Files
rocm-systems/util/simple_heap.h
T
Longlong Yao ccb3950068 wsl/hsakmt: clean up namespace
Signed-off-by: Longlong Yao <Longlong.Yao@amd.com>
Reviewed-by: lyndonli <Lyndon.Li@amd.com>
Reviewed-by: Flora Cui <flora.cui@amd.com>
Part-of: <http://10.67.69.192/wsl/libhsakmt/-/merge_requests/7>
2025-11-05 18:53:35 +08:00

364 řádky
12 KiB
C++

////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// A simple best fit memory allocator with eager compaction. Manages block sub-allocation.
// For use when memory efficiency is more important than allocation speed.
// O(log n) time.
#ifndef HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_
#define HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_
#include <map>
#include <deque>
#include <utility>
#include "core/util/utils.h"
namespace wsl {
template <typename Allocator> class SimpleHeap {
private:
struct Fragment_T {
typedef std::multimap<size_t, uintptr_t>::iterator ptr_t;
ptr_t free_list_entry_;
struct {
size_t size : 62;
bool discard : 1;
bool free : 1;
};
Fragment_T(ptr_t Iterator, size_t Len, bool Free)
: free_list_entry_(Iterator), size(Len), discard(false), free(Free) {}
Fragment_T() = default;
};
struct Block {
uintptr_t base_ptr_;
size_t length_;
Block(uintptr_t base, size_t length) : base_ptr_(base), length_(length) {}
Block() = default;
};
Allocator block_allocator_;
std::multimap<size_t, uintptr_t> free_list_;
std::map<uintptr_t, std::map<uintptr_t, Fragment_T>> block_list_;
std::deque<Block> block_cache_;
// Size of blocks that are at least partially in use.
size_t in_use_size_;
// Total size of block cache
size_t cache_size_;
__forceinline bool isFree(const Fragment_T& node) { return node.free; }
__forceinline void setUsed(Fragment_T& node) {
node.free = false;
node.free_list_entry_ = free_list_.end();
}
__forceinline void setFree(Fragment_T& node, typename Fragment_T::ptr_t Iterator) {
node.free_list_entry_ = Iterator;
node.free = true;
}
__forceinline Fragment_T makeFragment(size_t Len) {
return Fragment_T(free_list_.end(), Len, false);
}
__forceinline Fragment_T makeFragment(typename Fragment_T::ptr_t Iterator, size_t Len) {
return Fragment_T(Iterator, Len, true);
}
__forceinline void removeFreeListEntry(Fragment_T& node) {
if (node.free_list_entry_ != free_list_.end()) {
free_list_.erase(node.free_list_entry_);
node.free_list_entry_ = free_list_.end();
}
}
__forceinline void discard(Fragment_T& node) {
removeFreeListEntry(node);
node.discard = true;
}
public:
explicit SimpleHeap(const Allocator& BlockAllocator = Allocator())
: block_allocator_(BlockAllocator), in_use_size_(0), cache_size_(0) {}
~SimpleHeap() {
trim();
// Leak here may be due to the user. Check is for debugging only.
// assert(in_use_size_ == 0 && "Leak in SimpleHeap.");
}
SimpleHeap(const SimpleHeap& rhs) = delete;
SimpleHeap(SimpleHeap&& rhs) = delete;
SimpleHeap& operator=(const SimpleHeap& rhs) = delete;
SimpleHeap& operator=(SimpleHeap&& rhs) = delete;
void* alloc(size_t bytes) {
// Find best fit.
uintptr_t base;
size_t size;
// For bytes >= 2MB, the requested mem should be aligned
size_t align_bytes = bytes;
const int retry = bytes >= GPU_HUGE_PAGE_SIZE ? 1 : 0;
size_t align = bytes >= GPU_HUGE_PAGE_SIZE ? GPU_HUGE_PAGE_SIZE : DEFAULT_GPU_PAGE_SIZE;
for (int i = 0; i <= retry; i++) {
auto free_fragment = free_list_.lower_bound(align_bytes);
if (free_fragment == free_list_.end()) break;
uintptr_t addr = free_fragment->second;
size = free_fragment->first;
assert(size >= bytes && "SimpleHeap: map lower_bound failure.");
// Find the containing block and fragment
auto it = block_list_.upper_bound(addr);
it--;
auto& frag_map = it->second;
const auto& fragment = frag_map.find(addr);
assert(fragment != frag_map.end() && "Inconsistency in SimpleHeap.");
assert(size == fragment->second.size && "Inconsistency in SimpleHeap.");
size_t delta = addr & (align - 1);
if (!delta) {
// already find aligned address
base = addr;
free_list_.erase(free_fragment);
// Sub-allocate from fragment.
fragment->second.size = bytes;
setUsed(fragment->second);
// Record remaining free space.
if (size > bytes) {
free_fragment = free_list_.insert(std::make_pair(size - bytes, base + bytes));
frag_map[base + bytes] = makeFragment(free_fragment, size - bytes);
}
} else {
// If this is the first request and the requested size is not enough for alignment,
// then request for a bigger hole and do trim.
if (i == 0 && size < bytes + align - delta) {
align_bytes += align;
continue;
}
uintptr_t aligned_base = addr + align - delta;
base = aligned_base;
// Erase the old free list
free_list_.erase(free_fragment);
// fragment 1 - free
free_fragment = free_list_.insert(std::make_pair(aligned_base - addr, addr));
frag_map[addr] = makeFragment(free_fragment, aligned_base - addr);
//fragment 2 - used
frag_map[base] = makeFragment(bytes);
// fragement 3 - free
if (size > aligned_base - addr + bytes) {
free_fragment = free_list_.insert(std::make_pair(size - (aligned_base - addr) - bytes, aligned_base + bytes));
frag_map[aligned_base + bytes] = makeFragment(free_fragment, size - (aligned_base - addr) - bytes);
}
}
return reinterpret_cast<void*>(base);
}
// No usable fragment, check block cache
if (bytes < default_block_size() && !block_cache_.empty()) {
const auto& block = block_cache_.back();
base = block.base_ptr_;
size = block.length_;
block_cache_.pop_back();
cache_size_ -= size;
} else { // Alloc new block - new block may be larger than default.
void* ptr = block_allocator_.alloc(bytes, size);
base = reinterpret_cast<uintptr_t>(ptr);
assert(ptr != nullptr && "Block allocation failed, Allocator is expected to throw.");
}
in_use_size_ += size;
assert(size >= bytes && "Alloc exceeds block size.");
// Sub alloc and insert free region.
if (size > bytes) {
auto free_fragment = free_list_.insert(std::make_pair(size - bytes, base + bytes));
block_list_[base][base + bytes] = makeFragment(free_fragment, size - bytes);
}
// Track used region
block_list_[base][base] = makeFragment(bytes);
// Disallow multiple suballocation from large blocks.
// Prevents a small allocation from retaining a large block.
if (bytes > default_block_size()) {
bool err = discardBlock(reinterpret_cast<void*>(base));
assert(err && "Large block discard failed.");
}
return reinterpret_cast<void*>(base);
}
bool free(void* ptr) {
if (ptr == nullptr) return true;
uintptr_t base = reinterpret_cast<uintptr_t>(ptr);
// Find fragment and validate.
auto frag_map_it = block_list_.upper_bound(base);
if (frag_map_it == block_list_.begin()) return false;
frag_map_it--;
auto& frag_map = frag_map_it->second;
auto fragment = frag_map.find(base);
if (fragment == frag_map.end() || isFree(fragment->second)) return false;
bool discard = fragment->second.discard;
// Merge lower
if (fragment != frag_map.begin()) {
auto lower = fragment;
lower--;
if (isFree(lower->second)) {
removeFreeListEntry(lower->second);
lower->second.size += fragment->second.size;
frag_map.erase(fragment);
fragment = lower;
}
}
// Merge upper
{
auto upper = fragment;
upper++;
if ((upper != frag_map.end()) && isFree(upper->second)) {
removeFreeListEntry(upper->second);
fragment->second.size += upper->second.size;
frag_map.erase(upper);
}
}
// Release whole free blocks.
if (frag_map.size() == 1) {
Block block(fragment->first, fragment->second.size);
block_list_.erase(frag_map_it);
// Discard or add to the block cache.
if (discard) {
block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
} else {
block_cache_.push_back(block);
cache_size_ += block.length_;
in_use_size_ -= block.length_;
}
balance();
// Don't publish free space since block was moved to the cache.
return true;
}
// Don't report free memory if discarding the fragment.
if (discard) return true;
// Report free fragment
const auto& freeEntry =
free_list_.insert(std::make_pair(size_t(fragment->second.size), fragment->first));
setFree(fragment->second, freeEntry);
return true;
}
void balance() {
// Release old blocks when over cache limit.
while ((block_cache_.size() > 1) && (cache_size_ > in_use_size_ * 2)) {
const auto& block = block_cache_.front();
block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
cache_size_ -= block.length_;
block_cache_.pop_front();
}
}
void trim() {
for (const auto& block : block_cache_)
block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
block_cache_.clear();
cache_size_ = 0;
}
size_t cache_size() const { return cache_size_; }
size_t default_block_size() const { return block_allocator_.block_size(); }
// Prevent reuse of the block containing ptr. No further fragments will be allocated from the
// block and the block will not be added to the block cache when it is free.
bool discardBlock(void* ptr) {
if (ptr == nullptr) return true;
uintptr_t base = reinterpret_cast<uintptr_t>(ptr);
// Find block validate.
auto frag_map_it = block_list_.upper_bound(base);
if (frag_map_it == block_list_.begin()) return false;
frag_map_it--;
auto& frag_map = frag_map_it->second;
if ((base < frag_map.begin()->first) ||
(frag_map.rbegin()->first + frag_map.rbegin()->second.size <= base))
return false;
// Is block already discarded?
if (frag_map.begin()->second.discard) return true;
// Mark all fragments for discard and compute block size. Removes freelist records for all
// fragments in the block.
size_t size = 0;
for (auto& frag : frag_map) {
discard(frag.second);
size += frag.second.size;
}
// Remove discarded block from in-use tracking and rebalance the block cache.
in_use_size_ -= size;
balance();
return true;
}
};
} // namespace wsl
#endif // HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_