diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h index 8be4644ae7..25e4113115 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -103,6 +103,8 @@ class MemoryRegion : public core::MemoryRegion { hsa_status_t Free(void* address, size_t size) const; + hsa_status_t IPCFragmentExport(void* address) const; + hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const; hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h index 6281413d1b..35cbaa47a2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h @@ -95,6 +95,9 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> { virtual hsa_status_t Free(void* address, size_t size) const = 0; + // Prepares suballocated memory for IPC export. + virtual hsa_status_t IPCFragmentExport(void* address) const = 0; + // Translate memory properties into HSA region attribute. virtual hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index b8f4076f8f..59ee5e64e4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -262,6 +262,12 @@ hsa_status_t MemoryRegion::Free(void* address, size_t size) const { return HSA_STATUS_SUCCESS; } +// TODO: Look into a better name and/or making this process transparent to exporting. +hsa_status_t MemoryRegion::IPCFragmentExport(void* address) const { + if (!fragment_allocator_.discardBlock(address)) return HSA_STATUS_ERROR_INVALID_ALLOCATION; + return HSA_STATUS_SUCCESS; +} + hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute, void* value) const { switch (attribute) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index dd3dfab487..53b6e00917 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -904,6 +904,11 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han (reinterpret_cast(ptr) - reinterpret_cast(block.base)) / 4096; // Holds size in (4K?) pages in thunk handle: Mark as a fragment and denote offset. handle->handle[6] |= 0x80000000 | offset; + // Mark block for IPC. Prevents reallocation of exported memory. + ScopedAcquire lock(&memory_lock_); + hsa_status_t err = allocation_map_[ptr].region->IPCFragmentExport(ptr); + assert(err == HSA_STATUS_SUCCESS && "Region inconsistent with address map."); + return err; } else { if (hsaKmtShareMemory(ptr, len, reinterpret_cast(handle)) != HSAKMT_STATUS_SUCCESS) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h index 361f71389a..2584f3c2bd 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h @@ -58,9 +58,14 @@ template class SimpleHeap { struct Fragment_T { typedef std::multimap::iterator ptr_t; ptr_t free_list_entry_; - size_t size; + struct { + size_t size : 62; + bool discard : 1; + bool free : 1; + }; - Fragment_T(ptr_t Iterator, size_t Len) : free_list_entry_(Iterator), size(Len) {} + Fragment_T(ptr_t Iterator, size_t Len, bool Free) + : free_list_entry_(Iterator), size(Len), discard(false), free(Free) {} Fragment_T() = default; }; @@ -81,16 +86,30 @@ template class SimpleHeap { size_t in_use_size_; size_t cache_size_; - __forceinline bool isFree(const Fragment_T& node) { - return node.free_list_entry_ != free_list_.end(); + __forceinline bool isFree(const Fragment_T& node) { return node.free; } + __forceinline void setUsed(Fragment_T& node) { + node.free = false; + node.free_list_entry_ = free_list_.end(); } - __forceinline void setUsed(Fragment_T& node) { node.free_list_entry_ = free_list_.end(); } __forceinline void setFree(Fragment_T& node, typename Fragment_T::ptr_t Iterator) { node.free_list_entry_ = Iterator; + node.free = true; + } + __forceinline Fragment_T makeFragment(size_t Len) { + return Fragment_T(free_list_.end(), Len, false); } - __forceinline Fragment_T makeFragment(size_t Len) { return Fragment_T(free_list_.end(), Len); } __forceinline Fragment_T makeFragment(typename Fragment_T::ptr_t Iterator, size_t Len) { - return Fragment_T(Iterator, Len); + return Fragment_T(Iterator, Len, true); + } + __forceinline void removeFreeListEntry(Fragment_T& node) { + if (node.free_list_entry_ != free_list_.end()) { + free_list_.erase(node.free_list_entry_); + node.free_list_entry_ = free_list_.end(); + } + } + __forceinline void discard(Fragment_T& node) { + removeFreeListEntry(node); + node.discard = true; } public: @@ -185,12 +204,14 @@ template class SimpleHeap { auto fragment = frag_map.find(base); if (fragment == frag_map.end() || isFree(fragment->second)) return false; + bool discard = fragment->second.discard; + // Merge lower if (fragment != frag_map.begin()) { auto lower = fragment; lower--; if (isFree(lower->second)) { - free_list_.erase(lower->second.free_list_entry_); + removeFreeListEntry(lower->second); lower->second.size += fragment->second.size; frag_map.erase(fragment); fragment = lower; @@ -202,19 +223,26 @@ template class SimpleHeap { auto upper = fragment; upper++; if ((upper != frag_map.end()) && isFree(upper->second)) { - free_list_.erase(upper->second.free_list_entry_); + removeFreeListEntry(upper->second); fragment->second.size += upper->second.size; frag_map.erase(upper); } } - // Move whole free blocks to block cache + // Release whole free blocks. if (frag_map.size() == 1) { - in_use_size_ -= fragment->second.size; - cache_size_ += fragment->second.size; - block_cache_.push_back(Block(fragment->first, fragment->second.size)); + Block block(fragment->first, fragment->second.size); + in_use_size_ -= block.length_; block_list_.erase(frag_map_it); + // Discard or add to the block cache. + if (discard) { + block_allocator_.free(reinterpret_cast(block.base_ptr_), block.length_); + } else { + block_cache_.push_back(block); + cache_size_ += block.length_; + } + // Release old blocks when over cache limit. while ((block_cache_.size() > 1) && (cache_size_ > in_use_size_ * 2)) { const auto& block = block_cache_.front(); @@ -227,9 +255,12 @@ template class SimpleHeap { return true; } + // Don't report free memory if discarding the fragment. + if (discard) return true; + // Report free fragment const auto& freeEntry = - free_list_.insert(std::make_pair(fragment->second.size, fragment->first)); + free_list_.insert(std::make_pair(size_t(fragment->second.size), fragment->first)); setFree(fragment->second, freeEntry); return true; @@ -243,6 +274,28 @@ template class SimpleHeap { } size_t max_alloc() const { return block_allocator_.block_size(); } + + // Prevent reuse of the block containing ptr. No further fragments will be allocated from the + // block and the block will not be added to the block cache when it is free. + bool discardBlock(void* ptr) { + if (ptr == nullptr) return true; + + uintptr_t base = reinterpret_cast(ptr); + + // Find block validate. + auto frag_map_it = block_list_.upper_bound(base); + if (frag_map_it == block_list_.begin()) return false; + frag_map_it--; + auto& frag_map = frag_map_it->second; + if ((base < frag_map.begin()->first) || + (frag_map.rbegin()->first + frag_map.rbegin()->second.size <= base)) + return false; + + // Mark all fragments for discard. Removes freelist records for all fragments in the block. + for (auto& frag : frag_map) discard(frag.second); + + return true; + } }; #endif // HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_