From d554725903ba5cae7cbdb7f8c6ec72ec55fcaca2 Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Wed, 29 Apr 2020 23:12:17 -0500 Subject: [PATCH] Correct IPC fragment reuse. Memory from the suballocator may be exported via IPC. If this happens then the allocating process should not reuse that memory since it would still be connected to the remote process. IPC exported memory must be released back to the driver. Change-Id: I2ab0c814f63191f753fc3640cc4140ee144bf07f [ROCm/ROCR-Runtime commit: 29b660c91e5d2955327acbe6e764be55a45e7598] --- .../hsa-runtime/core/inc/amd_memory_region.h | 2 + .../hsa-runtime/core/inc/memory_region.h | 3 + .../core/runtime/amd_memory_region.cpp | 6 ++ .../hsa-runtime/core/runtime/runtime.cpp | 5 ++ .../hsa-runtime/core/util/simple_heap.h | 81 +++++++++++++++---- 5 files changed, 83 insertions(+), 14 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h index 8be4644ae7..25e4113115 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -103,6 +103,8 @@ class MemoryRegion : public core::MemoryRegion { hsa_status_t Free(void* address, size_t size) const; + hsa_status_t IPCFragmentExport(void* address) const; + hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const; hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h index 6281413d1b..35cbaa47a2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h @@ -95,6 +95,9 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> { virtual hsa_status_t Free(void* address, size_t size) const = 0; + // Prepares suballocated memory for IPC export. + virtual hsa_status_t IPCFragmentExport(void* address) const = 0; + // Translate memory properties into HSA region attribute. virtual hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index b8f4076f8f..59ee5e64e4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -262,6 +262,12 @@ hsa_status_t MemoryRegion::Free(void* address, size_t size) const { return HSA_STATUS_SUCCESS; } +// TODO: Look into a better name and/or making this process transparent to exporting. +hsa_status_t MemoryRegion::IPCFragmentExport(void* address) const { + if (!fragment_allocator_.discardBlock(address)) return HSA_STATUS_ERROR_INVALID_ALLOCATION; + return HSA_STATUS_SUCCESS; +} + hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute, void* value) const { switch (attribute) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index dd3dfab487..53b6e00917 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -904,6 +904,11 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han (reinterpret_cast(ptr) - reinterpret_cast(block.base)) / 4096; // Holds size in (4K?) pages in thunk handle: Mark as a fragment and denote offset. handle->handle[6] |= 0x80000000 | offset; + // Mark block for IPC. Prevents reallocation of exported memory. + ScopedAcquire lock(&memory_lock_); + hsa_status_t err = allocation_map_[ptr].region->IPCFragmentExport(ptr); + assert(err == HSA_STATUS_SUCCESS && "Region inconsistent with address map."); + return err; } else { if (hsaKmtShareMemory(ptr, len, reinterpret_cast(handle)) != HSAKMT_STATUS_SUCCESS) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h index 361f71389a..2584f3c2bd 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/simple_heap.h @@ -58,9 +58,14 @@ template class SimpleHeap { struct Fragment_T { typedef std::multimap::iterator ptr_t; ptr_t free_list_entry_; - size_t size; + struct { + size_t size : 62; + bool discard : 1; + bool free : 1; + }; - Fragment_T(ptr_t Iterator, size_t Len) : free_list_entry_(Iterator), size(Len) {} + Fragment_T(ptr_t Iterator, size_t Len, bool Free) + : free_list_entry_(Iterator), size(Len), discard(false), free(Free) {} Fragment_T() = default; }; @@ -81,16 +86,30 @@ template class SimpleHeap { size_t in_use_size_; size_t cache_size_; - __forceinline bool isFree(const Fragment_T& node) { - return node.free_list_entry_ != free_list_.end(); + __forceinline bool isFree(const Fragment_T& node) { return node.free; } + __forceinline void setUsed(Fragment_T& node) { + node.free = false; + node.free_list_entry_ = free_list_.end(); } - __forceinline void setUsed(Fragment_T& node) { node.free_list_entry_ = free_list_.end(); } __forceinline void setFree(Fragment_T& node, typename Fragment_T::ptr_t Iterator) { node.free_list_entry_ = Iterator; + node.free = true; + } + __forceinline Fragment_T makeFragment(size_t Len) { + return Fragment_T(free_list_.end(), Len, false); } - __forceinline Fragment_T makeFragment(size_t Len) { return Fragment_T(free_list_.end(), Len); } __forceinline Fragment_T makeFragment(typename Fragment_T::ptr_t Iterator, size_t Len) { - return Fragment_T(Iterator, Len); + return Fragment_T(Iterator, Len, true); + } + __forceinline void removeFreeListEntry(Fragment_T& node) { + if (node.free_list_entry_ != free_list_.end()) { + free_list_.erase(node.free_list_entry_); + node.free_list_entry_ = free_list_.end(); + } + } + __forceinline void discard(Fragment_T& node) { + removeFreeListEntry(node); + node.discard = true; } public: @@ -185,12 +204,14 @@ template class SimpleHeap { auto fragment = frag_map.find(base); if (fragment == frag_map.end() || isFree(fragment->second)) return false; + bool discard = fragment->second.discard; + // Merge lower if (fragment != frag_map.begin()) { auto lower = fragment; lower--; if (isFree(lower->second)) { - free_list_.erase(lower->second.free_list_entry_); + removeFreeListEntry(lower->second); lower->second.size += fragment->second.size; frag_map.erase(fragment); fragment = lower; @@ -202,19 +223,26 @@ template class SimpleHeap { auto upper = fragment; upper++; if ((upper != frag_map.end()) && isFree(upper->second)) { - free_list_.erase(upper->second.free_list_entry_); + removeFreeListEntry(upper->second); fragment->second.size += upper->second.size; frag_map.erase(upper); } } - // Move whole free blocks to block cache + // Release whole free blocks. if (frag_map.size() == 1) { - in_use_size_ -= fragment->second.size; - cache_size_ += fragment->second.size; - block_cache_.push_back(Block(fragment->first, fragment->second.size)); + Block block(fragment->first, fragment->second.size); + in_use_size_ -= block.length_; block_list_.erase(frag_map_it); + // Discard or add to the block cache. + if (discard) { + block_allocator_.free(reinterpret_cast(block.base_ptr_), block.length_); + } else { + block_cache_.push_back(block); + cache_size_ += block.length_; + } + // Release old blocks when over cache limit. while ((block_cache_.size() > 1) && (cache_size_ > in_use_size_ * 2)) { const auto& block = block_cache_.front(); @@ -227,9 +255,12 @@ template class SimpleHeap { return true; } + // Don't report free memory if discarding the fragment. + if (discard) return true; + // Report free fragment const auto& freeEntry = - free_list_.insert(std::make_pair(fragment->second.size, fragment->first)); + free_list_.insert(std::make_pair(size_t(fragment->second.size), fragment->first)); setFree(fragment->second, freeEntry); return true; @@ -243,6 +274,28 @@ template class SimpleHeap { } size_t max_alloc() const { return block_allocator_.block_size(); } + + // Prevent reuse of the block containing ptr. No further fragments will be allocated from the + // block and the block will not be added to the block cache when it is free. + bool discardBlock(void* ptr) { + if (ptr == nullptr) return true; + + uintptr_t base = reinterpret_cast(ptr); + + // Find block validate. + auto frag_map_it = block_list_.upper_bound(base); + if (frag_map_it == block_list_.begin()) return false; + frag_map_it--; + auto& frag_map = frag_map_it->second; + if ((base < frag_map.begin()->first) || + (frag_map.rbegin()->first + frag_map.rbegin()->second.size <= base)) + return false; + + // Mark all fragments for discard. Removes freelist records for all fragments in the block. + for (auto& frag : frag_map) discard(frag.second); + + return true; + } }; #endif // HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_