Correct IPC fragment reuse.

Memory from the suballocator may be exported via IPC.  If this
happens then the allocating process should not reuse that memory
since it would still be connected to the remote process.  IPC exported
memory must be released back to the driver.

Change-Id: I2ab0c814f63191f753fc3640cc4140ee144bf07f


[ROCm/ROCR-Runtime commit: 29b660c91e]
This commit is contained in:
Sean Keely
2020-04-29 23:12:17 -05:00
rodzic 355cf758a9
commit d554725903
5 zmienionych plików z 83 dodań i 14 usunięć
@@ -103,6 +103,8 @@ class MemoryRegion : public core::MemoryRegion {
hsa_status_t Free(void* address, size_t size) const;
hsa_status_t IPCFragmentExport(void* address) const;
hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const;
hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
@@ -95,6 +95,9 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
virtual hsa_status_t Free(void* address, size_t size) const = 0;
// Prepares suballocated memory for IPC export.
virtual hsa_status_t IPCFragmentExport(void* address) const = 0;
// Translate memory properties into HSA region attribute.
virtual hsa_status_t GetInfo(hsa_region_info_t attribute,
void* value) const = 0;
@@ -262,6 +262,12 @@ hsa_status_t MemoryRegion::Free(void* address, size_t size) const {
return HSA_STATUS_SUCCESS;
}
// TODO: Look into a better name and/or making this process transparent to exporting.
hsa_status_t MemoryRegion::IPCFragmentExport(void* address) const {
if (!fragment_allocator_.discardBlock(address)) return HSA_STATUS_ERROR_INVALID_ALLOCATION;
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute,
void* value) const {
switch (attribute) {
@@ -904,6 +904,11 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
(reinterpret_cast<uint8_t*>(ptr) - reinterpret_cast<uint8_t*>(block.base)) / 4096;
// Holds size in (4K?) pages in thunk handle: Mark as a fragment and denote offset.
handle->handle[6] |= 0x80000000 | offset;
// Mark block for IPC. Prevents reallocation of exported memory.
ScopedAcquire<KernelMutex> lock(&memory_lock_);
hsa_status_t err = allocation_map_[ptr].region->IPCFragmentExport(ptr);
assert(err == HSA_STATUS_SUCCESS && "Region inconsistent with address map.");
return err;
} else {
if (hsaKmtShareMemory(ptr, len, reinterpret_cast<HsaSharedMemoryHandle*>(handle)) !=
HSAKMT_STATUS_SUCCESS)
@@ -58,9 +58,14 @@ template <typename Allocator> class SimpleHeap {
struct Fragment_T {
typedef std::multimap<size_t, uintptr_t>::iterator ptr_t;
ptr_t free_list_entry_;
size_t size;
struct {
size_t size : 62;
bool discard : 1;
bool free : 1;
};
Fragment_T(ptr_t Iterator, size_t Len) : free_list_entry_(Iterator), size(Len) {}
Fragment_T(ptr_t Iterator, size_t Len, bool Free)
: free_list_entry_(Iterator), size(Len), discard(false), free(Free) {}
Fragment_T() = default;
};
@@ -81,16 +86,30 @@ template <typename Allocator> class SimpleHeap {
size_t in_use_size_;
size_t cache_size_;
__forceinline bool isFree(const Fragment_T& node) {
return node.free_list_entry_ != free_list_.end();
__forceinline bool isFree(const Fragment_T& node) { return node.free; }
__forceinline void setUsed(Fragment_T& node) {
node.free = false;
node.free_list_entry_ = free_list_.end();
}
__forceinline void setUsed(Fragment_T& node) { node.free_list_entry_ = free_list_.end(); }
__forceinline void setFree(Fragment_T& node, typename Fragment_T::ptr_t Iterator) {
node.free_list_entry_ = Iterator;
node.free = true;
}
__forceinline Fragment_T makeFragment(size_t Len) {
return Fragment_T(free_list_.end(), Len, false);
}
__forceinline Fragment_T makeFragment(size_t Len) { return Fragment_T(free_list_.end(), Len); }
__forceinline Fragment_T makeFragment(typename Fragment_T::ptr_t Iterator, size_t Len) {
return Fragment_T(Iterator, Len);
return Fragment_T(Iterator, Len, true);
}
__forceinline void removeFreeListEntry(Fragment_T& node) {
if (node.free_list_entry_ != free_list_.end()) {
free_list_.erase(node.free_list_entry_);
node.free_list_entry_ = free_list_.end();
}
}
__forceinline void discard(Fragment_T& node) {
removeFreeListEntry(node);
node.discard = true;
}
public:
@@ -185,12 +204,14 @@ template <typename Allocator> class SimpleHeap {
auto fragment = frag_map.find(base);
if (fragment == frag_map.end() || isFree(fragment->second)) return false;
bool discard = fragment->second.discard;
// Merge lower
if (fragment != frag_map.begin()) {
auto lower = fragment;
lower--;
if (isFree(lower->second)) {
free_list_.erase(lower->second.free_list_entry_);
removeFreeListEntry(lower->second);
lower->second.size += fragment->second.size;
frag_map.erase(fragment);
fragment = lower;
@@ -202,19 +223,26 @@ template <typename Allocator> class SimpleHeap {
auto upper = fragment;
upper++;
if ((upper != frag_map.end()) && isFree(upper->second)) {
free_list_.erase(upper->second.free_list_entry_);
removeFreeListEntry(upper->second);
fragment->second.size += upper->second.size;
frag_map.erase(upper);
}
}
// Move whole free blocks to block cache
// Release whole free blocks.
if (frag_map.size() == 1) {
in_use_size_ -= fragment->second.size;
cache_size_ += fragment->second.size;
block_cache_.push_back(Block(fragment->first, fragment->second.size));
Block block(fragment->first, fragment->second.size);
in_use_size_ -= block.length_;
block_list_.erase(frag_map_it);
// Discard or add to the block cache.
if (discard) {
block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
} else {
block_cache_.push_back(block);
cache_size_ += block.length_;
}
// Release old blocks when over cache limit.
while ((block_cache_.size() > 1) && (cache_size_ > in_use_size_ * 2)) {
const auto& block = block_cache_.front();
@@ -227,9 +255,12 @@ template <typename Allocator> class SimpleHeap {
return true;
}
// Don't report free memory if discarding the fragment.
if (discard) return true;
// Report free fragment
const auto& freeEntry =
free_list_.insert(std::make_pair(fragment->second.size, fragment->first));
free_list_.insert(std::make_pair(size_t(fragment->second.size), fragment->first));
setFree(fragment->second, freeEntry);
return true;
@@ -243,6 +274,28 @@ template <typename Allocator> class SimpleHeap {
}
size_t max_alloc() const { return block_allocator_.block_size(); }
// Prevent reuse of the block containing ptr. No further fragments will be allocated from the
// block and the block will not be added to the block cache when it is free.
bool discardBlock(void* ptr) {
if (ptr == nullptr) return true;
uintptr_t base = reinterpret_cast<uintptr_t>(ptr);
// Find block validate.
auto frag_map_it = block_list_.upper_bound(base);
if (frag_map_it == block_list_.begin()) return false;
frag_map_it--;
auto& frag_map = frag_map_it->second;
if ((base < frag_map.begin()->first) ||
(frag_map.rbegin()->first + frag_map.rbegin()->second.size <= base))
return false;
// Mark all fragments for discard. Removes freelist records for all fragments in the block.
for (auto& frag : frag_map) discard(frag.second);
return true;
}
};
#endif // HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_