From f5dc99bf9e7c9826141febc044f9e3532392d4e6 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Thu, 22 Sep 2022 17:32:39 +0000 Subject: [PATCH] Use user requested size for memory fragments Amount of memory requested by user may be aligned-up internally to the memory pool granularity. The extra padded memory should not be considered when validating pointers from the user. Also return the user requested size when user queries pointer information. Change-Id: I28b25448ea03c836b44fafdb34b7330cf6887424 [ROCm/ROCR-Runtime commit: 39632a713e47f137ac499769a91c30c7410031eb] --- .../runtime/hsa-runtime/core/inc/runtime.h | 9 ++++---- .../hsa-runtime/core/runtime/runtime.cpp | 23 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index e20da516f1..6ca4e52253 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -373,9 +373,9 @@ class Runtime { static void AsyncEventsLoop(void*); struct AllocationRegion { - AllocationRegion() : region(NULL), size(0), user_ptr(nullptr) {} - AllocationRegion(const MemoryRegion* region_arg, size_t size_arg) - : region(region_arg), size(size_arg), user_ptr(nullptr) {} + AllocationRegion() : region(NULL), size(0), size_requested(0), user_ptr(nullptr) {} + AllocationRegion(const MemoryRegion* region_arg, size_t size_arg, size_t size_requested) + : region(region_arg), size(size_arg), size_requested(size_requested), user_ptr(nullptr) {} struct notifier_t { void* ptr; @@ -384,7 +384,8 @@ class Runtime { }; const MemoryRegion* region; - size_t size; + size_t size; /* actual size = align_up(size_requested, granularity) */ + size_t size_requested; /* size requested by user */ void* user_ptr; std::unique_ptr> notifiers; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 2c20375045..e85a224883 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -285,11 +285,12 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size, MemoryRegion::AllocateFlags alloc_flags, void** address) { + size_t size_requested = size; // region->Allocate(...) may align-up size to granularity hsa_status_t status = region->Allocate(size, alloc_flags, address); // Track the allocation result so that it could be freed properly. if (status == HSA_STATUS_SUCCESS) { ScopedAcquire lock(&memory_lock_); - allocation_map_[*address] = AllocationRegion(region, size); + allocation_map_[*address] = AllocationRegion(region, size, size_requested); } return status; @@ -815,15 +816,15 @@ hsa_status_t Runtime::PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, voi if (fragment != allocation_map_.begin()) { fragment--; if ((fragment->first <= ptr) && - (ptr < reinterpret_cast(fragment->first) + fragment->second.size)) { - // agent and host address must match here. Only lock memory is allowed to have differing - // addresses but lock memory has type HSA_EXT_POINTER_TYPE_LOCKED and cannot be - // suballocated. - retInfo.agentBaseAddress = const_cast(fragment->first); - retInfo.hostBaseAddress = retInfo.agentBaseAddress; - retInfo.sizeInBytes = fragment->second.size; - retInfo.userData = fragment->second.user_ptr; - allocation_map_entry_found = true; + (ptr < reinterpret_cast(fragment->first) + fragment->second.size_requested)) { + // agent and host address must match here. Only lock memory is allowed to have differing + // addresses but lock memory has type HSA_EXT_POINTER_TYPE_LOCKED and cannot be + // suballocated. + retInfo.agentBaseAddress = const_cast(fragment->first); + retInfo.hostBaseAddress = retInfo.agentBaseAddress; + retInfo.sizeInBytes = fragment->second.size_requested; + retInfo.userData = fragment->second.user_ptr; + allocation_map_entry_found = true; } } } // end lock scope @@ -959,7 +960,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, len = Min(len, importSize - fragOffset); } ScopedAcquire lock(&memory_lock_); - allocation_map_[importAddress] = AllocationRegion(nullptr, len); + allocation_map_[importAddress] = AllocationRegion(nullptr, len, len); }; if ((importHandle.handle[6] & 0x80000000) != 0) {