From d4707ff16616ab54758affebcb7f19831112fb03 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Tue, 10 Nov 2020 14:56:06 -0500 Subject: [PATCH] SWDEV-259566 - Adding support to retrieve handle for offsetted pointer. Change-Id: I4a700c31a9ab481c4ea43923af57e83247555ab0 [ROCm/clr commit: 95e3a6d9859af6a29587137e66710286ea6b5fda] --- projects/clr/rocclr/device/device.hpp | 4 +- projects/clr/rocclr/device/rocm/rocdevice.cpp | 94 +++++++++++++------ projects/clr/rocclr/device/rocm/rocdevice.hpp | 4 +- 3 files changed, 70 insertions(+), 32 deletions(-) mode change 100644 => 100755 projects/clr/rocclr/device/device.hpp mode change 100644 => 100755 projects/clr/rocclr/device/rocm/rocdevice.cpp mode change 100644 => 100755 projects/clr/rocclr/device/rocm/rocdevice.hpp diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp old mode 100644 new mode 100755 index eb79e6eaf9..8bfbba872b --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1776,12 +1776,12 @@ class Device : public RuntimeObject { //! Checks if OCL runtime can use code object manager for compilation bool ValidateComgr(); - virtual bool IpcCreate(void* dev_ptr, size_t* mem_size, void* handle) { + virtual bool IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* mem_offset) const { ShouldNotReachHere(); return false; } - virtual bool IpcAttach(const void* handle, size_t mem_size, + virtual bool IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, unsigned int flags, void** dev_ptr) const { ShouldNotReachHere(); return false; diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp old mode 100644 new mode 100755 index 28714eda23..c2b101b84a --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -2065,7 +2065,7 @@ void Device::updateFreeMemory(size_t size, bool free) { ClPrint(amd::LOG_INFO, amd::LOG_MEM, "device=0x%lx, freeMem_ = 0x%x", this, freeMem_.load()); } -bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle) { +bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* mem_offset) const { hsa_status_t hsa_status = HSA_STATUS_SUCCESS; amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(dev_ptr); @@ -2074,7 +2074,7 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle) { return false; } - // Get the starting pointer from the amd::Memory object + // Get the original pointer from the amd::Memory object void* orig_dev_ptr = nullptr; if (amd_mem_obj->getSvmPtr() != nullptr) { orig_dev_ptr = amd_mem_obj->getSvmPtr(); @@ -2084,15 +2084,27 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle) { ShouldNotReachHere(); } - if (orig_dev_ptr != dev_ptr) { - DevLogPrintfError("Handle can only be created for Original Dev Ptr: 0x%x", orig_dev_ptr); + // Check if the dev_ptr is lesser than original dev_ptr + if (orig_dev_ptr > dev_ptr) { + //If this happens, then revisit FindMemObj logic + DevLogPrintfError("Original dev_ptr: 0x%x cannot be greater than dev_ptr: 0x%x", + orig_dev_ptr, dev_ptr); return false; } + //Calculate the memory offset from the original base ptr + *mem_offset = reinterpret_cast
(dev_ptr) - reinterpret_cast
(orig_dev_ptr); *mem_size = amd_mem_obj->getSize(); + //Check if the dev_ptr is greater than memory allocated + if (*mem_offset > *mem_size) { + DevLogPrintfError("Memory offset: %u cannot be greater than size of " + "original memory allocated: %u", *mem_size, *mem_offset); + return false; + } + // Pass the pointer and memory size to retrieve the handle - hsa_status = hsa_amd_ipc_memory_create(dev_ptr, amd::alignUp(*mem_size, alloc_granularity()), + hsa_status = hsa_amd_ipc_memory_create(orig_dev_ptr, amd::alignUp(*mem_size, alloc_granularity()), reinterpret_cast(handle)); if (hsa_status != HSA_STATUS_SUCCESS) { @@ -2103,36 +2115,51 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle) { return true; } -bool Device::IpcAttach(const void* handle, size_t mem_size, unsigned int flags, - void** dev_ptr) const { +bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, + unsigned int flags, void** dev_ptr) const { amd::Memory* amd_mem_obj = nullptr; hsa_status_t hsa_status = HSA_STATUS_SUCCESS; + void* orig_dev_ptr = nullptr; // Retrieve the devPtr from the handle - hsa_status - = hsa_amd_ipc_memory_attach(reinterpret_cast(handle), - mem_size, (1 + p2p_agents_.size()), p2p_agents_list_, dev_ptr); + hsa_status = hsa_amd_ipc_memory_attach(reinterpret_cast(handle), + mem_size, (1 + p2p_agents_.size()), p2p_agents_list_, + &orig_dev_ptr); if (hsa_status != HSA_STATUS_SUCCESS) { LogPrintfError("HSA failed to attach IPC memory with status: %d \n", hsa_status); return false; } - // Create an amd Memory object for the pointer - amd_mem_obj = new (context()) amd::Buffer(context(), flags, mem_size, *dev_ptr); + amd_mem_obj = amd::MemObjMap::FindMemObj(orig_dev_ptr); if (amd_mem_obj == nullptr) { - LogError("failed to create a mem object!"); - return false; + + // Memory does not exist, create an amd Memory object for the pointer + amd_mem_obj = new (context()) amd::Buffer(context(), flags, mem_size, orig_dev_ptr); + if (amd_mem_obj == nullptr) { + LogError("failed to create a mem object!"); + return false; + } + + if (!amd_mem_obj->create(nullptr)) { + LogError("failed to create a svm hidden buffer!"); + amd_mem_obj->release(); + return false; + } + + // Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj + amd::MemObjMap::AddMemObj(orig_dev_ptr, amd_mem_obj); + + } else { + //Memory already exists, just retain the old one. + amd_mem_obj->retain(); } - if (!amd_mem_obj->create(nullptr)) { - LogError("failed to create a svm hidden buffer!"); - amd_mem_obj->release(); - return false; - } + //Make sure the mem_offset doesnt overflow the allocated memory + guarantee((mem_offset < mem_size) && "IPC mem offset greater than allocated size"); - // Add the memory to the MemObjMap - amd::MemObjMap::AddMemObj(*dev_ptr, amd_mem_obj); + // Return offsetted device pointer and maintain offsetted_ptr to orig_dev_ptr in map + *dev_ptr = reinterpret_cast
(orig_dev_ptr) + mem_offset; return true; } @@ -2146,15 +2173,26 @@ bool Device::IpcDetach (void* dev_ptr) const { return false; } - // Detach the memory from HSA - hsa_status = hsa_amd_ipc_memory_detach(dev_ptr); - if (hsa_status != HSA_STATUS_SUCCESS) { - LogPrintfError("HSA failed to detach memory with status: %d \n", hsa_status); - return false; + // Get the original pointer from the amd::Memory object + void* orig_dev_ptr = nullptr; + if (amd_mem_obj->getSvmPtr() != nullptr) { + orig_dev_ptr = amd_mem_obj->getSvmPtr(); + } else if (amd_mem_obj->getHostMem() != nullptr) { + orig_dev_ptr = amd_mem_obj->getHostMem(); + } else { + ShouldNotReachHere(); } - amd::MemObjMap::RemoveMemObj(dev_ptr); - amd_mem_obj->release(); + if (amd_mem_obj->release() == 0) { + amd::MemObjMap::RemoveMemObj(orig_dev_ptr); + + // Detach the memory from HSA + hsa_status = hsa_amd_ipc_memory_detach(orig_dev_ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + LogPrintfError("HSA failed to detach memory with status: %d \n", hsa_status); + return false; + } + } return true; } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp old mode 100644 new mode 100755 index fd1e40d10d..82d33ccbb4 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -444,8 +444,8 @@ class Device : public NullDevice { // Update the global free memory size void updateFreeMemory(size_t size, bool free); - virtual bool IpcCreate(void* dev_ptr, size_t* mem_size, void* handle); - virtual bool IpcAttach(const void* handle, size_t mem_size, + virtual bool IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* mem_offset) const; + virtual bool IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, unsigned int flags, void** dev_ptr) const; virtual bool IpcDetach (void* dev_ptr) const;