From d52d16c8e6159150792df39683a67072d860c7ae Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Fri, 15 Mar 2024 11:31:19 -0400 Subject: [PATCH] SWDEV-413997 - Fixing multiple device cases. Change-Id: I10ad3fbfca887e92cd81f68392fa1acf753cbd2b --- hipamd/src/hip_vm.cpp | 6 +- rocclr/device/device.cpp | 114 ++++++++++++++++++++++++++++++ rocclr/device/device.hpp | 28 ++++++++ rocclr/device/pal/paldevice.cpp | 30 +++----- rocclr/device/rocm/rocdevice.cpp | 18 ++--- rocclr/device/rocm/rocdevice.hpp | 1 + rocclr/device/rocm/rocvirtual.cpp | 25 ++++--- rocclr/platform/memory.cpp | 5 +- 8 files changed, 177 insertions(+), 50 deletions(-) diff --git a/hipamd/src/hip_vm.cpp b/hipamd/src/hip_vm.cpp index f8dd197c13..a6c996db37 100644 --- a/hipamd/src/hip_vm.cpp +++ b/hipamd/src/hip_vm.cpp @@ -314,12 +314,12 @@ hipError_t hipMemUnmap(void* ptr, size_t size) { HIP_RETURN(hipErrorInvalidValue); } - amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(ptr); - if (vaddr_mem_obj == nullptr && vaddr_mem_obj->getSize() != size) { + amd::Memory* vaddr_sub_obj = amd::MemObjMap::FindMemObj(ptr); + if (vaddr_sub_obj == nullptr && vaddr_sub_obj->getSize() != size) { HIP_RETURN(hipErrorInvalidValue); } - amd::Memory* phys_mem_obj = vaddr_mem_obj->getUserData().phys_mem_obj; + amd::Memory* phys_mem_obj = vaddr_sub_obj->getUserData().phys_mem_obj; if (phys_mem_obj == nullptr) { HIP_RETURN(hipErrorInvalidValue); } diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index a2c3532f5a..c4e465dcd8 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -376,6 +376,120 @@ amd::Memory* MemObjMap::FindVirtualMemObj(const void* k) { } } +//================================================================================================== +bool Device::ValidateVirtualAddressRange(amd::Memory* vaddr_base_obj, amd::Memory* vaddr_sub_obj) { + + // Check if the start of the subbuffer is >= to base start. + if (vaddr_base_obj->getSvmPtr() > vaddr_sub_obj->getSvmPtr()) { + LogError("Sub buffer cannot start with addr lesser than base_start."); + return false; + } + + // Check if the new size belongs to the vaddr_base_obj range. + address vaddr_base_end = reinterpret_cast
(vaddr_base_obj->getSvmPtr()) + + vaddr_base_obj->getSize(); + address vaddr_sub_end = reinterpret_cast
(vaddr_sub_obj->getSvmPtr()) + + vaddr_sub_obj->getSize(); + + if (vaddr_sub_end > vaddr_base_end) { + LogError("Sub buffer memory end cannot be greater than base_end. Return nullptr"); + return false; + } + + return true; +} + +//================================================================================================== +amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vptr, size_t size, + int deviceId, bool parent, bool kForceAlloc) { + + amd::Memory* vaddr_base_obj = nullptr; + amd::Memory* vaddr_sub_obj = nullptr; + + if (parent) { + vaddr_base_obj = new (device_context) amd::Buffer(device_context, CL_MEM_VA_RANGE_AMD, size, + vptr); + if (vaddr_base_obj == nullptr) { + LogError("failed to new a va range curr_mem_obj object!"); + return nullptr; + } + // This curr_mem_obj->create() does not create an actual memory but stores the memory info + // with given vptr on ROCr backend. + constexpr bool kSysMemAlloc = false; + constexpr bool kSkipAlloc = false; + if (!vaddr_base_obj->create(nullptr, kSysMemAlloc, kSkipAlloc, kForceAlloc)) { + LogError("failed to create a va range mem object"); + vaddr_base_obj->release(); + return nullptr; + } + + amd::MemObjMap::AddVirtualMemObj(vaddr_base_obj->getSvmPtr(), vaddr_base_obj); + } else { + // If not parent, but sub-buffer/child, then validate the address range + vaddr_base_obj = amd::MemObjMap::FindVirtualMemObj(vptr); + if (vaddr_base_obj == nullptr) { + LogPrintfError("Cannot find entry in VirtualMemObjMap: 0x%x \n", vptr); + return nullptr; + } + + size_t offset = (reinterpret_cast
(vptr) + - reinterpret_cast
(vaddr_base_obj->getSvmPtr())); + vaddr_sub_obj = new (device_context) amd::Buffer(*vaddr_base_obj, CL_MEM_VA_RANGE_AMD, offset, + size); + + // This curr_mem_obj->create() does not create an actual memory but stores the memory info + // with given vptr on ROCr backend. + constexpr bool kSysMemAlloc = false; + constexpr bool kSkipAlloc = false; + if (!vaddr_sub_obj->create(nullptr, kSysMemAlloc, kSkipAlloc, kForceAlloc)) { + LogError("failed to create a va range mem object"); + vaddr_sub_obj->release(); + return nullptr; + } + + vaddr_sub_obj->getUserData().deviceId = deviceId; + + if (!ValidateVirtualAddressRange(vaddr_base_obj, vaddr_sub_obj)) { + LogError("Validation failed on address range, returning nullptr"); + return nullptr; + } + } + + if (vptr != nullptr) { + // Assert to make sure that amd::Memory object has set the right ptr. + guarantee(vptr == (parent ? vaddr_base_obj->getSvmPtr() : vaddr_sub_obj->getSvmPtr()), + "amd::Memory object does not have the right ptr"); + } + + return parent ? vaddr_base_obj : vaddr_sub_obj; +} + +//================================================================================================== +bool Device::DestroyVirtualBuffer(amd::Memory* vaddr_mem_obj) { + + // Argument nullptr check. + if (vaddr_mem_obj == nullptr || vaddr_mem_obj->getSvmPtr() == nullptr) { + LogPrintfError("Mem obj passed is nullptr, vaddr_mem_obj: %p \n", vaddr_mem_obj); + return false; + } + + if (vaddr_mem_obj->parent() == nullptr) { + // If parent is nullptr, then vaddr_mem_obj is the parent. + amd::MemObjMap::RemoveVirtualMemObj(vaddr_mem_obj->getSvmPtr()); + return true; + } else { + // If parent is not nullptr, this is the sub-buffer object. + amd::Memory* vaddr_base_obj = amd::MemObjMap::FindVirtualMemObj(vaddr_mem_obj->getSvmPtr()); + if (vaddr_base_obj == nullptr) { + LogPrintfError("Cannot find mem obj for ptr: 0x%x", vaddr_mem_obj->getSvmPtr()); + return false; + } + vaddr_base_obj->removeSubBuffer(vaddr_mem_obj); + } + + return true; +} + void MemObjMap::UpdateAccess(amd::Device *peerDev) { if (peerDev == nullptr) { return; diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 401c6613d8..f5e814cb67 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -1797,6 +1797,34 @@ class Device : public RuntimeObject { */ virtual void svmFree(void* ptr) const = 0; + /** + * Validatates Virtual Address range between parent and sub-buffer. + * + * @param vaddr_base_obj Parent/base object of the virtual address. + * @param vaddr_sub_obj Sub Buffer object of the virtual address. + */ + static bool ValidateVirtualAddressRange(amd::Memory* vaddr_base_obj, amd::Memory* vaddr_sub_obj); + + /** + * Abstracts the Virtual Buffer creation and memobj/virtual memobj add/delete logic. + * + * @param device_context Context the virtual buffer should be created. + * @param vptr virtual ptr to store in the buffer object. + * @param size Size of the buffer + * @param deviceId deviceId + * @param parent base_obj or sub_obj + * @param ForceAlloc force_alloc + */ + amd::Memory* CreateVirtualBuffer(Context& device_context, void* vptr, size_t size, + int deviceId, bool parent, bool kForceAlloc = false); + + /** + * Deletes Virtual Buffer and creates memob + * + * @param vaddr_mem_obj amd::Memory object of parent/sub buffer. + */ + bool DestroyVirtualBuffer(amd::Memory* vaddr_mem_obj); + /** * Reserve a VA range with no backing store * diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index 5b4d8b339c..9601a8b15f 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -2443,31 +2443,21 @@ void Device::svmFree(void* ptr) const { // ================================================================================================ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) { - // create a hidden buffer, which will allocated on the device later - auto mem = new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_VA_RANGE_AMD, size, addr); - if (mem == nullptr) { - LogError("failed to new a va range mem object!"); - return nullptr; - } - - constexpr bool kSysMemAlloc = false; - constexpr bool kSkipAlloc = false; - constexpr bool kForceAlloc = true; - // Force the alloc now for VA_Range reservation. - if (!mem->create(nullptr, kSysMemAlloc, kSkipAlloc, kForceAlloc)) { - LogError("failed to create a va range mem object"); - mem->release(); - return nullptr; - } - + amd::Memory* mem = CreateVirtualBuffer(context(), addr, size, -1, true, true); + assert(mem != nullptr); return mem->getSvmPtr(); } // ================================================================================================ void Device::virtualFree(void* addr) { - auto va = amd::MemObjMap::FindVirtualMemObj(addr); - if (nullptr != va) { - va->release(); + auto vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(addr); + if (vaddr_mem_obj == nullptr) { + LogPrintfError("Cannot find any mem_obj for addr: 0x%x \n", addr); + return; + } + + if (!vaddr_mem_obj->getContext().devices()[0]->DestroyVirtualBuffer(vaddr_mem_obj)) { + LogPrintfError("Cannot destroy mem_obj:0x%x for addr: 0x%x \n", vaddr_mem_obj, addr); } } diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index d4fb779e4b..091f1814a5 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -2434,22 +2434,12 @@ void* Device::virtualAlloc(void* req_addr, size_t size, size_t alignment) { return nullptr; } - // This mem->create() does not create an actual memory but stores the memory info with given vptr. - auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, vptr); + constexpr bool kParent = true; + amd::Memory* mem = CreateVirtualBuffer(context(), vptr, size, -1, kParent); if (mem == nullptr) { - LogError("failed to new a va range mem object!"); - return nullptr; + LogPrintfError("Cannot create Virtual Buffer for vptr: %p of size: %u", vptr, size); } - if (!mem->create(nullptr, false)) { - LogError("failed to create a va range mem object"); - mem->release(); - return nullptr; - } - - // Assert to make sure that amd::Memory object has set the right ptr. - guarantee(vptr == mem->getSvmPtr(), "amd::Memory object does not have the right ptr"); - return mem->getSvmPtr(); } @@ -2459,6 +2449,8 @@ void Device::virtualFree(void* addr) { LogPrintfError("Cannot find the Virtual MemObj entry for this addr 0x%x", addr); } + memObj->getContext().devices()[0]->DestroyVirtualBuffer(memObj); + hsa_status_t hsa_status = hsa_amd_vmem_address_free(memObj->getSvmPtr(), memObj->getSize()); if (hsa_status != HSA_STATUS_SUCCESS) { LogPrintfError("Failed hsa_amd_vmem_address_free. Failed with status:%d \n", hsa_status); diff --git a/rocclr/device/rocm/rocdevice.hpp b/rocclr/device/rocm/rocdevice.hpp index 89dbfe9b60..c2240d8987 100644 --- a/rocclr/device/rocm/rocdevice.hpp +++ b/rocclr/device/rocm/rocdevice.hpp @@ -221,6 +221,7 @@ class NullDevice : public amd::Device { ShouldNotReachHere(); return; } + void* virtualAlloc(void* req_addr, size_t size, size_t alignment) override { ShouldNotReachHere(); return nullptr; diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 2e42a88778..53a8451f9c 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -2573,8 +2573,8 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { profilingBegin(vcmd); // Find the amd::Memory object for virtual ptr. vcmd.ptr() is vaddr. - amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr()); - if (vaddr_mem_obj == nullptr || !(vaddr_mem_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { + amd::Memory* vaddr_base_obj = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr()); + if (vaddr_base_obj == nullptr || !(vaddr_base_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { profilingEnd(vcmd); return; } @@ -2585,15 +2585,18 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { // If Physical address is not set, then it is map command. If set, it is unmap command. if (phys_mem_obj != nullptr) { + constexpr bool kParent = false; + amd::Memory* vaddr_sub_obj = phys_mem_obj->getContext().devices()[0]->CreateVirtualBuffer( + phys_mem_obj->getContext(), const_cast(vcmd.ptr()), + vcmd.size(), phys_mem_obj->getUserData().deviceId, kParent); // Map the physical to virtual address the hsa api hsa_amd_vmem_alloc_handle_t opaque_hsa_handle; opaque_hsa_handle.handle = phys_mem_obj->getUserData().hsa_handle; - if ((hsa_status = hsa_amd_vmem_map(vaddr_mem_obj->getSvmPtr(), vcmd.size(), - vaddr_mem_obj->getOffset(), opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) { + if ((hsa_status = hsa_amd_vmem_map(vaddr_sub_obj->getSvmPtr(), vcmd.size(), + vaddr_sub_obj->getOffset(), opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) { assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr); - // Now that we have mapped physical addr to virtual addr, make an entry in the MemObjMap. - amd::MemObjMap::AddMemObj(vcmd.ptr(), vaddr_mem_obj); - vaddr_mem_obj->getUserData().phys_mem_obj = phys_mem_obj; + amd::MemObjMap::AddMemObj(vcmd.ptr(), vaddr_sub_obj); + vaddr_sub_obj->getUserData().phys_mem_obj = phys_mem_obj; } else { LogError("HSA Command: hsa_amd_vmem_map failed!"); } @@ -2602,12 +2605,14 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { Barriers().WaitCurrent(); // Unmap the object, since the physical addr is set. - if ((hsa_status = hsa_amd_vmem_unmap(vaddr_mem_obj->getSvmPtr(), vcmd.size())) + if ((hsa_status = hsa_amd_vmem_unmap(vaddr_base_obj->getSvmPtr(), vcmd.size())) == HSA_STATUS_SUCCESS) { // assert the va is mapped and needs to be removed - assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr); + amd::Memory* vaddr_sub_obj = amd::MemObjMap::FindMemObj(vcmd.ptr()); + assert(vaddr_sub_obj != nullptr); + vaddr_sub_obj->getContext().devices()[0]->DestroyVirtualBuffer(vaddr_sub_obj); amd::MemObjMap::RemoveMemObj(vcmd.ptr()); - vaddr_mem_obj->getUserData().phys_mem_obj = nullptr; + vaddr_sub_obj->getUserData().phys_mem_obj = nullptr; } else { LogError("HSA Command: hsa_amd_vmem_unmap failed"); } diff --git a/rocclr/platform/memory.cpp b/rocclr/platform/memory.cpp index 91933e4049..6f3f027c39 100644 --- a/rocclr/platform/memory.cpp +++ b/rocclr/platform/memory.cpp @@ -320,10 +320,7 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force } } } - // Add a VA range into VA range map - if (getMemFlags() & CL_MEM_VA_RANGE_AMD) { - amd::MemObjMap::AddVirtualMemObj(getSvmPtr(), this); - } + // Store the unique id for each memory allocation uniqueId_ = ++numAllocs; return true;