From 558a26cf98f7b9c671b5985b721d13e941305345 Mon Sep 17 00:00:00 2001 From: "Arandjelovic, Marko" Date: Mon, 12 May 2025 16:55:25 +0200 Subject: [PATCH] SWDEV-511204 - Mapped virtual memory should use device instead of host context (#213) Since the sub-buffer(virtual memory that is mapped to device memory) is associated with device memory, it should utilize the device context instead of the host context. The original implementation caused hipMemcpyPeer to not take the P2P path, as the memory object was treated as host memory. [ROCm/clr commit: a7492c516dc432bfa724101cf0ad996911acd739] --- projects/clr/hipamd/src/hip_memory.cpp | 12 ++++++++---- projects/clr/rocclr/device/device.cpp | 10 ++++++---- projects/clr/rocclr/platform/memory.hpp | 10 ++++++++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index a0b6bcf6b7..caf5fa79f6 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -539,7 +539,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, hip::Stream* pStream = &stream; switch (type) { case hipWriteBuffer: - if (queueDevice != dstMemory->GetDeviceById()) { + if (queueDevice != dstMemory->GetDeviceById() && + !(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { pStream = hip::getNullStream(dstMemory->GetDeviceById()->context()); amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { @@ -551,7 +552,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, copyMetadata); break; case hipReadBuffer: - if (queueDevice != srcMemory->GetDeviceById()) { + if (queueDevice != srcMemory->GetDeviceById() && + !(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { pStream = hip::getNullStream(srcMemory->GetDeviceById()->context()); amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { @@ -589,7 +591,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, } else if (srcMemory->GetDeviceById() != dstMemory->GetDeviceById()) { // Scenarios such as DtoH where dst is pinned memory if ((queueDevice != srcMemory->GetDeviceById()) && - (dstMemory->getContext().devices().size() != 1)) { + (dstMemory->getContext().devices().size() != 1) && + !(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { pStream = hip::getNullStream(srcMemory->GetDeviceById()->context()); amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { @@ -597,7 +600,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, } // Scenarios such as HtoD where src is pinned memory } else if ((queueDevice != dstMemory->GetDeviceById()) && - (srcMemory->getContext().devices().size() != 1)) { + (srcMemory->getContext().devices().size() != 1) && + !(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { pStream = hip::getNullStream(dstMemory->GetDeviceById()->context()); amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index f02d5b7acb..d3201126d3 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -537,10 +537,12 @@ amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vpt } assert(vaddr_base_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD); - size_t offset = (reinterpret_cast
(vptr) - - reinterpret_cast
(vaddr_base_obj->getSvmPtr())); - Context& ctx = vaddr_base_obj->getContext(); - vaddr_sub_obj = new (ctx) amd::Buffer(*vaddr_base_obj,CL_MEM_VA_RANGE_AMD, offset, size); + size_t offset = + (reinterpret_cast
(vptr) - reinterpret_cast
(vaddr_base_obj->getSvmPtr())); + vaddr_sub_obj = + new (device_context) amd::Buffer(device_context, CL_MEM_VA_RANGE_AMD, size, vptr); + vaddr_sub_obj->SetParent(vaddr_base_obj); + vaddr_sub_obj->setOrigin(offset); // This curr_mem_obj->create() does not create an actual memory but stores the memory info // with given vptr on ROCr backend. diff --git a/projects/clr/rocclr/platform/memory.hpp b/projects/clr/rocclr/platform/memory.hpp index cdd3f4ff3a..d0ca853c98 100644 --- a/projects/clr/rocclr/platform/memory.hpp +++ b/projects/clr/rocclr/platform/memory.hpp @@ -329,11 +329,18 @@ class Memory : public amd::RuntimeObject { // Accessors Memory* parent() const { return parent_; } - void SetParent(amd::Memory* parent) { parent_ = parent; } + void SetParent(amd::Memory* parent) { + parent_ = parent; + if (parent != nullptr) { + parent_->isParent_ = true; + parent_->retain(); + } + } bool isParent() const { return isParent_; } bool ImageView() const { return image_view_; } size_t getOrigin() const { return origin_; } + void setOrigin(size_t origin) { origin_ = origin; } size_t getSize() const { return size_; } Flags getMemFlags() const { return flags_; } Type getType() const { return type_; } @@ -721,4 +728,3 @@ class IpcBuffer : public Buffer { }; } // namespace amd -