diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 780289d2ea..cf57b6b45d 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -2186,6 +2186,7 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds bool p2p = (&gpuMem(srcMemory).dev() != &gpuMem(dstMemory).dev()) && ((sizeIn[0] > ROC_P2P_SDMA_SIZE * Ki) || !gpu().IsPendingDispatch()); bool asan = false; + bool ipcShared = srcMemory.owner()->ipcShared() || dstMemory.owner()->ipcShared(); #if defined(__clang__) #if __has_feature(address_sanitizer) asan = true; @@ -2193,7 +2194,7 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds #endif if (setup_.disableHwlCopyBuffer_ || (!srcMemory.isHostMemDirectAccess() && !dstMemory.isHostMemDirectAccess() && - !(p2p || asan))) { + !(p2p || asan) && !ipcShared)) { uint blitType = BlitCopyBuffer; size_t dim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 4b4472bb13..9cabf95ade 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -2184,7 +2184,7 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, amd_mem_obj->release(); return false; } - + amd_mem_obj->setIpcShared(true); // Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj amd::MemObjMap::AddMemObj(orig_dev_ptr, amd_mem_obj); diff --git a/rocclr/platform/memory.hpp b/rocclr/platform/memory.hpp index 8182378d1a..cc2de2ed51 100644 --- a/rocclr/platform/memory.hpp +++ b/rocclr/platform/memory.hpp @@ -197,6 +197,7 @@ class Memory : public amd::RuntimeObject { uint32_t svmPtrCommited_ : 1; //!< svm host address committed flag uint32_t canBeCached_ : 1; //!< flag to if the object can be cached uint32_t p2pAccess_ : 1; //!< Memory object allows P2P access + uint32_t ipcShared_ : 1; //!< Memory shared between processes }; uint32_t flagsEx_; }; @@ -378,6 +379,13 @@ class Memory : public amd::RuntimeObject { //! Check if this objects allows P2P access bool P2PAccess() const { return p2pAccess_; } + // Set ipcShared status + void setIpcShared(bool ipcShared) { + ipcShared_ = ipcShared; + } + //! Check if this object allows IPC + bool ipcShared() const { return ipcShared_; } + //! Returns the base device memory object for possible P2P access device::Memory* BaseP2PMemory() const { return deviceMemories_[0].value_; } device::Memory* svmBase() const { return svmBase_; } //!< Returns SVM base for MGPU case