From 8434feed1668ed73a028c5e2428b482bf9752bf2 Mon Sep 17 00:00:00 2001 From: Todd tiantuo Li Date: Fri, 30 Jun 2023 03:33:21 -0700 Subject: [PATCH] SWDEV-333557 - 1.Add support for IPC memory to enable hipIpcGetMemHandle() and hipIpcOpenMemHandle() in PAL path. Set interprocess flag for device allocations in HIP PAL. 2.Fix PAL IPC path with ResourceCache and SubAllocation 3.Minor fixes for IpcBuffer constructor and IpcAttach 4.Remove redundant checks that may go wrong Change-Id: Ie9d99847c7c2c7b3b2aaefaaf60d23bf71b68635 --- rocclr/device/device.cpp | 15 ++++----------- rocclr/device/pal/paldevice.cpp | 5 +++++ rocclr/device/pal/palresource.cpp | 8 +++++++- rocclr/platform/memory.cpp | 2 ++ rocclr/platform/memory.hpp | 8 +++++++- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index 3e8a284dec..b0cab834fa 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -844,16 +844,12 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* me } // Calculate the memory offset from the original base ptr - *mem_offset = reinterpret_cast
(dev_ptr) - reinterpret_cast
(orig_dev_ptr); + *mem_offset = reinterpret_cast
(dev_ptr) + - reinterpret_cast
(orig_dev_ptr) + + amd_mem_obj->getOffset(); + *mem_size = amd_mem_obj->getSize(); - // Check if the dev_ptr is greater than memory allocated - if (*mem_offset > *mem_size) { - DevLogPrintfError( - "Memory offset: %u cannot be greater than size of original memory allocated: %u", *mem_size, - *mem_offset); - return false; - } auto dev_mem = static_cast(amd_mem_obj->getDeviceMemory(*this)); auto result = dev_mem->ExportHandle(handle); @@ -882,9 +878,6 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, u if (mem_obj_exist == nullptr) { // Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj amd::MemObjMap::AddMemObj(amd_mem_obj->getSvmPtr(), amd_mem_obj); - - // Make sure the mem_offset doesnt overflow the allocated memory - guarantee((mem_offset < mem_size), "IPC mem offset greater than allocated size"); } else { amd_mem_obj->release(); amd_mem_obj = mem_obj_exist; diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index bc7f5ef3f3..f11ef5b409 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -2266,6 +2266,11 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_ constexpr bool kForceAllocation = true; alignment = std::max(alignment, static_cast(info_.memBaseAddrAlign_)); + if (amd::IS_HIP) { + //set interprocess for IPC memory support + flags |= ROCCLR_MEM_INTERPROCESS; + } + amd::Memory* mem = nullptr; freeCPUMem_ = false; if (nullptr == svmPtr) { diff --git a/rocclr/device/pal/palresource.cpp b/rocclr/device/pal/palresource.cpp index 552c5395a5..c5058bf56d 100644 --- a/rocclr/device/pal/palresource.cpp +++ b/rocclr/device/pal/palresource.cpp @@ -1040,7 +1040,8 @@ bool Resource::CreateIpc(CreateParams* params) { if (nullptr == memRef_) { return false; } - params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr)); + offset_ += params->owner_->getOffset(); + params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr + offset_)); return true; } @@ -1178,6 +1179,7 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) { params->owner_->setSvmPtr( reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr + subOffset_)); offset_ += static_cast(subOffset_); + params->owner_->setOffset(offset_); } return true; } @@ -2055,6 +2057,10 @@ bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.heaps[0] = Pal::GpuHeapInvisible; createInfo.heaps[1] = Pal::GpuHeapLocal; createInfo.mallPolicy = static_cast(device_->settings().mallPolicy_); + if (amd::IS_HIP) { + //set interprocess for IPC memory support + createInfo.flags.interprocess = 1; + } GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. diff --git a/rocclr/platform/memory.cpp b/rocclr/platform/memory.cpp index 65e7f4881c..6320731b9e 100644 --- a/rocclr/platform/memory.cpp +++ b/rocclr/platform/memory.cpp @@ -94,6 +94,7 @@ Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmP vDev_(NULL), mapCount_(0), svmHostAddress_(svmPtr), + resOffset_(0), flagsEx_(0), lockMemoryOps_("Memory Ops Lock", true) { svmPtrCommited_ = (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) ? true : false; @@ -117,6 +118,7 @@ Memory::Memory(Memory& parent, Flags flags, size_t origin, size_t size, Type typ vDev_(NULL), mapCount_(0), svmHostAddress_(parent.getSvmPtr()), + resOffset_(0), flagsEx_(0), lockMemoryOps_("Memory Ops Lock", true) { svmPtrCommited_ = parent.isSvmPtrCommited(); diff --git a/rocclr/platform/memory.hpp b/rocclr/platform/memory.hpp index 0ccc652998..aae60102e3 100644 --- a/rocclr/platform/memory.hpp +++ b/rocclr/platform/memory.hpp @@ -194,6 +194,7 @@ class Memory : public amd::RuntimeObject { device::VirtualDevice* vDev_; //!< Memory object belongs to a virtual device only std::atomic_uint mapCount_; //!< Keep track of number of mappings for a memory object void* svmHostAddress_; //!< svm host address; + size_t resOffset_; //!< resource offset union { struct { uint32_t isParent_ : 1; //!< This object is a parent @@ -372,6 +373,10 @@ class Memory : public amd::RuntimeObject { void* getSvmPtr() const { return svmHostAddress_; } //!< svm pointer accessor; void setSvmPtr(void* ptr) { svmHostAddress_ = ptr; } //!< svm pointer setter; + + size_t getOffset() const { return resOffset_; } //!< resource offset accessor; + void setOffset(size_t offset) { resOffset_ = offset; } //!< resource offset setter; + bool isSvmPtrCommited() const { return svmPtrCommited_; } //!< svm host address committed accessor; @@ -679,8 +684,9 @@ public: class IpcBuffer : public Buffer { public: IpcBuffer(Context& context, Flags flags, size_t offset, size_t size, const void* handle) - : Buffer(context, flags, offset, size), handle_(handle) { + : Buffer(context, flags, size), handle_(handle) { setIpcShared(true); + setOffset(offset); } virtual void initDeviceMemory();