diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index 3e8a284dec..b0cab834fa 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -844,16 +844,12 @@ bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* me } // Calculate the memory offset from the original base ptr - *mem_offset = reinterpret_cast
(dev_ptr) - reinterpret_cast
(orig_dev_ptr); + *mem_offset = reinterpret_cast
(dev_ptr) + - reinterpret_cast
(orig_dev_ptr) + + amd_mem_obj->getOffset(); + *mem_size = amd_mem_obj->getSize(); - // Check if the dev_ptr is greater than memory allocated - if (*mem_offset > *mem_size) { - DevLogPrintfError( - "Memory offset: %u cannot be greater than size of original memory allocated: %u", *mem_size, - *mem_offset); - return false; - } auto dev_mem = static_cast(amd_mem_obj->getDeviceMemory(*this)); auto result = dev_mem->ExportHandle(handle); @@ -882,9 +878,6 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, u if (mem_obj_exist == nullptr) { // Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj amd::MemObjMap::AddMemObj(amd_mem_obj->getSvmPtr(), amd_mem_obj); - - // Make sure the mem_offset doesnt overflow the allocated memory - guarantee((mem_offset < mem_size), "IPC mem offset greater than allocated size"); } else { amd_mem_obj->release(); amd_mem_obj = mem_obj_exist; diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index bc7f5ef3f3..f11ef5b409 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -2266,6 +2266,11 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_ constexpr bool kForceAllocation = true; alignment = std::max(alignment, static_cast(info_.memBaseAddrAlign_)); + if (amd::IS_HIP) { + //set interprocess for IPC memory support + flags |= ROCCLR_MEM_INTERPROCESS; + } + amd::Memory* mem = nullptr; freeCPUMem_ = false; if (nullptr == svmPtr) { diff --git a/rocclr/device/pal/palresource.cpp b/rocclr/device/pal/palresource.cpp index 552c5395a5..c5058bf56d 100644 --- a/rocclr/device/pal/palresource.cpp +++ b/rocclr/device/pal/palresource.cpp @@ -1040,7 +1040,8 @@ bool Resource::CreateIpc(CreateParams* params) { if (nullptr == memRef_) { return false; } - params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr)); + offset_ += params->owner_->getOffset(); + params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr + offset_)); return true; } @@ -1178,6 +1179,7 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) { params->owner_->setSvmPtr( reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr + subOffset_)); offset_ += static_cast(subOffset_); + params->owner_->setOffset(offset_); } return true; } @@ -2055,6 +2057,10 @@ bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.heaps[0] = Pal::GpuHeapInvisible; createInfo.heaps[1] = Pal::GpuHeapLocal; createInfo.mallPolicy = static_cast(device_->settings().mallPolicy_); + if (amd::IS_HIP) { + //set interprocess for IPC memory support + createInfo.flags.interprocess = 1; + } GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. diff --git a/rocclr/platform/memory.cpp b/rocclr/platform/memory.cpp index 65e7f4881c..6320731b9e 100644 --- a/rocclr/platform/memory.cpp +++ b/rocclr/platform/memory.cpp @@ -94,6 +94,7 @@ Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmP vDev_(NULL), mapCount_(0), svmHostAddress_(svmPtr), + resOffset_(0), flagsEx_(0), lockMemoryOps_("Memory Ops Lock", true) { svmPtrCommited_ = (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) ? true : false; @@ -117,6 +118,7 @@ Memory::Memory(Memory& parent, Flags flags, size_t origin, size_t size, Type typ vDev_(NULL), mapCount_(0), svmHostAddress_(parent.getSvmPtr()), + resOffset_(0), flagsEx_(0), lockMemoryOps_("Memory Ops Lock", true) { svmPtrCommited_ = parent.isSvmPtrCommited(); diff --git a/rocclr/platform/memory.hpp b/rocclr/platform/memory.hpp index 0ccc652998..aae60102e3 100644 --- a/rocclr/platform/memory.hpp +++ b/rocclr/platform/memory.hpp @@ -194,6 +194,7 @@ class Memory : public amd::RuntimeObject { device::VirtualDevice* vDev_; //!< Memory object belongs to a virtual device only std::atomic_uint mapCount_; //!< Keep track of number of mappings for a memory object void* svmHostAddress_; //!< svm host address; + size_t resOffset_; //!< resource offset union { struct { uint32_t isParent_ : 1; //!< This object is a parent @@ -372,6 +373,10 @@ class Memory : public amd::RuntimeObject { void* getSvmPtr() const { return svmHostAddress_; } //!< svm pointer accessor; void setSvmPtr(void* ptr) { svmHostAddress_ = ptr; } //!< svm pointer setter; + + size_t getOffset() const { return resOffset_; } //!< resource offset accessor; + void setOffset(size_t offset) { resOffset_ = offset; } //!< resource offset setter; + bool isSvmPtrCommited() const { return svmPtrCommited_; } //!< svm host address committed accessor; @@ -679,8 +684,9 @@ public: class IpcBuffer : public Buffer { public: IpcBuffer(Context& context, Flags flags, size_t offset, size_t size, const void* handle) - : Buffer(context, flags, offset, size), handle_(handle) { + : Buffer(context, flags, size), handle_(handle) { setIpcShared(true); + setOffset(offset); } virtual void initDeviceMemory();