From af5944dc71ea28b804669cd9005e89d2ca0170f9 Mon Sep 17 00:00:00 2001 From: German Date: Tue, 6 Jun 2023 16:56:09 -0400 Subject: [PATCH] SWDEV-311270 - Add IPC support for memory pools Initial implementation for hipMemPoolExportToShareableHandle, hipMemPoolImportFromShareableHandle, hipMemPoolExportPointer and hipMemPoolImportPointer Change-Id: I0ebdc48e9163b394ded560adca6c38bbc5aee7d1 [ROCm/clr commit: 1a0c3e4dc45237716c08a6b74be5ff6405422ead] --- projects/clr/hipamd/src/hip_mempool.cpp | 63 ++++++++- projects/clr/hipamd/src/hip_mempool_impl.cpp | 50 ++++++- projects/clr/hipamd/src/hip_mempool_impl.hpp | 77 ++++++++--- projects/clr/rocclr/device/pal/paldevice.cpp | 122 +++++++++++++++++- projects/clr/rocclr/device/pal/paldevice.hpp | 9 ++ .../clr/rocclr/device/pal/palresource.cpp | 34 ++++- .../clr/rocclr/device/pal/palresource.hpp | 17 ++- projects/clr/rocclr/device/pal/palvirtual.cpp | 2 +- projects/clr/rocclr/os/os.hpp | 9 ++ projects/clr/rocclr/os/os_posix.cpp | 41 ++++++ projects/clr/rocclr/os/os_win32.cpp | 36 ++++++ projects/clr/rocclr/platform/memory.cpp | 8 ++ projects/clr/rocclr/platform/memory.hpp | 26 +++- 13 files changed, 454 insertions(+), 40 deletions(-) diff --git a/projects/clr/hipamd/src/hip_mempool.cpp b/projects/clr/hipamd/src/hip_mempool.cpp index 36dd674e55..eac005a962 100644 --- a/projects/clr/hipamd/src/hip_mempool.cpp +++ b/projects/clr/hipamd/src/hip_mempool.cpp @@ -236,7 +236,7 @@ hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_ HIP_RETURN(hipErrorInvalidValue); } auto device = g_devices[pool_props->location.id]; - auto pool = new hip::MemoryPool(device); + auto pool = new hip::MemoryPool(device, pool_props->handleTypes != hipMemHandleTypeNone); if (pool == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -298,7 +298,15 @@ hipError_t hipMemPoolExportToShareableHandle( if (mem_pool == nullptr || shared_handle == nullptr || flags == -1) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(hipErrorNotSupported); + + auto mpool = reinterpret_cast(mem_pool); + auto handle = mpool->Export(); + if (!handle) { + HIP_RETURN(hipErrorInvalidValue); + } + *reinterpret_cast(shared_handle) = handle; + + HIP_RETURN(hipSuccess); } // ================================================================================================ @@ -311,7 +319,26 @@ hipError_t hipMemPoolImportFromShareableHandle( if (mem_pool == nullptr || shared_handle == nullptr || flags == -1) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(hipErrorNotSupported); + + auto device = g_devices[0]; + auto pool = new hip::MemoryPool(device); + if (pool == nullptr) { + HIP_RETURN(hipErrorOutOfMemory); + } + // Note: The interface casts the integer value of file handle under Linux into void*, + // but compiler may not allow to cast it back. Hence, make a cast with a union... + union { + amd::Os::FileDesc desc; + void* ptr; + } handle; + handle.ptr = shared_handle; + if (!pool->Import(handle.desc)) { + pool->release(); + HIP_RETURN(hipErrorOutOfMemory); + } + *mem_pool = reinterpret_cast(pool); + + HIP_RETURN(hipSuccess); } // ================================================================================================ @@ -320,7 +347,22 @@ hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* p if (export_data == nullptr || ptr == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(hipErrorNotSupported); + + size_t offset = 0; + auto memory = getMemoryObject(ptr, offset); + if (memory != nullptr) { + auto id = memory->getUserData().deviceId; + // Note: export_data must point to 64 bytes of shared memory + auto shared = reinterpret_cast(export_data); + + if (!g_devices[id]->devices()[0]->IpcCreate(ptr, + &shared->size_, &shared->handle_[0], &shared->offset_)) { + HIP_RETURN(hipErrorOutOfMemory); + } + } else { + HIP_RETURN(hipErrorOutOfMemory); + } + HIP_RETURN(hipSuccess); } // ================================================================================================ @@ -332,6 +374,15 @@ hipError_t hipMemPoolImportPointer( if (mem_pool == nullptr || export_data == nullptr || ptr == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - - HIP_RETURN(hipErrorNotSupported); + auto mpool = reinterpret_cast(mem_pool); + auto shared = reinterpret_cast(export_data); + if (!mpool->Device()->devices()[0]->IpcAttach( + &shared->handle_[0], shared->size_, shared->offset_, 0, ptr)) { + HIP_RETURN(hipErrorOutOfMemory); + } + size_t offset = 0; + auto memory = getMemoryObject(*ptr, offset); + mpool->AddBusyMemory(memory); + mpool->retain(); + HIP_RETURN(hipSuccess); } diff --git a/projects/clr/hipamd/src/hip_mempool_impl.cpp b/projects/clr/hipamd/src/hip_mempool_impl.cpp index 45b7bdc680..afdc35e3eb 100644 --- a/projects/clr/hipamd/src/hip_mempool_impl.cpp +++ b/projects/clr/hipamd/src/hip_mempool_impl.cpp @@ -168,8 +168,8 @@ void* MemoryPool::AllocateMemory(size_t size, hip::Stream* stream, void* dptr) { if (dev_info.maxMemAllocSize_ < size) { return nullptr; } - - dev_ptr = amd::SvmBuffer::malloc(*context, 0, size, dev_info.memBaseAddrAlign_, nullptr); + cl_svm_mem_flags flags = (state_.interprocess_) ? ROCCLR_MEM_INTERPROCESS : 0; + dev_ptr = amd::SvmBuffer::malloc(*context, flags, size, dev_info.memBaseAddrAlign_, nullptr); if (dev_ptr == nullptr) { size_t free = 0, total =0; hipError_t err = hipMemGetInfo(&free, &total); @@ -425,10 +425,56 @@ void MemoryPool::GetAccess(hip::Device* device, hipMemAccessFlags* flags) { } } +// ================================================================================================ void MemoryPool::FreeAllMemory(hip::Stream* stream) { while (!busy_heap_.Allocations().empty()) { FreeMemory(busy_heap_.Allocations().begin()->first, stream); } } +// ================================================================================================ +amd::Os::FileDesc MemoryPool::Export() { + amd::ScopedLock lock(lock_pool_ops_); + if (shared_ != nullptr) { + return shared_->handle_; + } + + constexpr uint32_t kFileNameSize = 20; + char file_name[kFileNameSize]; + // Generate a unique name from the mempool pointer + // Note: Windows can accept an unnamed allocation + snprintf(file_name, kFileNameSize, "%p", this); + amd::Os::FileDesc handle{}; + shared_ = reinterpret_cast(amd::Os::CreateIpcMemory( + file_name, sizeof(SharedMemPool), &handle)); + if (shared_ != nullptr) { + shared_->handle_ = handle; + shared_->state_ = state_.value_; + shared_->access_size_ = 0; + memset(shared_->access_, 0, sizeof(SharedAccess) * kMaxMgpuAccess); + assert((access_map_.size() <= kMaxMgpuAccess) && "Can't support more GPU(s) in shared access" ); + for (auto it : access_map_) { + shared_->access_[shared_->access_size_] = SharedAccess{it.first->deviceId(), it.second}; + shared_->access_size_++; + } + } + return handle; +} + +// ================================================================================================ +bool MemoryPool::Import(amd::Os::FileDesc handle) { + amd::ScopedLock lock(lock_pool_ops_); + bool result = false; + auto shared = reinterpret_cast( + amd::Os::OpenIpcMemory(nullptr, handle, sizeof(SharedMemPool))); + + if (shared != nullptr) { + state_.value_ = shared->state_; + for (uint32_t i = 0; i < shared->access_size_; ++i) { + access_map_[g_devices[shared->access_[i].device_id_]] = shared->access_[i].flags_; + } + result = true; + } + return result; +} } diff --git a/projects/clr/hipamd/src/hip_mempool_impl.hpp b/projects/clr/hipamd/src/hip_mempool_impl.hpp index 75674742c0..2541e7a490 100644 --- a/projects/clr/hipamd/src/hip_mempool_impl.hpp +++ b/projects/clr/hipamd/src/hip_mempool_impl.hpp @@ -31,6 +31,12 @@ namespace hip { class Device; class Stream; +struct SharedMemPointer { + size_t offset_; + size_t size_; + char handle_[IHIP_IPC_MEM_HANDLE_SIZE]; +}; + struct MemoryTimestamp { MemoryTimestamp(hip::Stream* stream, hip::Event* event = nullptr): event_(event) { if (stream != nullptr) { @@ -160,16 +166,34 @@ private: /// hipMemPoolReuseAllowOpportunistic option will validate if HIP event, /// associated with memory is done, then reuse can be performed. class MemoryPool : public amd::ReferenceCountedObject { -public: - MemoryPool(hip::Device* device): - busy_heap_(device), - free_heap_(device), - lock_pool_ops_("Pool operations", true), device_(device) { - device_->AddMemoryPool(this); - state_.event_dependencies_ = 1; - state_.opportunistic_ = 1; - state_.internal_dependencies_ = 1; - } + public: + struct SharedAccess { + int device_id_; //!< Device ID for access with a specified shared resource + hipMemAccessFlags flags_; //!< Flags which define access type + }; + + static constexpr uint32_t kMaxMgpuAccess = 32; + struct SharedMemPool { + amd::Os::FileDesc handle_; //!< File descriptor for shared memory + uint32_t state_; //!< Memory pool state + uint32_t access_size_; //!< The number of entries in access array + SharedAccess access_[kMaxMgpuAccess]; //!< The list of devices for access + }; + + MemoryPool(hip::Device* device, bool interprocess = false) + : busy_heap_(device), + free_heap_(device), + lock_pool_ops_("Pool operations", true), + device_(device), + shared_(nullptr) { + device_->AddMemoryPool(this); + state_.value_ = 0; + state_.event_dependencies_ = 1; + state_.opportunistic_ = 1; + state_.internal_dependencies_ = 1; + state_.interprocess_ = interprocess; + } + virtual ~MemoryPool() { if (!busy_heap_.IsEmpty()) { LogError("Shouldn't destroy pool with busy allocations!"); @@ -177,6 +201,10 @@ public: ReleaseAllMemory(); // Remove memory pool from the list of all pool on the current device device_->RemoveMemoryPool(this); + if (shared_ != nullptr) { + // Note: The app supposes to close the handle... Double close in Windows will cause a crash + amd::Os::CloseIpcMemory(0, shared_, sizeof(SharedMemPool)); + } } /// The same stream can reuse memory without HIP event validation @@ -186,9 +214,7 @@ public: bool FreeMemory(amd::Memory* memory, hip::Stream* stream); /// Check if memory is active and belongs to the busy heap - bool IsBusyMemory(amd::Memory* memory) const { - return busy_heap_.IsActiveMemory(memory); - } + bool IsBusyMemory(amd::Memory* memory) const { return busy_heap_.IsActiveMemory(memory); } /// Releases all allocations from free_heap_. It can be called on Stream or Device synchronization /// @note The caller must make sure it's safe to release memory @@ -200,6 +226,10 @@ public: /// Releases all allocations in MemoryPool void ReleaseAllMemory(); + /// Place the allocated memory into the busy heap + void AddBusyMemory(amd::Memory* memory) { + busy_heap_.AddMemory(memory, nullptr); + } /// Trims the pool until it has only min_bytes_to_hold void TrimTo(size_t min_bytes_to_hold); @@ -221,6 +251,12 @@ public: /// Frees all busy memory void FreeAllMemory(hip::Stream* stream = nullptr); + /// Exports memory pool into an OS specific handle + amd::Os::FileDesc Export(); + + /// Imports memory pool from an OS specific handle + bool Import(amd::Os::FileDesc handle); + /// Accessors for the pool state bool EventDependencies() const { return (state_.event_dependencies_) ? true : false; } bool Opportunistic() const { return (state_.opportunistic_) ? true : false; } @@ -233,15 +269,22 @@ private: Heap busy_heap_; //!< Heap of busy allocations Heap free_heap_; //!< Heap of freed allocations - struct { - uint32_t event_dependencies_ : 1; //!< Event dependencies tracking is enabled - uint32_t opportunistic_ : 1; //!< HIP event check is enabled - uint32_t internal_dependencies_ : 1; //!< Runtime adds internal events to handle memory dependencies + union { + struct { + uint32_t event_dependencies_ : 1; //!< Event dependencies tracking is enabled + uint32_t opportunistic_ : 1; //!< HIP event check is enabled + uint32_t internal_dependencies_ : 1; //!< Runtime adds internal events to handle memory + //!< dependencies + uint32_t interprocess_ : 1; //!< Memory pool can be used in interprocess communications + }; + uint32_t value_; } state_; amd::Monitor lock_pool_ops_; //!< Access to the pool must be lock protected std::map access_map_; //!< Map of access to the pool from devices hip::Device* device_; //!< Hip device the heap will reside + SharedMemPool* shared_; //!< Pointer to shared memory for IPC }; + } // Mamespace hip diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 8610ceeb8e..3bfaf31844 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -332,7 +332,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, memcpy(info_.uuid_ + 4, &palProp.pciProperties.busNumber, sizeof(uint32_t)); memcpy(info_.uuid_ + 8, &palProp.pciProperties.deviceNumber, sizeof(uint32_t)); memcpy(info_.uuid_ + 12, &palProp.pciProperties.functionNumber, sizeof(uint32_t)); - + info_.maxWorkItemDimensions_ = 3; info_.maxComputeUnits_ = settings().enableWgpMode_ @@ -1571,7 +1571,10 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const { type = Resource::P2PAccess; } } - + params.interprocess_ = (owner.getMemFlags() & ROCCLR_MEM_INTERPROCESS) ? true : false; + if (owner.ipcShared()) { + type = Resource::IpcMemory; + } // Create memory object result = gpuMemory->create(type, ¶ms); @@ -2342,6 +2345,116 @@ void Device::virtualFree(void* addr) { } } +// ================================================================================================ +bool Device::IpcCreate(void* dev_ptr, size_t* mem_size, void* handle, size_t* mem_offset) const { + hsa_status_t hsa_status = HSA_STATUS_SUCCESS; + + amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(dev_ptr); + if (amd_mem_obj == nullptr) { + DevLogPrintfError("Cannot retrieve amd_mem_obj for dev_ptr: 0x%x", dev_ptr); + return false; + } + + // Get the original pointer from the amd::Memory object + void* orig_dev_ptr = nullptr; + if (amd_mem_obj->getSvmPtr() != nullptr) { + orig_dev_ptr = amd_mem_obj->getSvmPtr(); + } else if (amd_mem_obj->getHostMem() != nullptr) { + orig_dev_ptr = amd_mem_obj->getHostMem(); + } else { + ShouldNotReachHere(); + } + + // Check if the dev_ptr is lesser than original dev_ptr + if (orig_dev_ptr > dev_ptr) { + // If this happens, then revisit FindMemObj logic + DevLogPrintfError("Original dev_ptr: 0x%x cannot be greater than dev_ptr: 0x%x", orig_dev_ptr, + dev_ptr); + return false; + } + + // Calculate the memory offset from the original base ptr + *mem_offset = reinterpret_cast
(dev_ptr) - reinterpret_cast
(orig_dev_ptr); + *mem_size = amd_mem_obj->getSize(); + + // Check if the dev_ptr is greater than memory allocated + if (*mem_offset > *mem_size) { + DevLogPrintfError( + "Memory offset: %u cannot be greater than size of original memory allocated: %u", *mem_size, + *mem_offset); + return false; + } + auto dev_mem = getGpuMemory(amd_mem_obj); + *reinterpret_cast(handle) = dev_mem->ExportHandle(); + + return true; +} + +// ================================================================================================ +bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, unsigned int flags, + void** dev_ptr) const { + amd::Memory* amd_mem_obj = nullptr; + + // Note: ROCr path has a validation for duplicated IPC memory, but PAL currently can't + // identify the duplicates + + // Create an amd Memory object for the handle + amd_mem_obj = new (context()) amd::IpcBuffer(context(), flags, mem_offset, mem_size, + *reinterpret_cast(const_cast(handle))); + if (amd_mem_obj == nullptr) { + LogError("failed to create a mem object!"); + return false; + } + + if (!amd_mem_obj->create(nullptr)) { + LogError("failed to create a svm hidden buffer!"); + amd_mem_obj->release(); + return false; + } + + // Add the original mem_ptr to the MemObjMap with newly created amd_mem_obj + amd::MemObjMap::AddMemObj(amd_mem_obj->getSvmPtr(), amd_mem_obj); + + // Make sure the mem_offset doesnt overflow the allocated memory + guarantee((mem_offset < mem_size), "IPC mem offset greater than allocated size"); + + *dev_ptr = amd_mem_obj->getSvmPtr(); + + return true; +} + +// ================================================================================================ +bool Device::IpcDetach(void* dev_ptr) const { + hsa_status_t hsa_status = HSA_STATUS_SUCCESS; + + amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(dev_ptr); + if (amd_mem_obj == nullptr) { + DevLogPrintfError("Memory object for the ptr: 0x%x cannot be null \n", dev_ptr); + return false; + } + + if (!amd_mem_obj->ipcShared()) { + DevLogPrintfError("Memory object for the ptr: 0x%x is not ipcShared \n", dev_ptr); + return false; + } + + // Get the original pointer from the amd::Memory object + void* orig_dev_ptr = nullptr; + if (amd_mem_obj->getSvmPtr() != nullptr) { + orig_dev_ptr = amd_mem_obj->getSvmPtr(); + } else if (amd_mem_obj->getHostMem() != nullptr) { + orig_dev_ptr = amd_mem_obj->getHostMem(); + } else { + ShouldNotReachHere(); + } + + if (amd_mem_obj->release() == 0) { + amd::MemObjMap::RemoveMemObj(orig_dev_ptr); + } + + return true; +} + // ================================================================================================ bool Device::AcquireExclusiveGpuAccess() { // Lock the virtual GPU list @@ -2508,8 +2621,7 @@ bool Device::createBlitProgram() { if (info().cooperativeGroups_) { extraBlits.append(GwsInitSourceCode); } - } - else { + } else { if (settings().oclVersion_ >= OpenCL20) { extraBlits = iDev()->GetDispatchKernelSource(); if (settings().useLightning_) { @@ -2553,7 +2665,7 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI return result; } - +// ================================================================================================ bool Device::importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle, amd::ExternalSemaphoreHandleType sem_handle_type) { Pal::ExternalQueueSemaphoreOpenInfo palOpenInfo = {}; diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp index 2bbd1c99f1..3efc171729 100644 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -533,6 +533,15 @@ class Device : public NullDevice { virtual void* virtualAlloc(void* addr, size_t size, size_t alignment); virtual void virtualFree(void* addr); + //! Creates IPC memory handle from a provided SVM pointer + virtual bool IpcCreate(void* dev_ptr, size_t* mem_size, + void* handle, size_t* mem_offset) const override; + //! Attch IPC memory to the current device + virtual bool IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, unsigned int flags, + void** dev_ptr) const override; + //! Detach IPC memory from the current device + virtual bool IpcDetach(void* dev_ptr) const override; + //! Returns SRD manger object SrdManager& srds() const { return *srdManager_; } diff --git a/projects/clr/rocclr/device/pal/palresource.cpp b/projects/clr/rocclr/device/pal/palresource.cpp index a8d75ac4cf..5dbc4a895f 100644 --- a/projects/clr/rocclr/device/pal/palresource.cpp +++ b/projects/clr/rocclr/device/pal/palresource.cpp @@ -1027,6 +1027,22 @@ bool Resource::CreateInterop(CreateParams* params) { return true; } +// ================================================================================================ +bool Resource::CreateIpc(CreateParams* params) { + Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {}; + Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo; + + openInfo.hExternalResource = reinterpret_cast(params->owner_)->Handle(); + openInfo.flags.ntHandle = false; + + memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo); + if (nullptr == memRef_) { + return false; + } + params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr)); + return true; +} + // ================================================================================================ bool Resource::CreateP2PAccess(CreateParams* params) { if (params->owner_->asImage()) { @@ -1123,6 +1139,7 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) { createInfo.flags.useReservedGpuVa = false; createInfo.pReservedGpuVaOwner = nullptr; } + createInfo.flags.interprocess = desc_.interprocess_; if (!dev().settings().svmFineGrainSystem_) { memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment, createInfo.pReservedGpuVaOwner, &subOffset_); @@ -1141,6 +1158,8 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) { createInfo.pReservedGpuVaOwner = params->svmBase_->iMem(); } memTypeToHeap(&createInfo); + createInfo.flags.interprocess = desc_.interprocess_; + memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment, createInfo.pReservedGpuVaOwner, &subOffset_); if (memRef_ == nullptr) { @@ -1210,6 +1229,8 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) { desc_.type_ = RemoteUSWC; } + desc_.interprocess_ = (nullptr != params) ? params->interprocess_ : false; + switch (memoryType()) { case OGLInterop: case D3D9Interop: @@ -1242,6 +1263,8 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear } return true; } + case IpcMemory: + return CreateIpc(params); default: break; } @@ -1313,6 +1336,14 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear return true; } +// ================================================================================================ +void* Resource::ExportHandle() const { + Pal::GpuMemoryExportInfo exportInfo = {}; + // Set default flags in case they are not provided by application + exportInfo.accessFlags = GENERIC_READ | GENERIC_WRITE; + Pal::OsExternalHandle handle = iMem()->ExportExternalHandle(exportInfo); + return reinterpret_cast(handle); +} // ================================================================================================ void Resource::free() { if (memRef_ == nullptr) { @@ -2246,7 +2277,8 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal (size > (sizeRes >> 1)) && ((it.second->iMem()->Desc().gpuVirtAddr % alignment) == 0) && (entry->isAllocExecute_ == desc->isAllocExecute_) && (entry->SVMRes_ == desc->SVMRes_) && - (entry->gl2CacheDisabled_ == desc->gl2CacheDisabled_)) { + (entry->gl2CacheDisabled_ == desc->gl2CacheDisabled_) && + (entry->interprocess_ == desc->interprocess_)) { ref = it.second; cacheSize_ -= sizeRes; if (entry->type_ == Resource::Local) { diff --git a/projects/clr/rocclr/device/pal/palresource.hpp b/projects/clr/rocclr/device/pal/palresource.hpp index 2a0e17ff39..5350442671 100644 --- a/projects/clr/rocclr/device/pal/palresource.hpp +++ b/projects/clr/rocclr/device/pal/palresource.hpp @@ -103,7 +103,8 @@ class Resource : public amd::HeapObject { amd::Memory* owner_; //!< Resource's owner VirtualGPU* gpu_; //!< Resource won't be shared between multiple queues const Resource* svmBase_; //!< SVM base for MGPU allocations - CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr) {} + bool interprocess_; //!< Ressource can be used in the interprocess communication + CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr), interprocess_(false) {} }; struct PinnedParams : public CreateParams { @@ -176,7 +177,8 @@ class Resource : public amd::HeapObject { Shader, //!< resource is a shader P2PAccess, //!< resource is a shared resource for P2P access VkInterop, //!< resource is a Vulkan memory object - VaRange //!< reousrce is a virtual address range + VaRange, //!< reousrce is a virtual address range + IpcMemory //!< reousrce is a IPC memory object }; //! Resource map flags @@ -213,6 +215,7 @@ class Resource : public amd::HeapObject { uint isDoppTexture_ : 1; //!< PAL resource is for a DOPP desktop texture uint gl2CacheDisabled_ : 1;//!< PAL resource is allocated with GPU L2 cache disabled. uint reserved_va_ : 1; //!< PAL resource was allocated for a reserved VA + uint interprocess_ : 1; //!< PAL resource can be shared between processes }; uint state_; }; @@ -430,9 +433,17 @@ class Resource : public amd::HeapObject { */ bool CreateP2PAccess(CreateParams* params //!< special parameters for resource allocation ); + //! Returns an export handle for the interprocess communication + void* ExportHandle() const; protected: - /*! \brief Creates a PAL iamge object, associated with the resource + /*! \brief Creates a PAL memory object, from IPC handle + * + * \return True if we succesfully created a PAL resource + */ + bool CreateIpc(CreateParams* params); + + /*! \brief Creates a PAL iamge object, associated with the resource * * \return True if we succesfully created a PAL resource */ diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index a1e3e92cc2..e57de246b2 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -1048,7 +1048,7 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs, dev().rgpCaptureMgr()->RegisterTimedQueue(2 * index() + 1, queue(SdmaEngine).iQueue_, &dbg_vmid); } - + return true; } diff --git a/projects/clr/rocclr/os/os.hpp b/projects/clr/rocclr/os/os.hpp index ab026bcac5..b4649846b5 100644 --- a/projects/clr/rocclr/os/os.hpp +++ b/projects/clr/rocclr/os/os.hpp @@ -131,6 +131,15 @@ class Os : AllStatic { // Given a valid mmaped ptr with correct size, unmaps the ptr from memory static bool MemoryUnmapFile(const void* mmap_ptr, size_t mmap_size); + // Given a valid filename create system memory that can be shared between processes + static void* CreateIpcMemory(const char* fname, size_t size, FileDesc* desc); + + // Given a valid file descriptor open IPC memory + static void* OpenIpcMemory(const char* fname, const FileDesc desc, size_t size); + + // Given a valid file descriptor close IPC memory + static void CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size); + private: static constexpr size_t FILE_PATH_MAX_LENGTH = 1024; diff --git a/projects/clr/rocclr/os/os_posix.cpp b/projects/clr/rocclr/os/os_posix.cpp index c41c39d338..e48a4bcfc7 100644 --- a/projects/clr/rocclr/os/os_posix.cpp +++ b/projects/clr/rocclr/os/os_posix.cpp @@ -910,6 +910,47 @@ int Os::getProcessId() { return ::getpid(); } +// ================================================================================================ +void* Os::CreateIpcMemory(const char* fname, size_t size, FileDesc* desc) { + *desc = shm_open(fname, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); + if (*desc < 0) { + return nullptr; + } + + int status = ftruncate(*desc, size); + if (status != 0) { + return nullptr; + } + + auto addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *desc, 0); + return addr; +} + +// ================================================================================================ +void* Os::OpenIpcMemory(const char* fname, const FileDesc desc, size_t size) { + FileDesc handle = desc; + if (fname != nullptr) { + handle = shm_open(fname, O_RDWR, S_IRWXU|S_IRWXG|S_IRWXO); + } + + if (handle < 0) { + return nullptr; + } + + auto addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, handle, 0); + return addr; +} + +// ================================================================================================ +void Os::CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size) { + if (ptr != nullptr) { + munmap(const_cast(ptr), size); + } + if (desc != 0) { + close(desc); + } +} + } // namespace amd #endif // !defined(_WIN32) && !defined(__CYGWIN__) diff --git a/projects/clr/rocclr/os/os_win32.cpp b/projects/clr/rocclr/os/os_win32.cpp index 5b9c3ec1b7..d304c61e63 100644 --- a/projects/clr/rocclr/os/os_win32.cpp +++ b/projects/clr/rocclr/os/os_win32.cpp @@ -945,6 +945,42 @@ int Os::getProcessId() { return ::_getpid(); } +// ================================================================================================ +void* Os::CreateIpcMemory(const char* fname, size_t size, FileDesc* desc) { + void* addr = nullptr; + *desc = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, + 0, static_cast(size), fname); + if (*desc != 0) { + addr = MapViewOfFile(*desc, FILE_MAP_ALL_ACCESS, 0, 0, size); + } + + return addr; +} + +// ================================================================================================ +void* Os::OpenIpcMemory(const char* fname, const FileDesc desc, size_t size) { + void* addr = nullptr; + FileDesc handle = desc; + if (fname != nullptr) { + handle = CreateFileMapping(desc, NULL, PAGE_READWRITE, 0, static_cast(size), fname); + } + if (handle != 0) { + addr = MapViewOfFile(handle, FILE_MAP_ALL_ACCESS, 0, 0, size); + } + + return addr; +} + +// ================================================================================================ +void Os::CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size) { + if (ptr != nullptr) { + UnmapViewOfFile(ptr); + } + if (desc != nullptr) { + CloseHandle(desc); + } +} + } // namespace amd #endif // _WIN32 || __CYGWIN__ diff --git a/projects/clr/rocclr/platform/memory.cpp b/projects/clr/rocclr/platform/memory.cpp index b239d51b8c..b71f3af15d 100644 --- a/projects/clr/rocclr/platform/memory.cpp +++ b/projects/clr/rocclr/platform/memory.cpp @@ -1535,6 +1535,14 @@ void SvmBuffer::memFill(void* dst, const void* src, size_t srcSize, size_t times } } +// ================================================================================================ bool SvmBuffer::malloced(const void* ptr) { return Contains(reinterpret_cast(ptr)); } +// ================================================================================================ +void IpcBuffer::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(IpcBuffer)); + memset(deviceMemories_, 0, NumDevicesWithP2P() * sizeof(DeviceMemory)); +} + } // namespace amd diff --git a/projects/clr/rocclr/platform/memory.hpp b/projects/clr/rocclr/platform/memory.hpp index 1837698e95..847e848a8e 100644 --- a/projects/clr/rocclr/platform/memory.hpp +++ b/projects/clr/rocclr/platform/memory.hpp @@ -37,11 +37,12 @@ #include #include #include -#define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31) -#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) -#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29) -#define CL_MEM_VA_RANGE_AMD (1u << 28) -#define ROCCLR_MEM_HSA_UNCACHED (1u << 27) +#define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31) +#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) +#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29) +#define CL_MEM_VA_RANGE_AMD (1u << 28) +#define ROCCLR_MEM_HSA_UNCACHED (1u << 27) +#define ROCCLR_MEM_INTERPROCESS (1u << 26) namespace device { class Memory; @@ -672,6 +673,21 @@ public: bool isArena() { return true; } }; +class IpcBuffer : public Buffer { + public: + IpcBuffer(Context& context, Flags flags, size_t offset, size_t size, amd::Os::FileDesc handle) + : Buffer(context, flags, offset, size), handle_(handle) { + setIpcShared(true); + } + + virtual void initDeviceMemory(); + amd::Os::FileDesc Handle() const { return handle_; } + + private: + amd::Os::FileDesc handle_; //!< Ipc handle, associated with this memory object +}; + + } // namespace amd #endif // MEMORY_H_