From dc34af61d77681282da4418a6efbb112337f90be Mon Sep 17 00:00:00 2001 From: "Assiouras, Ioannis" Date: Tue, 12 Aug 2025 21:23:09 +0100 Subject: [PATCH] SWDEV-543340 - Remove shared memory objects after IPC event cleanup (#745) This change ensures that shared memory objects (e.g., files in /dev/shm) are unlinked once all related IPC events have been destroyed. --- hipamd/src/hip_code_object.cpp | 20 +++++++++----------- hipamd/src/hip_event.hpp | 3 +++ rocclr/os/os.hpp | 3 +++ rocclr/os/os_posix.cpp | 1 + rocclr/os/os_win32.cpp | 3 +++ 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp index 4336fdf0ea..846ca299a7 100644 --- a/hipamd/src/hip_code_object.cpp +++ b/hipamd/src/hip_code_object.cpp @@ -284,9 +284,7 @@ hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { for (auto& hostVar : hostVarsIter->second) { auto varIter = vars_.find(hostVar); if (varIter == vars_.end()) { - LogPrintfError( - "removeFatBinary: Unable to find module 0x%x hostVar 0x%x", - module, hostVar); + LogPrintfError("removeFatBinary: Unable to find module 0x%x hostVar 0x%x", module, hostVar); } else { delete varIter->second; vars_.erase(varIter); @@ -325,8 +323,8 @@ hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { for (auto& hostFunc : hostFuncsIter->second) { auto funcIter = functions_.find(hostFunc); if (funcIter == functions_.end()) { - LogPrintfError("removeFatBinary: Unable to find module 0x%x hostFunc 0x%x", - module, hostFunc); + LogPrintfError("removeFatBinary: Unable to find module 0x%x hostFunc 0x%x", module, + hostFunc); } else { delete funcIter->second; functions_.erase(funcIter); @@ -343,8 +341,8 @@ hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { delete moduleIter->second; modules_.erase(moduleIter); } else { - LogPrintfError("removeFatBinary: Unable to find module 0x%x via hostModule 0x%x", - module, hostModule); + LogPrintfError("removeFatBinary: Unable to find module 0x%x via hostModule 0x%x", module, + hostModule); } module_to_hostModule_.erase(hostModuleIter); } @@ -383,7 +381,7 @@ hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, i } // Lazy load - FatBinaryInfo **module = it->second->moduleInfo(); + FatBinaryInfo** module = it->second->moduleInfo(); if (*(module) == nullptr) { amd::ScopedLock lock(sclock_); if (*(module) == nullptr) { @@ -405,7 +403,7 @@ hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hos } // Lazy load - FatBinaryInfo **module = it->second->moduleInfo(); + FatBinaryInfo** module = it->second->moduleInfo(); if (*(module) == nullptr) { hipError_t err = digestFatBinary(module_to_hostModule_[module], *module); assert(err == hipSuccess); @@ -437,7 +435,7 @@ hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDevice } // Lazy load - FatBinaryInfo **module = it->second->moduleInfo(); + FatBinaryInfo** module = it->second->moduleInfo(); if (*(module) == nullptr) { hipError_t err = digestFatBinary(module_to_hostModule_[module], *module); assert(err == hipSuccess); @@ -464,7 +462,7 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { for (auto& vecIter : managedVars_) { for (auto& var : vecIter.second) { // Lazy load - FatBinaryInfo **module = var->moduleInfo(); + FatBinaryInfo** module = var->moduleInfo(); if (*(module) == nullptr) { err = digestFatBinary(module_to_hostModule_[module], *module); assert(err == hipSuccess); diff --git a/hipamd/src/hip_event.hpp b/hipamd/src/hip_event.hpp index 456bf3d0a6..33d80dfbf2 100644 --- a/hipamd/src/hip_event.hpp +++ b/hipamd/src/hip_event.hpp @@ -200,6 +200,9 @@ class IPCEvent : public Event { if (!amd::Os::MemoryUnmapFile(ipc_evt_.ipc_shmem_, sizeof(hip::ihipIpcEventShmem_t))) { // print hipErrorInvalidHandle; } + if (owners == 0) { + amd::Os::shm_unlink(ipc_evt_.ipc_name_); + } } } IPCEvent() : Event(hipEventInterprocess) {} diff --git a/rocclr/os/os.hpp b/rocclr/os/os.hpp index e8b7c0d2ad..d830361229 100644 --- a/rocclr/os/os.hpp +++ b/rocclr/os/os.hpp @@ -285,6 +285,9 @@ class Os : AllStatic { //! Deletes file static int unlink(const std::string& path); + //! Removes the shared memory object name + static int shm_unlink(const std::string& path); + // Library routines: // typedef bool (*SymbolCallback)(std::string, const void*, void*); diff --git a/rocclr/os/os_posix.cpp b/rocclr/os/os_posix.cpp index 6ee367842c..8837c575b5 100644 --- a/rocclr/os/os_posix.cpp +++ b/rocclr/os/os_posix.cpp @@ -650,6 +650,7 @@ std::string Os::getTempFileName() { } int Os::unlink(const std::string& path) { return ::unlink(path.c_str()); } +int Os::shm_unlink(const std::string& path) { return ::shm_unlink(path.c_str()); } #if defined(ATI_ARCH_X86) void Os::cpuid(int regs[4], int info) { diff --git a/rocclr/os/os_win32.cpp b/rocclr/os/os_win32.cpp index 68a87f15f7..b933dfd452 100644 --- a/rocclr/os/os_win32.cpp +++ b/rocclr/os/os_win32.cpp @@ -505,6 +505,9 @@ std::string Os::getTempFileName() { int Os::unlink(const std::string& path) { return ::_unlink(path.c_str()); } +// shm_unlink is a nop on windows +int Os::shm_unlink(const std::string& path) { return 0; } + void Os::cpuid(int regs[4], int info) { return __cpuid(regs, info); } uint64_t Os::xgetbv(uint32_t ecx) { return (uint64_t)_xgetbv(ecx); }