From 7f195e2996f716cf6cca4c9d1718f6a1ba8a6fdf Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 12 Apr 2024 11:22:15 -0400 Subject: [PATCH] SWDEV-444670 - Enable teardown class Force implicit runtime teardown with a global destructor. Change-Id: Iabe63dedf5b94fefc98668585c45a61607120669 [ROCm/clr commit: c95a75a2bff92d1a58e91ebb77fe80ae7b3fa9a0] --- projects/clr/hipamd/src/hip_context.cpp | 2 ++ projects/clr/hipamd/src/hip_internal.hpp | 2 +- projects/clr/rocclr/device/rocm/rocdevice.cpp | 7 +++--- projects/clr/rocclr/device/rocm/rocmemory.cpp | 6 ++--- projects/clr/rocclr/platform/memory.cpp | 1 - projects/clr/rocclr/platform/runtime.cpp | 23 +++++++++++++++---- projects/clr/rocclr/platform/runtime.hpp | 10 ++++++++ 7 files changed, 37 insertions(+), 14 deletions(-) diff --git a/projects/clr/hipamd/src/hip_context.cpp b/projects/clr/hipamd/src/hip_context.cpp index 06e700ee19..6289a9891c 100644 --- a/projects/clr/hipamd/src/hip_context.cpp +++ b/projects/clr/hipamd/src/hip_context.cpp @@ -61,6 +61,7 @@ void init(bool* status) { return; } g_devices.push_back(device); + amd::RuntimeTearDown::RegisterObject(device); } amd::Context* hContext = new amd::Context(devices, amd::Context::Info()); @@ -73,6 +74,7 @@ void init(bool* status) { hContext->release(); } host_context = hContext; + amd::RuntimeTearDown::RegisterObject(hContext); PlatformState::instance().init(); *status = true; diff --git a/projects/clr/hipamd/src/hip_internal.hpp b/projects/clr/hipamd/src/hip_internal.hpp index d93c445d4d..1c3f230442 100644 --- a/projects/clr/hipamd/src/hip_internal.hpp +++ b/projects/clr/hipamd/src/hip_internal.hpp @@ -445,7 +445,7 @@ public: }; /// HIP Device class - class Device { + class Device : public amd::ReferenceCountedObject { amd::Monitor lock_{"Device lock", true}; amd::Monitor streamSetLock{"Guards device stream set"}; std::unordered_set streamSet; diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index ff5eebc005..9fd3600cb7 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -259,7 +259,7 @@ Device::~Device() { p2p_stage_ = nullptr; } if (nullptr != mg_sync_) { - amd::SvmBuffer::free(GlbCtx(), mg_sync_); + GlbCtx().svmFree(mg_sync_); mg_sync_ = nullptr; } if (glb_ctx_ != nullptr) { @@ -619,9 +619,8 @@ bool Device::init() { // Allocate mgpu sync buffer for cooperative launches if (amd::IS_HIP) { - mg_sync_ = reinterpret_cast
(amd::SvmBuffer::malloc( - *glb_ctx_, (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS), - kMGInfoSizePerDevice * devices.size(), kMGInfoSizePerDevice)); + mg_sync_ = reinterpret_cast
(glb_ctx_->svmAlloc(kMGInfoSizePerDevice * devices.size(), + kMGInfoSizePerDevice, (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS))); if (mg_sync_ == nullptr) { LogError("mgpu sync buffer alloc failed"); return false; diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp index 88b3ba37a5..cd94068904 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp @@ -615,7 +615,7 @@ Buffer::Buffer(const roc::Device& dev, size_t size) : roc::Memory(dev, size) {} Buffer::~Buffer() { if (owner() == nullptr) { - dev().hostFree(deviceMemory_, size()); + dev().memFree(deviceMemory_, size()); } else { destroy(); @@ -663,7 +663,7 @@ void Buffer::destroy() { ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "[ROCClr] munmap failed \n"); } } else { - dev().hostFree(deviceMemory_, size()); + dev().memFree(deviceMemory_, size()); } } else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) { if (HSA_STATUS_SUCCESS != hsa_signal_destroy(signal_)) { @@ -672,7 +672,7 @@ void Buffer::destroy() { } deviceMemory_ = nullptr; } else { - dev().hostFree(deviceMemory_, size()); + dev().memFree(deviceMemory_, size()); } } else { dev().memFree(deviceMemory_, size()); diff --git a/projects/clr/rocclr/platform/memory.cpp b/projects/clr/rocclr/platform/memory.cpp index 6f3f027c39..d2629d6377 100644 --- a/projects/clr/rocclr/platform/memory.cpp +++ b/projects/clr/rocclr/platform/memory.cpp @@ -1521,7 +1521,6 @@ bool SvmBuffer::Contains(uintptr_t ptr) { // The allocation flags are ignored for now. void* SvmBuffer::malloc(Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment, const amd::Device* curDev) { - bool atomics = (flags & CL_MEM_SVM_ATOMICS) != 0; void* ret = context.svmAlloc(size, alignment, flags, curDev); if (ret == nullptr) { LogError("Unable to allocate aligned memory"); diff --git a/projects/clr/rocclr/platform/runtime.cpp b/projects/clr/rocclr/platform/runtime.cpp index 7f80919c46..571668cc70 100644 --- a/projects/clr/rocclr/platform/runtime.cpp +++ b/projects/clr/rocclr/platform/runtime.cpp @@ -100,11 +100,24 @@ void Runtime::tearDown() { initialized_ = false; } -class RuntimeTearDown : public amd::HeapObject { -public: - RuntimeTearDown() {} - ~RuntimeTearDown() { /*Runtime::tearDown();*/ } -} runtime_tear_down; +std::vector RuntimeTearDown::external_; + +RuntimeTearDown::~RuntimeTearDown() { +#ifndef _WIN32 + if (amd::IS_HIP) { + for (auto it: external_) { + it->release(); + } + Runtime::tearDown(); + } +#endif +} + +void RuntimeTearDown::RegisterObject(ReferenceCountedObject* obj) { + external_.push_back(obj); +} + +class RuntimeTearDown runtime_tear_down; uint ReferenceCountedObject::retain() { return referenceCount_.fetch_add(1, std::memory_order_relaxed) + 1; diff --git a/projects/clr/rocclr/platform/runtime.hpp b/projects/clr/rocclr/platform/runtime.hpp index f582e70323..a1c3d25e1a 100644 --- a/projects/clr/rocclr/platform/runtime.hpp +++ b/projects/clr/rocclr/platform/runtime.hpp @@ -91,6 +91,16 @@ public: inline bool Runtime::initialized() { return initialized_; } +class RuntimeTearDown : public HeapObject { + static std::vector external_; + +public: + RuntimeTearDown() {} + ~RuntimeTearDown(); + + static void RegisterObject(ReferenceCountedObject* obj); +}; + } // namespace amd #endif /*RUNTIME_HPP_*/