From 60110b6c015fbd051f35c0bbefb997aee7a2479b Mon Sep 17 00:00:00 2001 From: "Sang, Tao" Date: Wed, 23 Apr 2025 15:08:07 -0400 Subject: [PATCH] SWDEV-518831 - fix streams' sync issue in mthreads (#123) * SWDEV-518831 - fix streams' sync issue in mthreads 1. Fix sync issue of null stream and non-null streams in multithreads. 2. Remove assert(GetSubmissionBatch() == nullptr) as it is invalid in multithreads. 3. Update getActiveQueues() to deal with the state of being terminated. [ROCm/clr commit: 27aad09bd4b452344174d0f8543d9c3db10999f7] --- projects/clr/hipamd/src/hip_device.cpp | 1 + projects/clr/hipamd/src/hip_stream.cpp | 1 + projects/clr/rocclr/device/device.cpp | 15 +++++++++++++++ projects/clr/rocclr/device/device.hpp | 5 +---- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/projects/clr/hipamd/src/hip_device.cpp b/projects/clr/hipamd/src/hip_device.cpp index e2a8ff0098..1647f4b735 100644 --- a/projects/clr/hipamd/src/hip_device.cpp +++ b/projects/clr/hipamd/src/hip_device.cpp @@ -201,6 +201,7 @@ void Device::WaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stre // Get the last valid command waitForStream(active_stream); } + command->release(); } } diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp index a1a474baea..4764fcffca 100644 --- a/projects/clr/hipamd/src/hip_stream.cpp +++ b/projects/clr/hipamd/src/hip_stream.cpp @@ -73,6 +73,7 @@ bool Stream::Create() { // ================================================================================================ void Stream::Destroy(hip::Stream* stream, bool forceDestroy) { + stream->device().removeFromActiveQueues(stream); stream->device_->RemoveStream(stream); stream->SetForceDestroy(forceDestroy); stream->release(); diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 0dfec19dfb..2e9ef8155c 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -1107,6 +1107,21 @@ void Device::IpcDetach(void* dev_ptr) const { } } +std::vector Device::getActiveQueues() { + amd::ScopedLock lock(activeQueuesLock_); + for (auto it = activeQueues.begin(); it != activeQueues.end();) { + if ((*it)->referenceCount() == 0) { + // It is being terminated in HostQueue::terminate(). + // We should not wait for commands in a queue being terminated. + it = activeQueues.erase(it); + } else { + // In case the queue will be destroyed in Stream::Destroy(). + (*it)->retain(); + ++it; + } + } + return std::vector(activeQueues.begin(), activeQueues.end()); +} } // namespace amd namespace amd::device { diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index ac401cb28a..56566728ac 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -2119,10 +2119,7 @@ class Device : public RuntimeObject { } //! Returns the queues that have at least one submitted command - std::vector getActiveQueues() { - amd::ScopedLock lock(activeQueuesLock_); - return std::vector(activeQueues.begin(), activeQueues.end()); - } + std::vector getActiveQueues(); //! Adds the queue to the set of active command queues void addToActiveQueues(amd::CommandQueue* commandQueue) {