diff --git a/projects/clr/hipamd/src/hip_device.cpp b/projects/clr/hipamd/src/hip_device.cpp index 9f6a8e3d07..20889b0fe6 100644 --- a/projects/clr/hipamd/src/hip_device.cpp +++ b/projects/clr/hipamd/src/hip_device.cpp @@ -257,15 +257,30 @@ void Device::destroyAllStreams() { } // ================================================================================================ -void Device::SyncAllStreams( bool cpu_wait) { +void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) { // Make a local copy to avoid stalls for GPU finish with multiple threads std::vector streams; streams.reserve(streamSet.size()); { amd::ScopedLock lock(streamSetLock); - for (auto it : streamSet) { - streams.push_back(it); - it->retain(); + if (wait_blocking_streams_only) { + auto null_stream = GetNullStream(); + for (auto it : streamSet) { + if (it != null_stream && (it->Flags() & hipStreamNonBlocking) == 0) { + streams.push_back(it); + it->retain(); + } + } + // Add null stream to the end of the list so that wait happens after all blocking streams. + if (null_stream != nullptr) { + streams.push_back(null_stream); + null_stream->retain(); + } + } else { + for (auto it : streamSet) { + streams.push_back(it); + it->retain(); + } } } for (auto it : streams) { diff --git a/projects/clr/hipamd/src/hip_internal.hpp b/projects/clr/hipamd/src/hip_internal.hpp index d0a6dca57d..47749c012c 100644 --- a/projects/clr/hipamd/src/hip_internal.hpp +++ b/projects/clr/hipamd/src/hip_internal.hpp @@ -595,7 +595,7 @@ public: void destroyAllStreams(); - void SyncAllStreams( bool cpu_wait = true); + void SyncAllStreams( bool cpu_wait = true, bool wait_blocking_streams_only = false); bool StreamCaptureBlocking(); diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp index 9373749772..76a732acd7 100644 --- a/projects/clr/hipamd/src/hip_stream.cpp +++ b/projects/clr/hipamd/src/hip_stream.cpp @@ -357,13 +357,23 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } } - bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false; - auto hip_stream = hip::getStream(stream, wait); - // Wait for the current host queue - hip_stream->finish(); - // Release freed memory for all memory pools on the device - hip_stream->GetDevice()->ReleaseFreedMemory(); + if (stream == nullptr) { + // Do cpu wait on null stream and only on blocking streams + constexpr bool WaitblockingStreamOnly = true; + getCurrentDevice()->SyncAllStreams(true, WaitblockingStreamOnly); + + // Release freed memory for all memory pools on the device + getCurrentDevice()->ReleaseFreedMemory(); + } else { + constexpr bool wait = false; + auto hip_stream = hip::getStream(stream, wait); + + // Wait for the current host queue + hip_stream->finish(); + // Release freed memory for all memory pools on the device + hip_stream->GetDevice()->ReleaseFreedMemory(); + } return hipSuccess; }