From 4aed0e4a01782b8129f0e0eeecba19e308db8553 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 27 Sep 2021 17:56:25 -0400 Subject: [PATCH] SWDEV-296301 - Avoid deadlocks in the hostcall path Change the scope of hostcall buffer access lock during destruction. Make sure wait() returns the signal value after timeout. That matches ROCr behaviour for HSA signal wait. Change-Id: I3df34207e0c2e21972ec8052777e5742bda1dca0 [ROCm/clr commit: 9a9d10a10b36d5e01815ac29e4fab1b458bfffd7] --- projects/clr/rocclr/device/devhostcall.cpp | 23 +++++++++++--------- projects/clr/rocclr/device/pal/palsignal.cpp | 4 ++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/projects/clr/rocclr/device/devhostcall.cpp b/projects/clr/rocclr/device/devhostcall.cpp index bedc8de63e..c1dbeb47b7 100644 --- a/projects/clr/rocclr/device/devhostcall.cpp +++ b/projects/clr/rocclr/device/devhostcall.cpp @@ -286,10 +286,12 @@ void HostcallListener::consumePackets() { return; } - amd::ScopedLock lock{listenerLock}; + if (!idle()) { + amd::ScopedLock lock{listenerLock}; - for (auto ii : buffers_) { - ii->processPackets(messages_); + for (auto ii : buffers_) { + ii->processPackets(messages_); + } } } @@ -389,14 +391,15 @@ bool enableHostcalls(const amd::Device &dev, void* bfr, uint32_t numPackets) { } void disableHostcalls(void* bfr) { - amd::ScopedLock lock(listenerLock); - if (!hostcallListener) { - return; + { + amd::ScopedLock lock(listenerLock); + if (!hostcallListener) { + return; + } + assert(bfr && "expected a hostcall buffer"); + auto buffer = reinterpret_cast(bfr); + hostcallListener->removeBuffer(buffer); } - assert(bfr && "expected a hostcall buffer"); - auto buffer = reinterpret_cast(bfr); - hostcallListener->removeBuffer(buffer); - if (hostcallListener->idle()) { hostcallListener->terminate(); delete hostcallListener; diff --git a/projects/clr/rocclr/device/pal/palsignal.cpp b/projects/clr/rocclr/device/pal/palsignal.cpp index 0764647419..9b5efaaefe 100644 --- a/projects/clr/rocclr/device/pal/palsignal.cpp +++ b/projects/clr/rocclr/device/pal/palsignal.cpp @@ -129,7 +129,7 @@ uint64_t Signal::Wait(uint64_t value, device::Signal::Condition c, uint64_t time float timeoutInSec = timeout / (1000 * 1000); result = event_.Wait(timeoutInSec); - if (result != Pal::Result::Success) { + if ((result != Pal::Result::Success) && (result != Pal::Result::Timeout)) { return -1; } @@ -142,7 +142,7 @@ uint64_t Signal::Wait(uint64_t value, device::Signal::Condition c, uint64_t time auto end = amd::Os::timeNanos(); auto duration = 1000 * (end - start); // convert to us if (duration >= timeout) { - return -1; + return amdSignal_->value; } if (!cmp(amdSignal_->value, value)) {