From abc4fcabb5347fcf4dd1c69c9d1d187f1eec7475 Mon Sep 17 00:00:00 2001 From: "Shweta.Khatri" Date: Thu, 25 Apr 2024 11:51:53 -0400 Subject: [PATCH] Fix soft hang on AQLQueue destruction with a timeout Add timeout to AQLQueue destructor signal wait to prevent indefinite hang Change-Id: I6c6c98a7bdd27d39569af1d667aa9aa7e9596535 Signed-off-by: Chris Freehill [ROCm/ROCR-Runtime commit: 4e9647704defa97ecc7d531a25f0b589c38f27bc] --- .../runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 39321ef012..3dad89cf69 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -365,8 +365,11 @@ AqlQueue::~AqlQueue() { // Remove kfd exception handler exceptionState |= ERROR_HANDLER_TERMINATE; while ((exceptionState & ERROR_HANDLER_DONE) != ERROR_HANDLER_DONE) { + const uint64_t timeout_ms = 5000; + exception_signal_->StoreRelease(-1ull); - exception_signal_->WaitRelaxed(HSA_SIGNAL_CONDITION_NE, -1ull, -1ull, HSA_WAIT_STATE_BLOCKED); + exception_signal_->WaitRelaxed(HSA_SIGNAL_CONDITION_NE, -1ull, timeout_ms, + HSA_WAIT_STATE_BLOCKED); } Inactivate();