From 102c19adf3d860768486af2a4894b8871ed4993c Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 26 Nov 2021 15:17:03 -0500 Subject: [PATCH] SWDEV-294669 - Avoid stall when the new signal was created Stall in the host thread could occur earlier than the app expects. Make sure rutnime can grow the signals to the queue size without any stall. Also adding a new signal to the end of the pool could break the dependency chain on signal reuse. The new logic will insert the new signal after current to keep the chain intact. Change-Id: I9c90b98515907db8b677528263c3e88cd9581a14 --- rocclr/device/rocm/rocvirtual.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index af4ec3105f..20fe2972db 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -347,6 +347,7 @@ bool VirtualGPU::HwQueueTracker::Create() { // ================================================================================================ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( hsa_signal_value_t init_val, Timestamp* ts, uint32_t queue_size) { + bool new_signal = false; // If queue size grows, then add more signals to avoid more frequent stalls if (queue_size > signal_list_.size()) { std::unique_ptr signal(new ProfilingSignal()); @@ -357,19 +358,28 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( uint32_t num_agents = (settings.system_scope_signal_) ? 0 : 1; if (HSA_STATUS_SUCCESS == hsa_signal_create(0, num_agents, agents, &signal->signal_)) { - signal_list_.push_back(signal.release()); + // Find valid new index + ++current_id_ %= signal_list_.size(); + // Insert the new signal into the current slot and ignore any wait + signal_list_.insert(signal_list_.begin() + current_id_, signal.release()); + new_signal = true; } } } - // Find valid index - ++current_id_ %= signal_list_.size(); - // Make sure the previous operation on the current signal is done - WaitCurrent(); + // If it's the new signal, then the wait can be avoided. + // That will allow to grow the list of signals without stalls + if (!new_signal) { + // Find valid index + ++current_id_ %= signal_list_.size(); - // Have to wait the next signal in the queue to avoid a race condition between - // a GPU waiter(which may be not triggered yet) and CPU signal reset below - WaitNext(); + // Make sure the previous operation on the current signal is done + WaitCurrent(); + + // Have to wait the next signal in the queue to avoid a race condition between + // a GPU waiter(which may be not triggered yet) and CPU signal reset below + WaitNext(); + } if (signal_list_[current_id_]->referenceCount() > 1) { // The signal was assigned to the global marker's event, hence runtime can't reuse it