From 861b9fb84c5ecb7bbfa07a3e02acdeae709623f8 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 26 Nov 2021 15:17:03 -0500 Subject: [PATCH] SWDEV-294669 - Avoid stall when the new signal was created Stall in the host thread could occur earlier than the app expects. Make sure rutnime can grow the signals to the queue size without any stall. Also adding a new signal to the end of the pool could break the dependency chain on signal reuse. The new logic will insert the new signal after current to keep the chain intact. Change-Id: I9c90b98515907db8b677528263c3e88cd9581a14 [ROCm/clr commit: 102c19adf3d860768486af2a4894b8871ed4993c] --- .../clr/rocclr/device/rocm/rocvirtual.cpp | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index af4ec3105f..20fe2972db 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -347,6 +347,7 @@ bool VirtualGPU::HwQueueTracker::Create() { // ================================================================================================ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( hsa_signal_value_t init_val, Timestamp* ts, uint32_t queue_size) { + bool new_signal = false; // If queue size grows, then add more signals to avoid more frequent stalls if (queue_size > signal_list_.size()) { std::unique_ptr signal(new ProfilingSignal()); @@ -357,19 +358,28 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( uint32_t num_agents = (settings.system_scope_signal_) ? 0 : 1; if (HSA_STATUS_SUCCESS == hsa_signal_create(0, num_agents, agents, &signal->signal_)) { - signal_list_.push_back(signal.release()); + // Find valid new index + ++current_id_ %= signal_list_.size(); + // Insert the new signal into the current slot and ignore any wait + signal_list_.insert(signal_list_.begin() + current_id_, signal.release()); + new_signal = true; } } } - // Find valid index - ++current_id_ %= signal_list_.size(); - // Make sure the previous operation on the current signal is done - WaitCurrent(); + // If it's the new signal, then the wait can be avoided. + // That will allow to grow the list of signals without stalls + if (!new_signal) { + // Find valid index + ++current_id_ %= signal_list_.size(); - // Have to wait the next signal in the queue to avoid a race condition between - // a GPU waiter(which may be not triggered yet) and CPU signal reset below - WaitNext(); + // Make sure the previous operation on the current signal is done + WaitCurrent(); + + // Have to wait the next signal in the queue to avoid a race condition between + // a GPU waiter(which may be not triggered yet) and CPU signal reset below + WaitNext(); + } if (signal_list_[current_id_]->referenceCount() > 1) { // The signal was assigned to the global marker's event, hence runtime can't reuse it