diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 61560617ac..492ef6996c 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -1259,13 +1259,21 @@ void VirtualGPU::dispatchBarrierValuePacket(uint16_t packetHeader, bool resolveD // Dependent signal and external signal cant be true at the same time assert(resolveDepSignal & (signal.handle != 0) == 0); if (resolveDepSignal) { - auto wait_signal = Barriers().WaitingSignal(); - if (wait_signal.size() > 0) { - assert(wait_signal.size() == 1 && "Only one dep signal allowed for BarrierValue"); - barrier_value_packet_.signal = wait_signal[0]; + auto wait_signals = Barriers().WaitingSignal(); + if (wait_signals.size() > 0) { + barrier_value_packet_.signal = wait_signals[0]; barrier_value_packet_.value = kInitSignalValueOne; barrier_value_packet_.mask = std::numeric_limits::max(); barrier_value_packet_.cond = HSA_SIGNAL_CONDITION_LT; + for (uint32_t i = 1; i < wait_signals.size(); ++i) { + uint32_t j = (i - 1) % 5; + barrier_packet_.dep_signal[j] = wait_signals[i]; + constexpr bool kSkipSignal = true; + // If runtime reached the packet limit or the count limit, then flush the barrier + if ((j == 4) || ((i + 1) == wait_signals.size())) { + dispatchBarrierPacket(kNopPacketHeader, kSkipSignal); + } + } } } diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index 0b84e12e42..9eaa1a6d2d 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -158,13 +158,13 @@ void HostQueue::finish(bool cpu_wait) { if (command == NULL) { return; } - ClPrint(LOG_DEBUG, LOG_CMD, "Marker queued to ensure finish"); + ClPrint(LOG_DEBUG, LOG_CMD, "Marker queued to %p for finish", this); command->enqueue(); } // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status static constexpr bool kWaitCompletion = true; if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion)) { - ClPrint(LOG_DEBUG, LOG_CMD, "HW Event not ready, awaiting completion instead"); + ClPrint(LOG_DEBUG, LOG_CMD, "No HW event || cpu wait=%d, await command completion", cpu_wait); command->awaitCompletion(); if (IS_HIP) { @@ -181,7 +181,7 @@ void HostQueue::finish(bool cpu_wait) { } command->release(); - ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished"); + ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished for host queue : %p", this); } void HostQueue::loop(device::VirtualDevice* virtualDevice) {