diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 66c652c7fc..282cb56c4c 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -2863,7 +2863,11 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait(); return WaitForSignal(reinterpret_cast(hw_event)->signal_, active_wait); } - return (hsa_signal_load_relaxed(reinterpret_cast(hw_event)->signal_) == 0); + + auto signal = reinterpret_cast(hw_event)->signal_; + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Check HW event = 0x%lx", signal.handle); + + return (hsa_signal_load_relaxed(signal) == 0); } // ================================================================================================ diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 18d696a589..23b3102ca7 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() { // ================================================================================================ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( - hsa_signal_value_t init_val, Timestamp* ts) { + hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) { + + amd::Command* cmd = gpu_.command(); + // If no signal is needed, decrement the refcount and clear the hw_event of current command + if (!attach_signal) { + if (nullptr != cmd) { + if (cmd->HwEvent() != nullptr) { + reinterpret_cast(cmd->HwEvent())->release(); + } + cmd->SetHwEvent(nullptr); + } + return hsa_signal_t {0}; + } + bool new_signal = false; // Peep signal +2 ahead to see if its done @@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( prof_signal->engine_ = engine_; prof_signal->flags_.isPacketDispatch_ = false; - // Store the HW event - amd::Command* cmd = gpu_.command(); + if (nullptr != cmd) { // Release any existing HwEvent before setting new one for the same command if (cmd->HwEvent() != nullptr) { @@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket( fence_state_ = static_cast(expected_fence_state); - if (timestamp_ != nullptr || attach_signal) { - // Get active signal for current dispatch if profiling is necessary - packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_); - - if (std::is_same::value) { - // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can - // retrieve this correlation ID to attribute waves to specific dispatch locations. - if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH)) { - auto dispatchPacket = reinterpret_cast(packet); - dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_; - } - - ProfilingSignal* current_signal = Barriers().GetLastSignal(); - current_signal->flags_.isPacketDispatch_ = true; + bool attachSignal = timestamp_ != nullptr || attach_signal; + // Get active signal for current dispatch if profiling is necessary + packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne, + timestamp_, attachSignal); + if (std::is_same::value + && timestamp_ != nullptr) { + // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can + // retrieve this correlation ID to attribute waves to specific dispatch locations. + if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH) ) { + auto dispatchPacket = reinterpret_cast(packet); + dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_; } + + ProfilingSignal* current_signal = Barriers().GetLastSignal(); + current_signal->flags_.isPacketDispatch_ = true; } + // Make sure the slot is free for usage while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= sw_queue_size) { amd::Os::yield(); diff --git a/rocclr/device/rocm/rocvirtual.hpp b/rocclr/device/rocm/rocvirtual.hpp index 43c927ff6e..4e8cd7fad3 100644 --- a/rocclr/device/rocm/rocvirtual.hpp +++ b/rocclr/device/rocm/rocvirtual.hpp @@ -254,7 +254,7 @@ class VirtualGPU : public device::VirtualDevice { //! Finds a free signal for the upcomming operation hsa_signal_t ActiveSignal(hsa_signal_value_t init_val = kInitSignalValueOne, - Timestamp* ts = nullptr); + Timestamp* ts = nullptr, bool attach_signal = true); //! Wait for the curent active signal. Can idle the queue bool WaitCurrent() { diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp index ebf47b8eb4..3f4fd6a24c 100644 --- a/rocclr/platform/command.cpp +++ b/rocclr/platform/command.cpp @@ -277,7 +277,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) { notified_.clear(); return false; } - ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue); command->enqueue(); // Save notification, associated with the current event notify_event_ = command; @@ -290,7 +289,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) { notified_.clear(); return false; } - ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue); command->enqueue(); command->release(); } @@ -356,8 +354,8 @@ void Command::enqueue() { Agent::postEventCreate(as_cl(static_cast(this)), type_); } - ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p", - amd::activity_prof::getOclCommandKindString(this->type()), this); + ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p", + amd::activity_prof::getOclCommandKindString(this->type()), this, queue_); // Direct dispatch logic below will submit the command immediately, but the command status // update will occur later after flush() with a wait diff --git a/rocclr/platform/command.hpp b/rocclr/platform/command.hpp index 68cad257f6..0aebbc4b72 100644 --- a/rocclr/platform/command.hpp +++ b/rocclr/platform/command.hpp @@ -213,7 +213,7 @@ class Event : public RuntimeObject { //! Returns the callback for this event const CallBackEntry* Callback() const { return callbacks_; } - // Saves HW event, associated with the current command + //! Saves HW event, associated with the current command void SetHwEvent(void* hw_event) { hw_event_ = hw_event; } //! Returns HW event, associated with the current command