SWDEV-521647 - Fix tracking of hw_event (#206)

- When a command may possibly have two packets(like device heap initializer), and if there is no signal on the main kernel packet the tracking was broken as it marked HW event of the command as the first packet signal. - Make sure if no completion signal is attached to the second packet then clear the HW event for the command.
2025-04-25 08:46:44 -07:00
@@ -2863,7 +2863,11 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve
    bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait();
    return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait);
  }
-  return (hsa_signal_load_relaxed(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_) == 0);
+
+  auto signal = reinterpret_cast<ProfilingSignal*>(hw_event)->signal_;
+  ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Check HW event = 0x%lx", signal.handle);
+
+  return (hsa_signal_load_relaxed(signal) == 0);
 }

 // ================================================================================================
@@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() {

 // ================================================================================================
 hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
-    hsa_signal_value_t init_val, Timestamp* ts) {
+    hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) {
+
+  amd::Command* cmd = gpu_.command();
+  // If no signal is needed, decrement the refcount and clear the hw_event of current command
+  if (!attach_signal) {
+    if (nullptr != cmd) {
+      if (cmd->HwEvent() != nullptr) {
+        reinterpret_cast<ProfilingSignal*>(cmd->HwEvent())->release();
+      }
+      cmd->SetHwEvent(nullptr);
+    }
+    return hsa_signal_t {0};
+  }
+
  bool new_signal = false;

  // Peep signal +2 ahead to see if its done
@@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
  prof_signal->engine_ = engine_;
  prof_signal->flags_.isPacketDispatch_ = false;

-  // Store the HW event
-  amd::Command* cmd = gpu_.command();
+
  if (nullptr != cmd) {
    // Release any existing HwEvent before setting new one for the same command
    if (cmd->HwEvent() != nullptr) {
@@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket(

  fence_state_ = static_cast<Device::CacheState>(expected_fence_state);

-  if (timestamp_ != nullptr || attach_signal) {
-    // Get active signal for current dispatch if profiling is necessary
-    packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
-
-    if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value) {
-      // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
-      // retrieve this correlation ID to attribute waves to specific dispatch locations.
-      if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH)) {
-        auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
-        dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
-      }
-
-      ProfilingSignal* current_signal = Barriers().GetLastSignal();
-      current_signal->flags_.isPacketDispatch_ = true;
+  bool attachSignal = timestamp_ != nullptr || attach_signal;
+  // Get active signal for current dispatch if profiling is necessary
+  packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne,
+                                                      timestamp_, attachSignal);

+  if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value
+      && timestamp_ != nullptr) {
+    // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
+    // retrieve this correlation ID to attribute waves to specific dispatch locations.
+    if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH) ) {
+      auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
+      dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
    }
+
+    ProfilingSignal* current_signal = Barriers().GetLastSignal();
+    current_signal->flags_.isPacketDispatch_ = true;
  }

+
  // Make sure the slot is free for usage
  while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= sw_queue_size) {
    amd::Os::yield();
@@ -254,7 +254,7 @@ class VirtualGPU : public device::VirtualDevice {

    //! Finds a free signal for the upcomming operation
    hsa_signal_t ActiveSignal(hsa_signal_value_t init_val = kInitSignalValueOne,
-                              Timestamp* ts = nullptr);
+                              Timestamp* ts = nullptr, bool attach_signal = true);

    //! Wait for the curent active signal. Can idle the queue
    bool WaitCurrent() {
@@ -277,7 +277,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
        notified_.clear();
        return false;
      }
-      ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
      command->enqueue();
      // Save notification, associated with the current event
      notify_event_ = command;
@@ -290,7 +289,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
        notified_.clear();
        return false;
      }
-      ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
      command->enqueue();
      command->release();
    }
@@ -356,8 +354,8 @@ void Command::enqueue() {
    Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
  }

-  ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p",
-          amd::activity_prof::getOclCommandKindString(this->type()), this);
+  ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
+          amd::activity_prof::getOclCommandKindString(this->type()), this, queue_);

  // Direct dispatch logic below will submit the command immediately, but the command status
  // update will occur later after flush() with a wait
@@ -213,7 +213,7 @@ class Event : public RuntimeObject {
  //! Returns the callback for this event
  const CallBackEntry* Callback() const { return callbacks_; }

-  // Saves HW event, associated with the current command
+  //! Saves HW event, associated with the current command
  void SetHwEvent(void* hw_event) { hw_event_ = hw_event; }

  //! Returns HW event, associated with the current command