SWDEV-521647 - Fix tracking of hw_event (#206)

- When a command may possibly have two packets(like device heap
  initializer), and if there is no signal on the main kernel packet the
tracking was broken as it marked HW event of the command as the first
packet signal.
- Make sure if no completion signal is attached to the second packet
  then clear the HW event for the command.
This commit is contained in:
Kudchadker, Saleel
2025-04-25 08:46:44 -07:00
کامیت شده توسط GitHub
والد ce24936970
کامیت 072fb0804e
5فایلهای تغییر یافته به همراه39 افزوده شده و 24 حذف شده
@@ -2863,7 +2863,11 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve
bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait();
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait);
}
return (hsa_signal_load_relaxed(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_) == 0);
auto signal = reinterpret_cast<ProfilingSignal*>(hw_event)->signal_;
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Check HW event = 0x%lx", signal.handle);
return (hsa_signal_load_relaxed(signal) == 0);
}
// ================================================================================================
@@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() {
// ================================================================================================
hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
hsa_signal_value_t init_val, Timestamp* ts) {
hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) {
amd::Command* cmd = gpu_.command();
// If no signal is needed, decrement the refcount and clear the hw_event of current command
if (!attach_signal) {
if (nullptr != cmd) {
if (cmd->HwEvent() != nullptr) {
reinterpret_cast<ProfilingSignal*>(cmd->HwEvent())->release();
}
cmd->SetHwEvent(nullptr);
}
return hsa_signal_t {0};
}
bool new_signal = false;
// Peep signal +2 ahead to see if its done
@@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
prof_signal->engine_ = engine_;
prof_signal->flags_.isPacketDispatch_ = false;
// Store the HW event
amd::Command* cmd = gpu_.command();
if (nullptr != cmd) {
// Release any existing HwEvent before setting new one for the same command
if (cmd->HwEvent() != nullptr) {
@@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket(
fence_state_ = static_cast<Device::CacheState>(expected_fence_state);
if (timestamp_ != nullptr || attach_signal) {
// Get active signal for current dispatch if profiling is necessary
packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value) {
// If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
// retrieve this correlation ID to attribute waves to specific dispatch locations.
if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH)) {
auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
}
ProfilingSignal* current_signal = Barriers().GetLastSignal();
current_signal->flags_.isPacketDispatch_ = true;
bool attachSignal = timestamp_ != nullptr || attach_signal;
// Get active signal for current dispatch if profiling is necessary
packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne,
timestamp_, attachSignal);
if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value
&& timestamp_ != nullptr) {
// If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
// retrieve this correlation ID to attribute waves to specific dispatch locations.
if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH) ) {
auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
}
ProfilingSignal* current_signal = Barriers().GetLastSignal();
current_signal->flags_.isPacketDispatch_ = true;
}
// Make sure the slot is free for usage
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= sw_queue_size) {
amd::Os::yield();
@@ -254,7 +254,7 @@ class VirtualGPU : public device::VirtualDevice {
//! Finds a free signal for the upcomming operation
hsa_signal_t ActiveSignal(hsa_signal_value_t init_val = kInitSignalValueOne,
Timestamp* ts = nullptr);
Timestamp* ts = nullptr, bool attach_signal = true);
//! Wait for the curent active signal. Can idle the queue
bool WaitCurrent() {
+2 -4
مشاهده پرونده
@@ -277,7 +277,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
notified_.clear();
return false;
}
ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
command->enqueue();
// Save notification, associated with the current event
notify_event_ = command;
@@ -290,7 +289,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
notified_.clear();
return false;
}
ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
command->enqueue();
command->release();
}
@@ -356,8 +354,8 @@ void Command::enqueue() {
Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
}
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p",
amd::activity_prof::getOclCommandKindString(this->type()), this);
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
amd::activity_prof::getOclCommandKindString(this->type()), this, queue_);
// Direct dispatch logic below will submit the command immediately, but the command status
// update will occur later after flush() with a wait
+1 -1
مشاهده پرونده
@@ -213,7 +213,7 @@ class Event : public RuntimeObject {
//! Returns the callback for this event
const CallBackEntry* Callback() const { return callbacks_; }
// Saves HW event, associated with the current command
//! Saves HW event, associated with the current command
void SetHwEvent(void* hw_event) { hw_event_ = hw_event; }
//! Returns HW event, associated with the current command