SWDEV-521647 - Fix tracking of hw_event (#206)
- When a command may possibly have two packets(like device heap initializer), and if there is no signal on the main kernel packet the tracking was broken as it marked HW event of the command as the first packet signal. - Make sure if no completion signal is attached to the second packet then clear the HW event for the command.
This commit is contained in:
zatwierdzone przez
GitHub
rodzic
ce24936970
commit
072fb0804e
@@ -2863,7 +2863,11 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve
|
||||
bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait();
|
||||
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait);
|
||||
}
|
||||
return (hsa_signal_load_relaxed(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_) == 0);
|
||||
|
||||
auto signal = reinterpret_cast<ProfilingSignal*>(hw_event)->signal_;
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Check HW event = 0x%lx", signal.handle);
|
||||
|
||||
return (hsa_signal_load_relaxed(signal) == 0);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
@@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() {
|
||||
|
||||
// ================================================================================================
|
||||
hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
|
||||
hsa_signal_value_t init_val, Timestamp* ts) {
|
||||
hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) {
|
||||
|
||||
amd::Command* cmd = gpu_.command();
|
||||
// If no signal is needed, decrement the refcount and clear the hw_event of current command
|
||||
if (!attach_signal) {
|
||||
if (nullptr != cmd) {
|
||||
if (cmd->HwEvent() != nullptr) {
|
||||
reinterpret_cast<ProfilingSignal*>(cmd->HwEvent())->release();
|
||||
}
|
||||
cmd->SetHwEvent(nullptr);
|
||||
}
|
||||
return hsa_signal_t {0};
|
||||
}
|
||||
|
||||
bool new_signal = false;
|
||||
|
||||
// Peep signal +2 ahead to see if its done
|
||||
@@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
|
||||
prof_signal->engine_ = engine_;
|
||||
prof_signal->flags_.isPacketDispatch_ = false;
|
||||
|
||||
// Store the HW event
|
||||
amd::Command* cmd = gpu_.command();
|
||||
|
||||
if (nullptr != cmd) {
|
||||
// Release any existing HwEvent before setting new one for the same command
|
||||
if (cmd->HwEvent() != nullptr) {
|
||||
@@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket(
|
||||
|
||||
fence_state_ = static_cast<Device::CacheState>(expected_fence_state);
|
||||
|
||||
if (timestamp_ != nullptr || attach_signal) {
|
||||
// Get active signal for current dispatch if profiling is necessary
|
||||
packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
|
||||
|
||||
if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value) {
|
||||
// If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
|
||||
// retrieve this correlation ID to attribute waves to specific dispatch locations.
|
||||
if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH)) {
|
||||
auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
|
||||
dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
|
||||
}
|
||||
|
||||
ProfilingSignal* current_signal = Barriers().GetLastSignal();
|
||||
current_signal->flags_.isPacketDispatch_ = true;
|
||||
bool attachSignal = timestamp_ != nullptr || attach_signal;
|
||||
// Get active signal for current dispatch if profiling is necessary
|
||||
packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne,
|
||||
timestamp_, attachSignal);
|
||||
|
||||
if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value
|
||||
&& timestamp_ != nullptr) {
|
||||
// If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
|
||||
// retrieve this correlation ID to attribute waves to specific dispatch locations.
|
||||
if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH) ) {
|
||||
auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
|
||||
dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
|
||||
}
|
||||
|
||||
ProfilingSignal* current_signal = Barriers().GetLastSignal();
|
||||
current_signal->flags_.isPacketDispatch_ = true;
|
||||
}
|
||||
|
||||
|
||||
// Make sure the slot is free for usage
|
||||
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= sw_queue_size) {
|
||||
amd::Os::yield();
|
||||
|
||||
@@ -254,7 +254,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
|
||||
//! Finds a free signal for the upcomming operation
|
||||
hsa_signal_t ActiveSignal(hsa_signal_value_t init_val = kInitSignalValueOne,
|
||||
Timestamp* ts = nullptr);
|
||||
Timestamp* ts = nullptr, bool attach_signal = true);
|
||||
|
||||
//! Wait for the curent active signal. Can idle the queue
|
||||
bool WaitCurrent() {
|
||||
|
||||
@@ -277,7 +277,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
|
||||
notified_.clear();
|
||||
return false;
|
||||
}
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
|
||||
command->enqueue();
|
||||
// Save notification, associated with the current event
|
||||
notify_event_ = command;
|
||||
@@ -290,7 +289,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
|
||||
notified_.clear();
|
||||
return false;
|
||||
}
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
|
||||
command->enqueue();
|
||||
command->release();
|
||||
}
|
||||
@@ -356,8 +354,8 @@ void Command::enqueue() {
|
||||
Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
|
||||
}
|
||||
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p",
|
||||
amd::activity_prof::getOclCommandKindString(this->type()), this);
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
|
||||
amd::activity_prof::getOclCommandKindString(this->type()), this, queue_);
|
||||
|
||||
// Direct dispatch logic below will submit the command immediately, but the command status
|
||||
// update will occur later after flush() with a wait
|
||||
|
||||
@@ -213,7 +213,7 @@ class Event : public RuntimeObject {
|
||||
//! Returns the callback for this event
|
||||
const CallBackEntry* Callback() const { return callbacks_; }
|
||||
|
||||
// Saves HW event, associated with the current command
|
||||
//! Saves HW event, associated with the current command
|
||||
void SetHwEvent(void* hw_event) { hw_event_ = hw_event; }
|
||||
|
||||
//! Returns HW event, associated with the current command
|
||||
|
||||
Reference in New Issue
Block a user