SWDEV-543498 - Some compute Ubertrace profiles are missing queue timing data (#1146)

This commit is contained in:
pcritchl-amd
2025-10-22 08:56:33 -07:00
committed by GitHub
parent ee93c9ddab
commit 63a991a8b9
2 changed files with 54 additions and 1 deletions
@@ -57,6 +57,37 @@ UberTraceCaptureMgr* UberTraceCaptureMgr::Create(Pal::IPlatform* platform, const
return mgr;
}
static void UberTraceStateChangeCallback(const GpuUtil::TraceSession& pTraceSession,
GpuUtil::TraceSessionState newState,
void* pPrivateData)
{
UberTraceCaptureMgr* mgr = static_cast<UberTraceCaptureMgr*>(pPrivateData);
switch (newState)
{
// boundary for prepare-phase dispatches
case GpuUtil::TraceSessionState::Preparing:
// boundary for detailed capture
case GpuUtil::TraceSessionState::Running:
// boundary for end of detailed trace
#if (PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 939)
case GpuUtil::TraceSessionState::Postamble:
#endif
// boundary for end of trace
case GpuUtil::TraceSessionState::Waiting:
{
VirtualGPU* current_gpu = mgr->GetCurrentGPU();
if (current_gpu != nullptr) {
bool flush_success = current_gpu->queue(MainEngine).flush();
assert(flush_success);
}
break;
}
default:
break;
}
}
// ================================================================================================
UberTraceCaptureMgr::UberTraceCaptureMgr(Pal::IPlatform* platform, const Device& device)
: device_(device),
@@ -67,7 +98,8 @@ UberTraceCaptureMgr::UberTraceCaptureMgr(Pal::IPlatform* platform, const Device&
trace_session_(platform->GetTraceSession()),
trace_controller_(nullptr),
code_object_trace_source_(nullptr),
queue_timings_trace_source_(nullptr) {}
queue_timings_trace_source_(nullptr),
registered_trace_state_callback_(false) {}
// ================================================================================================
UberTraceCaptureMgr::~UberTraceCaptureMgr() { DestroyUberTraceResources(); }
@@ -116,6 +148,13 @@ bool UberTraceCaptureMgr::CreateUberTraceResources(Pal::IPlatform* platform) {
break;
}
result = trace_session_->RegisterTraceStateChangeCallback(UberTraceStateChangeCallback, this);
if (result != Pal::Result::Success) {
break;
}
registered_trace_state_callback_ = true;
success = true;
} while (false);
@@ -150,6 +189,11 @@ void UberTraceCaptureMgr::DestroyUberTraceResources() {
delete queue_timings_trace_source_;
queue_timings_trace_source_ = nullptr;
}
if (registered_trace_state_callback_) {
trace_session_->UnregisterTraceStateChangeCallback(UberTraceStateChangeCallback, this);
registered_trace_state_callback_ = false;
}
}
// ================================================================================================
@@ -182,7 +226,9 @@ void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel
GpuUtil::RenderOpCounts opCounts = {
.dispatchCount = 1u,
};
current_gpu_ = gpu;
trace_controller_->RecordRenderOps(pQueue, opCounts);
current_gpu_ = nullptr;
if (trace_session_->GetTraceSessionState() == GpuUtil::TraceSessionState::Running) {
RgpSqttMarkerEventType apiEvent = RgpSqttMarkerEventType::CmdNDRangeKernel;
@@ -67,6 +67,10 @@ class UberTraceCaptureMgr final : public ICaptureMgr {
size_t elf_binary_size, Pal::IGpuMemory* pGpuMemory,
size_t offset) override;
VirtualGPU* GetCurrentGPU() {
return current_gpu_;
}
private:
UberTraceCaptureMgr(Pal::IPlatform* platform, const Device& device);
bool Init(Pal::IPlatform* platform);
@@ -96,6 +100,9 @@ class UberTraceCaptureMgr final : public ICaptureMgr {
GpuUtil::CodeObjectTraceSource* code_object_trace_source_;
GpuUtil::QueueTimingsTraceSource* queue_timings_trace_source_;
VirtualGPU* current_gpu_;
bool registered_trace_state_callback_;
PAL_DISALLOW_DEFAULT_CTOR(UberTraceCaptureMgr);
PAL_DISALLOW_COPY_AND_ASSIGN(UberTraceCaptureMgr);
};