Files
rocm-systems/shared/amdgpu-windows-interop/pal/inc/gpuUtil/palQueueTimingsTraceSource.h
T
2025-11-05 15:38:23 -05:00

237 baris
10 KiB
C++

/*
***********************************************************************************************************************
*
* Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
#pragma once
#include "palGpuUtil.h"
#include "palTraceSession.h"
#include "palGpaSession.h"
#include <atomic>
struct SqttQueueEventRecord;
struct SqttQueueInfoRecord;
namespace Pal
{
class Platform;
}
namespace GpuUtil
{
namespace TraceChunk
{
/// "QueueInfo" RDF chunk identifier & version
constexpr char QueueInfoChunkId[TextIdentifierSize] = "QueueInfo";
constexpr Pal::uint32 QueueInfoChunkVersion = 1;
/// Enum describing logical queue types
enum class QueueType : Pal::uint8
{
Unknown = 0,
Universal = 1,
Compute = 2,
Dma = 3,
Encode = 4,
Decode = 5,
Security = 6,
VideoProcessor = 7
};
/// Enum describing hardware engine types
enum class HwEngineType : Pal::uint8
{
Unknown = 0,
Universal = 1,
Compute = 2,
ExclusiveCompute = 3,
Dma = 4,
Decode = 5,
Encode = 6,
HighPriorityUniversal = 7,
HighPriorityGraphics = 8,
Security = 9,
Vpe = 10
};
/// Structure describing a queue's properties
struct QueueInfo
{
Pal::uint32 pciId; ///< The ID of the GPU queried
Pal::uint64 queueId; ///< API-specific queue ID
Pal::uint64 queueContext; ///< OS-level queue context value from Windows KMD to correlate with ETW data.
/// Only applicable to D3D on Windows; 0 otherwise.
QueueType queueType; ///< The logical queue type
HwEngineType engineType; ///< The hardware engine that the queue is mapped to
};
// ------------------------------------------------------------------------------------------- //
/// "QueueEvent" RDF chunk identifier & version
constexpr char QueueEventChunkId[TextIdentifierSize] = "QueueEvent";
constexpr Pal::uint32 QueueEventChunkVersion = 1;
/// The type of queue-level timings event
enum class QueueEventType : Pal::uint32
{
CmdBufSubmit = 0,
SignalSemaphore = 1,
WaitSemaphore = 2,
Present = 3
};
/// Structure describing a queue-level timings event
struct QueueEvent
{
Pal::uint32 pciId; ///< The ID of the GPU queried
Pal::uint64 queueId; ///< The API-specific queue ID which triggered the event
QueueEventType eventType; ///< The type of the queue-timing event
Pal::uint32 sqttCmdBufId; ///< [`CmdBufSubmit` only; 0 otherwise]
/// SQTT command buffer ID matching CmdBufStart user data marker
Pal::uint64 frameIndex; ///< [`CmdBufSubmit` & `Present` only; 0 otherwise]
/// Global frame index incremented for each "Present" call
Pal::uint32 submitSubIndex; ///< [`CmdBufSubmit` only; 0 otherwise]
/// Sub-index of event within submission.
/// When there is only one CmdBuffer per submission, `submitSubIndex` is 0.
/// When there are multiple command buffers per submission, `submitSubIndex`
/// is incremented by one for each command buffer within the submission.
Pal::uint64 apiEventId; ///< [`CmdBufSubmit`] API-specific command buffer ID signaled
/// [`SignalSemaphore`] API-specific semaphore ID signaled
/// [`WaitSemaphore`] API-specific semaphore ID waited on
/// [`Present`] N/A (set to 0)
Pal::uint64 cpuTimestamp; ///< CPU start timestamp of when this event is triggered in clock cycle units
Pal::uint64 gpuTimestamp1; ///< [`CmdBufSubmit`] GPU timestamp when the HW execution of command buffer began
/// [`SignalSemaphore`] GPU timestamp when the HW signaled the queue semaphore
/// [`WaitSemaphore`] GPU timestamp when HW finished waiting on the semaphore
/// [`Present`] GPU timestamp when HW processed the Present call
///
/// All timestamps are expressed in clock cycle units.
Pal::uint64 gpuTimestamp2; ///< [`CmdBufSubmit` only; 0 otherwise]
/// GPU timestamp when the HW execution of command buffer finished
};
} // namespace TraceChunk
// QueueTimings Trace Source name & version
constexpr char QueueTimingsTraceSourceName[] = "queuetimings";
constexpr Pal::uint32 QueueTimingsTraceSourceVersion = 2;
// =====================================================================================================================
// This trace source captures queue timings data through GPA session & produces "QueueInfo" and "QueueEvent" RDF chunks
class QueueTimingsTraceSource : public ITraceSource
{
public:
explicit QueueTimingsTraceSource(Pal::IPlatform* pPlatform);
virtual ~QueueTimingsTraceSource();
// ==== TraceSource Native Functions ========================================================================== //
Pal::Result Init(Pal::IDevice* pDevice);
Pal::Result RegisterTimedQueue(Pal::IQueue* pQueue,
Pal::uint64 queueId,
Pal::uint64 queueContext);
Pal::Result UnregisterTimedQueue(Pal::IQueue* pQueue);
Pal::Result TimedSubmit(Pal::IQueue* pQueue,
const Pal::MultiSubmitInfo& submitInfo,
const TimedSubmitInfo& timedSubmitInfo);
Pal::Result TimedSignalQueueSemaphore(Pal::IQueue* pQueue,
Pal::IQueueSemaphore* pQueueSemaphore,
const TimedQueueSemaphoreInfo& timedSignalInfo,
Pal::uint64 value = 0);
Pal::Result TimedWaitQueueSemaphore(Pal::IQueue* pQueue,
Pal::IQueueSemaphore* pQueueSemaphore,
const TimedQueueSemaphoreInfo& timedWaitInfo,
Pal::uint64 value = 0);
Pal::Result TimedQueuePresent(Pal::IQueue* pQueue,
const TimedQueuePresentInfo& timedPresentInfo);
Pal::Result ExternalTimedWaitQueueSemaphore(Pal::uint64 queueContext,
Pal::uint64 cpuSubmissionTimestamp,
Pal::uint64 cpuCompletionTimestamp,
const TimedQueueSemaphoreInfo& timedWaitInfo);
Pal::Result ExternalTimedSignalQueueSemaphore(Pal::uint64 queueContext,
Pal::uint64 cpuSubmissionTimestamp,
Pal::uint64 cpuCompletionTimestamp,
const TimedQueueSemaphoreInfo& timedSignalInfo);
bool IsTimingInProgress() const;
// ==== Base Class Overrides =================================================================================== //
virtual void OnConfigUpdated(DevDriver::StructuredValue* pJsonConfig) override { };
virtual Pal::uint64 QueryGpuWorkMask() const override { return 0; }
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 908
virtual void OnTraceAccepted(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override;
#else
virtual void OnTraceAccepted() override;
#endif
virtual void OnTraceBegin(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override { };
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 939
virtual void OnPostambleEnd(
Pal::uint32 gpuIndex,
Pal::ICmdBuffer* pCmdBuf) override;
virtual void OnTraceEnd(
Pal::uint32 gpuIndex,
Pal::ICmdBuffer* pCmdBuf) override {};
#else
virtual void OnTraceEnd(
Pal::uint32 gpuIndex,
Pal::ICmdBuffer* pCmdBuf) override;
#endif
virtual void OnTraceFinished() override;
virtual const char* GetName() const override { return QueueTimingsTraceSourceName; }
virtual Pal::uint32 GetVersion() const override { return QueueTimingsTraceSourceVersion; }
private:
void WriteQueueInfoChunks(
const SqttQueueInfoRecord* pQueueInfoRecords,
size_t numQueueInfoRecords);
void WriteQueueEventChunks(
const SqttQueueInfoRecord* pQueueInfoRecords,
size_t numQueueInfoRecords,
const SqttQueueEventRecord* pQueueEventRecords,
size_t numQueueEventRecords);
void ReportInternalError(const char* pErrorMsg, Pal::Result result);
Pal::IPlatform* const m_pPlatform; // IPlatform owning the parent TraceSession
GpaSession* m_pGpaSession; // Handle to GpaSession object for tracking queue timings
bool m_traceIsHealthy; // Internal flag for tracking resource and state health
std::atomic<bool> m_timingInProgress; // Flag for tracking if queue timings operations are ongoing
};
} // namespace GpuUtil