SWDEV-489003 - [Ubertrace] OCL/HIP profiles are missing event instrumentation
Adds UberTrace support for pre-dispatch markers and barrier begin/end markers. Moves shared definitions out of palgpuopen.hpp into shared header palcapturemgr.hpp. Change-Id: I9f464c689e7ff12c54eca043fc1ad65e1836a64f
This commit is contained in:
committato da
Rakesh Roy
parent
c6e25b2be7
commit
541c449ce2
@@ -29,6 +29,227 @@ class Device;
|
||||
class VirtualGPU;
|
||||
class HSAILKernel;
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
|
||||
enum RgpSqttMarkerIdentifier : uint32_t {
|
||||
RgpSqttMarkerIdentifierEvent = 0x0,
|
||||
RgpSqttMarkerIdentifierCbStart = 0x1,
|
||||
RgpSqttMarkerIdentifierCbEnd = 0x2,
|
||||
RgpSqttMarkerIdentifierBarrierStart = 0x3,
|
||||
RgpSqttMarkerIdentifierBarrierEnd = 0x4,
|
||||
RgpSqttMarkerIdentifierUserEvent = 0x5,
|
||||
RgpSqttMarkerIdentifierGeneralApi = 0x6,
|
||||
RgpSqttMarkerIdentifierSync = 0x7,
|
||||
RgpSqttMarkerIdentifierPresent = 0x8,
|
||||
RgpSqttMarkerIdentifierLayoutTransition = 0x9,
|
||||
RgpSqttMarkerIdentifierRenderPass = 0xA,
|
||||
RgpSqttMarkerIdentifierReserved2 = 0xB,
|
||||
RgpSqttMarkerIdentifierBindPipeline = 0xC,
|
||||
RgpSqttMarkerIdentifierReserved4 = 0xD,
|
||||
RgpSqttMarkerIdentifierReserved5 = 0xE,
|
||||
RgpSqttMarkerIdentifierReserved6 = 0xF
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
enum class RgpSqttMarkerEventType : uint32_t {
|
||||
CmdNDRangeKernel = 0,
|
||||
CmdScheduler = 1,
|
||||
CmdCopyBuffer = 2,
|
||||
CmdCopyImageToBuffer = 3,
|
||||
CmdCopyBufferToImage = 4,
|
||||
CmdFillBuffer = 5,
|
||||
CmdCopyImage = 6,
|
||||
CmdFillImage = 7,
|
||||
CmdPipelineBarrier = 8,
|
||||
InternalUnknown = 26,
|
||||
Invalid = 0xffffffff
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerEvent - "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker.
|
||||
// These are generated ahead of draws or dispatches for commands that trigger generation of waves
|
||||
// i.e. draws/dispatches (Table 4).
|
||||
struct RgpSqttMarkerEvent {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t apiType : 24; // The API type for this command
|
||||
uint32_t hasThreadDims : 1; // Whether thread dimensions are included
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
// Some information about the vertex/instance/draw register indices. These values are not
|
||||
// always valid because they are not available for one reason or another:
|
||||
//
|
||||
// - If vertex offset index or instance offset index are not (together) valid, they are both
|
||||
// equal to 0
|
||||
// - If draw index is not valid, it is equal to the vertex offset index
|
||||
struct {
|
||||
uint32_t cbID : 20; // Command buffer ID for this marker
|
||||
uint32_t vertexOffsetRegIdx : 4; // SPI userdata register index for the first vertex offset
|
||||
uint32_t
|
||||
instanceOffsetRegIdx : 4; // SPI userdata register index for the first instance offset
|
||||
uint32_t drawIndexRegIdx : 4; // SPI userdata register index for the draw index (multi draw
|
||||
// indirect)
|
||||
};
|
||||
uint32_t dword02; // The second dword
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t cmdID; // Command index within the command buffer
|
||||
uint32_t dword03; // The third dword
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerEventWithDims - Per-dispatch specific marker where workgroup dims are included
|
||||
struct RgpSqttMarkerEventWithDims {
|
||||
RgpSqttMarkerEvent
|
||||
event; // Per-draw/dispatch marker. API type should be Dispatch, threadDim = 1
|
||||
uint32_t threadX; // Work group count in X
|
||||
uint32_t threadY; // Work group count in Y
|
||||
uint32_t threadZ; // Work group count in Z
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerBarrierStart - "Barrier Start" RGP SQTT instrumentation marker (Table 5)
|
||||
struct RgpSqttMarkerBarrierStart {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t cbId : 20; // Command buffer ID within queue
|
||||
uint32_t reserved : 5; // Reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t driverReason : 31;
|
||||
uint32_t internal : 1;
|
||||
};
|
||||
|
||||
uint32_t dword02; // The second dword
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerBarrierEnd - "Barrier End" RGP SQTT instrumentation marker (Table 6)
|
||||
struct RgpSqttMarkerBarrierEnd {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t cbId : 20; // Command buffer ID within queue
|
||||
uint32_t waitOnEopTs : 1; // Issued EOP_TS VGT event followed by a WAIT_REG_MEM for that
|
||||
// timestamp to be written. Quintessential full pipeline stall.
|
||||
uint32_t vsPartialFlush : 1; // Stall at ME waiting for all prior VS waves to complete.
|
||||
uint32_t psPartialFlush : 1; // Stall at ME waiting for all prior PS waves to complete.
|
||||
uint32_t csPartialFlush : 1; // Stall at ME waiting for all prior CS waves to complete.
|
||||
uint32_t pfpSyncMe : 1; // Stall PFP until ME is at same point in command stream.
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t
|
||||
syncCpDma : 1; // Issue dummy CP-DMA command to confirm all prior CP-DMAs have completed.
|
||||
uint32_t invalTcp : 1; // Invalidate the L1 vector caches.
|
||||
uint32_t invalSqI : 1; // Invalidate the SQ instruction caches
|
||||
uint32_t invalSqK : 1; // Invalidate the SQ constant caches (i.e. L1 scalar caches)
|
||||
uint32_t flushTcc : 1; // Flush L2
|
||||
uint32_t invalTcc : 1; // Invalidate L2
|
||||
uint32_t flushCb : 1; // Flush CB caches (including DCC, cmask, fmask)
|
||||
uint32_t invalCb : 1; // Invalidate CB caches (including DCC, cmask, fmask)
|
||||
uint32_t flushDb : 1; // Flush DB caches (including htile)
|
||||
uint32_t invalDb : 1; // Invalidate DB caches (including htile)
|
||||
uint32_t numLayoutTransitions : 16; // Number of layout transitions following this packet
|
||||
uint32_t reserved : 6; // Reserved for future expansion. Always 0
|
||||
};
|
||||
|
||||
uint32_t dword02; // The second dword
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerPipelineBind - RGP SQ thread-tracing marker written whenever a pipeline is bound (Table 12).
|
||||
struct RgpSqttMarkerPipelineBind {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t bindPoint : 1; // The bind point of the pipeline within a queue
|
||||
// 0 = graphics bind point
|
||||
// 1 = compute bind point
|
||||
uint32_t cbID : 20; // A command buffer ID encoded as per Table 13.
|
||||
uint32_t reserved : 4; // Reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t apiPsoHash[2]; // The API PSO hash of the pipeline being bound
|
||||
struct {
|
||||
uint32_t dword02; // The second dword
|
||||
uint32_t dword03; // The third dword
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
// RGP SQTT Instrumentation Specification version (API-independent)
|
||||
constexpr uint32_t RgpSqttInstrumentationSpecVersion = 1;
|
||||
|
||||
// RGP SQTT Instrumentation Specification version for Vulkan-specific tables
|
||||
constexpr uint32_t RgpSqttInstrumentationApiVersion = 0;
|
||||
|
||||
// RgpSqttMarkerUserEventDataType - Data types used in RGP SQ thread-tracing markers for an user
|
||||
// event
|
||||
enum RgpSqttMarkerUserEventType : uint32_t {
|
||||
RgpSqttMarkerUserEventTrigger = 0x0,
|
||||
RgpSqttMarkerUserEventPop = 0x1,
|
||||
RgpSqttMarkerUserEventPush = 0x2,
|
||||
RgpSqttMarkerUserEventObjectName = 0x3,
|
||||
RgpSqttMarkerUserEventReserved1 = 0x4,
|
||||
RgpSqttMarkerUserEventReserved2 = 0x5,
|
||||
RgpSqttMarkerUserEventReserved3 = 0x6,
|
||||
RgpSqttMarkerUserEventReserved4 = 0x7,
|
||||
};
|
||||
|
||||
// RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event.
|
||||
union RgpSqttMarkerUserEvent {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 8; // Number of extra dwords following this marker
|
||||
uint32_t dataType : 8; // The type for this marker
|
||||
uint32_t reserved : 12; // reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
constexpr uint32_t RgpSqttMarkerUserEventWordCount = 1;
|
||||
|
||||
// The max lengths of frame marker strings
|
||||
static constexpr size_t RgpSqttMaxUserEventStringLengthInDwords = 1024;
|
||||
|
||||
// RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event with a string (push and
|
||||
// trigger data types)
|
||||
struct RgpSqttMarkerUserEventWithString {
|
||||
RgpSqttMarkerUserEvent header;
|
||||
|
||||
uint32_t stringLength; // Length of the string (in characters)
|
||||
uint32_t stringData[RgpSqttMaxUserEventStringLengthInDwords]; // String data in UTF-8 format
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
class ICaptureMgr {
|
||||
public:
|
||||
|
||||
@@ -453,7 +453,8 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size
|
||||
WriteComputeBindMarker(gpu, kernel.prog().ApiHash());
|
||||
|
||||
WriteUserEventMarker(gpu, RgpSqttMarkerUserEventObjectName, kernel.name());
|
||||
// Write disaptch marker
|
||||
|
||||
// Write dispatch marker
|
||||
WriteEventWithDimsMarker(gpu, apiEvent, static_cast<uint32_t>(x), static_cast<uint32_t>(y),
|
||||
static_cast<uint32_t>(z));
|
||||
}
|
||||
|
||||
@@ -89,227 +89,6 @@ class HandlerServer;
|
||||
} // namespace DevDriver
|
||||
|
||||
namespace amd::pal {
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
|
||||
enum RgpSqttMarkerIdentifier : uint32_t {
|
||||
RgpSqttMarkerIdentifierEvent = 0x0,
|
||||
RgpSqttMarkerIdentifierCbStart = 0x1,
|
||||
RgpSqttMarkerIdentifierCbEnd = 0x2,
|
||||
RgpSqttMarkerIdentifierBarrierStart = 0x3,
|
||||
RgpSqttMarkerIdentifierBarrierEnd = 0x4,
|
||||
RgpSqttMarkerIdentifierUserEvent = 0x5,
|
||||
RgpSqttMarkerIdentifierGeneralApi = 0x6,
|
||||
RgpSqttMarkerIdentifierSync = 0x7,
|
||||
RgpSqttMarkerIdentifierPresent = 0x8,
|
||||
RgpSqttMarkerIdentifierLayoutTransition = 0x9,
|
||||
RgpSqttMarkerIdentifierRenderPass = 0xA,
|
||||
RgpSqttMarkerIdentifierReserved2 = 0xB,
|
||||
RgpSqttMarkerIdentifierBindPipeline = 0xC,
|
||||
RgpSqttMarkerIdentifierReserved4 = 0xD,
|
||||
RgpSqttMarkerIdentifierReserved5 = 0xE,
|
||||
RgpSqttMarkerIdentifierReserved6 = 0xF
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
enum class RgpSqttMarkerEventType : uint32_t {
|
||||
CmdNDRangeKernel = 0,
|
||||
CmdScheduler = 1,
|
||||
CmdCopyBuffer = 2,
|
||||
CmdCopyImageToBuffer = 3,
|
||||
CmdCopyBufferToImage = 4,
|
||||
CmdFillBuffer = 5,
|
||||
CmdCopyImage = 6,
|
||||
CmdFillImage = 7,
|
||||
CmdPipelineBarrier = 8,
|
||||
InternalUnknown = 26,
|
||||
Invalid = 0xffffffff
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerEvent - "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker.
|
||||
// These are generated ahead of draws or dispatches for commands that trigger generation of waves
|
||||
// i.e. draws/dispatches (Table 4).
|
||||
struct RgpSqttMarkerEvent {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t apiType : 24; // The API type for this command
|
||||
uint32_t hasThreadDims : 1; // Whether thread dimensions are included
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
// Some information about the vertex/instance/draw register indices. These values are not
|
||||
// always valid because they are not available for one reason or another:
|
||||
//
|
||||
// - If vertex offset index or instance offset index are not (together) valid, they are both
|
||||
// equal to 0
|
||||
// - If draw index is not valid, it is equal to the vertex offset index
|
||||
struct {
|
||||
uint32_t cbID : 20; // Command buffer ID for this marker
|
||||
uint32_t vertexOffsetRegIdx : 4; // SPI userdata register index for the first vertex offset
|
||||
uint32_t
|
||||
instanceOffsetRegIdx : 4; // SPI userdata register index for the first instance offset
|
||||
uint32_t drawIndexRegIdx : 4; // SPI userdata register index for the draw index (multi draw
|
||||
// indirect)
|
||||
};
|
||||
uint32_t dword02; // The second dword
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t cmdID; // Command index within the command buffer
|
||||
uint32_t dword03; // The third dword
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerEventWithDims - Per-dispatch specific marker where workgroup dims are included
|
||||
struct RgpSqttMarkerEventWithDims {
|
||||
RgpSqttMarkerEvent
|
||||
event; // Per-draw/dispatch marker. API type should be Dispatch, threadDim = 1
|
||||
uint32_t threadX; // Work group count in X
|
||||
uint32_t threadY; // Work group count in Y
|
||||
uint32_t threadZ; // Work group count in Z
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerBarrierStart - "Barrier Start" RGP SQTT instrumentation marker (Table 5)
|
||||
struct RgpSqttMarkerBarrierStart {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t cbId : 20; // Command buffer ID within queue
|
||||
uint32_t reserved : 5; // Reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t driverReason : 31;
|
||||
uint32_t internal : 1;
|
||||
};
|
||||
|
||||
uint32_t dword02; // The second dword
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerBarrierEnd - "Barrier End" RGP SQTT instrumentation marker (Table 6)
|
||||
struct RgpSqttMarkerBarrierEnd {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t cbId : 20; // Command buffer ID within queue
|
||||
uint32_t waitOnEopTs : 1; // Issued EOP_TS VGT event followed by a WAIT_REG_MEM for that
|
||||
// timestamp to be written. Quintessential full pipeline stall.
|
||||
uint32_t vsPartialFlush : 1; // Stall at ME waiting for all prior VS waves to complete.
|
||||
uint32_t psPartialFlush : 1; // Stall at ME waiting for all prior PS waves to complete.
|
||||
uint32_t csPartialFlush : 1; // Stall at ME waiting for all prior CS waves to complete.
|
||||
uint32_t pfpSyncMe : 1; // Stall PFP until ME is at same point in command stream.
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t
|
||||
syncCpDma : 1; // Issue dummy CP-DMA command to confirm all prior CP-DMAs have completed.
|
||||
uint32_t invalTcp : 1; // Invalidate the L1 vector caches.
|
||||
uint32_t invalSqI : 1; // Invalidate the SQ instruction caches
|
||||
uint32_t invalSqK : 1; // Invalidate the SQ constant caches (i.e. L1 scalar caches)
|
||||
uint32_t flushTcc : 1; // Flush L2
|
||||
uint32_t invalTcc : 1; // Invalidate L2
|
||||
uint32_t flushCb : 1; // Flush CB caches (including DCC, cmask, fmask)
|
||||
uint32_t invalCb : 1; // Invalidate CB caches (including DCC, cmask, fmask)
|
||||
uint32_t flushDb : 1; // Flush DB caches (including htile)
|
||||
uint32_t invalDb : 1; // Invalidate DB caches (including htile)
|
||||
uint32_t numLayoutTransitions : 16; // Number of layout transitions following this packet
|
||||
uint32_t reserved : 6; // Reserved for future expansion. Always 0
|
||||
};
|
||||
|
||||
uint32_t dword02; // The second dword
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerPipelineBind - RGP SQ thread-tracing marker written whenever a pipeline is bound (Table 12).
|
||||
struct RgpSqttMarkerPipelineBind {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t bindPoint : 1; // The bind point of the pipeline within a queue
|
||||
// 0 = graphics bind point
|
||||
// 1 = compute bind point
|
||||
uint32_t cbID : 20; // A command buffer ID encoded as per Table 13.
|
||||
uint32_t reserved : 4; // Reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t apiPsoHash[2]; // The API PSO hash of the pipeline being bound
|
||||
struct {
|
||||
uint32_t dword02; // The second dword
|
||||
uint32_t dword03; // The third dword
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// RGP SQTT Instrumentation Specification version (API-independent)
|
||||
constexpr uint32_t RgpSqttInstrumentationSpecVersion = 1;
|
||||
|
||||
// RGP SQTT Instrumentation Specification version for Vulkan-specific tables
|
||||
constexpr uint32_t RgpSqttInstrumentationApiVersion = 0;
|
||||
|
||||
// RgpSqttMarkeUserEventDataType - Data types used in RGP SQ thread-tracing markers for an user
|
||||
// event
|
||||
enum RgpSqttMarkerUserEventType : uint32_t {
|
||||
RgpSqttMarkerUserEventTrigger = 0x0,
|
||||
RgpSqttMarkerUserEventPop = 0x1,
|
||||
RgpSqttMarkerUserEventPush = 0x2,
|
||||
RgpSqttMarkerUserEventObjectName = 0x3,
|
||||
RgpSqttMarkerUserEventReserved1 = 0x4,
|
||||
RgpSqttMarkerUserEventReserved2 = 0x5,
|
||||
RgpSqttMarkerUserEventReserved3 = 0x6,
|
||||
RgpSqttMarkerUserEventReserved4 = 0x7,
|
||||
};
|
||||
|
||||
// RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event.
|
||||
union RgpSqttMarkerUserEvent {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 8; // Number of extra dwords following this marker
|
||||
uint32_t dataType : 8; // The type for this marker
|
||||
uint32_t reserved : 12; // reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
constexpr uint32_t RgpSqttMarkerUserEventWordCount = 1;
|
||||
|
||||
// The max lengths of frame marker strings
|
||||
static constexpr size_t RgpSqttMaxUserEventStringLengthInDwords = 1024;
|
||||
|
||||
// RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event with a string (push and
|
||||
// trigger data types)
|
||||
struct RgpSqttMarkerUserEventWithString {
|
||||
RgpSqttMarkerUserEvent header;
|
||||
|
||||
uint32_t stringLength; // Length of the string (in characters)
|
||||
uint32_t stringData[RgpSqttMaxUserEventStringLengthInDwords]; // String data in UTF-8 format
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// This class provides functionality to interact with the GPU Open Developer Mode message passing
|
||||
|
||||
@@ -20,6 +20,10 @@
|
||||
|
||||
#include "device/pal/palubercapturemgr.hpp"
|
||||
#include "device/pal/paldevice.hpp"
|
||||
#include "device/pal/palvirtual.hpp"
|
||||
#include "device/pal/palprogram.hpp"
|
||||
#include "device/pal/palkernel.hpp"
|
||||
#include "device/pal/palblit.hpp"
|
||||
|
||||
#include "palPlatform.h"
|
||||
#include "palTraceSession.h"
|
||||
@@ -58,6 +62,8 @@ UberTraceCaptureMgr::UberTraceCaptureMgr(Pal::IPlatform* platform, const Device&
|
||||
: device_(device),
|
||||
dev_driver_server_(platform->GetDevDriverServer()),
|
||||
global_disp_count_(1), // Must start from 1 according to RGP spec
|
||||
user_event_(nullptr),
|
||||
current_event_id_(0),
|
||||
trace_session_(platform->GetTraceSession()),
|
||||
trace_controller_(nullptr),
|
||||
code_object_trace_source_(nullptr),
|
||||
@@ -74,6 +80,12 @@ bool UberTraceCaptureMgr::CreateUberTraceResources(Pal::IPlatform* platform) {
|
||||
bool success = false;
|
||||
|
||||
do {
|
||||
// Create the user event RGP marker
|
||||
user_event_ = new RgpSqttMarkerUserEventWithString;
|
||||
if (user_event_ == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Initialize the renderop trace controller
|
||||
trace_controller_ = new GpuUtil::RenderOpTraceController(platform, device_.iDev());
|
||||
if (trace_controller_ == nullptr) {
|
||||
@@ -115,7 +127,11 @@ bool UberTraceCaptureMgr::CreateUberTraceResources(Pal::IPlatform* platform) {
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::DestroyUberTraceResources() {
|
||||
// Deallocate and unregister all created trace controllers & trace sources
|
||||
// RGP user event marker
|
||||
if (user_event_ != nullptr) {
|
||||
delete user_event_;
|
||||
user_event_ = nullptr;
|
||||
}
|
||||
|
||||
// RenderOp TraceController
|
||||
if (trace_controller_ != nullptr) {
|
||||
@@ -169,6 +185,39 @@ void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel
|
||||
trace_controller_->RecordRenderOp(pQueue,
|
||||
GpuUtil::RenderOpTraceController::RenderOp::RenderOpDispatch);
|
||||
|
||||
if (trace_session_->GetTraceSessionState() == GpuUtil::TraceSessionState::Running) {
|
||||
RgpSqttMarkerEventType apiEvent = RgpSqttMarkerEventType::CmdNDRangeKernel;
|
||||
|
||||
if (kernel.prog().isInternal()) {
|
||||
constexpr RgpSqttMarkerEventType ApiEvents[KernelBlitManager::BlitTotal] = {
|
||||
RgpSqttMarkerEventType::CmdCopyImage,
|
||||
RgpSqttMarkerEventType::CmdCopyImage,
|
||||
RgpSqttMarkerEventType::CmdCopyImageToBuffer,
|
||||
RgpSqttMarkerEventType::CmdCopyBufferToImage,
|
||||
RgpSqttMarkerEventType::CmdCopyBuffer,
|
||||
RgpSqttMarkerEventType::CmdCopyBuffer,
|
||||
RgpSqttMarkerEventType::CmdCopyBuffer,
|
||||
RgpSqttMarkerEventType::CmdCopyBuffer,
|
||||
RgpSqttMarkerEventType::CmdFillBuffer,
|
||||
RgpSqttMarkerEventType::CmdFillImage,
|
||||
RgpSqttMarkerEventType::CmdScheduler};
|
||||
|
||||
for (uint i = 0; i < KernelBlitManager::BlitTotal; ++i) {
|
||||
if (kernel.name().compare(BlitName[i]) == 0) {
|
||||
apiEvent = ApiEvents[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write the hash value
|
||||
WriteComputeBindMarker(gpu, kernel.prog().ApiHash());
|
||||
|
||||
// Write dispatch marker
|
||||
WriteEventWithDimsMarker(gpu, apiEvent, static_cast<uint32_t>(x), static_cast<uint32_t>(y),
|
||||
static_cast<uint32_t>(z));
|
||||
}
|
||||
|
||||
// Increment the global dispatch counter
|
||||
global_disp_count_++;
|
||||
}
|
||||
@@ -204,16 +253,6 @@ bool UberTraceCaptureMgr::IsQueueTimingActive() const {
|
||||
(queue_timings_trace_source_->IsTimingInProgress()));
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::WriteBarrierStartMarker(const VirtualGPU* gpu,
|
||||
const Pal::Developer::BarrierData& data) const {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::WriteBarrierEndMarker(const VirtualGPU* gpu,
|
||||
const Pal::Developer::BarrierData& data) const {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool UberTraceCaptureMgr::RegisterTimedQueue(uint32_t queue_id,
|
||||
Pal::IQueue* iQueue,
|
||||
@@ -290,4 +329,105 @@ uint64_t UberTraceCaptureMgr::AddElfBinary(const void* exe_binary, size_t exe_bi
|
||||
return elfBinaryInfo.originalHash;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::WriteMarker(const VirtualGPU* gpu, const void* data,
|
||||
size_t data_size) const {
|
||||
assert((data_size % sizeof(uint32_t)) == 0);
|
||||
assert((data_size / sizeof(uint32_t)) > 0);
|
||||
|
||||
Pal::RgpMarkerSubQueueFlags subQueueFlags = {};
|
||||
subQueueFlags.includeMainSubQueue = 1;
|
||||
|
||||
gpu->queue(MainEngine).iCmd()->CmdInsertRgpTraceMarker(
|
||||
subQueueFlags, static_cast<uint32_t>(data_size / sizeof(uint32_t)), data);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
// Inserts a compute bind marker
|
||||
void UberTraceCaptureMgr::WriteComputeBindMarker(const VirtualGPU* gpu, uint64_t api_hash) const {
|
||||
RgpSqttMarkerPipelineBind marker = {};
|
||||
marker.identifier = RgpSqttMarkerIdentifierBindPipeline;
|
||||
marker.cbID = gpu->queue(MainEngine).cmdBufId();
|
||||
marker.bindPoint = 1;
|
||||
|
||||
memcpy(marker.apiPsoHash, &api_hash, sizeof(api_hash));
|
||||
WriteMarker(gpu, &marker, sizeof(marker));
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
// Inserts an RGP pre-dispatch marker
|
||||
void UberTraceCaptureMgr::WriteEventWithDimsMarker(const VirtualGPU* gpu,
|
||||
RgpSqttMarkerEventType apiType,
|
||||
uint32_t x, uint32_t y, uint32_t z) const {
|
||||
assert(apiType != RgpSqttMarkerEventType::Invalid);
|
||||
|
||||
RgpSqttMarkerEvent event = {};
|
||||
event.identifier = RgpSqttMarkerIdentifierEvent;
|
||||
event.apiType = static_cast<uint32_t>(apiType);
|
||||
event.cmdID = current_event_id_++;
|
||||
event.cbID = gpu->queue(MainEngine).cmdBufId();
|
||||
|
||||
RgpSqttMarkerEventWithDims eventWithDims = {};
|
||||
eventWithDims.event = event;
|
||||
eventWithDims.event.hasThreadDims = 1;
|
||||
eventWithDims.threadX = x;
|
||||
eventWithDims.threadY = y;
|
||||
eventWithDims.threadZ = z;
|
||||
|
||||
WriteMarker(gpu, &eventWithDims, sizeof(eventWithDims));
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::WriteBarrierStartMarker(const VirtualGPU* gpu,
|
||||
const Pal::Developer::BarrierData& data) const {
|
||||
if (trace_session_->GetTraceSessionState() == GpuUtil::TraceSessionState::Running) {
|
||||
amd::ScopedLock traceLock(&trace_mutex_);
|
||||
|
||||
RgpSqttMarkerBarrierStart marker = {};
|
||||
marker.cbId = gpu->queue(MainEngine).cmdBufId();
|
||||
marker.identifier = RgpSqttMarkerIdentifierBarrierStart;
|
||||
marker.internal = true;
|
||||
marker.dword02 = data.reason;
|
||||
|
||||
WriteMarker(gpu, &marker, sizeof(marker));
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::WriteBarrierEndMarker(const VirtualGPU* gpu,
|
||||
const Pal::Developer::BarrierData& data) const {
|
||||
if (trace_session_->GetTraceSessionState() == GpuUtil::TraceSessionState::Running) {
|
||||
amd::ScopedLock traceLock(&trace_mutex_);
|
||||
|
||||
// Copy the operations part and include the same data from previous markers
|
||||
// within the same barrier sequence to create a full picture of all cache
|
||||
// syncs and pipeline stalls.
|
||||
Pal::Developer::BarrierOperations operations = data.operations;
|
||||
operations.pipelineStalls.u16All |= 0;
|
||||
operations.caches.u16All |= 0;
|
||||
|
||||
RgpSqttMarkerBarrierEnd marker = {};
|
||||
marker.identifier = RgpSqttMarkerIdentifierBarrierEnd;
|
||||
marker.cbId = gpu->queue(MainEngine).cmdBufId();
|
||||
marker.numLayoutTransitions = 0;
|
||||
marker.waitOnEopTs = operations.pipelineStalls.eopTsBottomOfPipe;
|
||||
marker.vsPartialFlush = operations.pipelineStalls.vsPartialFlush;
|
||||
marker.psPartialFlush = operations.pipelineStalls.psPartialFlush;
|
||||
marker.csPartialFlush = operations.pipelineStalls.csPartialFlush;
|
||||
marker.pfpSyncMe = operations.pipelineStalls.pfpSyncMe;
|
||||
marker.syncCpDma = operations.pipelineStalls.syncCpDma;
|
||||
marker.invalTcp = operations.caches.invalTcp;
|
||||
marker.invalSqI = operations.caches.invalSqI$;
|
||||
marker.invalSqK = operations.caches.invalSqK$;
|
||||
marker.flushTcc = operations.caches.flushTcc;
|
||||
marker.invalTcc = operations.caches.invalTcc;
|
||||
marker.flushCb = operations.caches.flushCb;
|
||||
marker.invalCb = operations.caches.invalCb;
|
||||
marker.flushDb = operations.caches.flushDb;
|
||||
marker.invalDb = operations.caches.invalDb;
|
||||
|
||||
WriteMarker(gpu, &marker, sizeof(marker));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace amd::pal
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "device/pal/palcapturemgr.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
|
||||
namespace DevDriver
|
||||
{
|
||||
@@ -80,10 +81,18 @@ class UberTraceCaptureMgr final : public ICaptureMgr {
|
||||
bool CreateUberTraceResources(Pal::IPlatform* platform);
|
||||
void DestroyUberTraceResources();
|
||||
|
||||
void WriteMarker(const VirtualGPU* gpu, const void* data, size_t data_size) const;
|
||||
void WriteComputeBindMarker(const VirtualGPU* gpu, uint64_t api_hash) const;
|
||||
void WriteEventWithDimsMarker(const VirtualGPU* gpu, RgpSqttMarkerEventType apiType, uint32_t x,
|
||||
uint32_t y, uint32_t z) const;
|
||||
|
||||
const Device& device_;
|
||||
DevDriver::DevDriverServer* dev_driver_server_;
|
||||
uint64_t global_disp_count_;
|
||||
RgpSqttMarkerUserEventWithString* user_event_;
|
||||
mutable uint32_t current_event_id_;
|
||||
|
||||
mutable amd::Monitor trace_mutex_;
|
||||
GpuUtil::TraceSession* trace_session_;
|
||||
GpuUtil::RenderOpTraceController* trace_controller_;
|
||||
GpuUtil::CodeObjectTraceSource* code_object_trace_source_;
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user