Add realtime clock agent info to ATT records (#319)

* Add agent info to ATT records

* Fix names

* Update projects/aqlprofile/src/pm4/sqtt_builder.h
This commit is contained in:
Giovanni Lenzi Baraldi
2025-08-14 13:35:00 +02:00
committato da GitHub
parent 9df2c1ec68
commit 75eb06dc18
5 ha cambiato i file con 54 aggiunte e 20 eliminazioni
@@ -224,6 +224,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL:
trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F;
trace_config.perfPeriod = p->value + 1;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME:
if (trace_config.perfcounters.size() >= 8) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
+35 -8
Vedi File
@@ -37,14 +37,22 @@ namespace pm4_builder {
class CmdBuffer;
class CmdBuilder;
constexpr size_t ATT_CODEOBJ_OPCODE = 4;
constexpr size_t ATT_TIMESTAMP_OPCODE = 5;
enum ATT_OPCODES {
ATT_CODEOBJ_OPCODE = 4,
ATT_TIMESTAMP_OPCODE,
ATT_AGENT_INFO_OPCODE,
};
enum ATT_AGENT_INFO_TYPE {
ATT_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ = 0,
ATT_AGENT_INFO_TYPE_COUNTER_FREQUENCY,
};
union att_decoder_packet_header_t {
struct {
unsigned int opcode : 8;
unsigned int type : 4;
unsigned int reserved : 20;
unsigned int data20 : 20;
};
unsigned int u32All;
};
@@ -160,7 +168,9 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
explicit GpuSqttBuilder(const AgentInfo* agent_info)
: builder(acquire_ip_offset_table(agent_info)),
xcc_number_(agent_info->xcc_num),
se_number_total(agent_info->se_num) {}
se_number_total(agent_info->se_num),
timestamp_freq(agent_info->timestamp_freq),
cu_per_se(agent_info->cu_num / agent_info->se_num) {}
// Returns TT_CONTROL_UTC_ERR_MASK
virtual size_t GetUTCErrorMask() const override { return Primitives::TT_CONTROL_UTC_ERR_MASK; };
@@ -406,6 +416,21 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, header.u32All);
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, 524801);
att_decoder_packet_header_t packet{};
packet.opcode = ATT_AGENT_INFO_OPCODE;
if (config->enable_rt_timestamp)
{
packet.type = ATT_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ;
packet.data20 = this->timestamp_freq / 1000;
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, packet.u32All);
}
if (Primitives::GFXIP_LEVEL == 9 && config->perfcounters.size())
{
packet.type = ATT_AGENT_INFO_TYPE_COUNTER_FREQUENCY;
packet.data20 = (1 + cu_per_se) * ((config->perfcounters.size() + 3) & ~3) * config->perfPeriod;
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, packet.u32All);
}
if (Primitives::GFXIP_LEVEL == 9 && config->enable_rt_timestamp)
{
for (size_t xcc = 0; xcc < GetXCCNumber(); xcc++)
@@ -566,7 +591,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
att_decoder_packet_header_t header{};
header.opcode = ATT_CODEOBJ_OPCODE;
header.type = channel;
header.reserved = 0;
header.data20 = 0;
auto userdata_channel = Primitives::SQ_THREAD_TRACE_USERDATA_2;
SetGRBMToBroadcast(cmd_buffer);
@@ -580,7 +605,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
att_decoder_packet_header_t header{};
header.opcode = ATT_TIMESTAMP_OPCODE;
header.type = 0;
header.reserved = 0;
header.data20 = 0;
SetGRBMToBroadcast(cmd_buffer);
builder.BuildGPUClockPacket(cmd_buffer, addr, Primitives::SQ_THREAD_TRACE_USERDATA_3, header.u32All);
@@ -594,8 +619,10 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
builder.BuildWritePConfigRegPacket(cmdbuf, reg, value);
}
size_t se_number_total;
size_t xcc_number_;
size_t se_number_total{};
size_t xcc_number_{};
uint32_t timestamp_freq{};
uint32_t cu_per_se{};
};
} // namespace pm4_builder
@@ -41,6 +41,7 @@ struct TraceConfig {
// PERF
uint32_t perfMASK = ~0u;
uint32_t perfCTRL = 0;
uint32_t perfPeriod = 0;
std::vector<std::pair<size_t, size_t>> perfcounters{};
// GC configurations used by both TT and SPM
uint32_t se_number = 0;
@@ -242,6 +242,8 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
&agent_info->simds_per_cu);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
&agent_info->se_num);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY),
&agent_info->timestamp_freq);
if (hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
&agent_info->xcc_num) != HSA_STATUS_SUCCESS) {
@@ -72,16 +72,16 @@ typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
// handle, name, max queue size, max wavefront size, etc.
struct AgentInfo {
// Handle of Agent
hsa_agent_t dev_id;
hsa_agent_t dev_id{};
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint32_t dev_type;
uint32_t dev_type{};
// APU flag
bool is_apu;
bool is_apu{};
// Agent system index
uint32_t dev_index;
uint32_t dev_index{};
// GFXIP name
char gfxip[64];
@@ -90,13 +90,13 @@ struct AgentInfo {
char name[64];
// Max size of Wavefront size
uint32_t max_wave_size;
uint32_t max_wave_size{};
// Max size of Queue buffer
uint32_t max_queue_size;
uint32_t max_queue_size{};
// Hsail profile supported by agent
hsa_profile_t profile;
hsa_profile_t profile{};
// CPU/GPU/kern-arg memory pools
hsa_amd_memory_pool_t cpu_pool;
@@ -104,16 +104,16 @@ struct AgentInfo {
hsa_amd_memory_pool_t kern_arg_pool;
// The number of compute unit available in the agent.
uint32_t cu_num;
uint32_t cu_num{};
// Maximum number of waves possible in a Compute Unit.
uint32_t waves_per_cu;
uint32_t waves_per_cu{};
// Number of SIMD's per compute unit CU
uint32_t simds_per_cu;
uint32_t simds_per_cu{};
// Number of Shader Engines (SE) in Gpu
uint32_t se_num;
uint32_t se_num{};
// Number of MI3000 XCC
uint32_t xcc_num{1};
@@ -125,7 +125,10 @@ struct AgentInfo {
uint32_t bdf_id{0};
// Number of Shader Arrays Per Shader Engines in Gpu
uint32_t shader_arrays_per_se;
uint32_t shader_arrays_per_se{};
// Timestamp frequency for realtime clock
uint32_t timestamp_freq{0};
};
// HSA timer class