Add realtime clock agent info to ATT records (#319)
* Add agent info to ATT records * Fix names * Update projects/aqlprofile/src/pm4/sqtt_builder.h
This commit is contained in:
committato da
GitHub
parent
9df2c1ec68
commit
75eb06dc18
@@ -224,6 +224,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL:
|
||||
trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F;
|
||||
trace_config.perfPeriod = p->value + 1;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME:
|
||||
if (trace_config.perfcounters.size() >= 8) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
@@ -37,14 +37,22 @@ namespace pm4_builder {
|
||||
class CmdBuffer;
|
||||
class CmdBuilder;
|
||||
|
||||
constexpr size_t ATT_CODEOBJ_OPCODE = 4;
|
||||
constexpr size_t ATT_TIMESTAMP_OPCODE = 5;
|
||||
enum ATT_OPCODES {
|
||||
ATT_CODEOBJ_OPCODE = 4,
|
||||
ATT_TIMESTAMP_OPCODE,
|
||||
ATT_AGENT_INFO_OPCODE,
|
||||
};
|
||||
|
||||
enum ATT_AGENT_INFO_TYPE {
|
||||
ATT_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ = 0,
|
||||
ATT_AGENT_INFO_TYPE_COUNTER_FREQUENCY,
|
||||
};
|
||||
|
||||
union att_decoder_packet_header_t {
|
||||
struct {
|
||||
unsigned int opcode : 8;
|
||||
unsigned int type : 4;
|
||||
unsigned int reserved : 20;
|
||||
unsigned int data20 : 20;
|
||||
};
|
||||
unsigned int u32All;
|
||||
};
|
||||
@@ -160,7 +168,9 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
explicit GpuSqttBuilder(const AgentInfo* agent_info)
|
||||
: builder(acquire_ip_offset_table(agent_info)),
|
||||
xcc_number_(agent_info->xcc_num),
|
||||
se_number_total(agent_info->se_num) {}
|
||||
se_number_total(agent_info->se_num),
|
||||
timestamp_freq(agent_info->timestamp_freq),
|
||||
cu_per_se(agent_info->cu_num / agent_info->se_num) {}
|
||||
|
||||
// Returns TT_CONTROL_UTC_ERR_MASK
|
||||
virtual size_t GetUTCErrorMask() const override { return Primitives::TT_CONTROL_UTC_ERR_MASK; };
|
||||
@@ -406,6 +416,21 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, header.u32All);
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, 524801);
|
||||
|
||||
att_decoder_packet_header_t packet{};
|
||||
packet.opcode = ATT_AGENT_INFO_OPCODE;
|
||||
|
||||
if (config->enable_rt_timestamp)
|
||||
{
|
||||
packet.type = ATT_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ;
|
||||
packet.data20 = this->timestamp_freq / 1000;
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, packet.u32All);
|
||||
}
|
||||
if (Primitives::GFXIP_LEVEL == 9 && config->perfcounters.size())
|
||||
{
|
||||
packet.type = ATT_AGENT_INFO_TYPE_COUNTER_FREQUENCY;
|
||||
packet.data20 = (1 + cu_per_se) * ((config->perfcounters.size() + 3) & ~3) * config->perfPeriod;
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, packet.u32All);
|
||||
}
|
||||
if (Primitives::GFXIP_LEVEL == 9 && config->enable_rt_timestamp)
|
||||
{
|
||||
for (size_t xcc = 0; xcc < GetXCCNumber(); xcc++)
|
||||
@@ -566,7 +591,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
att_decoder_packet_header_t header{};
|
||||
header.opcode = ATT_CODEOBJ_OPCODE;
|
||||
header.type = channel;
|
||||
header.reserved = 0;
|
||||
header.data20 = 0;
|
||||
auto userdata_channel = Primitives::SQ_THREAD_TRACE_USERDATA_2;
|
||||
|
||||
SetGRBMToBroadcast(cmd_buffer);
|
||||
@@ -580,7 +605,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
att_decoder_packet_header_t header{};
|
||||
header.opcode = ATT_TIMESTAMP_OPCODE;
|
||||
header.type = 0;
|
||||
header.reserved = 0;
|
||||
header.data20 = 0;
|
||||
|
||||
SetGRBMToBroadcast(cmd_buffer);
|
||||
builder.BuildGPUClockPacket(cmd_buffer, addr, Primitives::SQ_THREAD_TRACE_USERDATA_3, header.u32All);
|
||||
@@ -594,8 +619,10 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
builder.BuildWritePConfigRegPacket(cmdbuf, reg, value);
|
||||
}
|
||||
|
||||
size_t se_number_total;
|
||||
size_t xcc_number_;
|
||||
size_t se_number_total{};
|
||||
size_t xcc_number_{};
|
||||
uint32_t timestamp_freq{};
|
||||
uint32_t cu_per_se{};
|
||||
};
|
||||
|
||||
} // namespace pm4_builder
|
||||
|
||||
@@ -41,6 +41,7 @@ struct TraceConfig {
|
||||
// PERF
|
||||
uint32_t perfMASK = ~0u;
|
||||
uint32_t perfCTRL = 0;
|
||||
uint32_t perfPeriod = 0;
|
||||
std::vector<std::pair<size_t, size_t>> perfcounters{};
|
||||
// GC configurations used by both TT and SPM
|
||||
uint32_t se_number = 0;
|
||||
|
||||
@@ -242,6 +242,8 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY),
|
||||
&agent_info->timestamp_freq);
|
||||
|
||||
if (hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
|
||||
&agent_info->xcc_num) != HSA_STATUS_SUCCESS) {
|
||||
|
||||
@@ -72,16 +72,16 @@ typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
struct AgentInfo {
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
hsa_agent_t dev_id{};
|
||||
|
||||
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint32_t dev_type;
|
||||
uint32_t dev_type{};
|
||||
|
||||
// APU flag
|
||||
bool is_apu;
|
||||
bool is_apu{};
|
||||
|
||||
// Agent system index
|
||||
uint32_t dev_index;
|
||||
uint32_t dev_index{};
|
||||
|
||||
// GFXIP name
|
||||
char gfxip[64];
|
||||
@@ -90,13 +90,13 @@ struct AgentInfo {
|
||||
char name[64];
|
||||
|
||||
// Max size of Wavefront size
|
||||
uint32_t max_wave_size;
|
||||
uint32_t max_wave_size{};
|
||||
|
||||
// Max size of Queue buffer
|
||||
uint32_t max_queue_size;
|
||||
uint32_t max_queue_size{};
|
||||
|
||||
// Hsail profile supported by agent
|
||||
hsa_profile_t profile;
|
||||
hsa_profile_t profile{};
|
||||
|
||||
// CPU/GPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
@@ -104,16 +104,16 @@ struct AgentInfo {
|
||||
hsa_amd_memory_pool_t kern_arg_pool;
|
||||
|
||||
// The number of compute unit available in the agent.
|
||||
uint32_t cu_num;
|
||||
uint32_t cu_num{};
|
||||
|
||||
// Maximum number of waves possible in a Compute Unit.
|
||||
uint32_t waves_per_cu;
|
||||
uint32_t waves_per_cu{};
|
||||
|
||||
// Number of SIMD's per compute unit CU
|
||||
uint32_t simds_per_cu;
|
||||
uint32_t simds_per_cu{};
|
||||
|
||||
// Number of Shader Engines (SE) in Gpu
|
||||
uint32_t se_num;
|
||||
uint32_t se_num{};
|
||||
|
||||
// Number of MI3000 XCC
|
||||
uint32_t xcc_num{1};
|
||||
@@ -125,7 +125,10 @@ struct AgentInfo {
|
||||
uint32_t bdf_id{0};
|
||||
|
||||
// Number of Shader Arrays Per Shader Engines in Gpu
|
||||
uint32_t shader_arrays_per_se;
|
||||
uint32_t shader_arrays_per_se{};
|
||||
|
||||
// Timestamp frequency for realtime clock
|
||||
uint32_t timestamp_freq{0};
|
||||
};
|
||||
|
||||
// HSA timer class
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user