diff --git a/projects/aqlprofile/src/core/threadtrace.cpp b/projects/aqlprofile/src/core/threadtrace.cpp index 8697191e63..b52af77897 100644 --- a/projects/aqlprofile/src/core/threadtrace.cpp +++ b/projects/aqlprofile/src/core/threadtrace.cpp @@ -224,6 +224,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets( break; case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL: trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F; + trace_config.perfPeriod = p->value + 1; break; case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME: if (trace_config.perfcounters.size() >= 8) return HSA_STATUS_ERROR_INVALID_ARGUMENT; diff --git a/projects/aqlprofile/src/pm4/sqtt_builder.h b/projects/aqlprofile/src/pm4/sqtt_builder.h index 36320c57f3..5de27768de 100644 --- a/projects/aqlprofile/src/pm4/sqtt_builder.h +++ b/projects/aqlprofile/src/pm4/sqtt_builder.h @@ -37,14 +37,22 @@ namespace pm4_builder { class CmdBuffer; class CmdBuilder; -constexpr size_t ATT_CODEOBJ_OPCODE = 4; -constexpr size_t ATT_TIMESTAMP_OPCODE = 5; +enum ATT_OPCODES { + ATT_CODEOBJ_OPCODE = 4, + ATT_TIMESTAMP_OPCODE, + ATT_AGENT_INFO_OPCODE, +}; + +enum ATT_AGENT_INFO_TYPE { + ATT_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ = 0, + ATT_AGENT_INFO_TYPE_COUNTER_FREQUENCY, +}; union att_decoder_packet_header_t { struct { unsigned int opcode : 8; unsigned int type : 4; - unsigned int reserved : 20; + unsigned int data20 : 20; }; unsigned int u32All; }; @@ -160,7 +168,9 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { explicit GpuSqttBuilder(const AgentInfo* agent_info) : builder(acquire_ip_offset_table(agent_info)), xcc_number_(agent_info->xcc_num), - se_number_total(agent_info->se_num) {} + se_number_total(agent_info->se_num), + timestamp_freq(agent_info->timestamp_freq), + cu_per_se(agent_info->cu_num / agent_info->se_num) {} // Returns TT_CONTROL_UTC_ERR_MASK virtual size_t GetUTCErrorMask() const override { return Primitives::TT_CONTROL_UTC_ERR_MASK; }; @@ -406,6 +416,21 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, header.u32All); builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, 524801); + att_decoder_packet_header_t packet{}; + packet.opcode = ATT_AGENT_INFO_OPCODE; + + if (config->enable_rt_timestamp) + { + packet.type = ATT_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ; + packet.data20 = this->timestamp_freq / 1000; + builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, packet.u32All); + } + if (Primitives::GFXIP_LEVEL == 9 && config->perfcounters.size()) + { + packet.type = ATT_AGENT_INFO_TYPE_COUNTER_FREQUENCY; + packet.data20 = (1 + cu_per_se) * ((config->perfcounters.size() + 3) & ~3) * config->perfPeriod; + builder.BuildWriteUConfigRegPacket(cmd_buffer, userdata_channel, packet.u32All); + } if (Primitives::GFXIP_LEVEL == 9 && config->enable_rt_timestamp) { for (size_t xcc = 0; xcc < GetXCCNumber(); xcc++) @@ -566,7 +591,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { att_decoder_packet_header_t header{}; header.opcode = ATT_CODEOBJ_OPCODE; header.type = channel; - header.reserved = 0; + header.data20 = 0; auto userdata_channel = Primitives::SQ_THREAD_TRACE_USERDATA_2; SetGRBMToBroadcast(cmd_buffer); @@ -580,7 +605,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { att_decoder_packet_header_t header{}; header.opcode = ATT_TIMESTAMP_OPCODE; header.type = 0; - header.reserved = 0; + header.data20 = 0; SetGRBMToBroadcast(cmd_buffer); builder.BuildGPUClockPacket(cmd_buffer, addr, Primitives::SQ_THREAD_TRACE_USERDATA_3, header.u32All); @@ -594,8 +619,10 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { builder.BuildWritePConfigRegPacket(cmdbuf, reg, value); } - size_t se_number_total; - size_t xcc_number_; + size_t se_number_total{}; + size_t xcc_number_{}; + uint32_t timestamp_freq{}; + uint32_t cu_per_se{}; }; } // namespace pm4_builder diff --git a/projects/aqlprofile/src/pm4/trace_config.h b/projects/aqlprofile/src/pm4/trace_config.h index 0c0b7a7cf3..f2cee49a23 100644 --- a/projects/aqlprofile/src/pm4/trace_config.h +++ b/projects/aqlprofile/src/pm4/trace_config.h @@ -41,6 +41,7 @@ struct TraceConfig { // PERF uint32_t perfMASK = ~0u; uint32_t perfCTRL = 0; + uint32_t perfPeriod = 0; std::vector> perfcounters{}; // GC configurations used by both TT and SPM uint32_t se_number = 0; diff --git a/projects/aqlprofile/src/util/hsa_rsrc_factory.cpp b/projects/aqlprofile/src/util/hsa_rsrc_factory.cpp index 07aada76d7..8288c1b10f 100644 --- a/projects/aqlprofile/src/util/hsa_rsrc_factory.cpp +++ b/projects/aqlprofile/src/util/hsa_rsrc_factory.cpp @@ -242,6 +242,8 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { &agent_info->simds_per_cu); hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY), + &agent_info->timestamp_freq); if (hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_XCC), &agent_info->xcc_num) != HSA_STATUS_SUCCESS) { diff --git a/projects/aqlprofile/src/util/hsa_rsrc_factory.h b/projects/aqlprofile/src/util/hsa_rsrc_factory.h index 33c935aa35..7e3d2b9cbc 100644 --- a/projects/aqlprofile/src/util/hsa_rsrc_factory.h +++ b/projects/aqlprofile/src/util/hsa_rsrc_factory.h @@ -72,16 +72,16 @@ typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; // handle, name, max queue size, max wavefront size, etc. struct AgentInfo { // Handle of Agent - hsa_agent_t dev_id; + hsa_agent_t dev_id{}; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 - uint32_t dev_type; + uint32_t dev_type{}; // APU flag - bool is_apu; + bool is_apu{}; // Agent system index - uint32_t dev_index; + uint32_t dev_index{}; // GFXIP name char gfxip[64]; @@ -90,13 +90,13 @@ struct AgentInfo { char name[64]; // Max size of Wavefront size - uint32_t max_wave_size; + uint32_t max_wave_size{}; // Max size of Queue buffer - uint32_t max_queue_size; + uint32_t max_queue_size{}; // Hsail profile supported by agent - hsa_profile_t profile; + hsa_profile_t profile{}; // CPU/GPU/kern-arg memory pools hsa_amd_memory_pool_t cpu_pool; @@ -104,16 +104,16 @@ struct AgentInfo { hsa_amd_memory_pool_t kern_arg_pool; // The number of compute unit available in the agent. - uint32_t cu_num; + uint32_t cu_num{}; // Maximum number of waves possible in a Compute Unit. - uint32_t waves_per_cu; + uint32_t waves_per_cu{}; // Number of SIMD's per compute unit CU - uint32_t simds_per_cu; + uint32_t simds_per_cu{}; // Number of Shader Engines (SE) in Gpu - uint32_t se_num; + uint32_t se_num{}; // Number of MI3000 XCC uint32_t xcc_num{1}; @@ -125,7 +125,10 @@ struct AgentInfo { uint32_t bdf_id{0}; // Number of Shader Arrays Per Shader Engines in Gpu - uint32_t shader_arrays_per_se; + uint32_t shader_arrays_per_se{}; + + // Timestamp frequency for realtime clock + uint32_t timestamp_freq{0}; }; // HSA timer class