diff --git a/src/core/include/aqlprofile-sdk/aql_profile_v2.h b/src/core/include/aqlprofile-sdk/aql_profile_v2.h index b20024e340..9abb749d38 100644 --- a/src/core/include/aqlprofile-sdk/aql_profile_v2.h +++ b/src/core/include/aqlprofile-sdk/aql_profile_v2.h @@ -248,9 +248,17 @@ typedef enum { hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile, aqlprofile_pmc_info_type_t attribute, void* value); +typedef enum aqlprofile_att_parameter_name_ext_t +{ + /** + * HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE + 1 + */ + AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH = 11, +} aqlprofile_att_parameter_name_ext_t; + // Profile parameter object typedef struct { - hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; + hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; // Or aqlprofile_att_parameter_name_ext_t union { uint32_t value; struct { diff --git a/src/core/threadtrace.cpp b/src/core/threadtrace.cpp index e5f96e75a9..013d06c19b 100644 --- a/src/core/threadtrace.cpp +++ b/src/core/threadtrace.cpp @@ -190,7 +190,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets( trace_config.se_mask = 0x11111111; const size_t se_number_total = pm4_factory->GetShaderEnginesNumber(); - size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE; + uint64_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE; if (profile.parameters) for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++) @@ -204,32 +204,6 @@ hsa_status_t _internal_aqlprofile_att_create_packets( "ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value); trace_config.targetCu = p->value; break; - case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK: - trace_config.vmIdMask = p->value; - break; - case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK: - if ((p->value & 0x50) != 0) - throw aql_profile::aql_profile_exc_val( - "ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value); - trace_config.deprecated_mask = p->value; - trace_config.targetCu = p->value & 0xF; - break; - case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK: - if ((p->value & 0xFF000000) != 0) - throw aql_profile::aql_profile_exc_val( - "ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask", - p->value); - trace_config.deprecated_tokenMask = p->value; - break; - case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2: - trace_config.deprecated_tokenMask2 = p->value; - break; - case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE: - trace_config.sampleRate = p->value; - break; - case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT: - trace_config.concurrent = p->value; - break; case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION: trace_config.simd_sel = p->value & 0xF; break; @@ -237,7 +211,10 @@ hsa_status_t _internal_aqlprofile_att_create_packets( trace_config.occupancy_mode = p->value ? 1 : 0; break; case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE: - buffer_size = p->value; + buffer_size = (buffer_size & ~static_cast(UINT32_MAX)) | p->value; + break; + case AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH: + buffer_size = (buffer_size & UINT32_MAX) | (uint64_t(p->value) << 32); // High 32 bits break; case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK: trace_config.perfMASK = p->value; diff --git a/src/pm4/sqtt_builder.h b/src/pm4/sqtt_builder.h index 3c38bb5173..996fdd661a 100644 --- a/src/pm4/sqtt_builder.h +++ b/src/pm4/sqtt_builder.h @@ -222,6 +222,18 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { const uint64_t se_number_xcc = se_number_total / GetXCCNumber(); uint64_t base_addr = reinterpret_cast(config->data_buffer_ptr); const uint64_t base_step = GetBaseStep(config->data_buffer_size, config->se_mask); + + // Old v1 API calls this with buffer == 0 first + if (config->data_buffer_size > 0) + { + // Max 16GB for gfx{9, 10, 12} and 512MB for gfx11. Min of 32 page per SE. + if (base_step >= (1ul<<34) || (Primitives::GFXIP_LEVEL == 11 && base_step >= (1ul<<29))) + throw std::runtime_error("SQTT Buffer size too high"); + else if (base_step < (1ul<<17)) + throw std::runtime_error("SQTT Buffer size too low"); + } + + config->capacity_per_se = base_step; config->capacity_per_disabled_se = 1 << Primitives::TT_BUFF_ALIGN_SHIFT; @@ -331,7 +343,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { const unsigned baddr_lo = Low32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT); const unsigned baddr_hi = High32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT); - const uint32_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se; + const uint64_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se; const uint32_t ctrl_val = Primitives::sqtt_ctrl_value(true); Select_GRBM_SE_SH0(cmd_buffer, index); @@ -432,7 +444,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives { // Initialize cache flush request object builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->control_buffer_ptr), config->control_buffer_size); - builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_size), + builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_ptr), config->data_buffer_size); // Program zero size of thread trace buffer builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::SQ_THREAD_TRACE_SIZE_ADDR, diff --git a/src/pm4/trace_config.h b/src/pm4/trace_config.h index 3112edab91..8fbd94f13f 100644 --- a/src/pm4/trace_config.h +++ b/src/pm4/trace_config.h @@ -54,7 +54,7 @@ struct TraceConfig { void* control_buffer_ptr = nullptr; uint32_t control_buffer_size = 0; void* data_buffer_ptr = nullptr; - uint32_t data_buffer_size = 0; + uint64_t data_buffer_size = 0; // concurrent kernels mode uint32_t concurrent = 0;