Adding high bits for ATT buffer size (#171)

* Adding high bits for ATT buffer size

* Copilot review comments

* Add buffer limits

* Update src/pm4/sqtt_builder.h

---------

Co-authored-by: Giovanni <gbaraldi@amd.com>
Этот коммит содержится в:
Baraldi, Giovanni
2025-07-23 00:53:55 +02:00
коммит произвёл GitHub
родитель 6f236ffb5f
Коммит 0bb1a61e82
4 изменённых файлов: 29 добавлений и 32 удалений
+9 -1
Просмотреть файл
@@ -248,9 +248,17 @@ typedef enum {
hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
aqlprofile_pmc_info_type_t attribute, void* value);
typedef enum aqlprofile_att_parameter_name_ext_t
{
/**
* HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE + 1
*/
AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH = 11,
} aqlprofile_att_parameter_name_ext_t;
// Profile parameter object
typedef struct {
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; // Or aqlprofile_att_parameter_name_ext_t
union {
uint32_t value;
struct {
+5 -28
Просмотреть файл
@@ -190,7 +190,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
trace_config.se_mask = 0x11111111;
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
uint64_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
if (profile.parameters)
for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++)
@@ -204,32 +204,6 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
trace_config.targetCu = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
trace_config.vmIdMask = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
if ((p->value & 0x50) != 0)
throw aql_profile::aql_profile_exc_val<uint32_t>(
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
trace_config.deprecated_mask = p->value;
trace_config.targetCu = p->value & 0xF;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
if ((p->value & 0xFF000000) != 0)
throw aql_profile::aql_profile_exc_val<uint32_t>(
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
p->value);
trace_config.deprecated_tokenMask = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
trace_config.deprecated_tokenMask2 = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
trace_config.sampleRate = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
trace_config.concurrent = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
trace_config.simd_sel = p->value & 0xF;
break;
@@ -237,7 +211,10 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
trace_config.occupancy_mode = p->value ? 1 : 0;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
buffer_size = p->value;
buffer_size = (buffer_size & ~static_cast<uint64_t>(UINT32_MAX)) | p->value;
break;
case AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH:
buffer_size = (buffer_size & UINT32_MAX) | (uint64_t(p->value) << 32); // High 32 bits
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
trace_config.perfMASK = p->value;
+14 -2
Просмотреть файл
@@ -222,6 +222,18 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
const uint64_t se_number_xcc = se_number_total / GetXCCNumber();
uint64_t base_addr = reinterpret_cast<uint64_t>(config->data_buffer_ptr);
const uint64_t base_step = GetBaseStep(config->data_buffer_size, config->se_mask);
// Old v1 API calls this with buffer == 0 first
if (config->data_buffer_size > 0)
{
// Max 16GB for gfx{9, 10, 12} and 512MB for gfx11. Min of 32 page per SE.
if (base_step >= (1ul<<34) || (Primitives::GFXIP_LEVEL == 11 && base_step >= (1ul<<29)))
throw std::runtime_error("SQTT Buffer size too high");
else if (base_step < (1ul<<17))
throw std::runtime_error("SQTT Buffer size too low");
}
config->capacity_per_se = base_step;
config->capacity_per_disabled_se = 1 << Primitives::TT_BUFF_ALIGN_SHIFT;
@@ -331,7 +343,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
const unsigned baddr_lo = Low32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
const unsigned baddr_hi = High32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
const uint32_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
const uint64_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
const uint32_t ctrl_val = Primitives::sqtt_ctrl_value(true);
Select_GRBM_SE_SH0(cmd_buffer, index);
@@ -432,7 +444,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
// Initialize cache flush request object
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->control_buffer_ptr),
config->control_buffer_size);
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_size),
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_ptr),
config->data_buffer_size);
// Program zero size of thread trace buffer
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::SQ_THREAD_TRACE_SIZE_ADDR,
+1 -1
Просмотреть файл
@@ -54,7 +54,7 @@ struct TraceConfig {
void* control_buffer_ptr = nullptr;
uint32_t control_buffer_size = 0;
void* data_buffer_ptr = nullptr;
uint32_t data_buffer_size = 0;
uint64_t data_buffer_size = 0;
// concurrent kernels mode
uint32_t concurrent = 0;