Merge commit '51c5343bf891848443cab2230615fdb287e3b918' into develop

Этот коммит содержится в:
systems-assistant[bot]
2025-07-30 15:21:32 +00:00
родитель a1568172c9 51c5343bf8
Коммит a7cb68e38d
9 изменённых файлов: 38 добавлений и 37 удалений
+1 -1
Просмотреть файл
@@ -618,7 +618,7 @@ class gfx10_cntx_prim {
// Indicates the size of buffer to use per Shader Engine instance.
// The size is specified in terms of 4KB blocks
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
static uint32_t sqtt_buffer_size_value(uint64_t size_val, uint32_t base_hi) {
#if SQTT_PRIM_ENABLED
uint32_t size{0};
size = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
+1 -1
Просмотреть файл
@@ -639,7 +639,7 @@ class gfx11_cntx_prim {
// Indicates the size of buffer to use per Shader Engine instance.
// The size is specified in terms of 4KB blocks
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
static uint32_t sqtt_buffer_size_value(uint64_t size_val, uint32_t base_hi) {
#if SQTT_PRIM_ENABLED
uint32_t sq_thread_trace_buf0_size =
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
+1 -1
Просмотреть файл
@@ -560,7 +560,7 @@ class gfx12_cntx_prim {
// Indicates the size of buffer to use per Shader Engine instance.
// The size is specified in terms of 4KB blocks
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) {
static uint32_t sqtt_buffer0_size_value(uint64_t size_val) {
uint32_t sq_thread_trace_buf0_size{0};
sq_thread_trace_buf0_size =
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT);
+1 -1
Просмотреть файл
@@ -683,7 +683,7 @@ class gfx9_cntx_prim {
// Indicates the size of buffer to use per Shader Engine instance.
// The size is specified in terms of 4KB blocks
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
static uint32_t sqtt_buffer_size_value(uint64_t size_val, uint32_t base_hi) {
uint32_t sq_thread_trace_size =
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_SIZE, SIZE, (size_val >> TT_BUFF_ALIGN_SHIFT));
return sq_thread_trace_size;
+5 -1
Просмотреть файл
@@ -51,8 +51,12 @@ class Gfx9Factory : public Pm4Factory {
class Mi100Factory : public Gfx9Factory {
public:
explicit Mi100Factory(const AgentInfo* agent_info);
virtual int GetAccumLowID() const override { return 1; }
virtual int GetAccumHiID() const override { return 158; }
virtual uint32_t GetSpmSampleDelayMax() { return 0x34; }
virtual uint32_t GetSpmSampleDelayMax() { return 0x34; }
protected:
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
+9 -1
Просмотреть файл
@@ -248,9 +248,17 @@ typedef enum {
hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
aqlprofile_pmc_info_type_t attribute, void* value);
typedef enum aqlprofile_att_parameter_name_ext_t
{
/**
* HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE + 1
*/
AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH = 11,
} aqlprofile_att_parameter_name_ext_t;
// Profile parameter object
typedef struct {
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; // Or aqlprofile_att_parameter_name_ext_t
union {
uint32_t value;
struct {
+5 -28
Просмотреть файл
@@ -190,7 +190,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
trace_config.se_mask = 0x11111111;
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
uint64_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
if (profile.parameters)
for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++)
@@ -204,32 +204,6 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
trace_config.targetCu = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
trace_config.vmIdMask = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
if ((p->value & 0x50) != 0)
throw aql_profile::aql_profile_exc_val<uint32_t>(
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
trace_config.deprecated_mask = p->value;
trace_config.targetCu = p->value & 0xF;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
if ((p->value & 0xFF000000) != 0)
throw aql_profile::aql_profile_exc_val<uint32_t>(
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
p->value);
trace_config.deprecated_tokenMask = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
trace_config.deprecated_tokenMask2 = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
trace_config.sampleRate = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
trace_config.concurrent = p->value;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
trace_config.simd_sel = p->value & 0xF;
break;
@@ -237,7 +211,10 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
trace_config.occupancy_mode = p->value ? 1 : 0;
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
buffer_size = p->value;
buffer_size = (buffer_size & ~static_cast<uint64_t>(UINT32_MAX)) | p->value;
break;
case AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH:
buffer_size = (buffer_size & UINT32_MAX) | (uint64_t(p->value) << 32); // High 32 bits
break;
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
trace_config.perfMASK = p->value;
+14 -2
Просмотреть файл
@@ -222,6 +222,18 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
const uint64_t se_number_xcc = se_number_total / GetXCCNumber();
uint64_t base_addr = reinterpret_cast<uint64_t>(config->data_buffer_ptr);
const uint64_t base_step = GetBaseStep(config->data_buffer_size, config->se_mask);
// Old v1 API calls this with buffer == 0 first
if (config->data_buffer_size > 0)
{
// Max 16GB for gfx{9, 10, 12} and 512MB for gfx11. Min of 32 page per SE.
if (base_step >= (1ul<<34) || (Primitives::GFXIP_LEVEL == 11 && base_step >= (1ul<<29)))
throw std::runtime_error("SQTT Buffer size too high");
else if (base_step < (1ul<<17))
throw std::runtime_error("SQTT Buffer size too low");
}
config->capacity_per_se = base_step;
config->capacity_per_disabled_se = 1 << Primitives::TT_BUFF_ALIGN_SHIFT;
@@ -331,7 +343,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
const unsigned baddr_lo = Low32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
const unsigned baddr_hi = High32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
const uint32_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
const uint64_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
const uint32_t ctrl_val = Primitives::sqtt_ctrl_value(true);
Select_GRBM_SE_SH0(cmd_buffer, index);
@@ -432,7 +444,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
// Initialize cache flush request object
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->control_buffer_ptr),
config->control_buffer_size);
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_size),
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_ptr),
config->data_buffer_size);
// Program zero size of thread trace buffer
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::SQ_THREAD_TRACE_SIZE_ADDR,
+1 -1
Просмотреть файл
@@ -54,7 +54,7 @@ struct TraceConfig {
void* control_buffer_ptr = nullptr;
uint32_t control_buffer_size = 0;
void* data_buffer_ptr = nullptr;
uint32_t data_buffer_size = 0;
uint64_t data_buffer_size = 0;
// concurrent kernels mode
uint32_t concurrent = 0;