Merge commit '51c5343bf891848443cab2230615fdb287e3b918' into develop
Этот коммит содержится в:
@@ -618,7 +618,7 @@ class gfx10_cntx_prim {
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
static uint32_t sqtt_buffer_size_value(uint64_t size_val, uint32_t base_hi) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t size{0};
|
||||
size = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
|
||||
|
||||
@@ -639,7 +639,7 @@ class gfx11_cntx_prim {
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
static uint32_t sqtt_buffer_size_value(uint64_t size_val, uint32_t base_hi) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_buf0_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
|
||||
|
||||
@@ -560,7 +560,7 @@ class gfx12_cntx_prim {
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) {
|
||||
static uint32_t sqtt_buffer0_size_value(uint64_t size_val) {
|
||||
uint32_t sq_thread_trace_buf0_size{0};
|
||||
sq_thread_trace_buf0_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT);
|
||||
|
||||
@@ -683,7 +683,7 @@ class gfx9_cntx_prim {
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
static uint32_t sqtt_buffer_size_value(uint64_t size_val, uint32_t base_hi) {
|
||||
uint32_t sq_thread_trace_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_SIZE, SIZE, (size_val >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_size;
|
||||
|
||||
@@ -51,8 +51,12 @@ class Gfx9Factory : public Pm4Factory {
|
||||
class Mi100Factory : public Gfx9Factory {
|
||||
public:
|
||||
explicit Mi100Factory(const AgentInfo* agent_info);
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; }
|
||||
|
||||
virtual int GetAccumHiID() const override { return 158; }
|
||||
|
||||
virtual uint32_t GetSpmSampleDelayMax() { return 0x34; }
|
||||
virtual uint32_t GetSpmSampleDelayMax() { return 0x34; }
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
|
||||
|
||||
@@ -248,9 +248,17 @@ typedef enum {
|
||||
hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
|
||||
aqlprofile_pmc_info_type_t attribute, void* value);
|
||||
|
||||
typedef enum aqlprofile_att_parameter_name_ext_t
|
||||
{
|
||||
/**
|
||||
* HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE + 1
|
||||
*/
|
||||
AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH = 11,
|
||||
} aqlprofile_att_parameter_name_ext_t;
|
||||
|
||||
// Profile parameter object
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; // Or aqlprofile_att_parameter_name_ext_t
|
||||
union {
|
||||
uint32_t value;
|
||||
struct {
|
||||
|
||||
@@ -190,7 +190,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
|
||||
trace_config.se_mask = 0x11111111;
|
||||
|
||||
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
|
||||
uint64_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
|
||||
|
||||
if (profile.parameters)
|
||||
for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++)
|
||||
@@ -204,32 +204,6 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
|
||||
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
|
||||
trace_config.targetCu = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
trace_config.vmIdMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
if ((p->value & 0x50) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
|
||||
trace_config.deprecated_mask = p->value;
|
||||
trace_config.targetCu = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
if ((p->value & 0xFF000000) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
|
||||
p->value);
|
||||
trace_config.deprecated_tokenMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
trace_config.deprecated_tokenMask2 = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
|
||||
trace_config.sampleRate = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
|
||||
trace_config.concurrent = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
|
||||
trace_config.simd_sel = p->value & 0xF;
|
||||
break;
|
||||
@@ -237,7 +211,10 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
|
||||
trace_config.occupancy_mode = p->value ? 1 : 0;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
|
||||
buffer_size = p->value;
|
||||
buffer_size = (buffer_size & ~static_cast<uint64_t>(UINT32_MAX)) | p->value;
|
||||
break;
|
||||
case AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH:
|
||||
buffer_size = (buffer_size & UINT32_MAX) | (uint64_t(p->value) << 32); // High 32 bits
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
|
||||
trace_config.perfMASK = p->value;
|
||||
|
||||
@@ -222,6 +222,18 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
const uint64_t se_number_xcc = se_number_total / GetXCCNumber();
|
||||
uint64_t base_addr = reinterpret_cast<uint64_t>(config->data_buffer_ptr);
|
||||
const uint64_t base_step = GetBaseStep(config->data_buffer_size, config->se_mask);
|
||||
|
||||
// Old v1 API calls this with buffer == 0 first
|
||||
if (config->data_buffer_size > 0)
|
||||
{
|
||||
// Max 16GB for gfx{9, 10, 12} and 512MB for gfx11. Min of 32 page per SE.
|
||||
if (base_step >= (1ul<<34) || (Primitives::GFXIP_LEVEL == 11 && base_step >= (1ul<<29)))
|
||||
throw std::runtime_error("SQTT Buffer size too high");
|
||||
else if (base_step < (1ul<<17))
|
||||
throw std::runtime_error("SQTT Buffer size too low");
|
||||
}
|
||||
|
||||
|
||||
config->capacity_per_se = base_step;
|
||||
config->capacity_per_disabled_se = 1 << Primitives::TT_BUFF_ALIGN_SHIFT;
|
||||
|
||||
@@ -331,7 +343,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
|
||||
const unsigned baddr_lo = Low32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
|
||||
const unsigned baddr_hi = High32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
|
||||
const uint32_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
|
||||
const uint64_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
|
||||
const uint32_t ctrl_val = Primitives::sqtt_ctrl_value(true);
|
||||
|
||||
Select_GRBM_SE_SH0(cmd_buffer, index);
|
||||
@@ -432,7 +444,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
|
||||
// Initialize cache flush request object
|
||||
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->control_buffer_ptr),
|
||||
config->control_buffer_size);
|
||||
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_size),
|
||||
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_ptr),
|
||||
config->data_buffer_size);
|
||||
// Program zero size of thread trace buffer
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::SQ_THREAD_TRACE_SIZE_ADDR,
|
||||
|
||||
@@ -54,7 +54,7 @@ struct TraceConfig {
|
||||
void* control_buffer_ptr = nullptr;
|
||||
uint32_t control_buffer_size = 0;
|
||||
void* data_buffer_ptr = nullptr;
|
||||
uint32_t data_buffer_size = 0;
|
||||
uint64_t data_buffer_size = 0;
|
||||
|
||||
// concurrent kernels mode
|
||||
uint32_t concurrent = 0;
|
||||
|
||||
Ссылка в новой задаче
Block a user