[aqlprofile] Misc formatting (#810)
* Remove dupicated code segment and apply formatting to some code regions
Цей коміт міститься в:
зафіксовано
GitHub
джерело
b6ffa43226
коміт
aa90a83e42
@@ -26,27 +26,29 @@
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
#include "src/def/gpu_block_info.h"
|
||||
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
#define SELECT_VALUE(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t2(reg_name) \
|
||||
#define SELECT_VALUE_T2(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t3(reg_name) \
|
||||
#define SELECT_VALUE_T3(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, CNTR_SEL0, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
#define MC_SELECT_VALUE(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
@@ -388,47 +390,47 @@ class gfx9_cntx_prim {
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_MC_VM_L2_PERFCOUNTER0_CFG =
|
||||
mc_select_value(MC_VM_L2_PERFCOUNTER0_CFG);
|
||||
MC_SELECT_VALUE(MC_VM_L2_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_ATC_L2_PERFCOUNTER0_CFG =
|
||||
mc_select_value(ATC_L2_PERFCOUNTER0_CFG);
|
||||
MC_SELECT_VALUE(ATC_L2_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_ATC_PERFCOUNTER0_CFG =
|
||||
mc_select_value(ATC_PERFCOUNTER0_CFG);
|
||||
MC_SELECT_VALUE(ATC_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
MC_SELECT_VALUE(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
MC_SELECT_VALUE(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
static auto constexpr select_value_CB_PERFCOUNTER0_SELECT = select_value(CB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_DB_PERFCOUNTER0_SELECT = select_value(DB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CB_PERFCOUNTER0_SELECT = SELECT_VALUE(CB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_DB_PERFCOUNTER0_SELECT = SELECT_VALUE(DB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
SELECT_VALUE(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_PA_SU_PERFCOUNTER0_SELECT =
|
||||
select_value(PA_SU_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(PA_SU_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_PA_SC_PERFCOUNTER0_SELECT =
|
||||
select_value(PA_SC_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(PA_SC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = SELECT_VALUE(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCA_PERFCOUNTER0_SELECT =
|
||||
select_value(TCA_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(TCA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCC_PERFCOUNTER0_SELECT =
|
||||
select_value(TCC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TD_PERFCOUNTER0_SELECT = select_value(TD_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(TCC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TD_PERFCOUNTER0_SELECT = SELECT_VALUE(TD_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_VGT_PERFCOUNTER0_SELECT =
|
||||
select_value(VGT_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_IA_PERFCOUNTER0_SELECT = select_value(IA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_WD_PERFCOUNTER0_SELECT = select_value(WD_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE(VGT_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_IA_PERFCOUNTER0_SELECT = SELECT_VALUE(IA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_WD_PERFCOUNTER0_SELECT = SELECT_VALUE(WD_PERFCOUNTER0_SELECT);
|
||||
|
||||
// static auto constexpr select_value_SX_PERFCOUNTER0_SELECT =
|
||||
// select_value_t2(SX_PERFCOUNTER0_SELECT); static auto constexpr
|
||||
// select_value_GDS_PERFCOUNTER0_SELECT = select_value_t2(GDS_PERFCOUNTER0_SELECT);
|
||||
// SELECT_VALUE_T2(SX_PERFCOUNTER0_SELECT); static auto constexpr
|
||||
// select_value_GDS_PERFCOUNTER0_SELECT = SELECT_VALUE_T2(GDS_PERFCOUNTER0_SELECT);
|
||||
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = [](const counter_des_t& counter_des) {
|
||||
return (uint32_t)0;
|
||||
@@ -437,9 +439,9 @@ class gfx9_cntx_prim {
|
||||
[](const counter_des_t& counter_des) { return (uint32_t)0; };
|
||||
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value_t3(CPC_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE_T3(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value_t3(CPF_PERFCOUNTER0_SELECT);
|
||||
SELECT_VALUE_T3(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
@@ -726,4 +728,9 @@ class gfx9_cntx_prim {
|
||||
} // namespace gfx9
|
||||
} // namespace gfxip
|
||||
|
||||
#undef SELECT_VALUE
|
||||
#undef SELECT_VALUE_T2
|
||||
#undef SELECT_VALUE_T3
|
||||
#undef MC_SELECT_VALUE
|
||||
|
||||
#endif // _GFX9_PRIMITIVES_H_
|
||||
|
||||
@@ -301,9 +301,10 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
// TODO: In order to get different event for different instance with WGP counter blocks, we
|
||||
// need to loop through WGP instead of blindly broadcast instance. Fortunately, this
|
||||
// is not a common practice
|
||||
const uint32_t grbm_value = (block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr))
|
||||
? Primitives::grbm_inst_index_value(block_des.index)
|
||||
: Primitives::grbm_broadcast_value();
|
||||
const uint32_t grbm_value =
|
||||
(block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr))
|
||||
? Primitives::grbm_inst_index_value(block_des.index)
|
||||
: Primitives::grbm_broadcast_value();
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value, block_info->attr);
|
||||
// Reset counters
|
||||
if (block_info->attr & CounterBlockMcAttr) {
|
||||
@@ -592,41 +593,44 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
grbm_value = Primitives::grbm_se_index_value(se_index);
|
||||
}
|
||||
|
||||
bool bIsWGPcounter11 = Primitives::GFXIP_LEVEL == 11 && (block_info->attr & CounterBlockSqAttr);
|
||||
bool bIsWGPcounter12 = Primitives::GFXIP_LEVEL >= 12 && (block_info->attr & CounterBlockWgpAttr);
|
||||
bool bIsWGPcounter11 =
|
||||
Primitives::GFXIP_LEVEL == 11 && (block_info->attr & CounterBlockSqAttr);
|
||||
bool bIsWGPcounter12 =
|
||||
Primitives::GFXIP_LEVEL >= 12 && (block_info->attr & CounterBlockWgpAttr);
|
||||
|
||||
if (bIsWGPcounter11) {
|
||||
for (int wgp=0; wgp<wgp_per_sa; wgp++) {
|
||||
grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp);
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value);
|
||||
builder.BuildCopyCounterDataPacket(
|
||||
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, 1);
|
||||
read_counter += 2;
|
||||
}
|
||||
} else if (bIsWGPcounter12) {
|
||||
for (int wgp=0; wgp<wgp_per_sa; wgp++) {
|
||||
if (block_info->instance_count > 1)
|
||||
grbm_value = Primitives::grbm_inst_se_sh_wgp_index_value(block_des.index, se_index, sarray, wgp);
|
||||
else
|
||||
if (bIsWGPcounter11) {
|
||||
for (int wgp = 0; wgp < wgp_per_sa; wgp++) {
|
||||
grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp);
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value);
|
||||
uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1;
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value);
|
||||
builder.BuildCopyCounterDataPacket(
|
||||
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, 1);
|
||||
read_counter += 2;
|
||||
}
|
||||
} else if (bIsWGPcounter12) {
|
||||
for (int wgp = 0; wgp < wgp_per_sa; wgp++) {
|
||||
if (block_info->instance_count > 1)
|
||||
grbm_value = Primitives::grbm_inst_se_sh_wgp_index_value(block_des.index,
|
||||
se_index, sarray, wgp);
|
||||
else
|
||||
grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp);
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value);
|
||||
uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1;
|
||||
builder.BuildCopyCounterDataPacket(
|
||||
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, dw_mask);
|
||||
if (data_buffer && (dw_mask == 1))
|
||||
*(reinterpret_cast<uint32_t*>(data_buffer) + read_counter + 1) = 0;
|
||||
read_counter += 2;
|
||||
}
|
||||
} else {
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value, block_info->attr);
|
||||
builder.BuildCopyCounterDataPacket(
|
||||
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, dw_mask);
|
||||
if (data_buffer && (dw_mask == 1))
|
||||
*(reinterpret_cast<uint32_t*>(data_buffer) + read_counter + 1) = 0;
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, 3);
|
||||
read_counter += 2;
|
||||
}
|
||||
} else {
|
||||
SetGrbmGfxIndex(cmd_buffer, grbm_value, block_info->attr);
|
||||
builder.BuildCopyCounterDataPacket(
|
||||
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, 3);
|
||||
read_counter += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reset Grbm to its default state - broadcast
|
||||
@@ -641,54 +645,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
SetGrbmBroadcast(cmd_buffer, counters_vec.get_attr());
|
||||
|
||||
uint32_t sdma_mask = 0;
|
||||
if (counters_vec.get_attr() & CounterBlockAidAttr)
|
||||
for (const auto& counter_des : counters_vec) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
const auto& block_des = counter_des.block_des;
|
||||
const auto* reg_table = get_reg_table(counter_des);
|
||||
const auto& reg_info = reg_table[counter_des.index];
|
||||
|
||||
if (!(block_info->attr & CounterBlockAidAttr))
|
||||
// skip all non-AID blocks
|
||||
continue;
|
||||
|
||||
// MI300 AID blocks: UMC/RPB/ATC/SDMA event insert master XCC PRED_EXEC packet here
|
||||
PrecExecBuilder<Builder> prec_exec_builder(builder, cmd_buffer, VIRTUALXCCID_SELECT,
|
||||
xcc_number_ > 1);
|
||||
|
||||
const auto target_aid_index = GetTargetAid(counter_des);
|
||||
uint64_t smn_control_addr = get_smn_addr(reg_info.control_addr, target_aid_index);
|
||||
|
||||
if (block_info->attr & CounterBlockUmcAttr) {
|
||||
// Stop UMC
|
||||
} else if (block_info->attr & (CounterBlockRpbAttr | CounterBlockAtcAttr)) {
|
||||
// Stop RPB/ATC
|
||||
builder.BuildWritePConfigRegPacket(cmd_buffer, smn_control_addr, 0);
|
||||
} else if (block_info->attr & CounterBlockSdmaAttr) {
|
||||
// Stop SDMA
|
||||
if (reg_info.control_addr.offset == 0) {
|
||||
// MI100: stopped per instance
|
||||
const uint32_t mask = 1u << counter_des.block_des.index;
|
||||
if ((sdma_mask & mask) == 0) {
|
||||
sdma_mask |= mask;
|
||||
auto control_addr = (reg_info.control_addr.offset == 0) ? reg_info.select_addr
|
||||
: reg_info.control_addr;
|
||||
builder.BuildWritePConfigRegPacket(cmd_buffer, control_addr,
|
||||
Primitives::sdma_stop_value(counter_des));
|
||||
}
|
||||
} else if (xcc_number_ > 1) {
|
||||
// MI300 SDMA event: insert master XCC PRED_EXEC packet here
|
||||
builder.BuildWritePConfigRegPacket(cmd_buffer, smn_control_addr,
|
||||
Primitives::sdma_stop_value(counter_des));
|
||||
} else {
|
||||
// MI200: stopped per counter to choose which counter to read
|
||||
builder.BuildWritePConfigRegPacket(cmd_buffer, reg_info.control_addr,
|
||||
Primitives::sdma_stop_value(counter_des));
|
||||
}
|
||||
}
|
||||
}
|
||||
sdma_mask = 0;
|
||||
if (counters_vec.get_attr() & CounterBlockAidAttr)
|
||||
if (counters_vec.get_attr() & CounterBlockAidAttr) {
|
||||
for (const auto& counter_des : counters_vec) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
const auto& block_des = counter_des.block_des;
|
||||
@@ -734,6 +691,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Issue barrier command to wait commands to complete
|
||||
SetPerfmonCntl(cmd_buffer, Primitives::cp_perfmon_cntl_stop_value(), counters_vec.get_attr());
|
||||
@@ -755,7 +713,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
SetPerfmonCntl(cmd_buffer, Primitives::cp_perfmon_cntl_read_value(), counters_vec.get_attr());
|
||||
|
||||
// counters have UMC events: MI300 Loop over MI300 XCCs for each counter_des
|
||||
if (counters_attr & CounterBlockAidAttr)
|
||||
if (counters_attr & CounterBlockAidAttr) {
|
||||
for (const auto& counter_des : counters_vec) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
const auto& block_des = counter_des.block_des;
|
||||
@@ -809,6 +767,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
read_counter += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t xcc_selected = 0; xcc_selected < xcc_number_; ++xcc_selected) {
|
||||
PrecExecBuilder<Builder> prec_exec_builder(builder, cmd_buffer, xcc_selected,
|
||||
xcc_number_ > 1);
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача