diff --git a/projects/aqlprofile/gfxip/gfx9/gfx9_primitives.h b/projects/aqlprofile/gfxip/gfx9/gfx9_primitives.h index 1e391abbba..9a14317817 100644 --- a/projects/aqlprofile/gfxip/gfx9/gfx9_primitives.h +++ b/projects/aqlprofile/gfxip/gfx9/gfx9_primitives.h @@ -26,27 +26,29 @@ #include #include +#include "src/def/gpu_block_info.h" + #define COPY_DATA_SEL_REG 0 ///< Mem-mapped register #define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 #define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits) // Counter Select Register value lambdas -#define select_value(reg_name) \ +#define SELECT_VALUE(reg_name) \ [](const counter_des_t& counter_des) { \ uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \ return select; \ } -#define select_value_t2(reg_name) \ +#define SELECT_VALUE_T2(reg_name) \ [](const counter_des_t& counter_des) { \ uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \ return select; \ } -#define select_value_t3(reg_name) \ +#define SELECT_VALUE_T3(reg_name) \ [](const counter_des_t& counter_des) { \ uint32_t select = SET_REG_FIELD_BITS(reg_name, CNTR_SEL0, counter_des.id); \ return select; \ } -#define mc_select_value(reg_name) \ +#define MC_SELECT_VALUE(reg_name) \ [](const counter_des_t& counter_des) { \ uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \ SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \ @@ -388,47 +390,47 @@ class gfx9_cntx_prim { // MC registers values static auto constexpr mc_select_value_MC_VM_L2_PERFCOUNTER0_CFG = - mc_select_value(MC_VM_L2_PERFCOUNTER0_CFG); + MC_SELECT_VALUE(MC_VM_L2_PERFCOUNTER0_CFG); static auto constexpr mc_select_value_ATC_L2_PERFCOUNTER0_CFG = - mc_select_value(ATC_L2_PERFCOUNTER0_CFG); + MC_SELECT_VALUE(ATC_L2_PERFCOUNTER0_CFG); static auto constexpr mc_select_value_ATC_PERFCOUNTER0_CFG = - mc_select_value(ATC_PERFCOUNTER0_CFG); + MC_SELECT_VALUE(ATC_PERFCOUNTER0_CFG); static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG = - mc_select_value(GCEA_PERFCOUNTER0_CFG); + MC_SELECT_VALUE(GCEA_PERFCOUNTER0_CFG); static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG = - mc_select_value(RPB_PERFCOUNTER0_CFG); + MC_SELECT_VALUE(RPB_PERFCOUNTER0_CFG); static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; } static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; } - static auto constexpr select_value_CB_PERFCOUNTER0_SELECT = select_value(CB_PERFCOUNTER0_SELECT); - static auto constexpr select_value_DB_PERFCOUNTER0_SELECT = select_value(DB_PERFCOUNTER0_SELECT); + static auto constexpr select_value_CB_PERFCOUNTER0_SELECT = SELECT_VALUE(CB_PERFCOUNTER0_SELECT); + static auto constexpr select_value_DB_PERFCOUNTER0_SELECT = SELECT_VALUE(DB_PERFCOUNTER0_SELECT); static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT = - select_value(GRBM_PERFCOUNTER0_SELECT); + SELECT_VALUE(GRBM_PERFCOUNTER0_SELECT); static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT = - select_value(GRBM_SE0_PERFCOUNTER_SELECT); + SELECT_VALUE(GRBM_SE0_PERFCOUNTER_SELECT); static auto constexpr select_value_PA_SU_PERFCOUNTER0_SELECT = - select_value(PA_SU_PERFCOUNTER0_SELECT); + SELECT_VALUE(PA_SU_PERFCOUNTER0_SELECT); static auto constexpr select_value_PA_SC_PERFCOUNTER0_SELECT = - select_value(PA_SC_PERFCOUNTER0_SELECT); + SELECT_VALUE(PA_SC_PERFCOUNTER0_SELECT); static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT = - select_value(SPI_PERFCOUNTER0_SELECT); - static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT); + SELECT_VALUE(SPI_PERFCOUNTER0_SELECT); + static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = SELECT_VALUE(TA_PERFCOUNTER0_SELECT); static auto constexpr select_value_TCA_PERFCOUNTER0_SELECT = - select_value(TCA_PERFCOUNTER0_SELECT); + SELECT_VALUE(TCA_PERFCOUNTER0_SELECT); static auto constexpr select_value_TCC_PERFCOUNTER0_SELECT = - select_value(TCC_PERFCOUNTER0_SELECT); - static auto constexpr select_value_TD_PERFCOUNTER0_SELECT = select_value(TD_PERFCOUNTER0_SELECT); + SELECT_VALUE(TCC_PERFCOUNTER0_SELECT); + static auto constexpr select_value_TD_PERFCOUNTER0_SELECT = SELECT_VALUE(TD_PERFCOUNTER0_SELECT); static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT = - select_value(TCP_PERFCOUNTER0_SELECT); + SELECT_VALUE(TCP_PERFCOUNTER0_SELECT); static auto constexpr select_value_VGT_PERFCOUNTER0_SELECT = - select_value(VGT_PERFCOUNTER0_SELECT); - static auto constexpr select_value_IA_PERFCOUNTER0_SELECT = select_value(IA_PERFCOUNTER0_SELECT); - static auto constexpr select_value_WD_PERFCOUNTER0_SELECT = select_value(WD_PERFCOUNTER0_SELECT); + SELECT_VALUE(VGT_PERFCOUNTER0_SELECT); + static auto constexpr select_value_IA_PERFCOUNTER0_SELECT = SELECT_VALUE(IA_PERFCOUNTER0_SELECT); + static auto constexpr select_value_WD_PERFCOUNTER0_SELECT = SELECT_VALUE(WD_PERFCOUNTER0_SELECT); // static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = - // select_value_t2(SX_PERFCOUNTER0_SELECT); static auto constexpr - // select_value_GDS_PERFCOUNTER0_SELECT = select_value_t2(GDS_PERFCOUNTER0_SELECT); + // SELECT_VALUE_T2(SX_PERFCOUNTER0_SELECT); static auto constexpr + // select_value_GDS_PERFCOUNTER0_SELECT = SELECT_VALUE_T2(GDS_PERFCOUNTER0_SELECT); static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = [](const counter_des_t& counter_des) { return (uint32_t)0; @@ -437,9 +439,9 @@ class gfx9_cntx_prim { [](const counter_des_t& counter_des) { return (uint32_t)0; }; static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT = - select_value_t3(CPC_PERFCOUNTER0_SELECT); + SELECT_VALUE_T3(CPC_PERFCOUNTER0_SELECT); static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT = - select_value_t3(CPF_PERFCOUNTER0_SELECT); + SELECT_VALUE_T3(CPF_PERFCOUNTER0_SELECT); static uint32_t spm_select_value(const counter_des_t& counter_des) { uint32_t tcc_perfcounter0_select = @@ -726,4 +728,9 @@ class gfx9_cntx_prim { } // namespace gfx9 } // namespace gfxip +#undef SELECT_VALUE +#undef SELECT_VALUE_T2 +#undef SELECT_VALUE_T3 +#undef MC_SELECT_VALUE + #endif // _GFX9_PRIMITIVES_H_ diff --git a/projects/aqlprofile/src/pm4/pmc_builder.h b/projects/aqlprofile/src/pm4/pmc_builder.h index 1a98631c89..96052dfd19 100644 --- a/projects/aqlprofile/src/pm4/pmc_builder.h +++ b/projects/aqlprofile/src/pm4/pmc_builder.h @@ -301,9 +301,10 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { // TODO: In order to get different event for different instance with WGP counter blocks, we // need to loop through WGP instead of blindly broadcast instance. Fortunately, this // is not a common practice - const uint32_t grbm_value = (block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr)) - ? Primitives::grbm_inst_index_value(block_des.index) - : Primitives::grbm_broadcast_value(); + const uint32_t grbm_value = + (block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr)) + ? Primitives::grbm_inst_index_value(block_des.index) + : Primitives::grbm_broadcast_value(); SetGrbmGfxIndex(cmd_buffer, grbm_value, block_info->attr); // Reset counters if (block_info->attr & CounterBlockMcAttr) { @@ -592,41 +593,44 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { grbm_value = Primitives::grbm_se_index_value(se_index); } - bool bIsWGPcounter11 = Primitives::GFXIP_LEVEL == 11 && (block_info->attr & CounterBlockSqAttr); - bool bIsWGPcounter12 = Primitives::GFXIP_LEVEL >= 12 && (block_info->attr & CounterBlockWgpAttr); + bool bIsWGPcounter11 = + Primitives::GFXIP_LEVEL == 11 && (block_info->attr & CounterBlockSqAttr); + bool bIsWGPcounter12 = + Primitives::GFXIP_LEVEL >= 12 && (block_info->attr & CounterBlockWgpAttr); - if (bIsWGPcounter11) { - for (int wgp=0; wgp(data_buffer) + read_counter, 1); - read_counter += 2; - } - } else if (bIsWGPcounter12) { - for (int wgp=0; wgpinstance_count > 1) - grbm_value = Primitives::grbm_inst_se_sh_wgp_index_value(block_des.index, se_index, sarray, wgp); - else + if (bIsWGPcounter11) { + for (int wgp = 0; wgp < wgp_per_sa; wgp++) { grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp); - SetGrbmGfxIndex(cmd_buffer, grbm_value); - uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1; + SetGrbmGfxIndex(cmd_buffer, grbm_value); + builder.BuildCopyCounterDataPacket( + cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi, + reinterpret_cast(data_buffer) + read_counter, 1); + read_counter += 2; + } + } else if (bIsWGPcounter12) { + for (int wgp = 0; wgp < wgp_per_sa; wgp++) { + if (block_info->instance_count > 1) + grbm_value = Primitives::grbm_inst_se_sh_wgp_index_value(block_des.index, + se_index, sarray, wgp); + else + grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp); + SetGrbmGfxIndex(cmd_buffer, grbm_value); + uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1; + builder.BuildCopyCounterDataPacket( + cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi, + reinterpret_cast(data_buffer) + read_counter, dw_mask); + if (data_buffer && (dw_mask == 1)) + *(reinterpret_cast(data_buffer) + read_counter + 1) = 0; + read_counter += 2; + } + } else { + SetGrbmGfxIndex(cmd_buffer, grbm_value, block_info->attr); builder.BuildCopyCounterDataPacket( cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi, - reinterpret_cast(data_buffer) + read_counter, dw_mask); - if (data_buffer && (dw_mask == 1)) - *(reinterpret_cast(data_buffer) + read_counter + 1) = 0; + reinterpret_cast(data_buffer) + read_counter, 3); read_counter += 2; } - } else { - SetGrbmGfxIndex(cmd_buffer, grbm_value, block_info->attr); - builder.BuildCopyCounterDataPacket( - cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi, - reinterpret_cast(data_buffer) + read_counter, 3); - read_counter += 2; } - } } } // Reset Grbm to its default state - broadcast @@ -641,54 +645,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { SetGrbmBroadcast(cmd_buffer, counters_vec.get_attr()); uint32_t sdma_mask = 0; - if (counters_vec.get_attr() & CounterBlockAidAttr) - for (const auto& counter_des : counters_vec) { - const auto* block_info = counter_des.block_info; - const auto& block_des = counter_des.block_des; - const auto* reg_table = get_reg_table(counter_des); - const auto& reg_info = reg_table[counter_des.index]; - - if (!(block_info->attr & CounterBlockAidAttr)) - // skip all non-AID blocks - continue; - - // MI300 AID blocks: UMC/RPB/ATC/SDMA event insert master XCC PRED_EXEC packet here - PrecExecBuilder prec_exec_builder(builder, cmd_buffer, VIRTUALXCCID_SELECT, - xcc_number_ > 1); - - const auto target_aid_index = GetTargetAid(counter_des); - uint64_t smn_control_addr = get_smn_addr(reg_info.control_addr, target_aid_index); - - if (block_info->attr & CounterBlockUmcAttr) { - // Stop UMC - } else if (block_info->attr & (CounterBlockRpbAttr | CounterBlockAtcAttr)) { - // Stop RPB/ATC - builder.BuildWritePConfigRegPacket(cmd_buffer, smn_control_addr, 0); - } else if (block_info->attr & CounterBlockSdmaAttr) { - // Stop SDMA - if (reg_info.control_addr.offset == 0) { - // MI100: stopped per instance - const uint32_t mask = 1u << counter_des.block_des.index; - if ((sdma_mask & mask) == 0) { - sdma_mask |= mask; - auto control_addr = (reg_info.control_addr.offset == 0) ? reg_info.select_addr - : reg_info.control_addr; - builder.BuildWritePConfigRegPacket(cmd_buffer, control_addr, - Primitives::sdma_stop_value(counter_des)); - } - } else if (xcc_number_ > 1) { - // MI300 SDMA event: insert master XCC PRED_EXEC packet here - builder.BuildWritePConfigRegPacket(cmd_buffer, smn_control_addr, - Primitives::sdma_stop_value(counter_des)); - } else { - // MI200: stopped per counter to choose which counter to read - builder.BuildWritePConfigRegPacket(cmd_buffer, reg_info.control_addr, - Primitives::sdma_stop_value(counter_des)); - } - } - } - sdma_mask = 0; - if (counters_vec.get_attr() & CounterBlockAidAttr) + if (counters_vec.get_attr() & CounterBlockAidAttr) { for (const auto& counter_des : counters_vec) { const auto* block_info = counter_des.block_info; const auto& block_des = counter_des.block_des; @@ -734,6 +691,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { } } } + } // Issue barrier command to wait commands to complete SetPerfmonCntl(cmd_buffer, Primitives::cp_perfmon_cntl_stop_value(), counters_vec.get_attr()); @@ -755,7 +713,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { SetPerfmonCntl(cmd_buffer, Primitives::cp_perfmon_cntl_read_value(), counters_vec.get_attr()); // counters have UMC events: MI300 Loop over MI300 XCCs for each counter_des - if (counters_attr & CounterBlockAidAttr) + if (counters_attr & CounterBlockAidAttr) { for (const auto& counter_des : counters_vec) { const auto* block_info = counter_des.block_info; const auto& block_des = counter_des.block_des; @@ -809,6 +767,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { read_counter += 2; } } + } for (size_t xcc_selected = 0; xcc_selected < xcc_number_; ++xcc_selected) { PrecExecBuilder prec_exec_builder(builder, cmd_buffer, xcc_selected, xcc_number_ > 1);