diff --git a/gfxip/gfx12/gfx12_block_info.h b/gfxip/gfx12/gfx12_block_info.h index 9d046adc4b..6c0316c044 100644 --- a/gfxip/gfx12/gfx12_block_info.h +++ b/gfxip/gfx12/gfx12_block_info.h @@ -97,15 +97,15 @@ enum SpmSeBlockId { SPM_SE_BLOCK_NAME_LAST = SPM_SE_BLOCK_NAME_UTCL1, }; -namespace gfx1201 { -// IP versions for Radeon RX 9070 -// ip_block : gc_12_0_1 +namespace gfx1200 { // ip_block : athub_4_1_0 +// ip_block : gc_12_0_0 +// ip_block : sdma_7_0_0 // Number of block instances // Reference: global_features.h (from gfxip header file package) static const uint32_t ChaCounterBlockNumInstances = 1; -static const uint32_t ChcCounterBlockNumInstances = 4; +static const uint32_t ChcCounterBlockNumInstances = 2; static const uint32_t CpcCounterBlockNumInstances = 1; static const uint32_t CpfCounterBlockNumInstances = 1; static const uint32_t CpgCounterBlockNumInstances = 1; @@ -113,12 +113,12 @@ static const uint32_t GcmcVmL2CounterBlockNumInstances = 1; static const uint32_t GcrCounterBlockNumInstances = 1; static const uint32_t Gcutcl2CounterBlockNumInstances = 1; static const uint32_t Gcvml2CounterBlockNumInstances = 1; -static const uint32_t GcEaCpwdCounterBlockNumInstances = 36; -static const uint32_t GcEaSeCounterBlockNumInstances = 4; +static const uint32_t GcEaCpwdCounterBlockNumInstances = 18; +static const uint32_t GcEaSeCounterBlockNumInstances = 8; static const uint32_t Gl1aCounterBlockNumInstances = 1; static const uint32_t Gl1cCounterBlockNumInstances = 4; static const uint32_t Gl2aCounterBlockNumInstances = 4; -static const uint32_t Gl2cCounterBlockNumInstances = 32; +static const uint32_t Gl2cCounterBlockNumInstances = 16; static const uint32_t GrbmCounterBlockNumInstances = 1; static const uint32_t GrbmhCounterBlockNumInstances = 1; static const uint32_t RlcCounterBlockNumInstances = 1; @@ -190,6 +190,18 @@ static const uint32_t TaCounterBlockMaxEvent = 254; static const uint32_t TcpCounterBlockMaxEvent = 99; static const uint32_t TdCounterBlockMaxEvent = 271; static const uint32_t Utcl1CounterBlockMaxEvent = 71; +} // namespace gfx1200 + +namespace gfx1201 { +// ip_block : athub_4_1_0 +// ip_block : gc_12_0_1 +// ip_block : sdma_7_0_1 + +// Number of block instances +static const uint32_t ChcCounterBlockNumInstances = 4; +static const uint32_t GcEaCpwdCounterBlockNumInstances = 36; +static const uint32_t GcEaSeCounterBlockNumInstances = 4; +static const uint32_t Gl2cCounterBlockNumInstances = 32; } // namespace gfx1201 static const uint32_t SdmaCounterBlockMaxInstances = 8; diff --git a/gfxip/gfx12/gfx12_block_table.h b/gfxip/gfx12/gfx12_block_table.h index f60bd2dcb8..56266e31a7 100644 --- a/gfxip/gfx12/gfx12_block_table.h +++ b/gfxip/gfx12/gfx12_block_table.h @@ -56,7 +56,7 @@ namespace gfxip { namespace gfx12 { -namespace gfx1201 { +namespace gfx1200 { // Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)}; static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)}; @@ -142,7 +142,15 @@ static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID_HSA(SQ), SqcCo static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID_HSA(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA}; static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID_HSA(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD}; static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID_HSA(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP}; -} // namespace gfx12xx +} // namespace gfx1200 + +namespace gfx1201 { +static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID_HSA(GL2C), gfx1201::Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), gfx1201::ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID_HSA(GCEA), gfx1201::GcEaCpwdCounterBlockNumInstances, GcEaCpwdCounterBlockMaxEvent, GcEaCpwdCounterBlockNumCounters, GcEaCpwdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr}; +static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), gfx1201::GcEaSeCounterBlockNumInstances, GcEaSeCounterBlockMaxEvent, GcEaSeCounterBlockNumCounters, GcEaSeCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr}; +} // namespace gfx1201 + } // namespace gfx12 } // namespace gfxip diff --git a/gfxip/gfx12/gfx12_primitives.h b/gfxip/gfx12/gfx12_primitives.h index aae8c25981..6c9726ffdc 100644 --- a/gfxip/gfx12/gfx12_primitives.h +++ b/gfxip/gfx12/gfx12_primitives.h @@ -44,7 +44,7 @@ class gfx12_cntx_prim { REG_32B_ADDR(GC, 0, regCOMPUTE_PERFCOUNT_ENABLE); static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR = REG_32B_ADDR(GC, 0, regRLC_PERFMON_CNTL); // REG_32B_ADDR(GC, 0, regRLC_PERFMON_CLK_CNTL); - static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL_1); + static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL); static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR = REG_32B_ADDR(GC, 0, regCOMPUTE_THREAD_TRACE_ENABLE); @@ -241,29 +241,29 @@ class gfx12_cntx_prim { return grbm_gfx_index; } - // CP_PERFMON_CNTL_1 value to reset counters + // CP_PERFMON_CNTL value to reset counters static uint32_t cp_perfmon_cntl_reset_value() { uint32_t cp_perfmon_cntl{0}; return cp_perfmon_cntl; } - // CP_PERFMON_CNTL_1 value to start counters + // CP_PERFMON_CNTL value to start counters static uint32_t cp_perfmon_cntl_start_value() { - uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1); + uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1); return cp_perfmon_cntl; } - // CP_PERFMON_CNTL_1 value to stop/freeze counters + // CP_PERFMON_CNTL value to stop/freeze counters static uint32_t cp_perfmon_cntl_stop_value() { - uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 2) | - SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1); + uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2) | + SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1); return cp_perfmon_cntl; } - // CP_PERFMON_CNTL_1 value to stop/freeze counters + // CP_PERFMON_CNTL value to stop/freeze counters static uint32_t cp_perfmon_cntl_read_value() { - uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1) | - SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1); + uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) | + SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1); return cp_perfmon_cntl; } @@ -421,12 +421,12 @@ class gfx12_cntx_prim { } static uint32_t cp_perfmon_cntl_spm_start_value() { uint32_t cp_perfmon_cntl{0}; - cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 1); + cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1); return cp_perfmon_cntl; } static uint32_t cp_perfmon_cntl_spm_stop_value() { uint32_t cp_perfmon_cntl{0}; - cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 2); + cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2); return cp_perfmon_cntl; } static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des, diff --git a/src/core/gfx12_factory.cpp b/src/core/gfx12_factory.cpp index 9b9b86e1b6..92f4617a4a 100644 --- a/src/core/gfx12_factory.cpp +++ b/src/core/gfx12_factory.cpp @@ -78,6 +78,7 @@ void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) { } void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) { + auto agent_name = std::string_view(agent_info->name).substr(0, 7); // Global blocks block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo; block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo; @@ -108,6 +109,13 @@ void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) { block_table_[__BLOCK_ID_HSA(TA)] = &TaCounterBlockInfo; block_table_[__BLOCK_ID_HSA(TCP)] = &TcpCounterBlockInfo; block_table_[__BLOCK_ID_HSA(TD)] = &TdCounterBlockInfo; + + if (agent_name == "gfx1201") { + block_table_[__BLOCK_ID(CHC)] = &gfx1201::ChcCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GCEA)] = &gfx1201::GceaCounterBlockInfo; + block_table_[__BLOCK_ID(GCEA_SE)] = &gfx1201::GceaSeCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GL2C)] = &gfx1201::Gl2cCounterBlockInfo; + } } // Pm4Factory create mathods diff --git a/src/def/gfx12_def.h b/src/def/gfx12_def.h index e12f5c1fed..3024f1e21d 100644 --- a/src/def/gfx12_def.h +++ b/src/def/gfx12_def.h @@ -28,10 +28,22 @@ #include "util/reg_offsets.h" #include "linux/registers/gc/gc_12_0_0_offset.h" #include "linux/registers/gc/gc_12_0_0_sh_mask.h" +// Rename CP_PERFMON_CNTL_1 to CP_PERFMON_CNTL for better compatibility +// CP_PERFMON_CNTL_1 +#define regCP_PERFMON_CNTL_BASE_IDX regCP_PERFMON_CNTL_1_BASE_IDX +#define regCP_PERFMON_CNTL regCP_PERFMON_CNTL_1 +#define CP_PERFMON_CNTL__PERFMON_STATE__SHIFT CP_PERFMON_CNTL_1__PERFMON_STATE__SHIFT +#define CP_PERFMON_CNTL__SPM_PERFMON_STATE__SHIFT CP_PERFMON_CNTL_1__SPM_PERFMON_STATE__SHIFT +#define CP_PERFMON_CNTL__PERFMON_ENABLE_MODE__SHIFT CP_PERFMON_CNTL_1__PERFMON_ENABLE_MODE__SHIFT +#define CP_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE__SHIFT CP_PERFMON_CNTL_1__PERFMON_SAMPLE_ENABLE__SHIFT +#define CP_PERFMON_CNTL__PERFMON_STATE_MASK CP_PERFMON_CNTL_1__PERFMON_STATE_MASK +#define CP_PERFMON_CNTL__SPM_PERFMON_STATE_MASK CP_PERFMON_CNTL_1__SPM_PERFMON_STATE_MASK +#define CP_PERFMON_CNTL__PERFMON_ENABLE_MODE_MASK CP_PERFMON_CNTL_1__PERFMON_ENABLE_MODE_MASK +#define CP_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE_MASK CP_PERFMON_CNTL_1__PERFMON_SAMPLE_ENABLE_MASK #include "linux/packets/nvd.h" #include "gfxip/gfx12/gfx12_block_info.h" using namespace gfxip::gfx12; -using namespace gfxip::gfx12::gfx1201; +using namespace gfxip::gfx12::gfx1200; #include "gfxip/gfx12/gfx12_primitives.h" #include "gfxip/gfx12/gfx12_block_table.h"