diff --git a/gfxip/gfx12/gfx12_block_info.h b/gfxip/gfx12/gfx12_block_info.h index 990ee61171..9d046adc4b 100644 --- a/gfxip/gfx12/gfx12_block_info.h +++ b/gfxip/gfx12/gfx12_block_info.h @@ -20,45 +20,16 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #ifndef _GFX12_BLOCKINFO_H_ #define _GFX12_BLOCKINFO_H_ namespace gfxip { namespace gfx12 { -#define __BLOCK_ID(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block -// Private PMC Counter BlockId is defined here -// Pubclic PMC Counter BlockId is defined in hsa_ven_amd_aqlprofile.h +#define __BLOCK_ID_HSA(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block +#define __BLOCK_ID(block) AQLPROFILE_BLOCK_NAME_##block enum CounterBlockId { - __BLOCK_ID(RLC) = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, - __BLOCK_ID(CPG), - __BLOCK_ID(GRBMH), - __BLOCK_ID(GRBMA), - __BLOCK_ID(SQG), - - // mem blocks - __BLOCK_ID(CHA), - __BLOCK_ID(CHC), - __BLOCK_ID(GLARBA), - __BLOCK_ID(GLARBC), - __BLOCK_ID(GC_CANE), - __BLOCK_ID(GC_FFBM), - __BLOCK_ID(GC_NHTTLB), - __BLOCK_ID(GC_L2TLB), - __BLOCK_ID(GC_UTCL1), - __BLOCK_ID(GC_UTCL2), - __BLOCK_ID(GC_VML2), - - __BLOCK_ID(GCEA_SE), - - // New SDMA Perfmon interface, comparing to the original SDMA PerfCnt. gfx12 - // supports both and they should provide the same counter events. We might - // remove SDMA PerfCnt support in aqlprofile in the future since it is easier - // to program Perfmon - __BLOCK_ID(SDMA_PM), - // Counters retrieved by KFD - IommuV2CounterBlockId, + IommuV2CounterBlockId = AQLPROFILE_BLOCKS_NUMBER, KernelDriverCounterBlockId, CpPipeStatsCounterBlockId, @@ -130,155 +101,100 @@ namespace gfx1201 { // IP versions for Radeon RX 9070 // ip_block : gc_12_0_1 // ip_block : athub_4_1_0 -// ip_block : umc_8_14_0 -// ip_block : df_4_15_1 -// ip_block : pcie_6_1_0 // Number of block instances // Reference: global_features.h (from gfxip header file package) -// rspm_config.pm (from design configuration files) -// The following default values are generated from Radeon RX 9070, the first product of the -// RDNA 4 lineup. It could change for other products, and the change will be made in -// [PRODUCT_NAME]_factory.h -// -static const uint32_t GrbmCounterBlockNumInstances = 1; -static const uint32_t RlcCounterBlockNumInstances = 1; -static const uint32_t CpgCounterBlockNumInstances = 1; -static const uint32_t CpcCounterBlockNumInstances = 1; -static const uint32_t CpfCounterBlockNumInstances = 1; -static const uint32_t GcrCounterBlockNumInstances = 1; -static const uint32_t Ge1CounterBlockNumInstances = 1; -static const uint32_t Gl2aCounterBlockNumInstances = 4; // GFX_CPWD__NUM_GL2A_PER_CPWD -static const uint32_t Gl2cCounterBlockNumInstances = 32; // GFX_CPWD__NUM_GL2C_PER_CPWD -static const uint32_t GceaCounterBlockNumInstances = 36; // GFX_CPWD__NUM_EA_PER_CPWD -static const uint32_t ChaCounterBlockNumInstances = 1; -static const uint32_t ChcCounterBlockNumInstances = 4; // GFX_CPWD__NUM_CHC -static const uint32_t Ge2DistCounterBlockNumInstances = 1; -static const uint32_t SdmaCounterBlockNumInstances = 2; // GFX_CPWD__NUM_SDMA_PER_CPWD -static const uint32_t GcVml2CounterBlockNumInstances = 1; -static const uint32_t GcMcVml2CounterBlockNumInstances = 1; -static const uint32_t GcUtcl2CounterBlockNumInstances = 1; -static const uint32_t GrbmhCounterBlockNumInstances = 1; -static const uint32_t CbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA -static const uint32_t DbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA -static const uint32_t SuCounterBlockNumInstances = 1; // GFX_SE__NUM_PA_PER_SE -static const uint32_t SxCounterBlockNumInstances = 1; -static const uint32_t ScCounterBlockNumInstances = 2; // GFX_SE__NUM_PACKER_PER_SA -static const uint32_t TaCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP -static const uint32_t TdCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP -static const uint32_t TcpCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP -static const uint32_t SpiCounterBlockNumInstances = 1; -static const uint32_t SqgCounterBlockNumInstances = 1; -static const uint32_t Gl1aCounterBlockNumInstances = 1; -static const uint32_t RmiCounterBlockNumInstances = 2; // GFX_SE__NUM_RMI_PER_SA -static const uint32_t Gl1cCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA -static const uint32_t SqcCounterBlockNumInstances = 1; -static const uint32_t PcCounterBlockNumInstances = 1; -static const uint32_t GceaSeCounterBlockNumInstances = 4; -static const uint32_t GeCounterBlockNumInstances = 1; -static const uint32_t WgsCounterBlockNumInstances = 1; -static const uint32_t Gl1xaCounterBlockNumInstances = 1; -static const uint32_t Gl1xcCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA -static const uint32_t GcUtcl1CounterBlockNumInstances = 2; +static const uint32_t ChaCounterBlockNumInstances = 1; +static const uint32_t ChcCounterBlockNumInstances = 4; +static const uint32_t CpcCounterBlockNumInstances = 1; +static const uint32_t CpfCounterBlockNumInstances = 1; +static const uint32_t CpgCounterBlockNumInstances = 1; +static const uint32_t GcmcVmL2CounterBlockNumInstances = 1; +static const uint32_t GcrCounterBlockNumInstances = 1; +static const uint32_t Gcutcl2CounterBlockNumInstances = 1; +static const uint32_t Gcvml2CounterBlockNumInstances = 1; +static const uint32_t GcEaCpwdCounterBlockNumInstances = 36; +static const uint32_t GcEaSeCounterBlockNumInstances = 4; +static const uint32_t Gl1aCounterBlockNumInstances = 1; +static const uint32_t Gl1cCounterBlockNumInstances = 4; +static const uint32_t Gl2aCounterBlockNumInstances = 4; +static const uint32_t Gl2cCounterBlockNumInstances = 32; +static const uint32_t GrbmCounterBlockNumInstances = 1; +static const uint32_t GrbmhCounterBlockNumInstances = 1; +static const uint32_t RlcCounterBlockNumInstances = 1; +static const uint32_t RpbCounterBlockNumInstances = 1; +static const uint32_t SdmaCounterBlockNumInstances = 2; +static const uint32_t SpiCounterBlockNumInstances = 1; +static const uint32_t SqcCounterBlockNumInstances = 1; +static const uint32_t SqgCounterBlockNumInstances = 1; +static const uint32_t TaCounterBlockNumInstances = 2; +static const uint32_t TcpCounterBlockNumInstances = 2; +static const uint32_t TdCounterBlockNumInstances = 2; +static const uint32_t Utcl1CounterBlockNumInstances = 2; -static const uint32_t SdmaCounterBlockMaxInstances = 8; -static const uint32_t UmcCounterBlockMaxInstances = 32; - -// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra -// caution Reference: chip_offset_byte.h (from gfxip header file package) The following default -// values are generated from Radeon RX 9070, the first product of the RDNA 4 lineup. It could change -// for other products, and the change will be made in [PRODUCT_NAME]_factory.h -// -static const uint32_t GrbmCounterBlockNumCounters = 2; -static const uint32_t RlcCounterBlockNumCounters = 2; -static const uint32_t CpgCounterBlockNumCounters = 2; -static const uint32_t CpcCounterBlockNumCounters = 2; -static const uint32_t CpfCounterBlockNumCounters = 2; -static const uint32_t GcrCounterBlockNumCounters = 2; -static const uint32_t PhCounterBlockNumCounters = 8; -static const uint32_t Ge1CounterBlockNumCounters = 4; -static const uint32_t Gl2aCounterBlockNumCounters = 4; -static const uint32_t Gl2cCounterBlockNumCounters = 4; -static const uint32_t GceaCounterBlockNumCounters = 2; -static const uint32_t ChaCounterBlockNumCounters = 4; -static const uint32_t ChcCounterBlockNumCounters = 4; -static const uint32_t Ge2DistCounterBlockNumCounters = 4; -static const uint32_t SdmaCounterBlockNumCounters = 2; -static const uint32_t GcVml2CounterBlockNumCounters = 2; -static const uint32_t GcMcVml2CounterBlockNumCounters = 1; -static const uint32_t GcUtcl2CounterBlockNumCounters = 1; -static const uint32_t GrbmhCounterBlockNumCounters = 2; -static const uint32_t CbCounterBlockNumCounters = 4; -static const uint32_t DbCounterBlockNumCounters = 4; -static const uint32_t SuCounterBlockNumCounters = 4; -static const uint32_t SxCounterBlockNumCounters = 4; -static const uint32_t PaScCounterBlockNumCounters = 8; -static const uint32_t TaCounterBlockNumCounters = 2; -static const uint32_t TdCounterBlockNumCounters = 2; -static const uint32_t TcpCounterBlockNumCounters = 4; -static const uint32_t SpiCounterBlockNumCounters = 6; -static const uint32_t SqgCounterBlockNumCounters = 8; -static const uint32_t Gl1aCounterBlockNumCounters = 4; -static const uint32_t RmiCounterBlockNumCounters = 4; -static const uint32_t Gl1cCounterBlockNumCounters = 4; -static const uint32_t SqcCounterBlockNumCounters = 16; -static const uint32_t PcCounterBlockNumCounters = 4; -static const uint32_t GceaSeCounterBlockNumCounters = 2; -static const uint32_t GeCounterBlockNumCounters = 4; -static const uint32_t WgsCounterBlockNumCounters = 2; -static const uint32_t Gl1xaCounterBlockNumCounters = 4; -static const uint32_t Gl1xcCounterBlockNumCounters = 4; -static const uint32_t GcUtcl1CounterBlockNumCounters = 4; +// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra caution +// Reference: chip_offset_byte.h (from gfxip header file package) +static const uint32_t ChaCounterBlockNumCounters = 4; +static const uint32_t ChcCounterBlockNumCounters = 4; +static const uint32_t CpcCounterBlockNumCounters = 2; +static const uint32_t CpfCounterBlockNumCounters = 2; +static const uint32_t CpgCounterBlockNumCounters = 2; +static const uint32_t GcmcVmL2CounterBlockNumCounters = 8; +static const uint32_t GcrCounterBlockNumCounters = 2; +static const uint32_t Gcutcl2CounterBlockNumCounters = 4; +static const uint32_t Gcvml2CounterBlockNumCounters = 2; +static const uint32_t GcEaCpwdCounterBlockNumCounters = 2; +static const uint32_t GcEaSeCounterBlockNumCounters = 2; +static const uint32_t Gl1aCounterBlockNumCounters = 4; +static const uint32_t Gl1cCounterBlockNumCounters = 4; +static const uint32_t Gl2aCounterBlockNumCounters = 4; +static const uint32_t Gl2cCounterBlockNumCounters = 4; +static const uint32_t GrbmCounterBlockNumCounters = 2; +static const uint32_t GrbmhCounterBlockNumCounters = 2; +static const uint32_t RlcCounterBlockNumCounters = 2; +static const uint32_t RpbCounterBlockNumCounters = 4; +static const uint32_t SdmaCounterBlockNumCounters = 2; +static const uint32_t SpiCounterBlockNumCounters = 6; +static const uint32_t SqcCounterBlockNumCounters = 16; +static const uint32_t SqgCounterBlockNumCounters = 8; +static const uint32_t TaCounterBlockNumCounters = 2; +static const uint32_t TcpCounterBlockNumCounters = 4; +static const uint32_t TdCounterBlockNumCounters = 2; +static const uint32_t Utcl1CounterBlockNumCounters = 4; // Block counters max event value - Auto-generated from chip_enum.h, edit with extra caution // Reference: chip_enum.h (from gfxip header file package) -// The following default values are generated from Radeon RX 9070, the first product of the -// RDNA 4 lineup. It could change for other products, and the change will be made in -// [PRODUCT_NAME]_factory.h -// -static const uint32_t GrbmCounterBlockMaxEvent = 51; -static const uint32_t RlcCounterBlockMaxEvent = 6; -static const uint32_t CpgCounterBlockMaxEvent = 30; -static const uint32_t CpcCounterBlockMaxEvent = 55; -static const uint32_t CpfCounterBlockMaxEvent = 4; -static const uint32_t GcrCounterBlockMaxEvent = 151; -static const uint32_t PhCounterBlockMaxEvent = 1023; -static const uint32_t Ge1CounterBlockMaxEvent = 54; -static const uint32_t Gl2aCounterBlockMaxEvent = 114; -static const uint32_t Gl2cCounterBlockMaxEvent = 249; -static const uint32_t GceaCounterBlockMaxEvent = 32; -static const uint32_t ChaCounterBlockMaxEvent = 25; -static const uint32_t ChcCounterBlockMaxEvent = 94; -static const uint32_t Ge2DistCounterBlockMaxEvent = 188; -static const uint32_t SdmaCounterBlockMaxEvent = 125; -static const uint32_t GcVml2CounterBlockMaxEvent = 90; -static const uint32_t GcMcVml2CounterBlockMaxEvent = - 1; // This is handled by GCMC_VM_L2_PERFCOUNTER0_CFG -static const uint32_t GcUtcl2CounterBlockMaxEvent = 36; -static const uint32_t GrbmhCounterBlockMaxEvent = 25; -static const uint32_t CbCounterBlockMaxEvent = 315; -static const uint32_t DbCounterBlockMaxEvent = 441; -static const uint32_t PaSuCounterBlockMaxEvent = 828; -static const uint32_t SxCounterBlockMaxEvent = 81; -static const uint32_t ScCounterBlockMaxEvent = 821; -static const uint32_t TaCounterBlockMaxEvent = 254; -static const uint32_t TdCounterBlockMaxEvent = 271; -static const uint32_t TcpCounterBlockMaxEvent = 99; -static const uint32_t SpiCounterBlockMaxEvent = 318; -static const uint32_t SqgCounterBlockMaxEvent = 45; -static const uint32_t Gl1aCounterBlockMaxEvent = 21; -static const uint32_t RmiCounterBlockMaxEvent = 138; -static const uint32_t Gl1cCounterBlockMaxEvent = 121; -static const uint32_t SqcCounterBlockMaxEvent = 511; -static const uint32_t PcCounterBlockMaxEvent = 164; -static const uint32_t GceaSeCounterBlockMaxEvent = 32; -static const uint32_t GeCounterBlockMaxEvent = 103; -static const uint32_t WgsCounterBlockMaxEvent = 4; -static const uint32_t Gl1xaCounterBlockMaxEvent = 21; -static const uint32_t Gl1xcCounterBlockMaxEvent = 109; -static const uint32_t GcUtcl1CounterBlockMaxEvent = 71; +static const uint32_t ChaCounterBlockMaxEvent = 25; +static const uint32_t ChcCounterBlockMaxEvent = 94; +static const uint32_t CpcCounterBlockMaxEvent = 55; +static const uint32_t CpfCounterBlockMaxEvent = 4; +static const uint32_t CpgCounterBlockMaxEvent = 30; +static const uint32_t GcmcVmL2CounterBlockMaxEvent = 90; +static const uint32_t GcrCounterBlockMaxEvent = 151; +static const uint32_t Gcutcl2CounterBlockMaxEvent = 36; +static const uint32_t Gcvml2CounterBlockMaxEvent = 90; +static const uint32_t GcEaCpwdCounterBlockMaxEvent = 32; +static const uint32_t GcEaSeCounterBlockMaxEvent = 32; +static const uint32_t Gl1aCounterBlockMaxEvent = 21; +static const uint32_t Gl1cCounterBlockMaxEvent = 121; +static const uint32_t Gl2aCounterBlockMaxEvent = 114; +static const uint32_t Gl2cCounterBlockMaxEvent = 249; +static const uint32_t GrbmCounterBlockMaxEvent = 51; +static const uint32_t GrbmhCounterBlockMaxEvent = 25; +static const uint32_t RlcCounterBlockMaxEvent = 6; +static const uint32_t SdmaCounterBlockMaxEvent = 125; +static const uint32_t SpiCounterBlockMaxEvent = 318; +static const uint32_t SqcCounterBlockMaxEvent = 511; +static const uint32_t SqgCounterBlockMaxEvent = 45; +static const uint32_t TaCounterBlockMaxEvent = 254; +static const uint32_t TcpCounterBlockMaxEvent = 99; +static const uint32_t TdCounterBlockMaxEvent = 271; +static const uint32_t Utcl1CounterBlockMaxEvent = 71; } // namespace gfx1201 +static const uint32_t SdmaCounterBlockMaxInstances = 8; +static const uint32_t UmcCounterBlockMaxInstances = 32; + } // namespace gfx12 } // namespace gfxip diff --git a/gfxip/gfx12/gfx12_block_table.h b/gfxip/gfx12/gfx12_block_table.h index 1eaa2a6a1e..f60bd2dcb8 100644 --- a/gfxip/gfx12/gfx12_block_table.h +++ b/gfxip/gfx12/gfx12_block_table.h @@ -43,54 +43,52 @@ #define REG_INFO_7(BLOCK) REG_INFO_WITH_CTRL_7(BLOCK, REG_32B_NULL) #define REG_INFO_8(BLOCK) REG_INFO_WITH_CTRL_8(BLOCK, REG_32B_NULL) +#define REG_INFO_WITH_CFG(BLOCK, INDEX) \ + {REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_CFG), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_HI)} +#define REG_INFO_WITH_CFG_1(BLOCK) REG_INFO_WITH_CFG(BLOCK, 0) +#define REG_INFO_WITH_CFG_2(BLOCK) REG_INFO_WITH_CFG_1(BLOCK), REG_INFO_WITH_CFG(BLOCK, 1) +#define REG_INFO_WITH_CFG_3(BLOCK) REG_INFO_WITH_CFG_2(BLOCK), REG_INFO_WITH_CFG(BLOCK, 2) +#define REG_INFO_WITH_CFG_4(BLOCK) REG_INFO_WITH_CFG_3(BLOCK), REG_INFO_WITH_CFG(BLOCK, 3) +#define REG_INFO_WITH_CFG_5(BLOCK) REG_INFO_WITH_CFG_4(BLOCK), REG_INFO_WITH_CFG(BLOCK, 4) +#define REG_INFO_WITH_CFG_6(BLOCK) REG_INFO_WITH_CFG_5(BLOCK), REG_INFO_WITH_CFG(BLOCK, 5) +#define REG_INFO_WITH_CFG_7(BLOCK) REG_INFO_WITH_CFG_6(BLOCK), REG_INFO_WITH_CFG(BLOCK, 6) +#define REG_INFO_WITH_CFG_8(BLOCK) REG_INFO_WITH_CFG_7(BLOCK), REG_INFO_WITH_CFG(BLOCK, 7) + namespace gfxip { namespace gfx12 { namespace gfx1201 { // Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution -static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)}; -static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)}; -static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)}; -static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)}; -static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)}; -static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))}; -static const CounterRegInfo PaPhCounterRegAddr[] = {REG_INFO_8(PA_PH)}; -static const CounterRegInfo Ge1CounterRegAddr[] = {REG_INFO_4(GE1)}; -static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)}; -static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)}; -static const CounterRegInfo GceaCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)}; static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)}; static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)}; -static const CounterRegInfo Ge2CounterRegAddr[] = {REG_INFO_4(GE2_DIST)}; -static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)}; -//static const CounterRegInfo GcVml2CounterRegAddr[] = {REG_INFO_2(GCVML2)}; -//static const CounterRegInfo GcMcVml2CounterRegAddr[] = {REG_INFO_1(GCMC_VM_L2)}; -//static const CounterRegInfo GcUtcl2CounterRegAddr[] = {REG_INFO_1(GCUTCL2)}; -static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)}; -static const CounterRegInfo CbCounterRegAddr[] = {REG_INFO_4(CB)}; -static const CounterRegInfo DbCounterRegAddr[] = {REG_INFO_4(DB)}; -static const CounterRegInfo PaSuCounterRegAddr[] = {REG_INFO_4(PA_SU)}; -static const CounterRegInfo SxCounterRegAddr[] = {REG_INFO_4(SX)}; -static const CounterRegInfo PaScCounterRegAddr[] = {REG_INFO_8(PA_SC)}; -static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)}; -static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)}; -static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)}; -static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)}; -static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))}; +static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)}; +static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)}; +static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)}; +static const CounterRegInfo GcmcVmL2CounterRegAddr[] = {REG_INFO_WITH_CFG_8(GCMC_VM_L2)}; +static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))}; +static const CounterRegInfo Gcutcl2CounterRegAddr[] = {REG_INFO_WITH_CFG_4(GCUTCL2)}; +// static const CounterRegInfo Gcvml2CounterRegAddr[] = {REG_INFO_2(GCVML2)}; +static const CounterRegInfo GcEaCpwdCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)}; +static const CounterRegInfo GcEaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)}; static const CounterRegInfo Gl1aCounterRegAddr[] = {REG_INFO_4(GL1A)}; -static const CounterRegInfo RmiCounterRegAddr[] = {REG_INFO_4(RMI)}; static const CounterRegInfo Gl1cCounterRegAddr[] = {REG_INFO_4(GL1C)}; -//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, regSQ_PERFCOUNTER_CTRL)}; -static const CounterRegInfo PcCounterRegAddr[] = {REG_INFO_4(PC)}; -static const CounterRegInfo GeCounterRegAddr[] = {REG_INFO_4(GE2_SE)}; -static const CounterRegInfo GceaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)}; -// static const CounterRegInfo WgsCounterRegAddr[] = {REG_INFO_2(WGS)}; -static const CounterRegInfo Gl1xaCounterRegAddr[] = {REG_INFO_4(GL1XA)}; -static const CounterRegInfo Gl1xcCounterRegAddr[] = {REG_INFO_4(GL1XC)}; -static const CounterRegInfo GcUtcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)}; +static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)}; +static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)}; +static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)}; +static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)}; +static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)}; +static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)}; +static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)}; +//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL))}; +static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))}; +static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)}; +static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)}; +static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)}; +static const CounterRegInfo Utcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)}; // Special handling of SQC: -// SQC only supports 32bit PMC, only regSQ_PERFCOUNTER#even_number#_SELECT is -// used by PMC. regSQ_PERFCOUNTER#odd_number#_SELECT is used only by SPM +// SQC only supports 32bit PMC. +// regSQ_PERFCOUNTER#even_number#_SELECT is used by PMC and SPM +// regSQ_PERFCOUNTER#odd_number#_SELECT is used by SPM only static const CounterRegInfo SqcCounterRegAddr[] = { {REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL}, {REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL}, @@ -101,57 +99,50 @@ static const CounterRegInfo SqcCounterRegAddr[] = { {REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL}, {REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}}; -// Special handling of GCVML2: -static const CounterRegInfo GcVml2CounterRegAddr[] = { +// Special handling of GCVML2 (SPM only): +static const CounterRegInfo Gcvml2CounterRegAddr[] = { {REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_HI)}, {REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_HI)}}; -// Special handling of GCMC_VM_L2: -static const CounterRegInfo GcMcVml2CounterRegAddr[] = { - {REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_HI)}}; - -// Special handling of GCUTCL2: Not sure if this is SPM-only -static const CounterRegInfo GcUtcl2CounterRegAddr[] = { - {REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_HI)}}; - // Global blocks: ATCL2 CHA CHC CPC CPF CPG EA FFBM GCR GL2A GL2C GRBM RLC SDMA VML2 UTCL2 // (Grphics only - not supported in ROCm): GE1 GE2_DIST PH // (Grphics only): CPG is for graphics, but it is not physically removed for compute products // (Not enabled for gfx12): CHCG GDS GUS -static const GpuBlockInfo GcAtcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID(ATCL2)}; // Placeholder now -static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value_Cha, CounterBlockTcAttr}; -static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value_Chc, CounterBlockTcAttr}; -static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value_Cpc, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC}; -static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value_Cpf, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF}; -static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value_Cpg, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG}; -static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID(GCEA), GceaCounterBlockNumInstances, GceaCounterBlockMaxEvent, GceaCounterBlockNumCounters, GceaCounterRegAddr, gfx12_cntx_prim::select_value_Gcea, 0}; +static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID_HSA(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID_HSA(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo Atcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID_HSA(ATCL2)}; // Placeholder now static const GpuBlockInfo GcFfbmCounterBlockInfo = {"GC_FFBM", __BLOCK_ID(GC_FFBM)}; // Placeholder now -static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value_Gcr, CounterBlockTcAttr}; -static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value_Gl2a, CounterBlockTcAttr}; -static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value_Gl2c, CounterBlockTcAttr}; -static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value_Grbm, CounterBlockGRBMAttr}; -static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value_Rlc, 0}; -static const GpuBlockInfo SdmaPmCounterBlockInfo = {"SDMA_PM", __BLOCK_ID(SDMA_PM), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value_SdmaPm, CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA}; -static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2)}; // Placeholder now -static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2)}; // Placeholder now +static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2), 1, Gcutcl2CounterBlockMaxEvent, Gcutcl2CounterBlockNumCounters, Gcutcl2CounterRegAddr, gfx12_cntx_prim::mc_select_value, CounterBlockRpbAttr|CounterBlockAidAttr}; +static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2), 1, GcmcVmL2CounterBlockMaxEvent, GcmcVmL2CounterBlockNumCounters, GcmcVmL2CounterRegAddr, gfx12_cntx_prim::mc_select_value, CounterBlockRpbAttr|CounterBlockAidAttr}; +static const GpuBlockInfo GcVml2SpmCounterBlockInfo = {"GC_VML2_SPM", __BLOCK_ID(GC_VML2_SPM), 1, Gcvml2CounterBlockMaxEvent, Gcvml2CounterBlockNumCounters, Gcvml2CounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr}; +static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID_HSA(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC}; +static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID_HSA(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF}; +static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG}; +static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID_HSA(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr}; +static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID_HSA(GCEA), GcEaCpwdCounterBlockNumInstances, GcEaCpwdCounterBlockMaxEvent, GcEaCpwdCounterBlockNumCounters, GcEaCpwdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr}; +static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID_HSA(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockGRBMAttr}; +static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr}; +static const GpuBlockInfo SdmaCounterBlockInfo = {"SDMA", __BLOCK_ID_HSA(SDMA), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA}; // SE blocks: EA_SE GL2A GL2C GRBMH SPI SQG UTCL1 // (Grphics only - not supported in ROCm): GE GL1XA GL1XC PA PC WGS -static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GceaSeCounterBlockNumInstances, GceaSeCounterBlockMaxEvent, GceaSeCounterBlockNumCounters, GceaSeCounterRegAddr, gfx12_cntx_prim::select_value_GceaSe, CounterBlockSeAttr}; -static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value_Grbmh, CounterBlockSeAttr}; -static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value_Spi, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI}; +static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GcEaSeCounterBlockNumInstances, GcEaSeCounterBlockMaxEvent, GcEaSeCounterBlockNumCounters, GcEaSeCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr}; +static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr}; +static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID_HSA(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI}; static const GpuBlockInfo SqgCounterBlockInfo = {"SQG", __BLOCK_ID(SQG), SqgCounterBlockNumInstances, SqgCounterBlockMaxEvent, SqgCounterBlockNumCounters, SqgCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQG}; -static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), GcUtcl1CounterBlockNumInstances, GcUtcl1CounterBlockMaxEvent, GcUtcl1CounterBlockNumCounters, GcUtcl1CounterRegAddr, gfx12_cntx_prim::select_value_GcUtcl1, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1}; +static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), Utcl1CounterBlockNumInstances, Utcl1CounterBlockMaxEvent, Utcl1CounterBlockNumCounters, Utcl1CounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1}; // SA blocks: GL1A GL1C // (Grphics only - not supported in ROCm): CB DB SC SX // (Not enabled for gfx12): GL1CG -static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value_Gl1a, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr}; -static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value_Gl1c, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr}; +static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID_HSA(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr}; +static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID_HSA(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr}; // WGP blocks: SQC TA TCP TD -static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC}; -static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value_Ta, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA}; -static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value_Td, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD}; -static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value_Tcp, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP}; -} // namespace gfx1201 +static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID_HSA(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC}; +static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID_HSA(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA}; +static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID_HSA(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD}; +static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID_HSA(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP}; +} // namespace gfx12xx } // namespace gfx12 } // namespace gfxip diff --git a/gfxip/gfx12/gfx12_primitives.h b/gfxip/gfx12/gfx12_primitives.h index b7f7838665..aae8c25981 100644 --- a/gfxip/gfx12/gfx12_primitives.h +++ b/gfxip/gfx12/gfx12_primitives.h @@ -32,23 +32,6 @@ #define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter #define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits) -// Counter Select Register value lambdas -#define select_value(reg_name) \ - [](const counter_des_t& counter_des) { \ - uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \ - return select; \ - } -#define select_value_t2(reg_name) \ - [](const counter_des_t& counter_des) { \ - uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \ - return select; \ - } -#define select_value_blank() \ - [](const counter_des_t& counter_des) { \ - uint32_t select = 0; \ - return select; \ - } - namespace gfxip { namespace gfx12 { @@ -156,7 +139,7 @@ class gfx12_cntx_prim { } gfx; }; - static const uint32_t SQ_BLOCK_ID = __BLOCK_ID(SQ); + static const uint32_t SQ_BLOCK_ID = __BLOCK_ID_HSA(SQ); static const uint32_t SQ_BLOCK_SPM_ID = SPM_SE_BLOCK_NAME_SQG; static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG; @@ -254,7 +237,7 @@ class gfx12_cntx_prim { uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) | SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) | - SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | (instance_index << 1))); + SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | instance_index)); return grbm_gfx_index; } @@ -365,34 +348,22 @@ class gfx12_cntx_prim { static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; } // MC registers values + static uint32_t mc_select_value(const counter_des_t& counter_des) { + uint32_t perfcounter0_cfg = + SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) | + SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | + SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, ENABLE, 1); + return perfcounter0_cfg; + } static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; } static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; } - static auto constexpr select_value_Cha= select_value(CHA_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Chc= select_value(CHC_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Cpc= select_value(CPC_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Cpf= select_value(CPF_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Cpg= select_value(CPG_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Gcea= select_value_blank(); // register not present - static auto constexpr select_value_Gcr= select_value(GCR_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Gl2a= select_value(GL2A_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Gl2c= select_value(GL2C_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Grbm= select_value(GRBM_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Rlc= select_value_t2(RLC_PERFCOUNTER0_SELECT); - static auto constexpr select_value_SdmaPm= select_value_blank(); // register not present - static auto constexpr select_value_GcVml2= select_value_blank(); // register not present - static auto constexpr select_value_GcUtcl2= select_value_blank(); // register not present - static auto constexpr select_value_GceaSe= select_value_blank(); // register not present - static auto constexpr select_value_Grbmh= select_value(GRBMH_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Spi= select_value(SPI_PERFCOUNTER0_SELECT); - static auto constexpr select_value_GcUtcl1= select_value_blank(); // register not present - static auto constexpr select_value_Gl1a= select_value(GL1A_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Gl1c= select_value(GL1C_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Ta= select_value(TA_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Td= select_value(TD_PERFCOUNTER0_SELECT); - static auto constexpr select_value_Tcp= select_value(TCP_PERFCOUNTER0_SELECT); - static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value_blank(); - + static uint32_t select_value(const counter_des_t& counter_des) { + uint32_t perfcounter0_select = + SET_REG_FIELD_BITS(CPC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id); + return perfcounter0_select; + } + static uint32_t spm_select_value(const counter_des_t& counter_des) { uint32_t tcp_perfcounter0_select = SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) | diff --git a/src/core/gfx10_factory.cpp b/src/core/gfx10_factory.cpp index c2d0730fc1..6962e2dd31 100644 --- a/src/core/gfx10_factory.cpp +++ b/src/core/gfx10_factory.cpp @@ -48,7 +48,7 @@ class Gfx10Factory : public Pm4Factory { // void ConstructTable(const AgentInfo* agent_info); void Init(const AgentInfo* agent_info); // void ConstructBuilders(const AgentInfo* agent_info); - static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER]; }; // Gfx builders init @@ -81,7 +81,7 @@ void Gfx10Factory::Init(const AgentInfo* agent_info) { } // GFX10 block table -const GpuBlockInfo* Gfx10Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = { +const GpuBlockInfo* Gfx10Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = { &CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo, NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo, NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, diff --git a/src/core/gfx11_factory.cpp b/src/core/gfx11_factory.cpp index e174fdfe80..f06c2be333 100644 --- a/src/core/gfx11_factory.cpp +++ b/src/core/gfx11_factory.cpp @@ -48,7 +48,7 @@ class Gfx11Factory : public Pm4Factory { // void ConstructTable(const AgentInfo* agent_info); void Init(const AgentInfo* agent_info); // void ConstructBuilders(const AgentInfo* agent_info); - static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER]; }; // Gfx builders init @@ -81,7 +81,7 @@ void Gfx11Factory::Init(const AgentInfo* agent_info) { } // GFX11 block table -const GpuBlockInfo* Gfx11Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = { +const GpuBlockInfo* Gfx11Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = { &CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo, NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo, NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, diff --git a/src/core/gfx12_factory.cpp b/src/core/gfx12_factory.cpp index 7392378a92..9b9b86e1b6 100644 --- a/src/core/gfx12_factory.cpp +++ b/src/core/gfx12_factory.cpp @@ -79,31 +79,35 @@ void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) { void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) { // Global blocks - block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo; - block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo; - block_table_[__BLOCK_ID(CPC)] = &CpcCounterBlockInfo; - block_table_[__BLOCK_ID(CPF)] = &CpfCounterBlockInfo; - block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo; - block_table_[__BLOCK_ID(GCEA)] = &GceaCounterBlockInfo; - block_table_[__BLOCK_ID(GCR)] = &GcrCounterBlockInfo; - block_table_[__BLOCK_ID(GL2A)] = &Gl2aCounterBlockInfo; - block_table_[__BLOCK_ID(GL2C)] = &Gl2cCounterBlockInfo; - block_table_[__BLOCK_ID(GRBM)] = &GrbmCounterBlockInfo; - block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo; - block_table_[__BLOCK_ID(SDMA_PM)] = &SdmaPmCounterBlockInfo; + block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo; + block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(CPC)] = &CpcCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(CPF)] = &CpfCounterBlockInfo; + block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo; + block_table_[__BLOCK_ID(GC_UTCL2)] = &GcUtcl2CounterBlockInfo; + block_table_[__BLOCK_ID(GC_VML2)] = &GcVml2CounterBlockInfo; + block_table_[__BLOCK_ID(GC_VML2_SPM)] = &GcVml2SpmCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GCEA)] = &GceaCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GCR)] = &GcrCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GL2A)] = &Gl2aCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GL2C)] = &Gl2cCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GRBM)] = &GrbmCounterBlockInfo; + block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(SDMA)] = &SdmaCounterBlockInfo; // SE blocks - block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo; - block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo; - block_table_[__BLOCK_ID(SPI)] = &SpiCounterBlockInfo; - block_table_[__BLOCK_ID(SQ)] = &SqcCounterBlockInfo; - block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo; + block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo; + block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo; + block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(SPI)] = &SpiCounterBlockInfo; + block_table_[__BLOCK_ID(SQG)] = &SqgCounterBlockInfo; // SA blocks - block_table_[__BLOCK_ID(GL1A)] = &Gl1aCounterBlockInfo; - block_table_[__BLOCK_ID(GL1C)] = &Gl1cCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GL1A)] = &Gl1aCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(GL1C)] = &Gl1cCounterBlockInfo; // WGP blocks - block_table_[__BLOCK_ID(TA)] = &TaCounterBlockInfo; - block_table_[__BLOCK_ID(TCP)] = &TcpCounterBlockInfo; - block_table_[__BLOCK_ID(TD)] = &TdCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(SQ)] = &SqcCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(TA)] = &TaCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(TCP)] = &TcpCounterBlockInfo; + block_table_[__BLOCK_ID_HSA(TD)] = &TdCounterBlockInfo; } // Pm4Factory create mathods diff --git a/src/core/gfx908_factory.cpp b/src/core/gfx908_factory.cpp index 4c5403c7c2..f2bcb32134 100644 --- a/src/core/gfx908_factory.cpp +++ b/src/core/gfx908_factory.cpp @@ -28,11 +28,11 @@ namespace aql_profile { -const GpuBlockInfo* Mi100Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {}; +const GpuBlockInfo* Mi100Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {}; Mi100Factory::Mi100Factory(const AgentInfo* agent_info) : Gfx9Factory(block_table_, sizeof(block_table_), agent_info) { - for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) { + for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) { const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i]; if (base_table_ptr == NULL) continue; GpuBlockInfo* block_info = nullptr; diff --git a/src/core/gfx90a_factory.cpp b/src/core/gfx90a_factory.cpp index eb40050cc4..d82d80f42a 100644 --- a/src/core/gfx90a_factory.cpp +++ b/src/core/gfx90a_factory.cpp @@ -37,14 +37,14 @@ class Mi200Factory : public Gfx9Factory { virtual int GetAccumHiID() const override { return 185; }; protected: - static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER]; }; -const GpuBlockInfo* Mi200Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {}; +const GpuBlockInfo* Mi200Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {}; Mi200Factory::Mi200Factory(const AgentInfo* agent_info) : Gfx9Factory(block_table_, sizeof(block_table_), agent_info) { - for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) { + for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) { const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i]; if (base_table_ptr == NULL) continue; GpuBlockInfo* block_info = nullptr; diff --git a/src/core/gfx940_factory.cpp b/src/core/gfx940_factory.cpp index e2f3fa159c..cc9c877b10 100644 --- a/src/core/gfx940_factory.cpp +++ b/src/core/gfx940_factory.cpp @@ -31,7 +31,7 @@ namespace aql_profile { class Mi300Factory : public Mi100Factory { public: explicit Mi300Factory(const AgentInfo* agent_info) : Mi100Factory(agent_info) { - for (unsigned blockname_id = 0; blockname_id < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; + for (unsigned blockname_id = 0; blockname_id < AQLPROFILE_BLOCKS_NUMBER; ++blockname_id) { const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[blockname_id]; if (base_table_ptr == NULL) continue; diff --git a/src/core/gfx9_factory.cpp b/src/core/gfx9_factory.cpp index 9b50cdc308..892df3b96e 100644 --- a/src/core/gfx9_factory.cpp +++ b/src/core/gfx9_factory.cpp @@ -76,7 +76,7 @@ void Gfx9Factory::Print(const GpuBlockInfo* block_info) { } // GFX9 block table -const GpuBlockInfo* Gfx9Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = { +const GpuBlockInfo* Gfx9Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = { &CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo, &GrbmSeCounterBlockInfo, &SpiCounterBlockInfo, &SqCounterBlockInfo, &SqCsCounterBlockInfo, NULL /*GFX? SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, &TcaCounterBlockInfo, diff --git a/src/core/gfx9_factory.h b/src/core/gfx9_factory.h index a1c51f086f..eddde234f1 100644 --- a/src/core/gfx9_factory.h +++ b/src/core/gfx9_factory.h @@ -42,7 +42,7 @@ class Gfx9Factory : public Pm4Factory { protected: void Init(const AgentInfo* agent_info); - static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER]; static void Print(const GpuBlockInfo* block_info); }; @@ -53,7 +53,7 @@ class Mi100Factory : public Gfx9Factory { explicit Mi100Factory(const AgentInfo* agent_info); protected: - static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER]; + static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER]; }; } // namespace aql_profile diff --git a/src/core/include/aql_profile_v2.h b/src/core/include/aql_profile_v2.h index 7fd763b7ff..b20024e340 100644 --- a/src/core/include/aql_profile_v2.h +++ b/src/core/include/aql_profile_v2.h @@ -50,6 +50,49 @@ typedef enum { AQLPROFILE_AGENT_VERSION_LAST } aqlprofile_agent_version_t; +/** + * @brief Enums for counter blocks. + * AQLPROFILE_BLOCK_NAME_RESERVED_X are blocks reserved for npi. Reserving them here can maintain + * enum consistency between mainline and npi. + * TODO: Move all counter blocks here from hsa_ven_amd_aqlprofile.h + */ +typedef enum { + // Blocks reserved for NPI support + AQLPROFILE_BLOCK_NAME_RESERVED_0 = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, + AQLPROFILE_BLOCK_NAME_RESERVED_1, + AQLPROFILE_BLOCK_NAME_RESERVED_2, + AQLPROFILE_BLOCK_NAME_RESERVED_3, + AQLPROFILE_BLOCK_NAME_RESERVED_4, + AQLPROFILE_BLOCK_NAME_RESERVED_5, + + // Blocks available for most ASICs, but not currently in use + AQLPROFILE_BLOCK_NAME_CPG, + AQLPROFILE_BLOCK_NAME_RLC, + + // New blocks for gc_12_0_x + AQLPROFILE_BLOCK_NAME_CHA, + AQLPROFILE_BLOCK_NAME_CHC, + AQLPROFILE_BLOCK_NAME_GC_CANE, + AQLPROFILE_BLOCK_NAME_GC_FFBM, + AQLPROFILE_BLOCK_NAME_GC_L2TLB, + AQLPROFILE_BLOCK_NAME_GC_UTCL1, + AQLPROFILE_BLOCK_NAME_GC_UTCL2, + AQLPROFILE_BLOCK_NAME_GC_VML2, + AQLPROFILE_BLOCK_NAME_GC_VML2_SPM, + AQLPROFILE_BLOCK_NAME_GCEA_SE, + AQLPROFILE_BLOCK_NAME_GRBMH, + AQLPROFILE_BLOCK_NAME_SQG, + + // Blocks reserved for NPI support + AQLPROFILE_BLOCK_NAME_RESERVED_6, + AQLPROFILE_BLOCK_NAME_RESERVED_7, + AQLPROFILE_BLOCK_NAME_RESERVED_8, + AQLPROFILE_BLOCK_NAME_RESERVED_9, + + // Add new blocks above + AQLPROFILE_BLOCKS_NUMBER +} aqlprofile_block_name_t; + /** * @brief Flags to describe which agents can access given buffer. */ diff --git a/src/pm4/pmc_builder.h b/src/pm4/pmc_builder.h index 82b562932e..398cc4369d 100644 --- a/src/pm4/pmc_builder.h +++ b/src/pm4/pmc_builder.h @@ -285,9 +285,14 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { // std::endl; // Set GRBM index to access proper block instance - const uint32_t grbm_value = (block_info->instance_count > 1) + // + // TODO: In order to get different event for different instance with WGP counter blocks, we + // need to loop through WGP instead of blindly broadcast instance. Fortunately, this + // is not a common practice + const uint32_t grbm_value = (block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr)) ? Primitives::grbm_inst_index_value(block_des.index) : Primitives::grbm_broadcast_value(); + builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::GRBM_GFX_INDEX_ADDR, grbm_value); // Reset counters if (block_info->attr & CounterBlockMcAttr) { @@ -602,9 +607,12 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { else grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp); builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::GRBM_GFX_INDEX_ADDR, grbm_value); + uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1; builder.BuildCopyCounterDataPacket( cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi, - reinterpret_cast(data_buffer) + read_counter, 1); + reinterpret_cast(data_buffer) + read_counter, dw_mask); + if (data_buffer && (dw_mask == 1)) + *(reinterpret_cast(data_buffer) + read_counter + 1) = 0; read_counter += 2; } } else { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fdfbfdd59d..70f3001e58 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,7 +31,7 @@ find_package(Clang REQUIRED CONFIG ## Building test executable add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} ) -target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${API_PATH} ${ROCM_ROOT_DIR}/include ${TEST_DIR}/parser/ ) +target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${API_PATH} ${ROCM_ROOT_DIR}/include ${TEST_DIR}/parser/ ${TEST_DIR}/../src/core/include) target_link_libraries( ${EXE_NAME} PRIVATE pthread hsa-runtime64::hsa-runtime64 dl ) install(TARGETS ${EXE_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests) diff --git a/test/app/test.cpp b/test/app/test.cpp index a47e9766c5..12260b76c3 100644 --- a/test/app/test.cpp +++ b/test/app/test.cpp @@ -22,6 +22,7 @@ #include "hsa/hsa_ext_amd.h" +#include "aql_profile_v2.h" #include #include @@ -275,6 +276,65 @@ int main(int argc, char* argv[]) { }; events_count = sizeof(events_arr1) / sizeof(hsa_ven_amd_aqlprofile_event_t); events_arr = events_arr1; + } else if (TestHsa::HsaAgentName() == "gfx12") { + const hsa_ven_amd_aqlprofile_event_t events_arr1[] = { + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHA, 0, 25 /*ALWAYS*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHA, 0, 0 /*BUSY*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHC, 0, 0 /*ALWAYS*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHC, 0, 1 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 25 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0, 0 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0, 24 /*BUSY*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CPG, 0, 0 /*ALWAYS*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CPG, 0, 51 /*BUSY*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL2, 0, 1}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_VML2, 0, 5}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA, 0, 3}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA, 0, 4}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR, 0, 6}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR, 0, 22}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A, 0, 1 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A, 0, 2 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C, 0, 1 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C, 0, 2 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0, 0 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0, 2 /*GUI_ACTIVE*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_RLC, 0, 2}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_RLC, 0, 5}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 0, 0 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 0, 2 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 1, 0 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 1, 2 /*BUSY*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL1, 0, 1}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL1, 0, 2}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GCEA_SE, 0, 3}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GCEA_SE, 0, 4}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GRBMH, 0, 0 /*ALWAYS*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GRBMH, 0, 19}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0, 46 /*CSN_BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0, 47 /*CSN_NUM_THREADGROUPS*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0,14 /*ALWAYS*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0, 15 /*BUSY*/}, + {(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0, 19 /*WAVES*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A, 0, 21 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A, 0, 0 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C, 0, 0 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C, 0, 1 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 2 /*ALWAYS*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 3 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0, 15 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0, 1 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 96 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 10 /*REQ_READ*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 14 /*REQ_WRITE*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 96 /*BUSY*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 10 /*REQ_READ*/}, + {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 14 /*REQ_WRITE*/}, + }; + events_count = sizeof(events_arr1) / sizeof(hsa_ven_amd_aqlprofile_event_t); + events_arr = events_arr1; } else { const hsa_ven_amd_aqlprofile_event_t events_arr1[] = { {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/}, @@ -294,7 +354,7 @@ int main(int argc, char* argv[]) { } else { const int block_index_max = 16; const int event_id_max = 128; - for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) { + for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) { for (unsigned j = 0; j < block_index_max; ++j) { for (unsigned k = 0; k <= event_id_max; k += scan_step) { fflush(stdout);