[gfx12] Finalize support for gfx12 new counter blocks (#81)

1. Move all new gfx12 block enums to aql_profile_v2.h. hsa_ven_amd_aqlprofile.h will be left untouched.
2. Re-org counter info in gfx12_block_info.h to be purely alphabetic-ordered for easy comparison between different IP versions. Also use auto-gen block name from IP header files whenever possible to reduce manual editing.
3. Remove unused counter info from graphics blocks.
4. Added UTCL2 and VML2 support
5. Added all gfx12 blocks to ctrl test
This commit is contained in:
Ma, Bing
2025-05-29 15:14:09 -07:00
gecommit door GitHub
bovenliggende 1ed169e30c
commit b20803e95d
15 gewijzigde bestanden met toevoegingen van 322 en 329 verwijderingen
+88 -172
Bestand weergeven
@@ -20,45 +20,16 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef _GFX12_BLOCKINFO_H_
#define _GFX12_BLOCKINFO_H_
namespace gfxip {
namespace gfx12 {
#define __BLOCK_ID(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block
// Private PMC Counter BlockId is defined here
// Pubclic PMC Counter BlockId is defined in hsa_ven_amd_aqlprofile.h
#define __BLOCK_ID_HSA(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block
#define __BLOCK_ID(block) AQLPROFILE_BLOCK_NAME_##block
enum CounterBlockId {
__BLOCK_ID(RLC) = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER,
__BLOCK_ID(CPG),
__BLOCK_ID(GRBMH),
__BLOCK_ID(GRBMA),
__BLOCK_ID(SQG),
// mem blocks
__BLOCK_ID(CHA),
__BLOCK_ID(CHC),
__BLOCK_ID(GLARBA),
__BLOCK_ID(GLARBC),
__BLOCK_ID(GC_CANE),
__BLOCK_ID(GC_FFBM),
__BLOCK_ID(GC_NHTTLB),
__BLOCK_ID(GC_L2TLB),
__BLOCK_ID(GC_UTCL1),
__BLOCK_ID(GC_UTCL2),
__BLOCK_ID(GC_VML2),
__BLOCK_ID(GCEA_SE),
// New SDMA Perfmon interface, comparing to the original SDMA PerfCnt. gfx12
// supports both and they should provide the same counter events. We might
// remove SDMA PerfCnt support in aqlprofile in the future since it is easier
// to program Perfmon
__BLOCK_ID(SDMA_PM),
// Counters retrieved by KFD
IommuV2CounterBlockId,
IommuV2CounterBlockId = AQLPROFILE_BLOCKS_NUMBER,
KernelDriverCounterBlockId,
CpPipeStatsCounterBlockId,
@@ -130,155 +101,100 @@ namespace gfx1201 {
// IP versions for Radeon RX 9070
// ip_block : gc_12_0_1
// ip_block : athub_4_1_0
// ip_block : umc_8_14_0
// ip_block : df_4_15_1
// ip_block : pcie_6_1_0
// Number of block instances
// Reference: global_features.h (from gfxip header file package)
// rspm_config.pm (from design configuration files)
// The following default values are generated from Radeon RX 9070, the first product of the
// RDNA 4 lineup. It could change for other products, and the change will be made in
// [PRODUCT_NAME]_factory.h
//
static const uint32_t GrbmCounterBlockNumInstances = 1;
static const uint32_t RlcCounterBlockNumInstances = 1;
static const uint32_t CpgCounterBlockNumInstances = 1;
static const uint32_t CpcCounterBlockNumInstances = 1;
static const uint32_t CpfCounterBlockNumInstances = 1;
static const uint32_t GcrCounterBlockNumInstances = 1;
static const uint32_t Ge1CounterBlockNumInstances = 1;
static const uint32_t Gl2aCounterBlockNumInstances = 4; // GFX_CPWD__NUM_GL2A_PER_CPWD
static const uint32_t Gl2cCounterBlockNumInstances = 32; // GFX_CPWD__NUM_GL2C_PER_CPWD
static const uint32_t GceaCounterBlockNumInstances = 36; // GFX_CPWD__NUM_EA_PER_CPWD
static const uint32_t ChaCounterBlockNumInstances = 1;
static const uint32_t ChcCounterBlockNumInstances = 4; // GFX_CPWD__NUM_CHC
static const uint32_t Ge2DistCounterBlockNumInstances = 1;
static const uint32_t SdmaCounterBlockNumInstances = 2; // GFX_CPWD__NUM_SDMA_PER_CPWD
static const uint32_t GcVml2CounterBlockNumInstances = 1;
static const uint32_t GcMcVml2CounterBlockNumInstances = 1;
static const uint32_t GcUtcl2CounterBlockNumInstances = 1;
static const uint32_t GrbmhCounterBlockNumInstances = 1;
static const uint32_t CbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
static const uint32_t DbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
static const uint32_t SuCounterBlockNumInstances = 1; // GFX_SE__NUM_PA_PER_SE
static const uint32_t SxCounterBlockNumInstances = 1;
static const uint32_t ScCounterBlockNumInstances = 2; // GFX_SE__NUM_PACKER_PER_SA
static const uint32_t TaCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
static const uint32_t TdCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
static const uint32_t TcpCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
static const uint32_t SpiCounterBlockNumInstances = 1;
static const uint32_t SqgCounterBlockNumInstances = 1;
static const uint32_t Gl1aCounterBlockNumInstances = 1;
static const uint32_t RmiCounterBlockNumInstances = 2; // GFX_SE__NUM_RMI_PER_SA
static const uint32_t Gl1cCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
static const uint32_t SqcCounterBlockNumInstances = 1;
static const uint32_t PcCounterBlockNumInstances = 1;
static const uint32_t GceaSeCounterBlockNumInstances = 4;
static const uint32_t GeCounterBlockNumInstances = 1;
static const uint32_t WgsCounterBlockNumInstances = 1;
static const uint32_t Gl1xaCounterBlockNumInstances = 1;
static const uint32_t Gl1xcCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
static const uint32_t GcUtcl1CounterBlockNumInstances = 2;
static const uint32_t ChaCounterBlockNumInstances = 1;
static const uint32_t ChcCounterBlockNumInstances = 4;
static const uint32_t CpcCounterBlockNumInstances = 1;
static const uint32_t CpfCounterBlockNumInstances = 1;
static const uint32_t CpgCounterBlockNumInstances = 1;
static const uint32_t GcmcVmL2CounterBlockNumInstances = 1;
static const uint32_t GcrCounterBlockNumInstances = 1;
static const uint32_t Gcutcl2CounterBlockNumInstances = 1;
static const uint32_t Gcvml2CounterBlockNumInstances = 1;
static const uint32_t GcEaCpwdCounterBlockNumInstances = 36;
static const uint32_t GcEaSeCounterBlockNumInstances = 4;
static const uint32_t Gl1aCounterBlockNumInstances = 1;
static const uint32_t Gl1cCounterBlockNumInstances = 4;
static const uint32_t Gl2aCounterBlockNumInstances = 4;
static const uint32_t Gl2cCounterBlockNumInstances = 32;
static const uint32_t GrbmCounterBlockNumInstances = 1;
static const uint32_t GrbmhCounterBlockNumInstances = 1;
static const uint32_t RlcCounterBlockNumInstances = 1;
static const uint32_t RpbCounterBlockNumInstances = 1;
static const uint32_t SdmaCounterBlockNumInstances = 2;
static const uint32_t SpiCounterBlockNumInstances = 1;
static const uint32_t SqcCounterBlockNumInstances = 1;
static const uint32_t SqgCounterBlockNumInstances = 1;
static const uint32_t TaCounterBlockNumInstances = 2;
static const uint32_t TcpCounterBlockNumInstances = 2;
static const uint32_t TdCounterBlockNumInstances = 2;
static const uint32_t Utcl1CounterBlockNumInstances = 2;
static const uint32_t SdmaCounterBlockMaxInstances = 8;
static const uint32_t UmcCounterBlockMaxInstances = 32;
// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra
// caution Reference: chip_offset_byte.h (from gfxip header file package) The following default
// values are generated from Radeon RX 9070, the first product of the RDNA 4 lineup. It could change
// for other products, and the change will be made in [PRODUCT_NAME]_factory.h
//
static const uint32_t GrbmCounterBlockNumCounters = 2;
static const uint32_t RlcCounterBlockNumCounters = 2;
static const uint32_t CpgCounterBlockNumCounters = 2;
static const uint32_t CpcCounterBlockNumCounters = 2;
static const uint32_t CpfCounterBlockNumCounters = 2;
static const uint32_t GcrCounterBlockNumCounters = 2;
static const uint32_t PhCounterBlockNumCounters = 8;
static const uint32_t Ge1CounterBlockNumCounters = 4;
static const uint32_t Gl2aCounterBlockNumCounters = 4;
static const uint32_t Gl2cCounterBlockNumCounters = 4;
static const uint32_t GceaCounterBlockNumCounters = 2;
static const uint32_t ChaCounterBlockNumCounters = 4;
static const uint32_t ChcCounterBlockNumCounters = 4;
static const uint32_t Ge2DistCounterBlockNumCounters = 4;
static const uint32_t SdmaCounterBlockNumCounters = 2;
static const uint32_t GcVml2CounterBlockNumCounters = 2;
static const uint32_t GcMcVml2CounterBlockNumCounters = 1;
static const uint32_t GcUtcl2CounterBlockNumCounters = 1;
static const uint32_t GrbmhCounterBlockNumCounters = 2;
static const uint32_t CbCounterBlockNumCounters = 4;
static const uint32_t DbCounterBlockNumCounters = 4;
static const uint32_t SuCounterBlockNumCounters = 4;
static const uint32_t SxCounterBlockNumCounters = 4;
static const uint32_t PaScCounterBlockNumCounters = 8;
static const uint32_t TaCounterBlockNumCounters = 2;
static const uint32_t TdCounterBlockNumCounters = 2;
static const uint32_t TcpCounterBlockNumCounters = 4;
static const uint32_t SpiCounterBlockNumCounters = 6;
static const uint32_t SqgCounterBlockNumCounters = 8;
static const uint32_t Gl1aCounterBlockNumCounters = 4;
static const uint32_t RmiCounterBlockNumCounters = 4;
static const uint32_t Gl1cCounterBlockNumCounters = 4;
static const uint32_t SqcCounterBlockNumCounters = 16;
static const uint32_t PcCounterBlockNumCounters = 4;
static const uint32_t GceaSeCounterBlockNumCounters = 2;
static const uint32_t GeCounterBlockNumCounters = 4;
static const uint32_t WgsCounterBlockNumCounters = 2;
static const uint32_t Gl1xaCounterBlockNumCounters = 4;
static const uint32_t Gl1xcCounterBlockNumCounters = 4;
static const uint32_t GcUtcl1CounterBlockNumCounters = 4;
// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra caution
// Reference: chip_offset_byte.h (from gfxip header file package)
static const uint32_t ChaCounterBlockNumCounters = 4;
static const uint32_t ChcCounterBlockNumCounters = 4;
static const uint32_t CpcCounterBlockNumCounters = 2;
static const uint32_t CpfCounterBlockNumCounters = 2;
static const uint32_t CpgCounterBlockNumCounters = 2;
static const uint32_t GcmcVmL2CounterBlockNumCounters = 8;
static const uint32_t GcrCounterBlockNumCounters = 2;
static const uint32_t Gcutcl2CounterBlockNumCounters = 4;
static const uint32_t Gcvml2CounterBlockNumCounters = 2;
static const uint32_t GcEaCpwdCounterBlockNumCounters = 2;
static const uint32_t GcEaSeCounterBlockNumCounters = 2;
static const uint32_t Gl1aCounterBlockNumCounters = 4;
static const uint32_t Gl1cCounterBlockNumCounters = 4;
static const uint32_t Gl2aCounterBlockNumCounters = 4;
static const uint32_t Gl2cCounterBlockNumCounters = 4;
static const uint32_t GrbmCounterBlockNumCounters = 2;
static const uint32_t GrbmhCounterBlockNumCounters = 2;
static const uint32_t RlcCounterBlockNumCounters = 2;
static const uint32_t RpbCounterBlockNumCounters = 4;
static const uint32_t SdmaCounterBlockNumCounters = 2;
static const uint32_t SpiCounterBlockNumCounters = 6;
static const uint32_t SqcCounterBlockNumCounters = 16;
static const uint32_t SqgCounterBlockNumCounters = 8;
static const uint32_t TaCounterBlockNumCounters = 2;
static const uint32_t TcpCounterBlockNumCounters = 4;
static const uint32_t TdCounterBlockNumCounters = 2;
static const uint32_t Utcl1CounterBlockNumCounters = 4;
// Block counters max event value - Auto-generated from chip_enum.h, edit with extra caution
// Reference: chip_enum.h (from gfxip header file package)
// The following default values are generated from Radeon RX 9070, the first product of the
// RDNA 4 lineup. It could change for other products, and the change will be made in
// [PRODUCT_NAME]_factory.h
//
static const uint32_t GrbmCounterBlockMaxEvent = 51;
static const uint32_t RlcCounterBlockMaxEvent = 6;
static const uint32_t CpgCounterBlockMaxEvent = 30;
static const uint32_t CpcCounterBlockMaxEvent = 55;
static const uint32_t CpfCounterBlockMaxEvent = 4;
static const uint32_t GcrCounterBlockMaxEvent = 151;
static const uint32_t PhCounterBlockMaxEvent = 1023;
static const uint32_t Ge1CounterBlockMaxEvent = 54;
static const uint32_t Gl2aCounterBlockMaxEvent = 114;
static const uint32_t Gl2cCounterBlockMaxEvent = 249;
static const uint32_t GceaCounterBlockMaxEvent = 32;
static const uint32_t ChaCounterBlockMaxEvent = 25;
static const uint32_t ChcCounterBlockMaxEvent = 94;
static const uint32_t Ge2DistCounterBlockMaxEvent = 188;
static const uint32_t SdmaCounterBlockMaxEvent = 125;
static const uint32_t GcVml2CounterBlockMaxEvent = 90;
static const uint32_t GcMcVml2CounterBlockMaxEvent =
1; // This is handled by GCMC_VM_L2_PERFCOUNTER0_CFG
static const uint32_t GcUtcl2CounterBlockMaxEvent = 36;
static const uint32_t GrbmhCounterBlockMaxEvent = 25;
static const uint32_t CbCounterBlockMaxEvent = 315;
static const uint32_t DbCounterBlockMaxEvent = 441;
static const uint32_t PaSuCounterBlockMaxEvent = 828;
static const uint32_t SxCounterBlockMaxEvent = 81;
static const uint32_t ScCounterBlockMaxEvent = 821;
static const uint32_t TaCounterBlockMaxEvent = 254;
static const uint32_t TdCounterBlockMaxEvent = 271;
static const uint32_t TcpCounterBlockMaxEvent = 99;
static const uint32_t SpiCounterBlockMaxEvent = 318;
static const uint32_t SqgCounterBlockMaxEvent = 45;
static const uint32_t Gl1aCounterBlockMaxEvent = 21;
static const uint32_t RmiCounterBlockMaxEvent = 138;
static const uint32_t Gl1cCounterBlockMaxEvent = 121;
static const uint32_t SqcCounterBlockMaxEvent = 511;
static const uint32_t PcCounterBlockMaxEvent = 164;
static const uint32_t GceaSeCounterBlockMaxEvent = 32;
static const uint32_t GeCounterBlockMaxEvent = 103;
static const uint32_t WgsCounterBlockMaxEvent = 4;
static const uint32_t Gl1xaCounterBlockMaxEvent = 21;
static const uint32_t Gl1xcCounterBlockMaxEvent = 109;
static const uint32_t GcUtcl1CounterBlockMaxEvent = 71;
static const uint32_t ChaCounterBlockMaxEvent = 25;
static const uint32_t ChcCounterBlockMaxEvent = 94;
static const uint32_t CpcCounterBlockMaxEvent = 55;
static const uint32_t CpfCounterBlockMaxEvent = 4;
static const uint32_t CpgCounterBlockMaxEvent = 30;
static const uint32_t GcmcVmL2CounterBlockMaxEvent = 90;
static const uint32_t GcrCounterBlockMaxEvent = 151;
static const uint32_t Gcutcl2CounterBlockMaxEvent = 36;
static const uint32_t Gcvml2CounterBlockMaxEvent = 90;
static const uint32_t GcEaCpwdCounterBlockMaxEvent = 32;
static const uint32_t GcEaSeCounterBlockMaxEvent = 32;
static const uint32_t Gl1aCounterBlockMaxEvent = 21;
static const uint32_t Gl1cCounterBlockMaxEvent = 121;
static const uint32_t Gl2aCounterBlockMaxEvent = 114;
static const uint32_t Gl2cCounterBlockMaxEvent = 249;
static const uint32_t GrbmCounterBlockMaxEvent = 51;
static const uint32_t GrbmhCounterBlockMaxEvent = 25;
static const uint32_t RlcCounterBlockMaxEvent = 6;
static const uint32_t SdmaCounterBlockMaxEvent = 125;
static const uint32_t SpiCounterBlockMaxEvent = 318;
static const uint32_t SqcCounterBlockMaxEvent = 511;
static const uint32_t SqgCounterBlockMaxEvent = 45;
static const uint32_t TaCounterBlockMaxEvent = 254;
static const uint32_t TcpCounterBlockMaxEvent = 99;
static const uint32_t TdCounterBlockMaxEvent = 271;
static const uint32_t Utcl1CounterBlockMaxEvent = 71;
} // namespace gfx1201
static const uint32_t SdmaCounterBlockMaxInstances = 8;
static const uint32_t UmcCounterBlockMaxInstances = 32;
} // namespace gfx12
} // namespace gfxip
+65 -74
Bestand weergeven
@@ -43,54 +43,52 @@
#define REG_INFO_7(BLOCK) REG_INFO_WITH_CTRL_7(BLOCK, REG_32B_NULL)
#define REG_INFO_8(BLOCK) REG_INFO_WITH_CTRL_8(BLOCK, REG_32B_NULL)
#define REG_INFO_WITH_CFG(BLOCK, INDEX) \
{REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_CFG), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_HI)}
#define REG_INFO_WITH_CFG_1(BLOCK) REG_INFO_WITH_CFG(BLOCK, 0)
#define REG_INFO_WITH_CFG_2(BLOCK) REG_INFO_WITH_CFG_1(BLOCK), REG_INFO_WITH_CFG(BLOCK, 1)
#define REG_INFO_WITH_CFG_3(BLOCK) REG_INFO_WITH_CFG_2(BLOCK), REG_INFO_WITH_CFG(BLOCK, 2)
#define REG_INFO_WITH_CFG_4(BLOCK) REG_INFO_WITH_CFG_3(BLOCK), REG_INFO_WITH_CFG(BLOCK, 3)
#define REG_INFO_WITH_CFG_5(BLOCK) REG_INFO_WITH_CFG_4(BLOCK), REG_INFO_WITH_CFG(BLOCK, 4)
#define REG_INFO_WITH_CFG_6(BLOCK) REG_INFO_WITH_CFG_5(BLOCK), REG_INFO_WITH_CFG(BLOCK, 5)
#define REG_INFO_WITH_CFG_7(BLOCK) REG_INFO_WITH_CFG_6(BLOCK), REG_INFO_WITH_CFG(BLOCK, 6)
#define REG_INFO_WITH_CFG_8(BLOCK) REG_INFO_WITH_CFG_7(BLOCK), REG_INFO_WITH_CFG(BLOCK, 7)
namespace gfxip {
namespace gfx12 {
namespace gfx1201 {
// Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution
static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)};
static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)};
static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)};
static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)};
static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)};
static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))};
static const CounterRegInfo PaPhCounterRegAddr[] = {REG_INFO_8(PA_PH)};
static const CounterRegInfo Ge1CounterRegAddr[] = {REG_INFO_4(GE1)};
static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)};
static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)};
static const CounterRegInfo GceaCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)};
static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)};
static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)};
static const CounterRegInfo Ge2CounterRegAddr[] = {REG_INFO_4(GE2_DIST)};
static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)};
//static const CounterRegInfo GcVml2CounterRegAddr[] = {REG_INFO_2(GCVML2)};
//static const CounterRegInfo GcMcVml2CounterRegAddr[] = {REG_INFO_1(GCMC_VM_L2)};
//static const CounterRegInfo GcUtcl2CounterRegAddr[] = {REG_INFO_1(GCUTCL2)};
static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)};
static const CounterRegInfo CbCounterRegAddr[] = {REG_INFO_4(CB)};
static const CounterRegInfo DbCounterRegAddr[] = {REG_INFO_4(DB)};
static const CounterRegInfo PaSuCounterRegAddr[] = {REG_INFO_4(PA_SU)};
static const CounterRegInfo SxCounterRegAddr[] = {REG_INFO_4(SX)};
static const CounterRegInfo PaScCounterRegAddr[] = {REG_INFO_8(PA_SC)};
static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)};
static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)};
static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)};
static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)};
static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))};
static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)};
static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)};
static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)};
static const CounterRegInfo GcmcVmL2CounterRegAddr[] = {REG_INFO_WITH_CFG_8(GCMC_VM_L2)};
static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))};
static const CounterRegInfo Gcutcl2CounterRegAddr[] = {REG_INFO_WITH_CFG_4(GCUTCL2)};
// static const CounterRegInfo Gcvml2CounterRegAddr[] = {REG_INFO_2(GCVML2)};
static const CounterRegInfo GcEaCpwdCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)};
static const CounterRegInfo GcEaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)};
static const CounterRegInfo Gl1aCounterRegAddr[] = {REG_INFO_4(GL1A)};
static const CounterRegInfo RmiCounterRegAddr[] = {REG_INFO_4(RMI)};
static const CounterRegInfo Gl1cCounterRegAddr[] = {REG_INFO_4(GL1C)};
//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, regSQ_PERFCOUNTER_CTRL)};
static const CounterRegInfo PcCounterRegAddr[] = {REG_INFO_4(PC)};
static const CounterRegInfo GeCounterRegAddr[] = {REG_INFO_4(GE2_SE)};
static const CounterRegInfo GceaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)};
// static const CounterRegInfo WgsCounterRegAddr[] = {REG_INFO_2(WGS)};
static const CounterRegInfo Gl1xaCounterRegAddr[] = {REG_INFO_4(GL1XA)};
static const CounterRegInfo Gl1xcCounterRegAddr[] = {REG_INFO_4(GL1XC)};
static const CounterRegInfo GcUtcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)};
static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)};
static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)};
static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)};
static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)};
static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)};
static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)};
static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)};
//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL))};
static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))};
static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)};
static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)};
static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)};
static const CounterRegInfo Utcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)};
// Special handling of SQC:
// SQC only supports 32bit PMC, only regSQ_PERFCOUNTER#even_number#_SELECT is
// used by PMC. regSQ_PERFCOUNTER#odd_number#_SELECT is used only by SPM
// SQC only supports 32bit PMC.
// regSQ_PERFCOUNTER#even_number#_SELECT is used by PMC and SPM
// regSQ_PERFCOUNTER#odd_number#_SELECT is used by SPM only
static const CounterRegInfo SqcCounterRegAddr[] = {
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL},
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL},
@@ -101,57 +99,50 @@ static const CounterRegInfo SqcCounterRegAddr[] = {
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL},
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}};
// Special handling of GCVML2:
static const CounterRegInfo GcVml2CounterRegAddr[] = {
// Special handling of GCVML2 (SPM only):
static const CounterRegInfo Gcvml2CounterRegAddr[] = {
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_HI)},
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_HI)}};
// Special handling of GCMC_VM_L2:
static const CounterRegInfo GcMcVml2CounterRegAddr[] = {
{REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_HI)}};
// Special handling of GCUTCL2: Not sure if this is SPM-only
static const CounterRegInfo GcUtcl2CounterRegAddr[] = {
{REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_HI)}};
// Global blocks: ATCL2 CHA CHC CPC CPF CPG EA FFBM GCR GL2A GL2C GRBM RLC SDMA VML2 UTCL2
// (Grphics only - not supported in ROCm): GE1 GE2_DIST PH
// (Grphics only): CPG is for graphics, but it is not physically removed for compute products
// (Not enabled for gfx12): CHCG GDS GUS
static const GpuBlockInfo GcAtcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID(ATCL2)}; // Placeholder now
static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value_Cha, CounterBlockTcAttr};
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value_Chc, CounterBlockTcAttr};
static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value_Cpc, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC};
static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value_Cpf, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF};
static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value_Cpg, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG};
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID(GCEA), GceaCounterBlockNumInstances, GceaCounterBlockMaxEvent, GceaCounterBlockNumCounters, GceaCounterRegAddr, gfx12_cntx_prim::select_value_Gcea, 0};
static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID_HSA(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID_HSA(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo Atcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID_HSA(ATCL2)}; // Placeholder now
static const GpuBlockInfo GcFfbmCounterBlockInfo = {"GC_FFBM", __BLOCK_ID(GC_FFBM)}; // Placeholder now
static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value_Gcr, CounterBlockTcAttr};
static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value_Gl2a, CounterBlockTcAttr};
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value_Gl2c, CounterBlockTcAttr};
static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value_Grbm, CounterBlockGRBMAttr};
static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value_Rlc, 0};
static const GpuBlockInfo SdmaPmCounterBlockInfo = {"SDMA_PM", __BLOCK_ID(SDMA_PM), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value_SdmaPm, CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA};
static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2)}; // Placeholder now
static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2)}; // Placeholder now
static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2), 1, Gcutcl2CounterBlockMaxEvent, Gcutcl2CounterBlockNumCounters, Gcutcl2CounterRegAddr, gfx12_cntx_prim::mc_select_value, CounterBlockRpbAttr|CounterBlockAidAttr};
static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2), 1, GcmcVmL2CounterBlockMaxEvent, GcmcVmL2CounterBlockNumCounters, GcmcVmL2CounterRegAddr, gfx12_cntx_prim::mc_select_value, CounterBlockRpbAttr|CounterBlockAidAttr};
static const GpuBlockInfo GcVml2SpmCounterBlockInfo = {"GC_VML2_SPM", __BLOCK_ID(GC_VML2_SPM), 1, Gcvml2CounterBlockMaxEvent, Gcvml2CounterBlockNumCounters, Gcvml2CounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID_HSA(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC};
static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID_HSA(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF};
static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG};
static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID_HSA(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID_HSA(GCEA), GcEaCpwdCounterBlockNumInstances, GcEaCpwdCounterBlockMaxEvent, GcEaCpwdCounterBlockNumCounters, GcEaCpwdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID_HSA(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockGRBMAttr};
static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
static const GpuBlockInfo SdmaCounterBlockInfo = {"SDMA", __BLOCK_ID_HSA(SDMA), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA};
// SE blocks: EA_SE GL2A GL2C GRBMH SPI SQG UTCL1
// (Grphics only - not supported in ROCm): GE GL1XA GL1XC PA PC WGS
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GceaSeCounterBlockNumInstances, GceaSeCounterBlockMaxEvent, GceaSeCounterBlockNumCounters, GceaSeCounterRegAddr, gfx12_cntx_prim::select_value_GceaSe, CounterBlockSeAttr};
static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value_Grbmh, CounterBlockSeAttr};
static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value_Spi, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI};
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GcEaSeCounterBlockNumInstances, GcEaSeCounterBlockMaxEvent, GcEaSeCounterBlockNumCounters, GcEaSeCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr};
static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr};
static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID_HSA(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI};
static const GpuBlockInfo SqgCounterBlockInfo = {"SQG", __BLOCK_ID(SQG), SqgCounterBlockNumInstances, SqgCounterBlockMaxEvent, SqgCounterBlockNumCounters, SqgCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQG};
static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), GcUtcl1CounterBlockNumInstances, GcUtcl1CounterBlockMaxEvent, GcUtcl1CounterBlockNumCounters, GcUtcl1CounterRegAddr, gfx12_cntx_prim::select_value_GcUtcl1, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1};
static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), Utcl1CounterBlockNumInstances, Utcl1CounterBlockMaxEvent, Utcl1CounterBlockNumCounters, Utcl1CounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1};
// SA blocks: GL1A GL1C
// (Grphics only - not supported in ROCm): CB DB SC SX
// (Not enabled for gfx12): GL1CG
static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value_Gl1a, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value_Gl1c, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID_HSA(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID_HSA(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
// WGP blocks: SQC TA TCP TD
static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC};
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value_Ta, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value_Td, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value_Tcp, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
} // namespace gfx1201
static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID_HSA(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC};
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID_HSA(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID_HSA(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID_HSA(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
} // namespace gfx12xx
} // namespace gfx12
} // namespace gfxip
+15 -44
Bestand weergeven
@@ -32,23 +32,6 @@
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
// Counter Select Register value lambdas
#define select_value(reg_name) \
[](const counter_des_t& counter_des) { \
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
return select; \
}
#define select_value_t2(reg_name) \
[](const counter_des_t& counter_des) { \
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
return select; \
}
#define select_value_blank() \
[](const counter_des_t& counter_des) { \
uint32_t select = 0; \
return select; \
}
namespace gfxip {
namespace gfx12 {
@@ -156,7 +139,7 @@ class gfx12_cntx_prim {
} gfx;
};
static const uint32_t SQ_BLOCK_ID = __BLOCK_ID(SQ);
static const uint32_t SQ_BLOCK_ID = __BLOCK_ID_HSA(SQ);
static const uint32_t SQ_BLOCK_SPM_ID = SPM_SE_BLOCK_NAME_SQG;
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
@@ -254,7 +237,7 @@ class gfx12_cntx_prim {
uint32_t grbm_gfx_index =
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | (instance_index << 1)));
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | instance_index));
return grbm_gfx_index;
}
@@ -365,34 +348,22 @@ class gfx12_cntx_prim {
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
// MC registers values
static uint32_t mc_select_value(const counter_des_t& counter_des) {
uint32_t perfcounter0_cfg =
SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) |
SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) |
SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, ENABLE, 1);
return perfcounter0_cfg;
}
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
static auto constexpr select_value_Cha= select_value(CHA_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Chc= select_value(CHC_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Cpc= select_value(CPC_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Cpf= select_value(CPF_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Cpg= select_value(CPG_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Gcea= select_value_blank(); // register not present
static auto constexpr select_value_Gcr= select_value(GCR_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Gl2a= select_value(GL2A_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Gl2c= select_value(GL2C_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Grbm= select_value(GRBM_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Rlc= select_value_t2(RLC_PERFCOUNTER0_SELECT);
static auto constexpr select_value_SdmaPm= select_value_blank(); // register not present
static auto constexpr select_value_GcVml2= select_value_blank(); // register not present
static auto constexpr select_value_GcUtcl2= select_value_blank(); // register not present
static auto constexpr select_value_GceaSe= select_value_blank(); // register not present
static auto constexpr select_value_Grbmh= select_value(GRBMH_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Spi= select_value(SPI_PERFCOUNTER0_SELECT);
static auto constexpr select_value_GcUtcl1= select_value_blank(); // register not present
static auto constexpr select_value_Gl1a= select_value(GL1A_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Gl1c= select_value(GL1C_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Ta= select_value(TA_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Td= select_value(TD_PERFCOUNTER0_SELECT);
static auto constexpr select_value_Tcp= select_value(TCP_PERFCOUNTER0_SELECT);
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value_blank();
static uint32_t select_value(const counter_des_t& counter_des) {
uint32_t perfcounter0_select =
SET_REG_FIELD_BITS(CPC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
return perfcounter0_select;
}
static uint32_t spm_select_value(const counter_des_t& counter_des) {
uint32_t tcp_perfcounter0_select =
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
+2 -2
Bestand weergeven
@@ -48,7 +48,7 @@ class Gfx10Factory : public Pm4Factory {
// void ConstructTable(const AgentInfo* agent_info);
void Init(const AgentInfo* agent_info);
// void ConstructBuilders(const AgentInfo* agent_info);
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
};
// Gfx builders init
@@ -81,7 +81,7 @@ void Gfx10Factory::Init(const AgentInfo* agent_info) {
}
// GFX10 block table
const GpuBlockInfo* Gfx10Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
const GpuBlockInfo* Gfx10Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
+2 -2
Bestand weergeven
@@ -48,7 +48,7 @@ class Gfx11Factory : public Pm4Factory {
// void ConstructTable(const AgentInfo* agent_info);
void Init(const AgentInfo* agent_info);
// void ConstructBuilders(const AgentInfo* agent_info);
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
};
// Gfx builders init
@@ -81,7 +81,7 @@ void Gfx11Factory::Init(const AgentInfo* agent_info) {
}
// GFX11 block table
const GpuBlockInfo* Gfx11Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
const GpuBlockInfo* Gfx11Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
+26 -22
Bestand weergeven
@@ -79,31 +79,35 @@ void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) {
void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) {
// Global blocks
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
block_table_[__BLOCK_ID(CPC)] = &CpcCounterBlockInfo;
block_table_[__BLOCK_ID(CPF)] = &CpfCounterBlockInfo;
block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo;
block_table_[__BLOCK_ID(GCEA)] = &GceaCounterBlockInfo;
block_table_[__BLOCK_ID(GCR)] = &GcrCounterBlockInfo;
block_table_[__BLOCK_ID(GL2A)] = &Gl2aCounterBlockInfo;
block_table_[__BLOCK_ID(GL2C)] = &Gl2cCounterBlockInfo;
block_table_[__BLOCK_ID(GRBM)] = &GrbmCounterBlockInfo;
block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo;
block_table_[__BLOCK_ID(SDMA_PM)] = &SdmaPmCounterBlockInfo;
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(CPC)] = &CpcCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(CPF)] = &CpfCounterBlockInfo;
block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo;
block_table_[__BLOCK_ID(GC_UTCL2)] = &GcUtcl2CounterBlockInfo;
block_table_[__BLOCK_ID(GC_VML2)] = &GcVml2CounterBlockInfo;
block_table_[__BLOCK_ID(GC_VML2_SPM)] = &GcVml2SpmCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GCEA)] = &GceaCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GCR)] = &GcrCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GL2A)] = &Gl2aCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GL2C)] = &Gl2cCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GRBM)] = &GrbmCounterBlockInfo;
block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(SDMA)] = &SdmaCounterBlockInfo;
// SE blocks
block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo;
block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo;
block_table_[__BLOCK_ID(SPI)] = &SpiCounterBlockInfo;
block_table_[__BLOCK_ID(SQ)] = &SqcCounterBlockInfo;
block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo;
block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo;
block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo;
block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(SPI)] = &SpiCounterBlockInfo;
block_table_[__BLOCK_ID(SQG)] = &SqgCounterBlockInfo;
// SA blocks
block_table_[__BLOCK_ID(GL1A)] = &Gl1aCounterBlockInfo;
block_table_[__BLOCK_ID(GL1C)] = &Gl1cCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GL1A)] = &Gl1aCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GL1C)] = &Gl1cCounterBlockInfo;
// WGP blocks
block_table_[__BLOCK_ID(TA)] = &TaCounterBlockInfo;
block_table_[__BLOCK_ID(TCP)] = &TcpCounterBlockInfo;
block_table_[__BLOCK_ID(TD)] = &TdCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(SQ)] = &SqcCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(TA)] = &TaCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(TCP)] = &TcpCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(TD)] = &TdCounterBlockInfo;
}
// Pm4Factory create mathods
+2 -2
Bestand weergeven
@@ -28,11 +28,11 @@
namespace aql_profile {
const GpuBlockInfo* Mi100Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
const GpuBlockInfo* Mi100Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {};
Mi100Factory::Mi100Factory(const AgentInfo* agent_info)
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) {
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
if (base_table_ptr == NULL) continue;
GpuBlockInfo* block_info = nullptr;
+3 -3
Bestand weergeven
@@ -37,14 +37,14 @@ class Mi200Factory : public Gfx9Factory {
virtual int GetAccumHiID() const override { return 185; };
protected:
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
};
const GpuBlockInfo* Mi200Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
const GpuBlockInfo* Mi200Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {};
Mi200Factory::Mi200Factory(const AgentInfo* agent_info)
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) {
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
if (base_table_ptr == NULL) continue;
GpuBlockInfo* block_info = nullptr;
+1 -1
Bestand weergeven
@@ -31,7 +31,7 @@ namespace aql_profile {
class Mi300Factory : public Mi100Factory {
public:
explicit Mi300Factory(const AgentInfo* agent_info) : Mi100Factory(agent_info) {
for (unsigned blockname_id = 0; blockname_id < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER;
for (unsigned blockname_id = 0; blockname_id < AQLPROFILE_BLOCKS_NUMBER;
++blockname_id) {
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[blockname_id];
if (base_table_ptr == NULL) continue;
+1 -1
Bestand weergeven
@@ -76,7 +76,7 @@ void Gfx9Factory::Print(const GpuBlockInfo* block_info) {
}
// GFX9 block table
const GpuBlockInfo* Gfx9Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
const GpuBlockInfo* Gfx9Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
&GrbmSeCounterBlockInfo, &SpiCounterBlockInfo, &SqCounterBlockInfo, &SqCsCounterBlockInfo,
NULL /*GFX? SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, &TcaCounterBlockInfo,
+2 -2
Bestand weergeven
@@ -42,7 +42,7 @@ class Gfx9Factory : public Pm4Factory {
protected:
void Init(const AgentInfo* agent_info);
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
static void Print(const GpuBlockInfo* block_info);
};
@@ -53,7 +53,7 @@ class Mi100Factory : public Gfx9Factory {
explicit Mi100Factory(const AgentInfo* agent_info);
protected:
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
};
} // namespace aql_profile
+43
Bestand weergeven
@@ -50,6 +50,49 @@ typedef enum {
AQLPROFILE_AGENT_VERSION_LAST
} aqlprofile_agent_version_t;
/**
* @brief Enums for counter blocks.
* AQLPROFILE_BLOCK_NAME_RESERVED_X are blocks reserved for npi. Reserving them here can maintain
* enum consistency between mainline and npi.
* TODO: Move all counter blocks here from hsa_ven_amd_aqlprofile.h
*/
typedef enum {
// Blocks reserved for NPI support
AQLPROFILE_BLOCK_NAME_RESERVED_0 = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER,
AQLPROFILE_BLOCK_NAME_RESERVED_1,
AQLPROFILE_BLOCK_NAME_RESERVED_2,
AQLPROFILE_BLOCK_NAME_RESERVED_3,
AQLPROFILE_BLOCK_NAME_RESERVED_4,
AQLPROFILE_BLOCK_NAME_RESERVED_5,
// Blocks available for most ASICs, but not currently in use
AQLPROFILE_BLOCK_NAME_CPG,
AQLPROFILE_BLOCK_NAME_RLC,
// New blocks for gc_12_0_x
AQLPROFILE_BLOCK_NAME_CHA,
AQLPROFILE_BLOCK_NAME_CHC,
AQLPROFILE_BLOCK_NAME_GC_CANE,
AQLPROFILE_BLOCK_NAME_GC_FFBM,
AQLPROFILE_BLOCK_NAME_GC_L2TLB,
AQLPROFILE_BLOCK_NAME_GC_UTCL1,
AQLPROFILE_BLOCK_NAME_GC_UTCL2,
AQLPROFILE_BLOCK_NAME_GC_VML2,
AQLPROFILE_BLOCK_NAME_GC_VML2_SPM,
AQLPROFILE_BLOCK_NAME_GCEA_SE,
AQLPROFILE_BLOCK_NAME_GRBMH,
AQLPROFILE_BLOCK_NAME_SQG,
// Blocks reserved for NPI support
AQLPROFILE_BLOCK_NAME_RESERVED_6,
AQLPROFILE_BLOCK_NAME_RESERVED_7,
AQLPROFILE_BLOCK_NAME_RESERVED_8,
AQLPROFILE_BLOCK_NAME_RESERVED_9,
// Add new blocks above
AQLPROFILE_BLOCKS_NUMBER
} aqlprofile_block_name_t;
/**
* @brief Flags to describe which agents can access given buffer.
*/
+10 -2
Bestand weergeven
@@ -285,9 +285,14 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
// std::endl;
// Set GRBM index to access proper block instance
const uint32_t grbm_value = (block_info->instance_count > 1)
//
// TODO: In order to get different event for different instance with WGP counter blocks, we
// need to loop through WGP instead of blindly broadcast instance. Fortunately, this
// is not a common practice
const uint32_t grbm_value = (block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr))
? Primitives::grbm_inst_index_value(block_des.index)
: Primitives::grbm_broadcast_value();
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::GRBM_GFX_INDEX_ADDR, grbm_value);
// Reset counters
if (block_info->attr & CounterBlockMcAttr) {
@@ -602,9 +607,12 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
else
grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp);
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::GRBM_GFX_INDEX_ADDR, grbm_value);
uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1;
builder.BuildCopyCounterDataPacket(
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, 1);
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, dw_mask);
if (data_buffer && (dw_mask == 1))
*(reinterpret_cast<uint32_t*>(data_buffer) + read_counter + 1) = 0;
read_counter += 2;
}
} else {
+1 -1
Bestand weergeven
@@ -31,7 +31,7 @@ find_package(Clang REQUIRED CONFIG
## Building test executable
add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${API_PATH} ${ROCM_ROOT_DIR}/include ${TEST_DIR}/parser/ )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${API_PATH} ${ROCM_ROOT_DIR}/include ${TEST_DIR}/parser/ ${TEST_DIR}/../src/core/include)
target_link_libraries( ${EXE_NAME} PRIVATE pthread hsa-runtime64::hsa-runtime64 dl )
install(TARGETS ${EXE_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests)
+61 -1
Bestand weergeven
@@ -22,6 +22,7 @@
#include "hsa/hsa_ext_amd.h"
#include "aql_profile_v2.h"
#include <stdlib.h>
#include <string>
@@ -275,6 +276,65 @@ int main(int argc, char* argv[]) {
};
events_count = sizeof(events_arr1) / sizeof(hsa_ven_amd_aqlprofile_event_t);
events_arr = events_arr1;
} else if (TestHsa::HsaAgentName() == "gfx12") {
const hsa_ven_amd_aqlprofile_event_t events_arr1[] = {
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHA, 0, 25 /*ALWAYS*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHA, 0, 0 /*BUSY*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHC, 0, 0 /*ALWAYS*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHC, 0, 1 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 25 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0, 0 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0, 24 /*BUSY*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CPG, 0, 0 /*ALWAYS*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CPG, 0, 51 /*BUSY*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL2, 0, 1},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_VML2, 0, 5},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA, 0, 3},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA, 0, 4},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR, 0, 6},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR, 0, 22},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A, 0, 1 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A, 0, 2 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C, 0, 1 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C, 0, 2 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0, 0 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0, 2 /*GUI_ACTIVE*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_RLC, 0, 2},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_RLC, 0, 5},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 0, 0 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 0, 2 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 1, 0 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 1, 2 /*BUSY*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL1, 0, 1},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL1, 0, 2},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GCEA_SE, 0, 3},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GCEA_SE, 0, 4},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GRBMH, 0, 0 /*ALWAYS*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GRBMH, 0, 19},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0, 46 /*CSN_BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0, 47 /*CSN_NUM_THREADGROUPS*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0,14 /*ALWAYS*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0, 15 /*BUSY*/},
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0, 19 /*WAVES*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A, 0, 21 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A, 0, 0 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C, 0, 0 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C, 0, 1 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 2 /*ALWAYS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 3 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0, 15 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0, 1 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 96 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 10 /*REQ_READ*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 14 /*REQ_WRITE*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 96 /*BUSY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 10 /*REQ_READ*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 14 /*REQ_WRITE*/},
};
events_count = sizeof(events_arr1) / sizeof(hsa_ven_amd_aqlprofile_event_t);
events_arr = events_arr1;
} else {
const hsa_ven_amd_aqlprofile_event_t events_arr1[] = {
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
@@ -294,7 +354,7 @@ int main(int argc, char* argv[]) {
} else {
const int block_index_max = 16;
const int event_id_max = 128;
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) {
for (unsigned j = 0; j < block_index_max; ++j) {
for (unsigned k = 0; k <= event_id_max; k += scan_step) {
fflush(stdout);