[gfx12] Finalize support for gfx12 new counter blocks (#81)
1. Move all new gfx12 block enums to aql_profile_v2.h. hsa_ven_amd_aqlprofile.h will be left untouched. 2. Re-org counter info in gfx12_block_info.h to be purely alphabetic-ordered for easy comparison between different IP versions. Also use auto-gen block name from IP header files whenever possible to reduce manual editing. 3. Remove unused counter info from graphics blocks. 4. Added UTCL2 and VML2 support 5. Added all gfx12 blocks to ctrl test
This commit is contained in:
@@ -20,45 +20,16 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_BLOCKINFO_H_
|
||||
#define _GFX12_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
#define __BLOCK_ID(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block
|
||||
// Private PMC Counter BlockId is defined here
|
||||
// Pubclic PMC Counter BlockId is defined in hsa_ven_amd_aqlprofile.h
|
||||
#define __BLOCK_ID_HSA(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block
|
||||
#define __BLOCK_ID(block) AQLPROFILE_BLOCK_NAME_##block
|
||||
enum CounterBlockId {
|
||||
__BLOCK_ID(RLC) = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER,
|
||||
__BLOCK_ID(CPG),
|
||||
__BLOCK_ID(GRBMH),
|
||||
__BLOCK_ID(GRBMA),
|
||||
__BLOCK_ID(SQG),
|
||||
|
||||
// mem blocks
|
||||
__BLOCK_ID(CHA),
|
||||
__BLOCK_ID(CHC),
|
||||
__BLOCK_ID(GLARBA),
|
||||
__BLOCK_ID(GLARBC),
|
||||
__BLOCK_ID(GC_CANE),
|
||||
__BLOCK_ID(GC_FFBM),
|
||||
__BLOCK_ID(GC_NHTTLB),
|
||||
__BLOCK_ID(GC_L2TLB),
|
||||
__BLOCK_ID(GC_UTCL1),
|
||||
__BLOCK_ID(GC_UTCL2),
|
||||
__BLOCK_ID(GC_VML2),
|
||||
|
||||
__BLOCK_ID(GCEA_SE),
|
||||
|
||||
// New SDMA Perfmon interface, comparing to the original SDMA PerfCnt. gfx12
|
||||
// supports both and they should provide the same counter events. We might
|
||||
// remove SDMA PerfCnt support in aqlprofile in the future since it is easier
|
||||
// to program Perfmon
|
||||
__BLOCK_ID(SDMA_PM),
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
IommuV2CounterBlockId = AQLPROFILE_BLOCKS_NUMBER,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
@@ -130,155 +101,100 @@ namespace gfx1201 {
|
||||
// IP versions for Radeon RX 9070
|
||||
// ip_block : gc_12_0_1
|
||||
// ip_block : athub_4_1_0
|
||||
// ip_block : umc_8_14_0
|
||||
// ip_block : df_4_15_1
|
||||
// ip_block : pcie_6_1_0
|
||||
|
||||
// Number of block instances
|
||||
// Reference: global_features.h (from gfxip header file package)
|
||||
// rspm_config.pm (from design configuration files)
|
||||
// The following default values are generated from Radeon RX 9070, the first product of the
|
||||
// RDNA 4 lineup. It could change for other products, and the change will be made in
|
||||
// [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockNumInstances = 1;
|
||||
static const uint32_t RlcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpgCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpfCounterBlockNumInstances = 1;
|
||||
static const uint32_t GcrCounterBlockNumInstances = 1;
|
||||
static const uint32_t Ge1CounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl2aCounterBlockNumInstances = 4; // GFX_CPWD__NUM_GL2A_PER_CPWD
|
||||
static const uint32_t Gl2cCounterBlockNumInstances = 32; // GFX_CPWD__NUM_GL2C_PER_CPWD
|
||||
static const uint32_t GceaCounterBlockNumInstances = 36; // GFX_CPWD__NUM_EA_PER_CPWD
|
||||
static const uint32_t ChaCounterBlockNumInstances = 1;
|
||||
static const uint32_t ChcCounterBlockNumInstances = 4; // GFX_CPWD__NUM_CHC
|
||||
static const uint32_t Ge2DistCounterBlockNumInstances = 1;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2; // GFX_CPWD__NUM_SDMA_PER_CPWD
|
||||
static const uint32_t GcVml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcMcVml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcUtcl2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumInstances = 1;
|
||||
static const uint32_t CbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
|
||||
static const uint32_t DbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
|
||||
static const uint32_t SuCounterBlockNumInstances = 1; // GFX_SE__NUM_PA_PER_SE
|
||||
static const uint32_t SxCounterBlockNumInstances = 1;
|
||||
static const uint32_t ScCounterBlockNumInstances = 2; // GFX_SE__NUM_PACKER_PER_SA
|
||||
static const uint32_t TaCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t TdCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t TcpCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t SpiCounterBlockNumInstances = 1;
|
||||
static const uint32_t SqgCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1aCounterBlockNumInstances = 1;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 2; // GFX_SE__NUM_RMI_PER_SA
|
||||
static const uint32_t Gl1cCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
|
||||
static const uint32_t SqcCounterBlockNumInstances = 1;
|
||||
static const uint32_t PcCounterBlockNumInstances = 1;
|
||||
static const uint32_t GceaSeCounterBlockNumInstances = 4;
|
||||
static const uint32_t GeCounterBlockNumInstances = 1;
|
||||
static const uint32_t WgsCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1xaCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1xcCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
|
||||
static const uint32_t GcUtcl1CounterBlockNumInstances = 2;
|
||||
static const uint32_t ChaCounterBlockNumInstances = 1;
|
||||
static const uint32_t ChcCounterBlockNumInstances = 4;
|
||||
static const uint32_t CpcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpfCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpgCounterBlockNumInstances = 1;
|
||||
static const uint32_t GcmcVmL2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcrCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gcutcl2CounterBlockNumInstances = 1;
|
||||
static const uint32_t Gcvml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcEaCpwdCounterBlockNumInstances = 36;
|
||||
static const uint32_t GcEaSeCounterBlockNumInstances = 4;
|
||||
static const uint32_t Gl1aCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1cCounterBlockNumInstances = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumInstances = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumInstances = 32;
|
||||
static const uint32_t GrbmCounterBlockNumInstances = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumInstances = 1;
|
||||
static const uint32_t RlcCounterBlockNumInstances = 1;
|
||||
static const uint32_t RpbCounterBlockNumInstances = 1;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
static const uint32_t SpiCounterBlockNumInstances = 1;
|
||||
static const uint32_t SqcCounterBlockNumInstances = 1;
|
||||
static const uint32_t SqgCounterBlockNumInstances = 1;
|
||||
static const uint32_t TaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 2;
|
||||
static const uint32_t TdCounterBlockNumInstances = 2;
|
||||
static const uint32_t Utcl1CounterBlockNumInstances = 2;
|
||||
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
|
||||
// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra
|
||||
// caution Reference: chip_offset_byte.h (from gfxip header file package) The following default
|
||||
// values are generated from Radeon RX 9070, the first product of the RDNA 4 lineup. It could change
|
||||
// for other products, and the change will be made in [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t PhCounterBlockNumCounters = 8;
|
||||
static const uint32_t Ge1CounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t ChaCounterBlockNumCounters = 4;
|
||||
static const uint32_t ChcCounterBlockNumCounters = 4;
|
||||
static const uint32_t Ge2DistCounterBlockNumCounters = 4;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcVml2CounterBlockNumCounters = 2;
|
||||
static const uint32_t GcMcVml2CounterBlockNumCounters = 1;
|
||||
static const uint32_t GcUtcl2CounterBlockNumCounters = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumCounters = 2;
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t SuCounterBlockNumCounters = 4;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqgCounterBlockNumCounters = 8;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t SqcCounterBlockNumCounters = 16;
|
||||
static const uint32_t PcCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaSeCounterBlockNumCounters = 2;
|
||||
static const uint32_t GeCounterBlockNumCounters = 4;
|
||||
static const uint32_t WgsCounterBlockNumCounters = 2;
|
||||
static const uint32_t Gl1xaCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1xcCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcUtcl1CounterBlockNumCounters = 4;
|
||||
// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra caution
|
||||
// Reference: chip_offset_byte.h (from gfxip header file package)
|
||||
static const uint32_t ChaCounterBlockNumCounters = 4;
|
||||
static const uint32_t ChcCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcmcVmL2CounterBlockNumCounters = 8;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t Gcutcl2CounterBlockNumCounters = 4;
|
||||
static const uint32_t Gcvml2CounterBlockNumCounters = 2;
|
||||
static const uint32_t GcEaCpwdCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcEaSeCounterBlockNumCounters = 2;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmhCounterBlockNumCounters = 2;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqcCounterBlockNumCounters = 16;
|
||||
static const uint32_t SqgCounterBlockNumCounters = 8;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t Utcl1CounterBlockNumCounters = 4;
|
||||
|
||||
// Block counters max event value - Auto-generated from chip_enum.h, edit with extra caution
|
||||
// Reference: chip_enum.h (from gfxip header file package)
|
||||
// The following default values are generated from Radeon RX 9070, the first product of the
|
||||
// RDNA 4 lineup. It could change for other products, and the change will be made in
|
||||
// [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = 51;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 6;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = 30;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = 55;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = 4;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 151;
|
||||
static const uint32_t PhCounterBlockMaxEvent = 1023;
|
||||
static const uint32_t Ge1CounterBlockMaxEvent = 54;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 114;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 249;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 32;
|
||||
static const uint32_t ChaCounterBlockMaxEvent = 25;
|
||||
static const uint32_t ChcCounterBlockMaxEvent = 94;
|
||||
static const uint32_t Ge2DistCounterBlockMaxEvent = 188;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = 125;
|
||||
static const uint32_t GcVml2CounterBlockMaxEvent = 90;
|
||||
static const uint32_t GcMcVml2CounterBlockMaxEvent =
|
||||
1; // This is handled by GCMC_VM_L2_PERFCOUNTER0_CFG
|
||||
static const uint32_t GcUtcl2CounterBlockMaxEvent = 36;
|
||||
static const uint32_t GrbmhCounterBlockMaxEvent = 25;
|
||||
static const uint32_t CbCounterBlockMaxEvent = 315;
|
||||
static const uint32_t DbCounterBlockMaxEvent = 441;
|
||||
static const uint32_t PaSuCounterBlockMaxEvent = 828;
|
||||
static const uint32_t SxCounterBlockMaxEvent = 81;
|
||||
static const uint32_t ScCounterBlockMaxEvent = 821;
|
||||
static const uint32_t TaCounterBlockMaxEvent = 254;
|
||||
static const uint32_t TdCounterBlockMaxEvent = 271;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = 99;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = 318;
|
||||
static const uint32_t SqgCounterBlockMaxEvent = 45;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 21;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = 138;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 121;
|
||||
static const uint32_t SqcCounterBlockMaxEvent = 511;
|
||||
static const uint32_t PcCounterBlockMaxEvent = 164;
|
||||
static const uint32_t GceaSeCounterBlockMaxEvent = 32;
|
||||
static const uint32_t GeCounterBlockMaxEvent = 103;
|
||||
static const uint32_t WgsCounterBlockMaxEvent = 4;
|
||||
static const uint32_t Gl1xaCounterBlockMaxEvent = 21;
|
||||
static const uint32_t Gl1xcCounterBlockMaxEvent = 109;
|
||||
static const uint32_t GcUtcl1CounterBlockMaxEvent = 71;
|
||||
static const uint32_t ChaCounterBlockMaxEvent = 25;
|
||||
static const uint32_t ChcCounterBlockMaxEvent = 94;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = 55;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = 4;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = 30;
|
||||
static const uint32_t GcmcVmL2CounterBlockMaxEvent = 90;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 151;
|
||||
static const uint32_t Gcutcl2CounterBlockMaxEvent = 36;
|
||||
static const uint32_t Gcvml2CounterBlockMaxEvent = 90;
|
||||
static const uint32_t GcEaCpwdCounterBlockMaxEvent = 32;
|
||||
static const uint32_t GcEaSeCounterBlockMaxEvent = 32;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 21;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 121;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 114;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 249;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = 51;
|
||||
static const uint32_t GrbmhCounterBlockMaxEvent = 25;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 6;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = 125;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = 318;
|
||||
static const uint32_t SqcCounterBlockMaxEvent = 511;
|
||||
static const uint32_t SqgCounterBlockMaxEvent = 45;
|
||||
static const uint32_t TaCounterBlockMaxEvent = 254;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = 99;
|
||||
static const uint32_t TdCounterBlockMaxEvent = 271;
|
||||
static const uint32_t Utcl1CounterBlockMaxEvent = 71;
|
||||
} // namespace gfx1201
|
||||
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
|
||||
@@ -43,54 +43,52 @@
|
||||
#define REG_INFO_7(BLOCK) REG_INFO_WITH_CTRL_7(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_8(BLOCK) REG_INFO_WITH_CTRL_8(BLOCK, REG_32B_NULL)
|
||||
|
||||
#define REG_INFO_WITH_CFG(BLOCK, INDEX) \
|
||||
{REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_CFG), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER_HI)}
|
||||
#define REG_INFO_WITH_CFG_1(BLOCK) REG_INFO_WITH_CFG(BLOCK, 0)
|
||||
#define REG_INFO_WITH_CFG_2(BLOCK) REG_INFO_WITH_CFG_1(BLOCK), REG_INFO_WITH_CFG(BLOCK, 1)
|
||||
#define REG_INFO_WITH_CFG_3(BLOCK) REG_INFO_WITH_CFG_2(BLOCK), REG_INFO_WITH_CFG(BLOCK, 2)
|
||||
#define REG_INFO_WITH_CFG_4(BLOCK) REG_INFO_WITH_CFG_3(BLOCK), REG_INFO_WITH_CFG(BLOCK, 3)
|
||||
#define REG_INFO_WITH_CFG_5(BLOCK) REG_INFO_WITH_CFG_4(BLOCK), REG_INFO_WITH_CFG(BLOCK, 4)
|
||||
#define REG_INFO_WITH_CFG_6(BLOCK) REG_INFO_WITH_CFG_5(BLOCK), REG_INFO_WITH_CFG(BLOCK, 5)
|
||||
#define REG_INFO_WITH_CFG_7(BLOCK) REG_INFO_WITH_CFG_6(BLOCK), REG_INFO_WITH_CFG(BLOCK, 6)
|
||||
#define REG_INFO_WITH_CFG_8(BLOCK) REG_INFO_WITH_CFG_7(BLOCK), REG_INFO_WITH_CFG(BLOCK, 7)
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
namespace gfx1201 {
|
||||
// Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)};
|
||||
static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)};
|
||||
static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)};
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)};
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)};
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))};
|
||||
static const CounterRegInfo PaPhCounterRegAddr[] = {REG_INFO_8(PA_PH)};
|
||||
static const CounterRegInfo Ge1CounterRegAddr[] = {REG_INFO_4(GE1)};
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)};
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)};
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)};
|
||||
static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)};
|
||||
static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)};
|
||||
static const CounterRegInfo Ge2CounterRegAddr[] = {REG_INFO_4(GE2_DIST)};
|
||||
static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)};
|
||||
//static const CounterRegInfo GcVml2CounterRegAddr[] = {REG_INFO_2(GCVML2)};
|
||||
//static const CounterRegInfo GcMcVml2CounterRegAddr[] = {REG_INFO_1(GCMC_VM_L2)};
|
||||
//static const CounterRegInfo GcUtcl2CounterRegAddr[] = {REG_INFO_1(GCUTCL2)};
|
||||
static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)};
|
||||
static const CounterRegInfo CbCounterRegAddr[] = {REG_INFO_4(CB)};
|
||||
static const CounterRegInfo DbCounterRegAddr[] = {REG_INFO_4(DB)};
|
||||
static const CounterRegInfo PaSuCounterRegAddr[] = {REG_INFO_4(PA_SU)};
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {REG_INFO_4(SX)};
|
||||
static const CounterRegInfo PaScCounterRegAddr[] = {REG_INFO_8(PA_SC)};
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)};
|
||||
static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)};
|
||||
static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)};
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)};
|
||||
static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))};
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)};
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)};
|
||||
static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)};
|
||||
static const CounterRegInfo GcmcVmL2CounterRegAddr[] = {REG_INFO_WITH_CFG_8(GCMC_VM_L2)};
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))};
|
||||
static const CounterRegInfo Gcutcl2CounterRegAddr[] = {REG_INFO_WITH_CFG_4(GCUTCL2)};
|
||||
// static const CounterRegInfo Gcvml2CounterRegAddr[] = {REG_INFO_2(GCVML2)};
|
||||
static const CounterRegInfo GcEaCpwdCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)};
|
||||
static const CounterRegInfo GcEaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)};
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {REG_INFO_4(GL1A)};
|
||||
static const CounterRegInfo RmiCounterRegAddr[] = {REG_INFO_4(RMI)};
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {REG_INFO_4(GL1C)};
|
||||
//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, regSQ_PERFCOUNTER_CTRL)};
|
||||
static const CounterRegInfo PcCounterRegAddr[] = {REG_INFO_4(PC)};
|
||||
static const CounterRegInfo GeCounterRegAddr[] = {REG_INFO_4(GE2_SE)};
|
||||
static const CounterRegInfo GceaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)};
|
||||
// static const CounterRegInfo WgsCounterRegAddr[] = {REG_INFO_2(WGS)};
|
||||
static const CounterRegInfo Gl1xaCounterRegAddr[] = {REG_INFO_4(GL1XA)};
|
||||
static const CounterRegInfo Gl1xcCounterRegAddr[] = {REG_INFO_4(GL1XC)};
|
||||
static const CounterRegInfo GcUtcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)};
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)};
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)};
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)};
|
||||
static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)};
|
||||
static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)};
|
||||
static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)};
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)};
|
||||
//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL))};
|
||||
static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))};
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)};
|
||||
static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)};
|
||||
static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)};
|
||||
static const CounterRegInfo Utcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)};
|
||||
|
||||
// Special handling of SQC:
|
||||
// SQC only supports 32bit PMC, only regSQ_PERFCOUNTER#even_number#_SELECT is
|
||||
// used by PMC. regSQ_PERFCOUNTER#odd_number#_SELECT is used only by SPM
|
||||
// SQC only supports 32bit PMC.
|
||||
// regSQ_PERFCOUNTER#even_number#_SELECT is used by PMC and SPM
|
||||
// regSQ_PERFCOUNTER#odd_number#_SELECT is used by SPM only
|
||||
static const CounterRegInfo SqcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL},
|
||||
@@ -101,57 +99,50 @@ static const CounterRegInfo SqcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}};
|
||||
|
||||
// Special handling of GCVML2:
|
||||
static const CounterRegInfo GcVml2CounterRegAddr[] = {
|
||||
// Special handling of GCVML2 (SPM only):
|
||||
static const CounterRegInfo Gcvml2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_HI)}};
|
||||
|
||||
// Special handling of GCMC_VM_L2:
|
||||
static const CounterRegInfo GcMcVml2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_HI)}};
|
||||
|
||||
// Special handling of GCUTCL2: Not sure if this is SPM-only
|
||||
static const CounterRegInfo GcUtcl2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_HI)}};
|
||||
|
||||
// Global blocks: ATCL2 CHA CHC CPC CPF CPG EA FFBM GCR GL2A GL2C GRBM RLC SDMA VML2 UTCL2
|
||||
// (Grphics only - not supported in ROCm): GE1 GE2_DIST PH
|
||||
// (Grphics only): CPG is for graphics, but it is not physically removed for compute products
|
||||
// (Not enabled for gfx12): CHCG GDS GUS
|
||||
static const GpuBlockInfo GcAtcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID(ATCL2)}; // Placeholder now
|
||||
static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value_Cha, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value_Chc, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value_Cpc, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value_Cpf, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value_Cpg, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG};
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID(GCEA), GceaCounterBlockNumInstances, GceaCounterBlockMaxEvent, GceaCounterBlockNumCounters, GceaCounterRegAddr, gfx12_cntx_prim::select_value_Gcea, 0};
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID_HSA(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID_HSA(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Atcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID_HSA(ATCL2)}; // Placeholder now
|
||||
static const GpuBlockInfo GcFfbmCounterBlockInfo = {"GC_FFBM", __BLOCK_ID(GC_FFBM)}; // Placeholder now
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value_Gcr, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value_Gl2a, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value_Gl2c, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value_Grbm, CounterBlockGRBMAttr};
|
||||
static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value_Rlc, 0};
|
||||
static const GpuBlockInfo SdmaPmCounterBlockInfo = {"SDMA_PM", __BLOCK_ID(SDMA_PM), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value_SdmaPm, CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA};
|
||||
static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2)}; // Placeholder now
|
||||
static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2)}; // Placeholder now
|
||||
static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2), 1, Gcutcl2CounterBlockMaxEvent, Gcutcl2CounterBlockNumCounters, Gcutcl2CounterRegAddr, gfx12_cntx_prim::mc_select_value, CounterBlockRpbAttr|CounterBlockAidAttr};
|
||||
static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2), 1, GcmcVmL2CounterBlockMaxEvent, GcmcVmL2CounterBlockNumCounters, GcmcVmL2CounterRegAddr, gfx12_cntx_prim::mc_select_value, CounterBlockRpbAttr|CounterBlockAidAttr};
|
||||
static const GpuBlockInfo GcVml2SpmCounterBlockInfo = {"GC_VML2_SPM", __BLOCK_ID(GC_VML2_SPM), 1, Gcvml2CounterBlockMaxEvent, Gcvml2CounterBlockNumCounters, Gcvml2CounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
|
||||
static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID_HSA(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID_HSA(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG};
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID_HSA(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID_HSA(GCEA), GcEaCpwdCounterBlockNumInstances, GcEaCpwdCounterBlockMaxEvent, GcEaCpwdCounterBlockNumCounters, GcEaCpwdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID_HSA(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockGRBMAttr};
|
||||
static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
|
||||
static const GpuBlockInfo SdmaCounterBlockInfo = {"SDMA", __BLOCK_ID_HSA(SDMA), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA};
|
||||
// SE blocks: EA_SE GL2A GL2C GRBMH SPI SQG UTCL1
|
||||
// (Grphics only - not supported in ROCm): GE GL1XA GL1XC PA PC WGS
|
||||
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GceaSeCounterBlockNumInstances, GceaSeCounterBlockMaxEvent, GceaSeCounterBlockNumCounters, GceaSeCounterRegAddr, gfx12_cntx_prim::select_value_GceaSe, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value_Grbmh, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value_Spi, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI};
|
||||
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GcEaSeCounterBlockNumInstances, GcEaSeCounterBlockMaxEvent, GcEaSeCounterBlockNumCounters, GcEaSeCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID_HSA(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI};
|
||||
static const GpuBlockInfo SqgCounterBlockInfo = {"SQG", __BLOCK_ID(SQG), SqgCounterBlockNumInstances, SqgCounterBlockMaxEvent, SqgCounterBlockNumCounters, SqgCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQG};
|
||||
static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), GcUtcl1CounterBlockNumInstances, GcUtcl1CounterBlockMaxEvent, GcUtcl1CounterBlockNumCounters, GcUtcl1CounterRegAddr, gfx12_cntx_prim::select_value_GcUtcl1, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1};
|
||||
static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), Utcl1CounterBlockNumInstances, Utcl1CounterBlockMaxEvent, Utcl1CounterBlockNumCounters, Utcl1CounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1};
|
||||
// SA blocks: GL1A GL1C
|
||||
// (Grphics only - not supported in ROCm): CB DB SC SX
|
||||
// (Not enabled for gfx12): GL1CG
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value_Gl1a, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value_Gl1c, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID_HSA(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID_HSA(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
// WGP blocks: SQC TA TCP TD
|
||||
static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC};
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value_Ta, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
|
||||
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value_Td, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
|
||||
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value_Tcp, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
|
||||
} // namespace gfx1201
|
||||
static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID_HSA(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC};
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID_HSA(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
|
||||
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID_HSA(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
|
||||
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID_HSA(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
|
||||
} // namespace gfx12xx
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
|
||||
@@ -32,23 +32,6 @@
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t2(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_blank() \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = 0; \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
|
||||
@@ -156,7 +139,7 @@ class gfx12_cntx_prim {
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = __BLOCK_ID(SQ);
|
||||
static const uint32_t SQ_BLOCK_ID = __BLOCK_ID_HSA(SQ);
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = SPM_SE_BLOCK_NAME_SQG;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
@@ -254,7 +237,7 @@ class gfx12_cntx_prim {
|
||||
uint32_t grbm_gfx_index =
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | (instance_index << 1)));
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | instance_index));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
@@ -365,34 +348,22 @@ class gfx12_cntx_prim {
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static uint32_t mc_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t perfcounter0_cfg =
|
||||
SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) |
|
||||
SET_REG_FIELD_BITS(GCUTCL2_PERFCOUNTER0_CFG, ENABLE, 1);
|
||||
return perfcounter0_cfg;
|
||||
}
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
static auto constexpr select_value_Cha= select_value(CHA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Chc= select_value(CHC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpc= select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpf= select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpg= select_value(CPG_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gcea= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Gcr= select_value(GCR_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl2a= select_value(GL2A_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl2c= select_value(GL2C_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Grbm= select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Rlc= select_value_t2(RLC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SdmaPm= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GcVml2= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GcUtcl2= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GceaSe= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Grbmh= select_value(GRBMH_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Spi= select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GcUtcl1= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Gl1a= select_value(GL1A_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl1c= select_value(GL1C_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Ta= select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Td= select_value(TD_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Tcp= select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value_blank();
|
||||
|
||||
static uint32_t select_value(const counter_des_t& counter_des) {
|
||||
uint32_t perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(CPC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
|
||||
@@ -48,7 +48,7 @@ class Gfx10Factory : public Pm4Factory {
|
||||
// void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info);
|
||||
// void ConstructBuilders(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
// Gfx builders init
|
||||
@@ -81,7 +81,7 @@ void Gfx10Factory::Init(const AgentInfo* agent_info) {
|
||||
}
|
||||
|
||||
// GFX10 block table
|
||||
const GpuBlockInfo* Gfx10Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
const GpuBlockInfo* Gfx10Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
|
||||
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
|
||||
|
||||
@@ -48,7 +48,7 @@ class Gfx11Factory : public Pm4Factory {
|
||||
// void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info);
|
||||
// void ConstructBuilders(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
// Gfx builders init
|
||||
@@ -81,7 +81,7 @@ void Gfx11Factory::Init(const AgentInfo* agent_info) {
|
||||
}
|
||||
|
||||
// GFX11 block table
|
||||
const GpuBlockInfo* Gfx11Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
const GpuBlockInfo* Gfx11Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
|
||||
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
|
||||
|
||||
@@ -79,31 +79,35 @@ void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
|
||||
void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) {
|
||||
// Global blocks
|
||||
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPC)] = &CpcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPF)] = &CpfCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCEA)] = &GceaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCR)] = &GcrCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL2A)] = &Gl2aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL2C)] = &Gl2cCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBM)] = &GrbmCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SDMA_PM)] = &SdmaPmCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(CPC)] = &CpcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(CPF)] = &CpfCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_UTCL2)] = &GcUtcl2CounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_VML2)] = &GcVml2CounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_VML2_SPM)] = &GcVml2SpmCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GCEA)] = &GceaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GCR)] = &GcrCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GL2A)] = &Gl2aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GL2C)] = &Gl2cCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GRBM)] = &GrbmCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(SDMA)] = &SdmaCounterBlockInfo;
|
||||
// SE blocks
|
||||
block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SPI)] = &SpiCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SQ)] = &SqcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(SPI)] = &SpiCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SQG)] = &SqgCounterBlockInfo;
|
||||
// SA blocks
|
||||
block_table_[__BLOCK_ID(GL1A)] = &Gl1aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL1C)] = &Gl1cCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GL1A)] = &Gl1aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(GL1C)] = &Gl1cCounterBlockInfo;
|
||||
// WGP blocks
|
||||
block_table_[__BLOCK_ID(TA)] = &TaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(TCP)] = &TcpCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(TD)] = &TdCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(SQ)] = &SqcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(TA)] = &TaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(TCP)] = &TcpCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID_HSA(TD)] = &TdCounterBlockInfo;
|
||||
}
|
||||
|
||||
// Pm4Factory create mathods
|
||||
|
||||
@@ -28,11 +28,11 @@
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
const GpuBlockInfo* Mi100Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
const GpuBlockInfo* Mi100Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
|
||||
Mi100Factory::Mi100Factory(const AgentInfo* agent_info)
|
||||
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
|
||||
@@ -37,14 +37,14 @@ class Mi200Factory : public Gfx9Factory {
|
||||
virtual int GetAccumHiID() const override { return 185; };
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
const GpuBlockInfo* Mi200Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
const GpuBlockInfo* Mi200Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
|
||||
Mi200Factory::Mi200Factory(const AgentInfo* agent_info)
|
||||
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace aql_profile {
|
||||
class Mi300Factory : public Mi100Factory {
|
||||
public:
|
||||
explicit Mi300Factory(const AgentInfo* agent_info) : Mi100Factory(agent_info) {
|
||||
for (unsigned blockname_id = 0; blockname_id < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER;
|
||||
for (unsigned blockname_id = 0; blockname_id < AQLPROFILE_BLOCKS_NUMBER;
|
||||
++blockname_id) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[blockname_id];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
|
||||
@@ -76,7 +76,7 @@ void Gfx9Factory::Print(const GpuBlockInfo* block_info) {
|
||||
}
|
||||
|
||||
// GFX9 block table
|
||||
const GpuBlockInfo* Gfx9Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
const GpuBlockInfo* Gfx9Factory::block_table_[AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
&GrbmSeCounterBlockInfo, &SpiCounterBlockInfo, &SqCounterBlockInfo, &SqCsCounterBlockInfo,
|
||||
NULL /*GFX? SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, &TcaCounterBlockInfo,
|
||||
|
||||
@@ -42,7 +42,7 @@ class Gfx9Factory : public Pm4Factory {
|
||||
|
||||
protected:
|
||||
void Init(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
|
||||
|
||||
static void Print(const GpuBlockInfo* block_info);
|
||||
};
|
||||
@@ -53,7 +53,7 @@ class Mi100Factory : public Gfx9Factory {
|
||||
explicit Mi100Factory(const AgentInfo* agent_info);
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
static const GpuBlockInfo* block_table_[AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
@@ -50,6 +50,49 @@ typedef enum {
|
||||
AQLPROFILE_AGENT_VERSION_LAST
|
||||
} aqlprofile_agent_version_t;
|
||||
|
||||
/**
|
||||
* @brief Enums for counter blocks.
|
||||
* AQLPROFILE_BLOCK_NAME_RESERVED_X are blocks reserved for npi. Reserving them here can maintain
|
||||
* enum consistency between mainline and npi.
|
||||
* TODO: Move all counter blocks here from hsa_ven_amd_aqlprofile.h
|
||||
*/
|
||||
typedef enum {
|
||||
// Blocks reserved for NPI support
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_0 = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_1,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_2,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_3,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_4,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_5,
|
||||
|
||||
// Blocks available for most ASICs, but not currently in use
|
||||
AQLPROFILE_BLOCK_NAME_CPG,
|
||||
AQLPROFILE_BLOCK_NAME_RLC,
|
||||
|
||||
// New blocks for gc_12_0_x
|
||||
AQLPROFILE_BLOCK_NAME_CHA,
|
||||
AQLPROFILE_BLOCK_NAME_CHC,
|
||||
AQLPROFILE_BLOCK_NAME_GC_CANE,
|
||||
AQLPROFILE_BLOCK_NAME_GC_FFBM,
|
||||
AQLPROFILE_BLOCK_NAME_GC_L2TLB,
|
||||
AQLPROFILE_BLOCK_NAME_GC_UTCL1,
|
||||
AQLPROFILE_BLOCK_NAME_GC_UTCL2,
|
||||
AQLPROFILE_BLOCK_NAME_GC_VML2,
|
||||
AQLPROFILE_BLOCK_NAME_GC_VML2_SPM,
|
||||
AQLPROFILE_BLOCK_NAME_GCEA_SE,
|
||||
AQLPROFILE_BLOCK_NAME_GRBMH,
|
||||
AQLPROFILE_BLOCK_NAME_SQG,
|
||||
|
||||
// Blocks reserved for NPI support
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_6,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_7,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_8,
|
||||
AQLPROFILE_BLOCK_NAME_RESERVED_9,
|
||||
|
||||
// Add new blocks above
|
||||
AQLPROFILE_BLOCKS_NUMBER
|
||||
} aqlprofile_block_name_t;
|
||||
|
||||
/**
|
||||
* @brief Flags to describe which agents can access given buffer.
|
||||
*/
|
||||
|
||||
@@ -285,9 +285,14 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
// std::endl;
|
||||
|
||||
// Set GRBM index to access proper block instance
|
||||
const uint32_t grbm_value = (block_info->instance_count > 1)
|
||||
//
|
||||
// TODO: In order to get different event for different instance with WGP counter blocks, we
|
||||
// need to loop through WGP instead of blindly broadcast instance. Fortunately, this
|
||||
// is not a common practice
|
||||
const uint32_t grbm_value = (block_info->instance_count > 1 && !(block_info->attr & CounterBlockWgpAttr))
|
||||
? Primitives::grbm_inst_index_value(block_des.index)
|
||||
: Primitives::grbm_broadcast_value();
|
||||
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::GRBM_GFX_INDEX_ADDR, grbm_value);
|
||||
// Reset counters
|
||||
if (block_info->attr & CounterBlockMcAttr) {
|
||||
@@ -602,9 +607,12 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
else
|
||||
grbm_value = Primitives::grbm_se_sh_wgp_index_value(se_index, sarray, wgp);
|
||||
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::GRBM_GFX_INDEX_ADDR, grbm_value);
|
||||
uint32_t dw_mask = reg_info.register_addr_hi.offset ? 3 : 1;
|
||||
builder.BuildCopyCounterDataPacket(
|
||||
cmd_buffer, reg_info.register_addr_lo, reg_info.register_addr_hi,
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, 1);
|
||||
reinterpret_cast<uint32_t*>(data_buffer) + read_counter, dw_mask);
|
||||
if (data_buffer && (dw_mask == 1))
|
||||
*(reinterpret_cast<uint32_t*>(data_buffer) + read_counter + 1) = 0;
|
||||
read_counter += 2;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -31,7 +31,7 @@ find_package(Clang REQUIRED CONFIG
|
||||
|
||||
## Building test executable
|
||||
add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${API_PATH} ${ROCM_ROOT_DIR}/include ${TEST_DIR}/parser/ )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${API_PATH} ${ROCM_ROOT_DIR}/include ${TEST_DIR}/parser/ ${TEST_DIR}/../src/core/include)
|
||||
target_link_libraries( ${EXE_NAME} PRIVATE pthread hsa-runtime64::hsa-runtime64 dl )
|
||||
install(TARGETS ${EXE_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests)
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include "aql_profile_v2.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
@@ -275,6 +276,65 @@ int main(int argc, char* argv[]) {
|
||||
};
|
||||
events_count = sizeof(events_arr1) / sizeof(hsa_ven_amd_aqlprofile_event_t);
|
||||
events_arr = events_arr1;
|
||||
} else if (TestHsa::HsaAgentName() == "gfx12") {
|
||||
const hsa_ven_amd_aqlprofile_event_t events_arr1[] = {
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHA, 0, 25 /*ALWAYS*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHA, 0, 0 /*BUSY*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHC, 0, 0 /*ALWAYS*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CHC, 0, 1 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 25 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0, 0 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0, 24 /*BUSY*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CPG, 0, 0 /*ALWAYS*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_CPG, 0, 51 /*BUSY*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL2, 0, 1},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_VML2, 0, 5},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA, 0, 3},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA, 0, 4},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR, 0, 6},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR, 0, 22},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A, 0, 1 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A, 0, 2 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C, 0, 1 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C, 0, 2 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0, 0 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0, 2 /*GUI_ACTIVE*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_RLC, 0, 2},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_RLC, 0, 5},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 0, 0 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 0, 2 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 1, 0 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA, 1, 2 /*BUSY*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL1, 0, 1},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GC_UTCL1, 0, 2},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GCEA_SE, 0, 3},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GCEA_SE, 0, 4},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GRBMH, 0, 0 /*ALWAYS*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_GRBMH, 0, 19},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0, 46 /*CSN_BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0, 47 /*CSN_NUM_THREADGROUPS*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0,14 /*ALWAYS*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0, 15 /*BUSY*/},
|
||||
{(hsa_ven_amd_aqlprofile_block_name_t)AQLPROFILE_BLOCK_NAME_SQG, 0, 19 /*WAVES*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A, 0, 21 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A, 0, 0 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C, 0, 0 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C, 0, 1 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 2 /*ALWAYS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 3 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0, 15 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0, 1 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 96 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 10 /*REQ_READ*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0, 14 /*REQ_WRITE*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 96 /*BUSY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 10 /*REQ_READ*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1, 14 /*REQ_WRITE*/},
|
||||
};
|
||||
events_count = sizeof(events_arr1) / sizeof(hsa_ven_amd_aqlprofile_event_t);
|
||||
events_arr = events_arr1;
|
||||
} else {
|
||||
const hsa_ven_amd_aqlprofile_event_t events_arr1[] = {
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
|
||||
@@ -294,7 +354,7 @@ int main(int argc, char* argv[]) {
|
||||
} else {
|
||||
const int block_index_max = 16;
|
||||
const int event_id_max = 128;
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
for (unsigned i = 0; i < AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
for (unsigned j = 0; j < block_index_max; ++j) {
|
||||
for (unsigned k = 0; k <= event_id_max; k += scan_step) {
|
||||
fflush(stdout);
|
||||
|
||||
Verwijs in nieuw issue
Block a user