<metricname="GRBM_COUNT"block=GRBMevent=0descr="Tie High - Count Number of Clocks"></metric>
<metricname="GRBM_GUI_ACTIVE"block=GRBMevent=2descr="The GUI is Active"></metric>
<metricname="SQ_WAVES"block=SQevent=4descr="Count number of waves sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_INSTS_VALU"block=SQevent=26descr="Number of VALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_WR"block=SQevent=27descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_RD"block=SQevent=28descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=30descr="Number of SALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=31descr="Number of SMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=32descr="Number of FLAT instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT_LDS_ONLY"block=SQevent=33descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=34descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=35descr="Number of GDS instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=61descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="SQ_ACTIVE_INST_VALU"block=SQevent=69descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_SALU"block=SQevent=86descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
<metricname="SQ_THREAD_CYCLES_VALU"block=SQevent=89descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metricname="SQ_LDS_BANK_CONFLICT"block=SQevent=97descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metricname="TA_TA_BUSY"block=TAevent=15descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_FLAT_READ_WAVEFRONTS"block=TAevent=101descr="Number of flat opcode reads processed by the TA."></metric>
<metricname="TA_FLAT_WRITE_WAVEFRONTS"block=TAevent=102descr="Number of flat opcode writes processed by the TA."></metric>
<metricname="TCC_HIT"block=TCCevent=18descr="Number of cache hits."></metric>
<metricname="TCC_MISS"block=TCCevent=19descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="TCC_MC_RDREQ"block=TCCevent=35descr="Number of 32-byte reads. The hardware actually does 64-byte reads but the number is adjusted to provide uniformity."></metric>
<metricname="TCC_MC_WRREQ"block=TCCevent=26descr="Number of 32-byte transactions going over the TC_MC_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests."></metric>
<metricname="TCC_MC_WRREQ_STALL"block=TCCevent=28descr="Number of cycles a write request was stalled."></metric>
<metricname="TCP_TCP_TA_DATA_STALL_CYCLES"block=TCPevent=3descr="TCP stalls TA data interface. Now Windowed."></metric>
<metricname="GRBM_COUNT"block=GRBMevent=0descr="Tie High - Count Number of Clocks"></metric>
<metricname="GRBM_GUI_ACTIVE"block=GRBMevent=2descr="The GUI is Active"></metric>
<metricname="SQ_WAVES"block=SQevent=4descr="Count number of waves sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_INSTS_VALU"block=SQevent=26descr="Number of VALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_WR"block=SQevent=27descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_RD"block=SQevent=28descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=30descr="Number of SALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=31descr="Number of SMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=32descr="Number of FLAT instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT_LDS_ONLY"block=SQevent=33descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=34descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=35descr="Number of GDS instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=63descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="SQ_ACTIVE_INST_VALU"block=SQevent=71descr="regspec 71? Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_SALU"block=SQevent=84descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
<metricname="SQ_THREAD_CYCLES_VALU"block=SQevent=85descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metricname="SQ_LDS_BANK_CONFLICT"block=SQevent=93descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metricname="TA_TA_BUSY"block=TAevent=15descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_FLAT_READ_WAVEFRONTS"block=TAevent=101descr="Number of flat opcode reads processed by the TA."></metric>
<metricname="TA_FLAT_WRITE_WAVEFRONTS"block=TAevent=102descr="Number of flat opcode writes processed by the TA."></metric>
<metricname="TCC_HIT"block=TCCevent=20descr="Number of cache hits."></metric>
<metricname="TCC_MISS"block=TCCevent=22descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="TCC_EA_WRREQ"block=TCCevent=29descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."></metric>
<metricname="TCC_EA_WRREQ_64B"block=TCCevent=30descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="TCC_EA_WRREQ_STALL"block=TCCevent=33descr="Number of cycles a write request was stalled."></metric>
<metricname="TCC_EA_RDREQ"block=TCCevent=41descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"></metric>
<metricname="TCC_EA_RDREQ_32B"block=TCCevent=42descr="Number of 32-byte TCC/EA read requests"></metric>
<metricname="TCP_TCP_TA_DATA_STALL_CYCLES"block=TCPevent=6descr="TCP stalls TA data interface. Now Windowed."></metric>
<metricname="TCC_EA1_WRREQ"block=TCCevent=256descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."></metric>
<metricname="TCC_EA1_WRREQ_64B"block=TCCevent=257descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="TCC_EA1_WRREQ_STALL"block=TCCevent=260descr="Number of cycles a write request was stalled."></metric>
<metricname="TCC_EA1_RDREQ"block=TCCevent=267descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"></metric>
<metricname="TCC_EA1_RDREQ_32B"block=TCCevent=268descr="Number of 32-byte TCC/EA read requests"></metric>
</gfx906>
<gfx908base="gfx9">
<metricname="SQ_INSTS_VMEM_WR"block=SQevent=28descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_RD"block=SQevent=29descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=31descr="Number of SALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=32descr="Number of SMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=33descr="Number of FLAT instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT_LDS_ONLY"block=SQevent=34descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=35descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=36descr="Number of GDS instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=64descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="SQ_ACTIVE_INST_VALU"block=SQevent=72descr="regspec 71? Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_SALU"block=SQevent=85descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
<metricname="SQ_THREAD_CYCLES_VALU"block=SQevent=86descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metricname="SQ_LDS_BANK_CONFLICT"block=SQevent=94descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metricname="TCC_HIT"block=TCCevent=17descr="Number of cache hits."></metric>
<metricname="TCC_MISS"block=TCCevent=19descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="TCC_EA_WRREQ"block=TCCevent=26descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."></metric>
<metricname="TCC_EA_WRREQ_64B"block=TCCevent=27descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="TCC_EA_WRREQ_STALL"block=TCCevent=30descr="Number of cycles a write request was stalled."></metric>
<metricname="TCC_EA_RDREQ"block=TCCevent=38descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"></metric>
<metricname="TCC_EA_RDREQ_32B"block=TCCevent=39descr="Number of 32-byte TCC/EA read requests"></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=91descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="TCP_TCP_TA_DATA_STALL_CYCLES"block=TCPevent=6descr="TCP stalls TA data interface. Now Windowed."></metric>
<metricname="GRBM_COUNT"block=GRBMevent=0descr="Tie High - Count Number of Clocks"></metric>
<metricname="GRBM_GUI_ACTIVE"block=GRBMevent=2descr="The GUI is Active"></metric>
<metricname="GRBM_CP_BUSY"block=GRBMevent=3descr="Any of the Command Processor (CPG/CPC/CPF) blocks are busy."></metric>
<metricname="GRBM_SPI_BUSY"block=GRBMevent=11descr="Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."></metric>
<metricname="GRBM_TA_BUSY"block=GRBMevent=13descr="Any of the Texture Pipes (TA) are busy in the shader engine(s)."></metric>
<metricname="GRBM_TC_BUSY"block=GRBMevent=28descr="Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."></metric>
<metricname="GRBM_CPC_BUSY"block=GRBMevent=30descr="The Command Processor Compute (CPC) is busy."></metric>
<metricname="GRBM_CPF_BUSY"block=GRBMevent=31descr="The Command Processor Fetchers (CPF) is busy."></metric>
<metricname="GRBM_UTCL2_BUSY"block=GRBMevent=34descr="The Unified Translation Cache Level-2 (UTCL2) block is busy."></metric>
<metricname="GRBM_EA_BUSY"block=GRBMevent=35descr="The Efficiency Arbiter (EA) block is busy."></metric>
<metricname="CPC_ME1_BUSY_FOR_PACKET_DECODE"block=CPCevent=13descr="Me1 busy for packet decode."></metric>
<metricname="CPC_UTCL1_STALL_ON_TRANSLATION"block=CPCevent=24descr="One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."></metric>
<metricname="CPF_CMP_UTCL1_STALL_ON_TRANSLATION"block=CPFevent=20descr="One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING response."></metric>
<metricname="CPF_CPF_TCIU_STALL"block=CPFevent=28descr="CPF TCIU interface Stalled waiting on Free, Tags."></metric>
<metricname="SPI_CSN_WINDOW_VALID"block=SPIevent=47descr="Clock count enabled by perfcounter_start event. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_CSN_BUSY"block=SPIevent=48descr="Number of clocks with outstanding waves (SPI or SH). Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_CSN_NUM_THREADGROUPS"block=SPIevent=49descr="Number of threadgroups launched. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_CSN_WAVE"block=SPIevent=52descr="Number of waves. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_RA_REQ_NO_ALLOC"block=SPIevent=79descr="Arb cycles with requests but no allocation. Source is RA0"></metric>
<metricname="SPI_RA_REQ_NO_ALLOC_CSN"block=SPIevent=85descr="Arb cycles with CSn req and no CSn alloc. Source is RA0"></metric>
<metricname="SPI_RA_RES_STALL_CSN"block=SPIevent=91descr="Arb cycles with CSn req and no CSn fits. Source is RA0"></metric>
<metricname="SPI_RA_TMP_STALL_CSN"block=SPIevent=97descr="Cycles where csn wants to req but does not fit in temp space."></metric>
<metricname="SPI_RA_WAVE_SIMD_FULL_CSN"block=SPIevent=103descr="Sum of SIMD where WAVE can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_VGPR_SIMD_FULL_CSN"block=SPIevent=109descr="Sum of SIMD where VGPR can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_SGPR_SIMD_FULL_CSN"block=SPIevent=115descr="Sum of SIMD where SGPR can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_LDS_CU_FULL_CSN"block=SPIevent=120descr="Sum of CU where LDS can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_BAR_CU_FULL_CSN"block=SPIevent=123descr="Sum of CU where BARRIER can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_BULKY_CU_FULL_CSN"block=SPIevent=125descr="Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_TGLIM_CU_FULL_CSN"block=SPIevent=127descr="Cycles where csn wants to req but all CU are at tg_limit"></metric>
<metricname="SPI_RA_WVLIM_STALL_CSN"block=SPIevent=133descr="Number of clocks csn is stalled due to WAVE LIMIT."></metric>
<metricname="SPI_SWC_CSC_WR"block=SPIevent=189descr="Number of clocks to write CSC waves to SGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_VWC_CSC_WR"block=SPIevent=195descr="Number of clocks to write CSC waves to VGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SQ_ACCUM_PREV"block=SQevent=1descr="For counter N, increment by the value of counter N-1. Only accumulates once every 4 cycles."></metric>
<metricname="SQ_BUSY_CYCLES"block=SQevent=3descr="Clock cycles while SQ is reporting that it is busy. (nondeterministic, per-simd, global)"></metric>
<metricname="SQ_WAVES"block=SQevent=4descr="Count number of waves sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_LEVEL_WAVES"block=SQevent=5descr="Track the number of waves. Set ACCUM_PREV for the next counter to use this. (level, per-simd, global)"></metric>
<metricname="SQ_WAVES_EQ_64"block=SQevent=6descr="Count number of waves with exactly 64 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_64"block=SQevent=7descr="Count number of waves with <64 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_48"block=SQevent=8descr="Count number of waves with <48 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_32"block=SQevent=9descr="Count number of waves sent <32 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_16"block=SQevent=10descr="Count number of waves sent <16 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_BUSY_CU_CYCLES"block=SQevent=13descr="Count quad-cycles each CU is busy. (nondeterministic, per-simd)"></metric>
<metricname="SQ_ITEMS"block=SQevent=14descr="Number of valid items per wave. (per-simd, global)"></metric>
<metricname="SQ_INSTS"block=SQevent=25descr="Number of instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU"block=SQevent=26descr="Number of VALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_ADD_F16"block=SQevent=27descr="Number of VALU ADD/SUB instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MUL_F16"block=SQevent=28descr="Number of VALU MUL instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_FMA_F16"block=SQevent=29descr="Number of VALU FMA/MAD instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_TRANS_F16"block=SQevent=30descr="Number of VALU transcendental instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_ADD_F32"block=SQevent=31descr="Number of VALU ADD/SUB instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MUL_F32"block=SQevent=32descr="Number of VALU MUL instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_FMA_F32"block=SQevent=33descr="Number of VALU FMA/MAD instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_TRANS_F32"block=SQevent=34descr="Number of VALU transcendental instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_ADD_F64"block=SQevent=35descr="Number of VALU ADD/SUB instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MUL_F64"block=SQevent=36descr="Number of VALU MUL instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_FMA_F64"block=SQevent=37descr="Number of VALU FMA/MAD instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_TRANS_F64"block=SQevent=38descr="Number of VALU transcendental instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_INT32"block=SQevent=39descr="Number of VALU 32-bit integer (signed or unsigned) instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_INT64"block=SQevent=40descr="Number of VALU 64-bit integer (signed or unsigned) instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_CVT"block=SQevent=41descr="Number of VALU data conversion instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_I8"block=SQevent=42descr="Number of VALU V_MFMA_*_I8 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_F16"block=SQevent=43descr="Number of VALU V_MFMA_*_F16 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_BF16"block=SQevent=44descr="Number of VALU V_MFMA_*_BF16 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_F32"block=SQevent=45descr="Number of VALU V_MFMA_*_F32 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_F64"block=SQevent=46descr="Number of VALU V_MFMA_*_F64 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_I8"block=SQevent=47descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type I8. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_F16"block=SQevent=48descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type F16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_BF16"block=SQevent=49descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type BF16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_F32"block=SQevent=50descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type F32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_F64"block=SQevent=51descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type F64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_MFMA"block=SQevent=52descr="Number of MFMA instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_WR"block=SQevent=53descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_RD"block=SQevent=54descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM"block=SQevent=55descr="Number of VMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=56descr="Number of SALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=57descr="Number of SMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=58descr="Number of FLAT instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT_LDS_ONLY"block=SQevent=59descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=60descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=61descr="Number of GDS instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_EXP_GDS"block=SQevent=63descr="Number of EXP and GDS instructions issued, excluding skipped export instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_BRANCH"block=SQevent=64descr="Number of Branch instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SENDMSG"block=SQevent=65descr="Number of Sendmsg instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VSKIPPED"block=SQevent=66descr="Number of vector instructions skipped. (per-simd, emulated)"></metric>
<metricname="SQ_INST_LEVEL_VMEM"block=SQevent=67descr="Number of in-flight VMEM instructions. Set next counter to ACCUM_PREV and divide by INSTS_VMEM for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"></metric>
<metricname="SQ_INST_LEVEL_SMEM"block=SQevent=68descr="Number of in-flight SMEM instructions (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). Set next counter to ACCUM_PREV and divide by INSTS_SMEM for average latency per smem request. Falls slightly short of total request latency because some fetches are divided into two requests that may finish at different times and this counter collects the average latency of the two. (per-simd, level, nondeterministic)"></metric>
<metricname="SQ_INST_LEVEL_LDS"block=SQevent=69descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"></metric>
<metricname="SQ_VALU_MFMA_BUSY_CYCLES"block=SQevent=72descr="Number of cycles the MFMA ALU is busy (per-simd, emulated)"></metric>
<metricname="SQ_WAVE_CYCLES"block=SQevent=74descr="Number of wave-cycles spent by waves in the CUs (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_WAIT_ANY"block=SQevent=85descr="Number of wave-cycles spent waiting for anything (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_WAIT_INST_ANY"block=SQevent=88descr="Number of wave-cycles spent waiting for any instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="SQ_ACTIVE_INST_ANY"block=SQevent=96descr="Number of cycles each wave is working on an instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_VMEM"block=SQevent=97descr="Number of cycles the SQ instruction arbiter is working on a VMEM instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_LDS"block=SQevent=98descr="Number of cycles the SQ instruction arbiter is working on a LDS instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_VALU"block=SQevent=99descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_SCA"block=SQevent=100descr="Number of cycles the SQ instruction arbiter is working on a SALU or SMEM instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_EXP_GDS"block=SQevent=101descr="Number of cycles the SQ instruction arbiter is working on an EXPORT or GDS instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_MISC"block=SQevent=102descr="Number of cycles the SQ instruction aribter is working on a BRANCH or SENDMSG instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_FLAT"block=SQevent=103descr="Number of cycles the SQ instruction arbiter is working on a FLAT instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_VMEM_WR"block=SQevent=104descr="Number of cycles needed to send addr and cmd data for VMEM write instructions. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_VMEM_RD"block=SQevent=105descr="Number of cycles needed to send addr and cmd data for VMEM read instructions. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_SMEM"block=SQevent=111descr="Number of cycles needed to execute scalar memory reads. (per-simd, emulated)"></metric>
<metricname="SQ_INST_CYCLES_SALU"block=SQevent=112descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_THREAD_CYCLES_VALU"block=SQevent=113descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metricname="SQ_IFETCH"block=SQevent=115descr="Number of instruction fetch requests from cache. (per-simd, emulated)"></metric>
<metricname="SQ_IFETCH_LEVEL"block=SQevent=116descr="Number of instruction fetch requests from cache. (per-simd, level)"></metric>
<metricname="SQ_LDS_BANK_CONFLICT"block=SQevent=121descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metricname="SQ_LDS_ADDR_CONFLICT"block=SQevent=122descr="Number of cycles LDS is stalled by address conflicts. (emulated,nondeterministic)"></metric>
<metricname="SQ_LDS_UNALIGNED_STALL"block=SQevent=123descr="Number of cycles LDS is stalled processing flat unaligned load/store ops. (emulated)"></metric>
<metricname="SQ_LDS_MEM_VIOLATIONS"block=SQevent=124descr="Number of threads that have a memory violation in the LDS.(emulated)"></metric>
<metricname="SQ_LDS_ATOMIC_RETURN"block=SQevent=125descr="Number of atomic return cycles in LDS. (per-simd, emulated)"></metric>
<metricname="SQ_LDS_IDX_ACTIVE"block=SQevent=126descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. (per-simd, emulated)"></metric>
<metricname="SQ_ACCUM_PREV_HIRES"block=SQevent=185descr="For counter N, increment by the value of counter N-1."></metric>
<metricname="SQ_WAVES_RESTORED"block=SQevent=186descr="Count number of context-restored waves sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_SAVED"block=SQevent=187descr="Count number of context-saved waves. (per-simd, emulated, global)"></metric>
<metricname="SQ_INSTS_SMEM_NORM"block=SQevent=188descr="Number of SMEM instructions issued normalized to match smem_level (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). (per-simd, emulated)"></metric>
<metricname="SQC_DCACHE_INPUT_VALID_READYB"block=SQevent=260descr="Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"></metric>
<metricname="SQC_TC_REQ"block=SQevent=262descr="Total number of TC requests that were issued by instruction and constant caches. (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_INST_REQ"block=SQevent=263descr="Number of insruction requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_DATA_READ_REQ"block=SQevent=264descr="Number of data read requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_DATA_WRITE_REQ"block=SQevent=265descr="Number of data write requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_DATA_ATOMIC_REQ"block=SQevent=266descr="Number of data atomic requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_ICACHE_REQ"block=SQevent=270descr="Number of requests. (per-SQ, per-Bank)"></metric>
<metricname="SQC_ICACHE_HITS"block=SQevent=271descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_ICACHE_MISSES"block=SQevent=272descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_ICACHE_MISSES_DUPLICATE"block=SQevent=273descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_REQ"block=SQevent=290descr="Number of requests (post-bank-serialization). (per-SQ, per-Bank)"></metric>
<metricname="SQC_DCACHE_HITS"block=SQevent=291descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_MISSES"block=SQevent=292descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_MISSES_DUPLICATE"block=SQevent=293descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_ATOMIC"block=SQevent=298descr="Number of atomic requests. (per-SQ, per-Bank)"></metric>
<metricname="SQC_DCACHE_REQ_READ_1"block=SQevent=323descr="Number of constant cache 1 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_2"block=SQevent=324descr="Number of constant cache 2 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_4"block=SQevent=325descr="Number of constant cache 4 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_8"block=SQevent=326descr="Number of constant cache 8 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_16"block=SQevent=327descr="Number of constant cache 16 dw read requests. (per-SQ)"></metric>
<metricname="TA_TA_BUSY"block=TAevent=15descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_TOTAL_WAVEFRONTS"block=TAevent=32descr="Total number of wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_WAVEFRONTS"block=TAevent=44descr="Number of buffer wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_READ_WAVEFRONTS"block=TAevent=45descr="Number of buffer read wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_WRITE_WAVEFRONTS"block=TAevent=46descr="Number of buffer write wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_ATOMIC_WAVEFRONTS"block=TAevent=47descr="Number of buffer atomic wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_TOTAL_CYCLES"block=TAevent=49descr="Number of buffer cycles issued to TC."></metric>
<metricname="TA_BUFFER_COALESCED_READ_CYCLES"block=TAevent=52descr="Number of buffer coalesced read cycles issued to TC."></metric>
<metricname="TA_BUFFER_COALESCED_WRITE_CYCLES"block=TAevent=53descr="Number of buffer coalesced write cycles issued to TC."></metric>
<metricname="TA_ADDR_STALLED_BY_TC_CYCLES"block=TAevent=54descr="Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_ADDR_STALLED_BY_TD_CYCLES"block=TAevent=55descr="Number of cycles addr path stalled by TD. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_DATA_STALLED_BY_TC_CYCLES"block=TAevent=56descr="Number of cycles data path stalled by TC. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_FLAT_WAVEFRONTS"block=TAevent=100descr="Number of flat opcode wavfronts processed by the TA."></metric>
<metricname="TA_FLAT_READ_WAVEFRONTS"block=TAevent=101descr="Number of flat opcode reads processed by the TA."></metric>
<metricname="TA_FLAT_WRITE_WAVEFRONTS"block=TAevent=102descr="Number of flat opcode writes processed by the TA."></metric>
<metricname="TA_FLAT_ATOMIC_WAVEFRONTS"block=TAevent=103descr="Number of flat opcode atomics processed by the TA."></metric>
<metricname="TD_TD_BUSY"block=TDevent=1descr="TD is processing or waiting for data. Perf_Windowing not supported for this counter."></metric>
<metricname="TD_TC_STALL"block=TDevent=15descr="TD is stalled waiting for TC data."></metric>
<metricname="TD_SPI_STALL"block=TDevent=18descr="TD is stalled SPI vinit"></metric>
<metricname="TD_LOAD_WAVEFRONT"block=TDevent=25descr="Count the wavefronts with opcode = load, include atomics and store."></metric>
<metricname="TD_ATOMIC_WAVEFRONT"block=TDevent=26descr="Count the wavefronts with opcode = atomic."></metric>
<metricname="TD_STORE_WAVEFRONT"block=TDevent=27descr="Count the wavefronts with opcode = store."></metric>
<metricname="TD_COALESCABLE_WAVEFRONT"block=TDevent=32descr="Count wavefronts that TA finds coalescable."></metric>
<metricname="TCP_GATE_EN1"block=TCPevent=0descr="TCP interface clocks are turned on. Not Windowed."></metric>
<metricname="TCP_GATE_EN2"block=TCPevent=1descr="TCP core clocks are turned on. Not Windowed."></metric>
<metricname="TCP_READ_TAGCONFLICT_STALL_CYCLES"block=TCPevent=11descr="Tagram conflict stall on a read"></metric>
<metricname="TCP_WRITE_TAGCONFLICT_STALL_CYCLES"block=TCPevent=12descr="Tagram conflict stall on a write"></metric>
<metricname="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES"block=TCPevent=13descr="Tagram conflict stall on an atomic"></metric>
<metricname="TCP_PENDING_STALL_CYCLES"block=TCPevent=22descr="Stall due to data pending from L2"></metric>
<metricname="TCP_TA_TCP_STATE_READ"block=TCPevent=27descr="Number of state reads"></metric>
<metricname="TCP_VOLATILE"block=TCPevent=28descr="Total number of L1 volatile pixels/buffers from TA"></metric>
<metricname="TCP_TOTAL_ACCESSES"block=TCPevent=29descr="Total number of pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_READ+TCP_PERF_SEL_TOTAL_NONREAD"></metric>
<metricname="TCP_TOTAL_READ"block=TCPevent=30descr="Total number of read pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_HIT_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_EVICT_READ"></metric>
<metricname="TCP_TOTAL_WRITE"block=TCPevent=32descr="Total number of local write pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE+ TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE"></metric>
<metricname="TCP_TOTAL_ATOMIC_WITH_RET"block=TCPevent=38descr="Total number of atomic with return pixels/buffers from TA"></metric>
<metricname="TCP_TOTAL_ATOMIC_WITHOUT_RET"block=TCPevent=39descr="Total number of atomic without return pixels/buffers from TA"></metric>
<metricname="TCP_TOTAL_WRITEBACK_INVALIDATES"block=TCPevent=45descr="Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed."></metric>
<metricname="TCP_UTCL1_REQUEST"block=TCPevent=47descr="Total CLIENT_UTCL1 NORMAL requests"></metric>
<metricname="TCP_TOTAL_CACHE_ACCESSES"block=TCPevent=60descr="Count of total cache line (tag) accesses (includes hits and misses)."></metric>
<metricname="TCP_TCP_LATENCY"block=TCPevent=65descr="Total TCP wave latency (from first clock of wave entering to first clock of wave leaving), divide by TA_TCP_STATE_READ to avg wave latency"></metric>
<metricname="TCP_TCC_READ_REQ_LATENCY"block=TCPevent=66descr="Total TCP->TCC request latency for reads and atomics with return. Not Windowed."></metric>
<metricname="TCP_TCC_WRITE_REQ_LATENCY"block=TCPevent=67descr="Total TCP->TCC request latency for writes and atomics without return. Not Windowed."></metric>
<metricname="TCP_TCC_READ_REQ"block=TCPevent=69descr="Total read requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_WRITE_REQ"block=TCPevent=70descr="Total write requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_ATOMIC_WITH_RET_REQ"block=TCPevent=71descr="Total atomic with return requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_ATOMIC_WITHOUT_RET_REQ"block=TCPevent=72descr="Total atomic without return requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_NC_READ_REQ"block=TCPevent=75descr="Total read requests with NC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_NC_WRITE_REQ"block=TCPevent=76descr="Total write requests with NC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_NC_ATOMIC_REQ"block=TCPevent=77descr="Total atomic requests with NC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_UC_READ_REQ"block=TCPevent=78descr="Total read requests with UC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_UC_WRITE_REQ"block=TCPevent=79descr="Total write requests with UC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_UC_ATOMIC_REQ"block=TCPevent=80descr="Total atomic requests with UC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_CC_READ_REQ"block=TCPevent=81descr="Total write requests with CC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_CC_WRITE_REQ"block=TCPevent=82descr="Total write requests with CC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_CC_ATOMIC_REQ"block=TCPevent=83descr="Total atomic requests with CC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_RW_READ_REQ"block=TCPevent=85descr="Total write requests with RW mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_RW_WRITE_REQ"block=TCPevent=86descr="Total write requests with RW mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_RW_ATOMIC_REQ"block=TCPevent=87descr="Total atomic requests with RW mtype from this TCP to all TCCs"></metric>
<metricname="TCA_CYCLE"block=TCAevent=1descr="Number of cycles. Not windowable."></metric>
<metricname="TCA_BUSY"block=TCAevent=2descr="Number of cycles we have a request pending. Not windowable."></metric>
<metricname="TCC_CYCLE"block=TCCevent=1descr="Number of cycles. Not windowable."></metric>
<metricname="TCC_BUSY"block=TCCevent=2descr="Number of cycles we have a request pending. Not windowable."></metric>
<metricname="TCC_REQ"block=TCCevent=3descr="Number of requests of all types. This is measured at the tag block. This may be more than the number of requests arriving at the TCC, but it is a good indication of the total amount of work that needs to be performed."></metric>
<metricname="TCC_STREAMING_REQ"block=TCCevent=4descr="Number of streaming requests. This is measured at the tag block."></metric>
<metricname="TCC_NC_REQ"block=TCCevent=5descr="The number of noncoherently cached requests. This is measured at the tag block."></metric>
<metricname="TCC_UC_REQ"block=TCCevent=6descr="The number of uncached requests. This is measured at the tag block."></metric>
<metricname="TCC_CC_REQ"block=TCCevent=7descr="The number of coherently cached requests. This is measured at the tag block."></metric>
<metricname="TCC_RW_REQ"block=TCCevent=8descr="The number of RW requests. This is measured at the tag block."></metric>
<metricname="TCC_PROBE"block=TCCevent=9descr="Number of probe requests. Not windowable."></metric>
<metricname="TCC_PROBE_ALL"block=TCCevent=10descr="Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable."></metric>
<metricname="TCC_READ"block=TCCevent=12descr="Number of read requests. Compressed reads are included in this, but metadata reads are not included."></metric>
<metricname="TCC_WRITE"block=TCCevent=13descr="Number of write requests."></metric>
<metricname="TCC_ATOMIC"block=TCCevent=14descr="Number of atomic requests of all types."></metric>
<metricname="TCC_HIT"block=TCCevent=17descr="Number of cache hits."></metric>
<metricname="TCC_MISS"block=TCCevent=19descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="TCC_WRITEBACK"block=TCCevent=22descr="Number of lines written back to main memory. This includes writebacks of dirty lines and uncached write/atomic requests."></metric>
<metricname="TCC_EA_WRREQ"block=TCCevent=26descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."></metric>
<metricname="TCC_EA_WRREQ_64B"block=TCCevent=27descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="TCC_EA_WR_UNCACHED_32B"block=TCCevent=29descr="Number of 32-byte write/atomic going over the TC_EA_wrreq interface due to uncached traffic. Note that CC mtypes can produce uncached requests, and those are included in this. A 64-byte request will be counted as 2"></metric>
<metricname="TCC_EA_WRREQ_STALL"block=TCCevent=30descr="Number of cycles a write request was stalled."></metric>
<metricname="TCC_EA_WRREQ_IO_CREDIT_STALL"block=TCCevent=31descr="Number of cycles a EA write request was stalled because the interface was out of IO credits."></metric>
<metricname="TCC_EA_WRREQ_GMI_CREDIT_STALL"block=TCCevent=32descr="Number of cycles a EA write request was stalled because the interface was out of GMI credits."></metric>
<metricname="TCC_EA_WRREQ_DRAM_CREDIT_STALL"block=TCCevent=33descr="Number of cycles a EA write request was stalled because the interface was out of DRAM credits."></metric>
<metricname="TCC_TOO_MANY_EA_WRREQS_STALL"block=TCCevent=34descr="Number of cycles the TCC could not send a EA write request because it already reached its maximum number of pending EA write requests."></metric>
<metricname="TCC_EA_WRREQ_LEVEL"block=TCCevent=35descr="The sum of the number of EA write requests in flight. This is primarily meant for measure average EA write latency. Average write latency = TCC_PERF_SEL_EA_WRREQ_LEVEL/TCC_PERF_SEL_EA_WRREQ."></metric>
<metricname="TCC_EA_ATOMIC"block=TCCevent=36descr="Number of transactions going over the TC_EA_wrreq interface that are actually atomic requests."></metric>
<metricname="TCC_EA_ATOMIC_LEVEL"block=TCCevent=37descr="The sum of the number of EA atomics in flight. This is primarily meant for measure average EA atomic latency. Average atomic latency = TCC_PERF_SEL_EA_WRREQ_ATOMIC_LEVEL/TCC_PERF_SEL_EA_WRREQ_ATOMIC."></metric>
<metricname="TCC_EA_RDREQ"block=TCCevent=38descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"></metric>
<metricname="TCC_EA_RDREQ_32B"block=TCCevent=39descr="Number of 32-byte TCC/EA read requests"></metric>
<metricname="TCC_EA_RD_UNCACHED_32B"block=TCCevent=40descr="Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be counted as 2"></metric>
<metricname="TCC_EA_RDREQ_IO_CREDIT_STALL"block=TCCevent=41descr="Number of cycles there was a stall because the read request interface was out of IO credits. Stalls occur regardless of whether a read needed to be performed or not."></metric>
<metricname="TCC_EA_RDREQ_GMI_CREDIT_STALL"block=TCCevent=42descr="Number of cycles there was a stall because the read request interface was out of GMI credits. Stalls occur regardless of whether a read needed to be performed or not."></metric>
<metricname="TCC_EA_RDREQ_DRAM_CREDIT_STALL"block=TCCevent=43descr="Number of cycles there was a stall because the read request interface was out of DRAM credits. Stalls occur regardless of whether a read needed to be performed or not."></metric>
<metricname="TCC_EA_RDREQ_LEVEL"block=TCCevent=44descr="The sum of the number of TCC/EA read requests in flight. This is primarily meant for measure average EA read latency. Average read latency = TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."></metric>
<metricname="TCC_TAG_STALL"block=TCCevent=45descr="Number of cycles the normal request pipeline in the tag was stalled for any reason. Normally, stalls of this nature are measured exactly from one point the pipeline, but that is not the case for this counter. Probes can stall the pipeline at a variety of places, and there is no single point that can reasonably measure the total stalls accurately."></metric>
<metricname="TCC_NORMAL_WRITEBACK"block=TCCevent=68descr="Number of writebacks due to requests that are not writeback requests."></metric>
<metricname="TCC_ALL_TC_OP_WB_WRITEBACK"block=TCCevent=73descr="Number of writebacks due to all TC_OP writeback requests."></metric>
<metricname="TCC_NORMAL_EVICT"block=TCCevent=74descr="Number of evictions due to requests that are not invalidate or probe requests."></metric>
<metricname="TCC_ALL_TC_OP_INV_EVICT"block=TCCevent=80descr="Number of evictions due to all TC_OP invalidate requests."></metric>
<metricname="TCC_EA_RDREQ_DRAM"block=TCCevent=102descr="Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC)."></metric>
<metricname="TCC_EA_WRREQ_DRAM"block=TCCevent=103descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=96descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="TCP_TCP_TA_DATA_STALL_CYCLES"block=TCPevent=6descr="TCP stalls TA data interface. Now Windowed."></metric>
<metricname="GRBM_COUNT"block=GRBMevent=0descr="Tie High - Count Number of Clocks"></metric>
<metricname="GRBM_GUI_ACTIVE"block=GRBMevent=2descr="The GUI is Active"></metric>
<metricname="GRBM_CP_BUSY"block=GRBMevent=3descr="Any of the Command Processor (CPG/CPC/CPF) blocks are busy."></metric>
<metricname="GRBM_SPI_BUSY"block=GRBMevent=11descr="Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."></metric>
<metricname="GRBM_TA_BUSY"block=GRBMevent=13descr="Any of the Texture Pipes (TA) are busy in the shader engine(s)."></metric>
<metricname="GRBM_TC_BUSY"block=GRBMevent=28descr="Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."></metric>
<metricname="GRBM_CPC_BUSY"block=GRBMevent=30descr="The Command Processor Compute (CPC) is busy."></metric>
<metricname="GRBM_CPF_BUSY"block=GRBMevent=31descr="The Command Processor Fetchers (CPF) is busy."></metric>
<metricname="GRBM_UTCL2_BUSY"block=GRBMevent=34descr="The Unified Translation Cache Level-2 (UTCL2) block is busy."></metric>
<metricname="GRBM_EA_BUSY"block=GRBMevent=35descr="The Efficiency Arbiter (EA) block is busy."></metric>
<metricname="CPC_ME1_BUSY_FOR_PACKET_DECODE"block=CPCevent=13descr="Me1 busy for packet decode."></metric>
<metricname="CPC_UTCL1_STALL_ON_TRANSLATION"block=CPCevent=24descr="One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."></metric>
<metricname="CPF_CMP_UTCL1_STALL_ON_TRANSLATION"block=CPFevent=20descr="One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING response."></metric>
<metricname="CPF_CPF_TCIU_STALL"block=CPFevent=28descr="CPF TCIU interface Stalled waiting on Free, Tags."></metric>
<metricname="SPI_CSN_WINDOW_VALID"block=SPIevent=47descr="Clock count enabled by perfcounter_start event. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_CSN_BUSY"block=SPIevent=48descr="Number of clocks with outstanding waves (SPI or SH). Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_CSN_NUM_THREADGROUPS"block=SPIevent=49descr="Number of threadgroups launched. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_CSN_WAVE"block=SPIevent=52descr="Number of waves. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_RA_REQ_NO_ALLOC"block=SPIevent=79descr="Arb cycles with requests but no allocation. Source is RA0"></metric>
<metricname="SPI_RA_REQ_NO_ALLOC_CSN"block=SPIevent=85descr="Arb cycles with CSn req and no CSn alloc. Source is RA0"></metric>
<metricname="SPI_RA_RES_STALL_CSN"block=SPIevent=91descr="Arb cycles with CSn req and no CSn fits. Source is RA0"></metric>
<metricname="SPI_RA_TMP_STALL_CSN"block=SPIevent=97descr="Cycles where csn wants to req but does not fit in temp space."></metric>
<metricname="SPI_RA_WAVE_SIMD_FULL_CSN"block=SPIevent=103descr="Sum of SIMD where WAVE can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_VGPR_SIMD_FULL_CSN"block=SPIevent=109descr="Sum of SIMD where VGPR can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_SGPR_SIMD_FULL_CSN"block=SPIevent=115descr="Sum of SIMD where SGPR can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_LDS_CU_FULL_CSN"block=SPIevent=120descr="Sum of CU where LDS can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_BAR_CU_FULL_CSN"block=SPIevent=123descr="Sum of CU where BARRIER can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_BULKY_CU_FULL_CSN"block=SPIevent=125descr="Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"></metric>
<metricname="SPI_RA_TGLIM_CU_FULL_CSN"block=SPIevent=127descr="Cycles where csn wants to req but all CU are at tg_limit"></metric>
<metricname="SPI_RA_WVLIM_STALL_CSN"block=SPIevent=133descr="Number of clocks csn is stalled due to WAVE LIMIT."></metric>
<metricname="SPI_SWC_CSC_WR"block=SPIevent=189descr="Number of clocks to write CSC waves to SGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SPI_VWC_CSC_WR"block=SPIevent=195descr="Number of clocks to write CSC waves to VGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"></metric>
<metricname="SQ_ACCUM_PREV"block=SQevent=1descr="For counter N, increment by the value of counter N-1. Only accumulates once every 4 cycles."></metric>
<metricname="SQ_BUSY_CYCLES"block=SQevent=3descr="Clock cycles while SQ is reporting that it is busy. (nondeterministic, per-simd, global)"></metric>
<metricname="SQ_WAVES"block=SQevent=4descr="Count number of waves sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_LEVEL_WAVES"block=SQevent=5descr="Track the number of waves. Set ACCUM_PREV for the next counter to use this. (level, per-simd, global)"></metric>
<metricname="SQ_WAVES_EQ_64"block=SQevent=6descr="Count number of waves with exactly 64 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_64"block=SQevent=7descr="Count number of waves with <64 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_48"block=SQevent=8descr="Count number of waves with <48 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_32"block=SQevent=9descr="Count number of waves sent <32 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_LT_16"block=SQevent=10descr="Count number of waves sent <16 active threads sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_BUSY_CU_CYCLES"block=SQevent=13descr="Count quad-cycles each CU is busy. (nondeterministic, per-simd)"></metric>
<metricname="SQ_ITEMS"block=SQevent=14descr="Number of valid items per wave. (per-simd, global)"></metric>
<metricname="SQ_INSTS"block=SQevent=25descr="Number of instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU"block=SQevent=26descr="Number of VALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_ADD_F16"block=SQevent=27descr="Number of VALU ADD/SUB instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MUL_F16"block=SQevent=28descr="Number of VALU MUL instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_FMA_F16"block=SQevent=29descr="Number of VALU FMA/MAD instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_TRANS_F16"block=SQevent=30descr="Number of VALU transcendental instructions on float16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_ADD_F32"block=SQevent=31descr="Number of VALU ADD/SUB instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MUL_F32"block=SQevent=32descr="Number of VALU MUL instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_FMA_F32"block=SQevent=33descr="Number of VALU FMA/MAD instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_TRANS_F32"block=SQevent=34descr="Number of VALU transcendental instructions on float32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_ADD_F64"block=SQevent=35descr="Number of VALU ADD/SUB instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MUL_F64"block=SQevent=36descr="Number of VALU MUL instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_FMA_F64"block=SQevent=37descr="Number of VALU FMA/MAD instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_TRANS_F64"block=SQevent=38descr="Number of VALU transcendental instructions on float64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_INT32"block=SQevent=39descr="Number of VALU 32-bit integer (signed or unsigned) instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_INT64"block=SQevent=40descr="Number of VALU 64-bit integer (signed or unsigned) instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_CVT"block=SQevent=41descr="Number of VALU data conversion instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_I8"block=SQevent=42descr="Number of VALU V_MFMA_*_I8 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_F16"block=SQevent=43descr="Number of VALU V_MFMA_*_F16 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_BF16"block=SQevent=44descr="Number of VALU V_MFMA_*_BF16 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_F32"block=SQevent=45descr="Number of VALU V_MFMA_*_F32 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_F64"block=SQevent=46descr="Number of VALU V_MFMA_*_F64 instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_I8"block=SQevent=49descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type I8. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_F16"block=SQevent=50descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type F16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_BF16"block=SQevent=51descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type BF16. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_F32"block=SQevent=52descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type F32. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VALU_MFMA_MOPS_F64"block=SQevent=53descr="Number of VALU matrix math operations (add or mul) performed dividied by 512, assuming a full EXEC mask, of data type F64. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_MFMA"block=SQevent=56descr="Number of MFMA instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_WR"block=SQevent=57descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM_RD"block=SQevent=58descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VMEM"block=SQevent=59descr="Number of VMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=60descr="Number of SALU instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=61descr="Number of SMEM instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=62descr="Number of FLAT instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=65descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=66descr="Number of GDS instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_EXP_GDS"block=SQevent=68descr="Number of EXP and GDS instructions issued, excluding skipped export instructions. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_BRANCH"block=SQevent=69descr="Number of Branch instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_SENDMSG"block=SQevent=70descr="Number of Sendmsg instructions issued. (per-simd, emulated)"></metric>
<metricname="SQ_INSTS_VSKIPPED"block=SQevent=71descr="Number of vector instructions skipped. (per-simd, emulated)"></metric>
<metricname="SQ_INST_LEVEL_VMEM"block=SQevent=72descr="Number of in-flight VMEM instructions. Set next counter to ACCUM_PREV and divide by INSTS_VMEM for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"></metric>
<metricname="SQ_INST_LEVEL_SMEM"block=SQevent=73descr="Number of in-flight SMEM instructions (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). Set next counter to ACCUM_PREV and divide by INSTS_SMEM for average latency per smem request. Falls slightly short of total request latency because some fetches are divided into two requests that may finish at different times and this counter collects the average latency of the two. (per-simd, level, nondeterministic)"></metric>
<metricname="SQ_INST_LEVEL_LDS"block=SQevent=74descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"></metric>
<metricname="SQ_VALU_MFMA_BUSY_CYCLES"block=SQevent=77descr="Number of cycles the MFMA ALU is busy (per-simd, emulated)"></metric>
<metricname="SQ_WAVE_CYCLES"block=SQevent=79descr="Number of wave-cycles spent by waves in the CUs (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_WAIT_ANY"block=SQevent=90descr="Number of wave-cycles spent waiting for anything (per-simd, nondeterministic). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_WAIT_INST_ANY"block=SQevent=93descr="Number of wave-cycles spent waiting for any instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metricname="SQ_ACTIVE_INST_ANY"block=SQevent=101descr="Number of cycles each wave is working on an instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_VMEM"block=SQevent=102descr="Number of cycles the SQ instruction arbiter is working on a VMEM instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_LDS"block=SQevent=103descr="Number of cycles the SQ instruction arbiter is working on a LDS instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_VALU"block=SQevent=104descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_SCA"block=SQevent=105descr="Number of cycles the SQ instruction arbiter is working on a SALU or SMEM instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_EXP_GDS"block=SQevent=106descr="Number of cycles the SQ instruction arbiter is working on an EXPORT or GDS instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_MISC"block=SQevent=107descr="Number of cycles the SQ instruction aribter is working on a BRANCH or SENDMSG instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_ACTIVE_INST_FLAT"block=SQevent=108descr="Number of cycles the SQ instruction arbiter is working on a FLAT instruction. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_VMEM_WR"block=SQevent=109descr="Number of cycles needed to send addr and cmd data for VMEM write instructions. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_VMEM_RD"block=SQevent=110descr="Number of cycles needed to send addr and cmd data for VMEM read instructions. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_INST_CYCLES_SMEM"block=SQevent=116descr="Number of cycles needed to execute scalar memory reads. (per-simd, emulated)"></metric>
<metricname="SQ_INST_CYCLES_SALU"block=SQevent=117descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated). Units in quad-cycles(4 cycles)"></metric>
<metricname="SQ_THREAD_CYCLES_VALU"block=SQevent=118descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metricname="SQ_IFETCH"block=SQevent=120descr="Number of instruction fetch requests from cache. (per-simd, emulated)"></metric>
<metricname="SQ_IFETCH_LEVEL"block=SQevent=121descr="Number of instruction fetch requests from cache. (per-simd, level)"></metric>
<metricname="SQ_LDS_BANK_CONFLICT"block=SQevent=126descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metricname="SQ_LDS_ADDR_CONFLICT"block=SQevent=127descr="Number of cycles LDS is stalled by address conflicts. (emulated,nondeterministic)"></metric>
<metricname="SQ_LDS_UNALIGNED_STALL"block=SQevent=128descr="Number of cycles LDS is stalled processing flat unaligned load/store ops. (emulated)"></metric>
<metricname="SQ_LDS_MEM_VIOLATIONS"block=SQevent=129descr="Number of threads that have a memory violation in the LDS.(emulated)"></metric>
<metricname="SQ_LDS_ATOMIC_RETURN"block=SQevent=130descr="Number of atomic return cycles in LDS. (per-simd, emulated)"></metric>
<metricname="SQ_LDS_IDX_ACTIVE"block=SQevent=131descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. (per-simd, emulated)"></metric>
<metricname="SQ_ACCUM_PREV_HIRES"block=SQevent=184descr="For counter N, increment by the value of counter N-1."></metric>
<metricname="SQ_WAVES_RESTORED"block=SQevent=185descr="Count number of context-restored waves sent to SQs. (per-simd, emulated, global)"></metric>
<metricname="SQ_WAVES_SAVED"block=SQevent=186descr="Count number of context-saved waves. (per-simd, emulated, global)"></metric>
<metricname="SQ_INSTS_SMEM_NORM"block=SQevent=187descr="Number of SMEM instructions issued normalized to match smem_level (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). (per-simd, emulated)"></metric>
<metricname="SQC_ICACHE_INPUT_VALID_READYB"block=SQevent=257descr=" Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"></metric>
<metricname="SQC_DCACHE_INPUT_VALID_READYB"block=SQevent=260descr="Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"></metric>
<metricname="SQC_TC_REQ"block=SQevent=262descr="Total number of TC requests that were issued by instruction and constant caches. (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_INST_REQ"block=SQevent=263descr="Number of insruction requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_DATA_READ_REQ"block=SQevent=264descr="Number of data read requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_DATA_WRITE_REQ"block=SQevent=265descr="Number of data write requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_TC_DATA_ATOMIC_REQ"block=SQevent=266descr="Number of data atomic requests to the TC (No-Masking, nondeterministic)"></metric>
<metricname="SQC_ICACHE_BUSY_CYCLES"block=SQevent=269descr="Clock cycles while cache is reporting that it is busy. (No-Masking, nondeterministic, unwindowed)"></metric>
<metricname="SQC_ICACHE_REQ"block=SQevent=270descr="Number of requests. (per-SQ, per-Bank)"></metric>
<metricname="SQC_ICACHE_HITS"block=SQevent=271descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_ICACHE_MISSES"block=SQevent=272descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_ICACHE_MISSES_DUPLICATE"block=SQevent=273descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_BUSY_CYCLES"block=SQevent=289descr=" Clock cycles while cache is reporting that it is busy. (No-Masking, nondeterministic, unwindowed)"></metric>
<metricname="SQC_DCACHE_REQ"block=SQevent=290descr="Number of requests (post-bank-serialization). (per-SQ, per-Bank)"></metric>
<metricname="SQC_DCACHE_HITS"block=SQevent=291descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_MISSES"block=SQevent=292descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_MISSES_DUPLICATE"block=SQevent=293descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"></metric>
<metricname="SQC_DCACHE_ATOMIC"block=SQevent=298descr="Number of atomic requests. (per-SQ, per-Bank)"></metric>
<metricname="SQC_DCACHE_REQ_READ_1"block=SQevent=323descr="Number of constant cache 1 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_2"block=SQevent=324descr="Number of constant cache 2 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_4"block=SQevent=325descr="Number of constant cache 4 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_8"block=SQevent=326descr="Number of constant cache 8 dw read requests. (per-SQ)"></metric>
<metricname="SQC_DCACHE_REQ_READ_16"block=SQevent=327descr="Number of constant cache 16 dw read requests. (per-SQ)"></metric>
<metricname="TA_TA_BUSY"block=TAevent=13descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_TOTAL_WAVEFRONTS"block=TAevent=29descr="Total number of wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_WAVEFRONTS"block=TAevent=32descr="Number of buffer wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_READ_WAVEFRONTS"block=TAevent=33descr="Number of buffer read wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_WRITE_WAVEFRONTS"block=TAevent=34descr="Number of buffer write wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_ATOMIC_WAVEFRONTS"block=TAevent=35descr="Number of buffer atomic wavefronts processed by TA."></metric>
<metricname="TA_BUFFER_TOTAL_CYCLES"block=TAevent=37descr="Number of buffer cycles issued to TC."></metric>
<metricname="TA_BUFFER_COALESCED_READ_CYCLES"block=TAevent=40descr="Number of buffer coalesced read cycles issued to TC."></metric>
<metricname="TA_BUFFER_COALESCED_WRITE_CYCLES"block=TAevent=41descr="Number of buffer coalesced write cycles issued to TC."></metric>
<metricname="TA_ADDR_STALLED_BY_TC_CYCLES"block=TAevent=42descr="Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_ADDR_STALLED_BY_TD_CYCLES"block=TAevent=43descr="Number of cycles addr path stalled by TD. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_DATA_STALLED_BY_TC_CYCLES"block=TAevent=44descr="Number of cycles data path stalled by TC. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_FLAT_WAVEFRONTS"block=TAevent=51descr="Number of flat opcode wavfronts processed by the TA."></metric>
<metricname="TA_FLAT_READ_WAVEFRONTS"block=TAevent=52descr="Number of flat opcode reads processed by the TA."></metric>
<metricname="TA_FLAT_WRITE_WAVEFRONTS"block=TAevent=53descr="Number of flat opcode writes processed by the TA."></metric>
<metricname="TA_FLAT_ATOMIC_WAVEFRONTS"block=TAevent=54descr="Number of flat opcode atomics processed by the TA."></metric>
<metricname="TD_TD_BUSY"block=TDevent=1descr="TD is processing or waiting for data. Perf_Windowing not supported for this counter."></metric>
<metricname="TD_TC_STALL"block=TDevent=12descr="TD is stalled waiting for TC data."></metric>
<metricname="TD_SPI_STALL"block=TDevent=15descr="TD is stalled SPI vinit"></metric>
<metricname="TD_LOAD_WAVEFRONT"block=TDevent=16descr="Count the wavefronts with opcode = load, include atomics and store."></metric>
<metricname="TD_ATOMIC_WAVEFRONT"block=TDevent=17descr="Count the wavefronts with opcode = atomic."></metric>
<metricname="TD_STORE_WAVEFRONT"block=TDevent=18descr="Count the wavefronts with opcode = store."></metric>
<metricname="TD_COALESCABLE_WAVEFRONT"block=TDevent=21descr="Count wavefronts that TA finds coalescable."></metric>
<metricname="TCP_GATE_EN1"block=TCPevent=0descr="TCP interface clocks are turned on. Not Windowed."></metric>
<metricname="TCP_GATE_EN2"block=TCPevent=1descr="TCP core clocks are turned on. Not Windowed."></metric>
<metricname="TCP_READ_TAGCONFLICT_STALL_CYCLES"block=TCPevent=10descr="Tagram conflict stall on a read"></metric>
<metricname="TCP_WRITE_TAGCONFLICT_STALL_CYCLES"block=TCPevent=11descr="Tagram conflict stall on a write"></metric>
<metricname="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES"block=TCPevent=12descr="Tagram conflict stall on an atomic"></metric>
<metricname="TCP_PENDING_STALL_CYCLES"block=TCPevent=21descr="Stall due to data pending from L2"></metric>
<metricname="TCP_TA_TCP_STATE_READ"block=TCPevent=25descr="Number of state reads"></metric>
<metricname="TCP_VOLATILE"block=TCPevent=26descr="Total number of L1 volatile pixels/buffers from TA"></metric>
<metricname="TCP_TOTAL_ACCESSES"block=TCPevent=27descr="Total number of pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_READ+TCP_PERF_SEL_TOTAL_NONREAD"></metric>
<metricname="TCP_TOTAL_READ"block=TCPevent=28descr="Total number of read pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_HIT_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_EVICT_READ"></metric>
<metricname="TCP_TOTAL_WRITE"block=TCPevent=30descr="Total number of local write pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE+ TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE"></metric>
<metricname="TCP_TOTAL_ATOMIC_WITH_RET"block=TCPevent=36descr="Total number of atomic with return pixels/buffers from TA"></metric>
<metricname="TCP_TOTAL_ATOMIC_WITHOUT_RET"block=TCPevent=37descr="Total number of atomic without return pixels/buffers from TA"></metric>
<metricname="TCP_TOTAL_WRITEBACK_INVALIDATES"block=TCPevent=43descr="Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed."></metric>
<metricname="TCP_UTCL1_REQUEST"block=TCPevent=45descr="Total CLIENT_UTCL1 NORMAL requests"></metric>
<metricname="TCP_TOTAL_CACHE_ACCESSES"block=TCPevent=60descr="Count of total cache line (tag) accesses (includes hits and misses)."></metric>
<metricname="TCP_TCC_READ_REQ"block=TCPevent=65descr="Total read requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_WRITE_REQ"block=TCPevent=66descr="Total write requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_ATOMIC_WITH_RET_REQ"block=TCPevent=67descr="Total atomic with return requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_ATOMIC_WITHOUT_RET_REQ"block=TCPevent=68descr="Total atomic without return requests from TCP to all TCCs"></metric>
<metricname="TCP_TCC_NC_READ_REQ"block=TCPevent=71descr="Total read requests with NC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_NC_WRITE_REQ"block=TCPevent=72descr="Total write requests with NC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_NC_ATOMIC_REQ"block=TCPevent=73descr="Total atomic requests with NC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_UC_READ_REQ"block=TCPevent=74descr="Total read requests with UC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_UC_WRITE_REQ"block=TCPevent=75descr="Total write requests with UC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_UC_ATOMIC_REQ"block=TCPevent=76descr="Total atomic requests with UC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_CC_READ_REQ"block=TCPevent=77descr="Total write requests with CC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_CC_WRITE_REQ"block=TCPevent=78descr="Total write requests with CC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_CC_ATOMIC_REQ"block=TCPevent=79descr="Total atomic requests with CC mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_RW_READ_REQ"block=TCPevent=80descr="Total write requests with RW mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_RW_WRITE_REQ"block=TCPevent=81descr="Total write requests with RW mtype from this TCP to all TCCs"></metric>
<metricname="TCP_TCC_RW_ATOMIC_REQ"block=TCPevent=82descr="Total atomic requests with RW mtype from this TCP to all TCCs"></metric>
<metricname="TCA_CYCLE"block=TCAevent=1descr="Number of cycles. Not windowable."></metric>
<metricname="TCA_BUSY"block=TCAevent=2descr="Number of cycles we have a request pending. Not windowable."></metric>
<metricname="TCC_CYCLE"block=TCCevent=1descr="Number of cycles. Not windowable."></metric>
<metricname="TCC_BUSY"block=TCCevent=2descr="Number of cycles we have a request pending. Not windowable."></metric>
<metricname="TCC_REQ"block=TCCevent=3descr="Number of requests of all types. This is measured at the tag block. This may be more than the number of requests arriving at the TCC, but it is a good indication of the total amount of work that needs to be performed."></metric>
<metricname="TCC_STREAMING_REQ"block=TCCevent=4descr="Number of streaming requests. This is measured at the tag block."></metric>
<metricname="TCC_NC_REQ"block=TCCevent=5descr="The number of noncoherently cached requests. This is measured at the tag block."></metric>
<metricname="TCC_UC_REQ"block=TCCevent=6descr="The number of uncached requests. This is measured at the tag block."></metric>
<metricname="TCC_CC_REQ"block=TCCevent=7descr="The number of coherently cached requests. This is measured at the tag block."></metric>
<metricname="TCC_RW_REQ"block=TCCevent=8descr="The number of RW requests. This is measured at the tag block."></metric>
<metricname="TCC_PROBE"block=TCCevent=9descr="Number of probe requests. Not windowable."></metric>
<metricname="TCC_PROBE_ALL"block=TCCevent=10descr="Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable."></metric>
<metricname="TCC_INTERNAL_PROBE"block=TCCevent=11descr="Number of self-probes spawned by TCC for CC writes/atomic operations. Not windowable."></metric>
<metricname="TCC_READ"block=TCCevent=12descr="Number of read requests. Compressed reads are included in this, but metadata reads are not included."></metric>
<metricname="TCC_WRITE"block=TCCevent=13descr="Number of write requests."></metric>
<metricname="TCC_ATOMIC"block=TCCevent=14descr="Number of atomic requests of all types."></metric>
<metricname="TCC_HIT"block=TCCevent=17descr="Number of cache hits."></metric>
<metricname="TCC_MISS"block=TCCevent=19descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="TCC_WRITEBACK"block=TCCevent=22descr="Number of lines written back to main memory. This includes writebacks of dirty lines and uncached write/atomic requests."></metric>
<metricname="TCC_EA0_WRREQ"block=TCCevent=26descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."></metric>
<metricname="TCC_EA0_WRREQ_64B"block=TCCevent=27descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="TCC_EA0_WRREQ_PROBE_COMMAND"block=TCCevent=28descr="Number of probe commands going over the TC_EA_wrreq interface."></metric>
<metricname="TCC_EA0_WR_UNCACHED_32B"block=TCCevent=29descr="Number of 32-byte write/atomic going over the TC_EA_wrreq interface due to uncached traffic. Note that CC mtypes can produce uncached requests, and those are included in this. A 64-byte request will be counted as 2"></metric>
<metricname="TCC_EA0_WRREQ_STALL"block=TCCevent=30descr="Number of cycles a write request was stalled."></metric>
<metricname="TCC_EA0_WRREQ_IO_CREDIT_STALL"block=TCCevent=31descr="Number of cycles a EA write request was stalled because the interface was out of IO credits."></metric>
<metricname="TCC_EA0_WRREQ_GMI_CREDIT_STALL"block=TCCevent=32descr="Number of cycles a EA write request was stalled because the interface was out of GMI credits."></metric>
<metricname="TCC_EA0_WRREQ_DRAM_CREDIT_STALL"block=TCCevent=33descr="Number of cycles a EA write request was stalled because the interface was out of DRAM credits."></metric>
<metricname="TCC_TOO_MANY_EA_WRREQS_STALL"block=TCCevent=34descr="Number of cycles the TCC could not send a EA write request because it already reached its maximum number of pending EA write requests."></metric>
<metricname="TCC_EA0_WRREQ_LEVEL"block=TCCevent=35descr="The sum of the number of EA write requests in flight. This is primarily meant for measure average EA write latency. Average write latency = TCC_PERF_SEL_EA_WRREQ_LEVEL/TCC_PERF_SEL_EA_WRREQ."></metric>
<metricname="TCC_EA0_ATOMIC"block=TCCevent=36descr="Number of transactions going over the TC_EA_wrreq interface that are actually atomic requests."></metric>
<metricname="TCC_EA0_ATOMIC_LEVEL"block=TCCevent=37descr="The sum of the number of EA atomics in flight. This is primarily meant for measure average EA atomic latency. Average atomic latency = TCC_PERF_SEL_EA_WRREQ_ATOMIC_LEVEL/TCC_PERF_SEL_EA_WRREQ_ATOMIC."></metric>
<metricname="TCC_EA0_RDREQ"block=TCCevent=38descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"></metric>
<metricname="TCC_EA0_RDREQ_32B"block=TCCevent=39descr="Number of 32-byte TCC/EA read requests"></metric>
<metricname="TCC_EA0_RD_UNCACHED_32B"block=TCCevent=40descr="Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be counted as 2"></metric>
<metricname="TCC_EA0_RDREQ_IO_CREDIT_STALL"block=TCCevent=41descr="Number of cycles there was a stall because the read request interface was out of IO credits. Stalls occur regardless of whether a read needed to be performed or not."></metric>
<metricname="TCC_EA0_RDREQ_GMI_CREDIT_STALL"block=TCCevent=42descr="Number of cycles there was a stall because the read request interface was out of GMI credits. Stalls occur regardless of whether a read needed to be performed or not."></metric>
<metricname="TCC_EA0_RDREQ_DRAM_CREDIT_STALL"block=TCCevent=43descr="Number of cycles there was a stall because the read request interface was out of DRAM credits. Stalls occur regardless of whether a read needed to be performed or not."></metric>
<metricname="TCC_EA0_RDREQ_LEVEL"block=TCCevent=44descr="The sum of the number of TCC/EA read requests in flight. This is primarily meant for measure average EA read latency. Average read latency = TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."></metric>
<metricname="TCC_TAG_STALL"block=TCCevent=45descr="Number of cycles the normal request pipeline in the tag was stalled for any reason. Normally, stalls of this nature are measured exactly from one point the pipeline, but that is not the case for this counter. Probes can stall the pipeline at a variety of places, and there is no single point that can reasonably measure the total stalls accurately."></metric>
<metricname="TCC_NORMAL_WRITEBACK"block=TCCevent=68descr="Number of writebacks due to requests that are not writeback requests."></metric>
<metricname="TCC_ALL_TC_OP_WB_WRITEBACK"block=TCCevent=73descr="Number of writebacks due to all TC_OP writeback requests."></metric>
<metricname="TCC_NORMAL_EVICT"block=TCCevent=74descr="Number of evictions due to requests that are not invalidate or probe requests."></metric>
<metricname="TCC_ALL_TC_OP_INV_EVICT"block=TCCevent=80descr="Number of evictions due to all TC_OP invalidate requests."></metric>
<metricname="TCC_PROBE_EVICT"block=TCCevent=81descr="Number of evictions/invalidations due to probes. Not windowable."></metric>
<metricname="TCC_EA0_RDREQ_DRAM"block=TCCevent=102descr="Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC)."></metric>
<metricname="TCC_EA0_WRREQ_DRAM"block=TCCevent=103descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."></metric>
<metricname="GRBM_COUNT"block=GRBMevent=0descr="Tie High - Count Number of Clocks"></metric>
<metricname="GRBM_GUI_ACTIVE"block=GRBMevent=2descr="The GUI is Active"></metric>
<metricname="GRBM_CP_BUSY"block=GRBMevent=3descr="Any of the Command Processor (CPG/CPC/CPF) blocks are busy."></metric>
<metricname="GRBM_SPI_BUSY"block=GRBMevent=11descr="Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."></metric>
<metricname="GRBM_TA_BUSY"block=GRBMevent=13descr="Any of the Texture Pipes (TA) are busy in the shader engine(s)."></metric>
<metricname="GRBM_GDS_BUSY"block=GRBMevent=25descr="The Global Data Share (GDS) is busy."></metric>
<metricname="GRBM_EA_BUSY"block=GRBMevent=35descr="The Efficiency Arbiter (EA) block is busy."></metric>
<metricname="GRBM_GL2CC_BUSY"block=GRBMevent=40descr="The GL2CC block is busy."></metric>
<metricname="GL2C_HIT"block=GL2Cevent=42descr="Number of cache hits"></metric>
<metricname="GL2C_MISS"block=GL2Cevent=43descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="GL2C_MC_WRREQ"block=GL2Cevent=83descr="Number of transactions (either 32-byte or 64-byte) going over the GL2C_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands"></metric>
<metricname="GL2C_EA_WRREQ_64B"block=GL2Cevent=85descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="GL2C_MC_WRREQ_STALL"block=GL2Cevent=88descr="Number of cycles a write request was stalled."></metric>
<metricname="GL2C_MC_RDREQ"block=GL2Cevent=96descr="Number of GL2C/EA read requests (either 32-byte or 64-byte or 128-byte)."></metric>
<metricname="GL2C_EA_RDREQ_32B"block=GL2Cevent=99descr="Number of 32-byte GL2C/EA read requests"></metric>
<metricname="GL2C_EA_RDREQ_64B"block=GL2Cevent=100descr="Number of 64-byte GL2C/EA read requests"></metric>
<metricname="GL2C_EA_RDREQ_96B"block=GL2Cevent=101descr="Number of 96-byte GL2C/EA read requests"></metric>
<metricname="GL2C_EA_RDREQ_128B"block=GL2Cevent=102descr="Number of 128-byte GL2C/EA read requests"></metric>
<metricname="SQ_ACCUM_PREV"block=SQevent=1descr="For counter N, increment by the value of counter N-1."></metric>
<metricname="SQ_BUSY_CYCLES"block=SQevent=3descr="Clock cycles while SQ is reporting that it is busy. {nondeterministic, global, C2}"></metric>
<metricname="SQ_WAVES"block=SQevent=4descr="Count number of waves sent to SQs. {emulated, global, C1}"></metric>
<metricname="SQ_LEVEL_WAVES"block=SQevent=7descr="Track the aggregated number of waves over certain period of time, Set next counter to ACCUM_PREV and divide by SQ_PERF_SEL_WAVES for average wave life."></metric>
<metricname="SQ_WAVE_CYCLES"block=SQevent=26descr="Number of clock cycles spent by waves in the SQs. Incremented by # of living (valid) waves each cycle. {nondeterministic, C1}"></metric>
<metricname="SQ_WAIT_INST_ANY"block=SQevent=28descr="Number of clock cycles spent waiting for any instruction issue. In units of cycles. {nondeterministic}"></metric>
<metricname="SQ_WAIT_ANY"block=SQevent=37descr="Number of clock cycles spent waiting for anything. {nondeterministic, C1}"></metric>
<metricname="SQ_INSTS_WAVE32"block=SQevent=71descr="Number of wave32 instructions issued, for flat, lds, valu, tex. {emulated, C1}"></metric>
<metricname="SQ_INSTS_WAVE32_LDS"block=SQevent=74descr="Number of wave32 LDS indexed instructions issued. Wave64 may count 1 or 2, depending on what gets issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_WAVE32_VALU"block=SQevent=75descr="Number of wave32 valu instructions issued. Wave64 may count 1 or 2, depending on what gets issued. {emulated, C1}"></metric>
<metricname="SQ_WAVE32_INSTS"block=SQevent=84descr="Number of instructions issued by wave32 waves. Skipped instructions are not counted. {emulated}"></metric>
<metricname="SQ_WAVE64_INSTS"block=SQevent=85descr="Number of instructions issued by wave64 waves. Skipped instructions are not counted. {emulated}"></metric>
<metricname="SQ_INST_LEVEL_GDS"block=SQevent=98descr="Number of in-flight GDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_GDS for average latency. {level, nondeterministic, C1}"></metric>
<metricname="SQ_INST_LEVEL_LDS"block=SQevent=99descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. {level, nondeterministic, C1}"></metric>
<metricname="SQ_INST_CYCLES_VMEM"block=SQevent=120descr="Number of cycles needed to send addr and data for VMEM (lds, buffer, image, flat, scratch, global) instructions, windowed by perf_en. {emulated, C1}"></metric>
<metricname="SQC_LDS_BANK_CONFLICT"block=SQevent=285descr="Number of cycles LDS is stalled by bank conflicts. (emulated, C1)"></metric>
<metricname="SQC_LDS_IDX_ACTIVE"block=SQevent=290descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. {per-simd, emulated, C1}"></metric>
<metricname="SQ_INSTS_VALU"block=SQevent=64descr="Number of VALU instructions issued excluding skipped instructions. {emulated, C1}"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=60descr="Number of SALU instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=61descr="Number of SMEM instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=57descr="Number of FLAT instructions issued. {emulated, C2}"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=59descr="Number of LDS indexed instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=55descr="Number of GDS instructions issued. {emulated, C1}"></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=31descr="Number of clock cycles spent waiting for LDS (indexed) instruction issue. In units of cycles. {nondeterministic, C1}"></metric>
<metricname="TA_TA_BUSY"block=TAevent=15descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_FLAT_LOAD_WAVEFRONTS"block=TAevent=101descr=" Number of flat load vec32 packets processed by TA, same as flat_read_wavefronts in earlier IP"></metric>
<metricname="TA_FLAT_STORE_WAVEFRONTS"block=TAevent=102descr="Number of flat store vec32 packets processed by TA, same as flat_write_wavefronts in earlier IP"></metric>
<metricname="GRBM_COUNT"block=GRBMevent=0descr="Tie High - Count Number of Clocks"></metric>
<metricname="GRBM_GUI_ACTIVE"block=GRBMevent=2descr="The GUI is Active"></metric>
<metricname="GL2C_HIT"block=GL2Cevent=42descr="Number of cache hits"></metric>
<metricname="GL2C_MISS"block=GL2Cevent=43descr="Number of cache misses. UC reads count as misses."></metric>
<metricname="GL2C_MC_WRREQ"block=GL2Cevent=83descr="Number of transactions (either 32-byte or 64-byte) going over the GL2C_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands"></metric>
<metricname="GL2C_EA_WRREQ_64B"block=GL2Cevent=85descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
<metricname="GL2C_MC_WRREQ_STALL"block=GL2Cevent=88descr="Number of cycles a write request was stalled."></metric>
<metricname="GL2C_MC_RDREQ"block=GL2Cevent=96descr="Number of GL2C/EA read requests (either 32-byte or 64-byte or 128-byte)."></metric>
<metricname="GL2C_EA_RDREQ_32B"block=GL2Cevent=99descr="Number of 32-byte GL2C/EA read requests"></metric>
<metricname="GL2C_EA_RDREQ_64B"block=GL2Cevent=100descr="Number of 64-byte GL2C/EA read requests"></metric>
<metricname="GL2C_EA_RDREQ_96B"block=GL2Cevent=101descr="Number of 96-byte GL2C/EA read requests"></metric>
<metricname="GL2C_EA_RDREQ_128B"block=GL2Cevent=102descr="Number of 128-byte GL2C/EA read requests"></metric>
<metricname="SQ_ACCUM_PREV"block=SQevent=1descr="For counter N, increment by the value of counter N-1."></metric>
<metricname="SQ_BUSY_CYCLES"block=SQevent=3descr="Clock cycles while SQ is reporting that it is busy. {nondeterministic, global, C2}"></metric>
<metricname="SQ_WAVES"block=SQevent=4descr="Count number of waves sent to SQs. {emulated, global, C1}"></metric>
<metricname="SQ_WAVE_CYCLES"block=SQevent=24descr="Number of clock cycles spent by waves in the SQs. Incremented by number of living (valid) waves each cycle. {nondeterministic, C1}"></metric>
<metricname="SQ_WAIT_INST_ANY"block=SQevent=26descr="Number of clock-cycles spent waiting for any instruction issue. In units of cycles. (nondeterministic)"></metric>
<metricname="SQ_WAIT_ANY"block=SQevent=35descr="Number of wave-cycles spent waiting for anything (nondeterministic, C1)"></metric>
<metricname="SQ_INSTS_WAVE32"block=SQevent=70descr="Number of wave32 instructions issued, for flat, lds, valu, tex. {emulated, C1}"></metric>
<metricname="SQ_INSTS_WAVE32_LDS"block=SQevent=72descr="Number of wave32 LDS indexed instructions issued. Wave64 may count 1 or 2, depending on what gets issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_WAVE32_VALU"block=SQevent=73descr="Number of wave32 valu instructions issued. Wave64 may count 1 or 2, depending on what gets issued. {emulated, C1}"></metric>
<metricname="SQ_WAVE32_INSTS"block=SQevent=82descr="Number of instructions issued by wave32 waves. Skipped instructions are not counted. {emulated}"></metric>
<metricname="SQ_WAVE64_INSTS"block=SQevent=83descr="Number of instructions issued by wave64 waves. Skipped instructions are not counted. {emulated}"></metric>
<metricname="SQ_INST_LEVEL_GDS"block=SQevent=87descr="Number of in-flight GDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_GDS for average latency. {level, nondeterministic, C1}"></metric>
<metricname="SQ_INST_LEVEL_LDS"block=SQevent=88descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. {level, nondeterministic, C1}"></metric>
<metricname="SQ_INST_CYCLES_VMEM"block=SQevent=106descr="Number of cycles needed to send addr and data for VMEM (lds, buffer, image, flat, scratch, global) instructions, windowed by perf_en. {emulated, C1}"></metric>
<metricname="SQC_LDS_BANK_CONFLICT"block=SQevent=256descr="Number of cycles LDS is stalled by bank conflicts. (emulated, C1)"></metric>
<metricname="SQC_LDS_IDX_ACTIVE"block=SQevent=261descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. {per-simd, emulated, C1}"></metric>
<metricname="SQ_INSTS_VALU"block=SQevent=62descr="Number of VALU instructions issued excluding skipped instructions. {emulated, C1}"></metric>
<metricname="SQ_INSTS_SALU"block=SQevent=58descr="Number of SALU instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_SMEM"block=SQevent=59descr="Number of SMEM instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_FLAT"block=SQevent=56descr="Number of FLAT instructions issued. {emulated, C2}"></metric>
<metricname="SQ_INSTS_LDS"block=SQevent=57descr="Number of LDS indexed instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_GDS"block=SQevent=54descr="Number of GDS instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_TEX_LOAD"block=SQevent=66descr="Number of buffer load, image load, sample, or atomic (with return) instructions issued. {emulated, C1}"></metric>
<metricname="SQ_INSTS_TEX_STORE"block=SQevent=67descr="Number of buffer store, image store, or atomic (without return) instructions issued. {emulated, C1}"></metric>
<metricname="SQ_WAIT_INST_LDS"block=SQevent=29descr="Number of clock cycles spent waiting for LDS (indexed) instruction issue. In units of cycles. {nondeterministic, C1}"></metric>
<metricname="TA_TA_BUSY"block=TAevent=15descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
<metricname="TA_BUFFER_LOAD_WAVEFRONTS"block=TAevent=45descr="Number of buffer load vec32 packets processed by TA"></metric>
<metricname="TA_BUFFER_STORE_WAVEFRONTS"block=TAevent=46descr="Number of buffer store vec32 packets processed by TA"></metric>