SWDEV-387039: Modified gfx90a section to inherit from gfx9 base and removed derived counters that are defined in the gfx9 base from gfx90a section to avoid duplication

Change-Id: I653e116bc47fe11b57e663c2827d177149b00c5b
Tá an tiomantas seo le fáil i:
Kiumars Sabeti
2023-03-08 18:22:30 -05:00
tiomanta ag Ammar ELWazir
tuismitheoir ef31a96b55
tiomantas a9f1237c53
+1 -22
Féach ar an gComhad
@@ -97,10 +97,7 @@
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,32) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
</gfx908_expr>
<gfx90a_expr>
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) descr="TA block is busy. Average over TA instances."></metric>
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) descr="TA block is busy. Max over TA instances."></metric>
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) descr="TA block is busy. Min over TA instances."></metric>
<gfx90a_expr base="gfx9_expr">
<metric name="TA_TA_BUSY_sum" expr=sum(TA_TA_BUSY,16) descr="TA block is busy. Perf_Windowing not supported for this counter. Sum over TA instances."></metric>
<metric name="TA_TOTAL_WAVEFRONTS_sum" expr=sum(TA_TOTAL_WAVEFRONTS,16) descr="Total number of wavefronts processed by TA. Sum over TA instances."></metric>
<metric name="TA_ADDR_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TC_CYCLES,16) descr="Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter. Sum over TA instances."></metric>
@@ -108,7 +105,6 @@
<metric name="TA_DATA_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_DATA_STALLED_BY_TC_CYCLES,16) descr="Number of cycles data path stalled by TC. Perf_Windowing not supported for this counter. Sum over TA instances."></metric>
<metric name="TA_FLAT_WAVEFRONTS_sum" expr=sum(TA_FLAT_WAVEFRONTS,16) descr="Number of flat opcode wavfronts processed by the TA. Sum over TA instances."></metric>
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="Number of flat opcode reads processed by the TA. Sum over TA instances."></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="Number of flat opcode writes processed by the TA. Sum over TA instances."></metric>
<metric name="TA_FLAT_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_FLAT_ATOMIC_WAVEFRONTS,16) descr="Number of flat opcode atomics processed by the TA. Sum over TA instances."></metric>
<metric name="TA_BUFFER_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WAVEFRONTS,16) descr="Number of buffer wavefronts processed by TA. Sum over TA instances."></metric>
<metric name="TA_BUFFER_READ_WAVEFRONTS_sum" expr=sum(TA_BUFFER_READ_WAVEFRONTS,16) descr="Number of buffer read wavefronts processed by TA. Sum over TA instances."></metric>
@@ -209,23 +205,6 @@
<metric name="TCC_ALL_TC_OP_INV_EVICT_sum" expr=sum(TCC_ALL_TC_OP_INV_EVICT,32) descr="Number of evictions due to all TC_OP invalidate requests. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_DRAM_sum" expr=sum(TCC_EA_RDREQ_DRAM,32) descr="Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC). Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_DRAM_sum" expr=sum(TCC_EA_WRREQ_DRAM,32) descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC). Sum over TCC instances."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr=((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024 descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr=TCC_EA_WRREQ_64B_sum*2+(TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum) descr="The total number of 32-byte effective memory writes."></metric>
<metric name="VFetchInsts" expr=(SQ_INSTS_VMEM_RD-TA_FLAT_READ_WAVEFRONTS_sum)/SQ_WAVES descr="The average number of vector fetch instructions from the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that fetch from video memory."></metric>
<metric name="VWriteInsts" expr=(SQ_INSTS_VMEM_WR-TA_FLAT_WRITE_WAVEFRONTS_sum)/SQ_WAVES descr="The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory."></metric>
<metric name="FlatVMemInsts" expr=(SQ_INSTS_FLAT-SQ_INSTS_FLAT_LDS_ONLY)/SQ_WAVES descr="The average number of FLAT instructions that read from or write to the video memory executed per work item (affected by flow control). Includes FLAT instructions that read from or write to scratch."></metric>
<metric name="LDSInsts" expr=(SQ_INSTS_LDS-SQ_INSTS_FLAT_LDS_ONLY)/SQ_WAVES descr="The average number of LDS read or LDS write instructions executed per work item (affected by flow control). Excludes FLAT instructions that read from or write to LDS."></metric>
<metric name="FlatLDSInsts" expr=SQ_INSTS_FLAT_LDS_ONLY/SQ_WAVES descr="The average number of FLAT instructions that read or write to LDS executed per work item (affected by flow control)."></metric>
<metric name="VALUUtilization" expr=100*SQ_THREAD_CYCLES_VALU/(SQ_ACTIVE_INST_VALU*MAX_WAVE_SIZE) descr="The percentage of active vector ALU threads in a wave. A lower number can mean either more thread divergence in a wave or that the work-group size is not a multiple of 64. Value range: 0% (bad), 100% (ideal - no thread divergence)."></metric>
<metric name="VALUBusy" expr=100*SQ_ACTIVE_INST_VALU*4/SIMD_NUM/GRBM_GUI_ACTIVE descr="The percentage of GPUTime vector ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."></metric>
<metric name="SALUBusy" expr=100*SQ_INST_CYCLES_SALU*4/SIMD_NUM/GRBM_GUI_ACTIVE descr="The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."></metric>
<metric name="MemWrites32B" expr=WRITE_REQ_32B descr="The total number of effective 32B write transactions to the memory"></metric>
<metric name="L2CacheHit" expr=100*sum(TCC_HIT,32)/(sum(TCC_HIT,32)+sum(TCC_MISS,32)) descr="The percentage of fetch, write, atomic, and other instructions that hit the data in L2 cache. Value range: 0% (no hit) to 100% (optimal)."></metric>
<metric name="MemUnitStalled" expr=100*max(TCP_TCP_TA_DATA_STALL_CYCLES,16)/GRBM_GUI_ACTIVE/SE_NUM descr="The percentage of GPUTime the memory unit is stalled. Try reducing the number or size of fetches and writes if possible. Value range: 0% (optimal) to 100% (bad)."></metric>
<metric name="WriteUnitStalled" expr=100*TCC_WRREQ_STALL_max/GRBM_GUI_ACTIVE descr="The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad)."></metric>
<metric name="LDSBankConflict" expr=100*SQ_LDS_BANK_CONFLICT/GRBM_GUI_ACTIVE/CU_NUM descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."></metric>
</gfx90a_expr>
<gfx10_expr>