Files
rocm-systems/test/tool/metrics.xml
T
Evgeny add56e26ae license annotations
Change-Id: I96b8f625a03b707b45aee6b8e7dbfabe073294c6
2018-06-25 19:52:28 -05:00

206 wiersze
12 KiB
XML

#include "gfx_metrics.xml"
<gfx8>
# average for 16 instances
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) ></metric>
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) ></metric>
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) ></metric>
# sum for 16 instances
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) ></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) ></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,16) ></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,16) ></metric>
<metric name="TCC_MC_RDREQ_sum" expr=sum(TCC_MC_RDREQ,16) ></metric>
<metric name="TCC_MC_WRREQ_sum" expr=sum(TCC_MC_WRREQ,16) ></metric>
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_MC_WRREQ_STALL,16) ></metric>
# FETCH_SIZE, kilobytes
# The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
<metric name="FETCH_SIZE" expr=(TCC_MC_RDREQ_sum*32)/1024 ></metric>
# WRITE_SIZE, kilobytes
# The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
<metric name="WRITE_SIZE" expr=(TCC_MC_WRREQ_sum*32)/1024 ></metric>
</gfx8>
<gfx9>
# average for 16 instances
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) ></metric>
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) ></metric>
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) ></metric>
# sum for 16 instances
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) ></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) ></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,16) ></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,16) ></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,16) ></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,16) ></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,16) ></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,16) ></metric>
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,16) ></metric>
# FETCH_SIZE, kilobytes
# The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
<metric name="FETCH_SIZE" expr=(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024 ></metric>
# WRITE_SIZE, kilobytes
# The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
<metric name="WRITE_SIZE" expr=((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024 ></metric>
</gfx9>
<global>
# GPUBusy, percentage
# The percentage of time GPU was busy.
<metric
name="GPUBusy"
descr="The percentage of time GPU was busy."
expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT
></metric>
# Wavefronts Total wavefronts.,
<metric
name="Wavefronts"
descr="Total wavefronts."
expr=SQ_WAVES
></metric>
# VALUInsts The average number of vector ALU instructions executed per work-item (affected by flow control).
<metric
name="VALUInsts"
descr="The average number of vector ALU instructions executed per work-item (affected by flow control)."
expr=SQ_INSTS_VALU/SQ_WAVES
></metric>
# SALUInsts The average number of scalar ALU instructions executed per work-item (affected by flow control).
<metric
name="SALUInsts"
descr="The average number of scalar ALU instructions executed per work-item (affected by flow control)."
expr=SQ_INSTS_SALU/SQ_WAVES
></metric>
# VFetchInsts The average number of vector fetch instructions from the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that fetch from video memory.
<metric
name="VFetchInsts"
descr="The average number of vector fetch instructions from the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that fetch from video memory."
expr=(SQ_INSTS_VMEM_RD-TA_FLAT_READ_WAVEFRONTS_sum)/SQ_WAVES
></metric>
# SFetchInsts The average number of scalar fetch instructions from the video memory executed per work-item (affected by flow control).
<metric
name="SFetchInsts"
descr="The average number of scalar fetch instructions from the video memory executed per work-item (affected by flow control)."
expr=SQ_INSTS_SMEM/SQ_WAVES
></metric>
# VWriteInsts The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory.
<metric
name="VWriteInsts"
descr="The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory."
expr=(SQ_INSTS_VMEM_WR-TA_FLAT_WRITE_WAVEFRONTS_sum)/SQ_WAVES
></metric>
# FlatVMemInsts The average number of FLAT instructions that read from or write to the video memory executed per work item (affected by flow control). Includes FLAT instructions that read from or write to scratch.
<metric
name="FlatVMemInsts"
descr="The average number of FLAT instructions that read from or write to the video memory executed per work item (affected by flow control). Includes FLAT instructions that read from or write to scratch."
expr=(SQ_INSTS_FLAT-SQ_INSTS_FLAT_LDS_ONLY)/SQ_WAVES
></metric>
# LDSInsts The average number of LDS read or LDS write instructions executed per work item (affected by flow control). Excludes FLAT instructions that read from or write to LDS.
<metric
name="LDSInsts"
descr="The average number of LDS read or LDS write instructions executed per work item (affected by flow control). Excludes FLAT instructions that read from or write to LDS."
expr=(SQ_INSTS_LDS-SQ_INSTS_FLAT_LDS_ONLY)/SQ_WAVES
></metric>
# FlatLDSInsts The average number of FLAT instructions that read or write to LDS executed per work item (affected by flow control).
<metric
name="FlatLDSInsts"
descr="The average number of FLAT instructions that read or write to LDS executed per work item (affected by flow control)."
expr=SQ_INSTS_FLAT_LDS_ONLY/SQ_WAVES
></metric>
# GDSInsts The average number of GDS read or GDS write instructions executed per work item (affected by flow control).
<metric
name="GDSInsts"
descr="The average number of GDS read or GDS write instructions executed per work item (affected by flow control)."
expr=SQ_INSTS_GDS/SQ_WAVES
></metric>
# VALUUtilization The percentage of active vector ALU threads in a wave. A lower number can mean either more thread divergence in a wave or that the work-group size is not a multiple of 64. Value range: 0% (bad), 100% (ideal - no thread divergence).
<metric
name="VALUUtilization"
descr="The percentage of active vector ALU threads in a wave. A lower number can mean either more thread divergence in a wave or that the work-group size is not a multiple of 64. Value range: 0% (bad), 100% (ideal - no thread divergence)."
expr=100*SQ_THREAD_CYCLES_VALU/(SQ_ACTIVE_INST_VALU*MAX_WAVE_SIZE)
></metric>
# VALUBusy The percentage of GPUTime vector ALU instructions are processed. Value range: 0% (bad) to 100% (optimal).
<metric
name="VALUBusy"
descr="The percentage of GPUTime vector ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."
expr=100*SQ_ACTIVE_INST_VALU*4/SIMD_NUM/GRBM_GUI_ACTIVE
></metric>
# SALUBusy The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal).
<metric
name="SALUBusy"
descr="The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."
expr=100*SQ_INST_CYCLES_SALU*4/SIMD_NUM/GRBM_GUI_ACTIVE
></metric>
# FetchSize The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
<metric
name="FetchSize"
descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."
expr=FETCH_SIZE
></metric>
# WriteSize The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
<metric
name="WriteSize"
descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."
expr=WRITE_SIZE
></metric>
# L2CacheHit The percentage of fetch, write, atomic, and other instructions that hit the data in L2 cache. Value range: 0% (no hit) to 100% (optimal).
<metric
name="L2CacheHit"
descr="The percentage of fetch, write, atomic, and other instructions that hit the data in L2 cache. Value range: 0% (no hit) to 100% (optimal)."
expr=100*sum(TCC_HIT,16)/(sum(TCC_HIT,16)+sum(TCC_MISS,16))
></metric>
# MemUnitBusy The percentage of GPUTime the memory unit is active. The result includes the stall time (MemUnitStalled). This is measured with all extra fetches and writes and any cache or memory effects taken into account. Value range: 0% to 100% (fetch-bound).
<metric
name="MemUnitBusy"
descr="The percentage of GPUTime the memory unit is active. The result includes the stall time (MemUnitStalled). This is measured with all extra fetches and writes and any cache or memory effects taken into account. Value range: 0% to 100% (fetch-bound)."
expr=100*max(TA_TA_BUSY,16)/GRBM_GUI_ACTIVE/SE_NUM
></metric>
# MemUnitStalled The percentage of GPUTime the memory unit is stalled. Try reducing the number or size of fetches and writes if possible. Value range: 0% (optimal) to 100% (bad).
<metric
name="MemUnitStalled"
descr="The percentage of GPUTime the memory unit is stalled. Try reducing the number or size of fetches and writes if possible. Value range: 0% (optimal) to 100% (bad)."
expr=100*max(TCP_TA_DATA_STALL_CYCLES,16)/GRBM_GUI_ACTIVE/SE_NUM
></metric>
# WriteUnitStalled The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad).
<metric
name="WriteUnitStalled"
descr="The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad)."
expr=100*TCC_WRREQ_STALL_max/GRBM_GUI_ACTIVE
></metric>
# The percentage of GPUTime ALU units are stalled by the LDS input queue being full or the output queue being not ready. If there are LDS bank conflicts, reduce them. Otherwise, try reducing the number of LDS accesses if possible. Value range: 0% (optimal) to 100% (bad).
<metric
name="ALUStalledByLDS"
descr="The percentage of GPUTime ALU units are stalled by the LDS input queue being full or the output queue being not ready. If there are LDS bank conflicts, reduce them. Otherwise, try reducing the number of LDS accesses if possible. Value range: 0% (optimal) to 100% (bad)."
expr=100*SQ_WAIT_INST_LDS*4/SQ_WAVES/GRBM_GUI_ACTIVE
></metric>
# LDSBankConflict The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad).
<metric
name="LDSBankConflict"
descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."
expr=100*SQ_LDS_BANK_CONFLICT/GRBM_GUI_ACTIVE/CU_NUM
></metric>
</global>