Adding support for Mi300X-A0

Signed-off-by: colramos-amd <colramos@amd.com>


[ROCm/rocprofiler-compute commit: f229b36277]
Este commit está contenido en:
colramos-amd
2024-01-22 11:11:04 -06:00
cometido por Cole Ramos
padre e1f82cb8a5
commit d9c5cd355a
Se han modificado 19 ficheros con 3246 adiciones y 0 borrados
@@ -51,6 +51,7 @@ class Omniperf:
"gfx906": {"mi50": ["MI50", "MI60"]},
"gfx908": {"mi100": ["MI100"]},
"gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]},
"gfx941": {"mi300": ["MI300X_A0"]},
"gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
}
@@ -0,0 +1,8 @@
---
Panel Config:
id: 000
title: Top Stat
data source:
- raw_csv_table:
id: 001
source: pmc_kernel_top.csv
@@ -0,0 +1,9 @@
---
Panel Config:
id: 100
title: System Info
data source:
- raw_csv_table:
id: 101
source: sysinfo.csv
columnwise: True
@@ -0,0 +1,247 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
SALU: &SALU_anchor Scalar Arithmetic Logic Unit
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 200
title: System Speed-of-Light
data source:
- metric_table:
id: 201
title: Speed-of-Light
header:
metric: Metric
value: Value
unit: Unit
peak: Peak
pop: PoP
tips: Tips
metric:
VALU FLOPs:
value: AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
/ (EndNs - BeginNs)))
unit: GFLOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
tips:
VALU IOPs:
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
unit: GIOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
tips:
MFMA FLOPs (BF16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F32):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA FLOPs (F64):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA IOPs (Int8):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
unit: GIOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
Active CUs:
value: $numActiveCUs
unit: CUs
peak: $numCU
pop: ((100 * $numActiveCUs) / $numCU)
tips:
SALU Util:
value: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
peak: 100
pop: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU)))
tips:
VALU Util:
value: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
peak: 100
pop: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU)))
tips:
MFMA Util:
value: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((GRBM_GUI_ACTIVE * $numCU)
* 4)))
unit: pct
peak: 100
pop: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((GRBM_GUI_ACTIVE * $numCU)
* 4)))
tips:
VALU Active Threads/Wave:
value: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: Threads
peak: 64
pop: (AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
!= 0) else None)) * 1.5625)
tips:
IPC - Issue:
value: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
/ SQ_ACTIVE_INST_ANY))
unit: Instr/cycle
peak: 5
pop: ((100 * AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
/ SQ_ACTIVE_INST_ANY))) / 5)
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
unit: Conflicts/access
peak: 32
pop: ((100 * AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))) / 32)
tips:
Instr Cache Hit Rate:
value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
unit: pct
peak: 100
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES))
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
unit: pct
peak: 100
pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES))
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
unit: pct
peak: 100
pop: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
value: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
unit: pct
peak: 100
pop: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: Cycles
peak: ''
pop: ''
tips:
L2-Fabric Write Latency:
value: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: Cycles
peak: ''
pop: ''
tips:
Wave Occupancy:
value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
unit: Wavefronts
peak: ($maxWavesPerCU * $numCU)
pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU
* $numCU))))
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
Instr Fetch Latency:
value: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH))
unit: Cycles
peak: ''
pop: ''
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -0,0 +1,315 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 300
title: Memory Chart
data source:
- metric_table:
id: 301
title: Memory Chart
header:
metric: Metric
#alias: #alias
value: Value
tips: Tips
metric:
# ----------------------------------------
# Instr Buff Block
#TODO: double check wave_occupancy
Wavefront Occupancy:
#alias: wave_occ_
value: ROUND(AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / $numActiveCUs), 0)
coll_level: SQ_LEVEL_WAVES
tips:
Wave Life:
#alias: wave_life_
value: ROUND(AVG(((4 * (SQ_WAVE_CYCLES / SQ_WAVES)) if (SQ_WAVES != 0) else 0)), 0)
tips:
# ----------------------------------------
# Instr Dispatch Block
SALU:
#alias: salu_
value: ROUND(AVG((SQ_INSTS_SALU / $denom)), 0)
tips:
SMEM:
#alias: smem_
value: ROUND(AVG((SQ_INSTS_SMEM / $denom)), 0)
tips:
VALU:
#alias: valu_
value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0)
tips:
MFMA:
#alias: mfma_
value: ROUND(AVG((SQ_INSTS_MFMA / $denom)), 0)
tips:
VMEM:
#alias: vmem_
value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0)
tips:
LDS:
#alias: lds_
value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0)
tips:
GWS:
#alias: gws_
value: ROUND(AVG((SQ_INSTS_GDS / $denom)), 0)
tips:
BR:
#alias: br_
value: ROUND(AVG((SQ_INSTS_BRANCH / $denom)), 0)
tips:
# ----------------------------------------
# Exec Block
Active CUs:
#alias: active_cu_
value: $numActiveCUs
tips:
Num CUs:
#alias: num_cu_
value: $numCU
tips:
VGPR:
#alias: vgpr_
value: ROUND(AVG(Arch_VGPR), 0)
tips:
# Todo: add AGPRs
SGPR:
#alias: sgpr_
value: ROUND(AVG(SGPR), 0)
tips:
LDS Allocation:
#alias: lds_alloc_
value: ROUND(AVG(LDS_Per_Workgroup), 0)
tips:
Scratch Allocation:
#alias: scratch_alloc_
value: ROUND(AVG(Scratch_Per_Workitem), 0)
tips:
Wavefronts:
#alias: wavefronts_
value: ROUND(AVG(SPI_CSN_WAVE), 0)
tips:
Workgroups:
#alias: workgroups_
value: ROUND(AVG(SPI_CSN_NUM_THREADGROUPS), 0)
tips:
# ----------------------------------------
# LDS Block
LDS Req:
#alias: lds_req_
value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0)
tips:
LDS Util:
#alias: lds_util_
value:
ROUND(AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU))),
0)
tips:
LDS Latency:
#alias: lds_lat
value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)),0)
coll_level: SQ_INST_LEVEL_LDS
tips:
# ----------------------------------------
# Vector L1 Cache Block
VL1 Rd:
#alias: vl1_rd_
value: ROUND(AVG((TCP_TOTAL_READ_sum / $denom)), 0)
tips:
VL1 Wr:
#alias: vl1_wr_
value: ROUND(AVG((TCP_TOTAL_WRITE_sum / $denom)), 0)
tips:
VL1 Atomic:
#alias: vl1_atom_
value:
ROUND(AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom)), 0)
tips:
VL1 Hit:
#alias: vl1_hit_
value:
ROUND(AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None )), 0)
tips:
VL1 Lat:
#alias: vl1_lat_
value:
ROUND(AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
!= 0) else None)), 0)
tips:
VL1 Coalesce:
#alias: vl1_coales_
value:
ROUND(AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
* 4)) if (TCP_TOTAL_ACCESSES_sum != None) else 0)), 0)
tips:
VL1 Stall:
#alias: vl1_stall_
value:
ROUND(AVG((((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)), 0)
tips:
VL1_L2 Rd:
#alias: vl1_l2_rd_
value: ROUND(AVG((TCP_TCC_READ_REQ_sum / $denom)), 0)
tips:
VL1_L2 Wr:
#alias: vl1_l2_wr_
value: ROUND(AVG((TCP_TCC_WRITE_REQ_sum / $denom)), 0)
tips:
VL1_L2 Atomic:
#alias: vl1_l2_atom_
value:
ROUND(AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom)), 0)
tips:
# ----------------------------------------
# Scalar L1D Cache Block
VL1D Rd:
#alias: sl1_rd_
value: ROUND(AVG((SQC_DCACHE_REQ / $denom)), 0)
tips:
VL1D Hit:
#alias: sl1_hit_
value:
ROUND((AVG(((SQC_DCACHE_HITS / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ !=
0) else None)) * 100), 0)
tips:
VL1D Lat:
#alias: sl1_lat_
value:
ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ !=
0) else None)) * 100), 0)
coll_level: SQC_DCACHE_INFLIGHT_LEVEL
tips:
VL1D_L2 Rd:
#alias: sl1_l2_rd_
value: ROUND(AVG((SQC_TC_DATA_READ_REQ / $denom)), 0)
tips:
VL1D_L2 Wr:
#alias: sl1_l2_wr_
value: ROUND(AVG((SQC_TC_DATA_WRITE_REQ / $denom)), 0)
tips:
VL1D_L2 Atomic:
#alias: sl1_l2_atom_
value: ROUND(AVG((SQC_TC_DATA_ATOMIC_REQ / $denom)), 0)
tips:
# ----------------------------------------
# Instr L1 Cache Block
IL1 Fetch:
#alias: il1_fetch_
value: ROUND(AVG((SQC_ICACHE_REQ / $denom)), 0)
tips:
IL1 Hit:
#alias: il1_hit_
value: ROUND((AVG((SQC_ICACHE_HITS / SQC_ICACHE_REQ)) * 100), 0)
tips:
IL1 Lat:
#alias: il1_lat_
value:
ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_ICACHE_REQ) if (SQC_ICACHE_REQ !=
0) else None)) * 100), 0)
tips: # ??? coll_level: SQ_IFETCH_LEVEL
IL1_L2 Rd:
#alias: il1_l2_req_
value: ROUND(AVG((SQC_TC_INST_REQ / $denom)), 0)
tips:
# ----------------------------------------
# L2 Cache Block(inside)
L2 Rd:
#alias: l2_rd_
value: ROUND(AVG((TCC_READ_sum / $denom)), 0)
tips:
L2 Wr:
#alias: l2_wr_
value: ROUND(AVG((TCC_WRITE_sum / $denom)), 0)
tips:
L2 Atomic:
#alias: l2_atom_
value: ROUND(AVG((TCC_ATOMIC_sum / $denom)), 0)
tips:
L2 Hit:
#alias: l2_hit_
value:
ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else 0)), 0)
tips:
L2 Rd Lat:
#alias: l2_rd_lat_
value:
# ROUND(AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None)),
# 0)
tips:
L2 Wr Lat:
#alias: l2_wr_lat_
value:
# ROUND(AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum +
# TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
# != 0) else None)), 0)
tips:
# ----------------------------------------
# Fabric Block
Fabric_L2 Rd:
#alias: l2_fabric_rd_
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
tips:
Fabric_L2 Wr:
#alias: l2_fabric_wr_
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
tips:
Fabric_L2 Atomic:
#alias: l2_fabric_atom_
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
tips:
Fabric Rd Lat:
#alias: fabric_rd_lat_
value:
ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else 0)), 0)
tips:
Fabric Wr Lat:
#alias: fabric_wr_lat_
value:
ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else 0)), 0)
tips:
Fabric Atomic Lat:
#alias: fabric_atom_lat_
value:
ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else 0)), 0)
tips:
HBM Rd:
#alias: hbm_rd_
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
tips:
HBM Wr:
#alias: hbm_wr_
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
tips:
comparable: false # for now
cli_style: mem_chart
@@ -0,0 +1,180 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 500
title: Command Processor (CPC/CPF)
data source:
- metric_table:
id: 501
title: Command Processor Fetcher
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
GPU Busy Cycles:
avg: AVG(GRBM_GUI_ACTIVE)
min: MIN(GRBM_GUI_ACTIVE)
max: MAX(GRBM_GUI_ACTIVE)
unit: Cycles/Kernel
tips:
CPF Busy:
avg: AVG(CPF_CPF_STAT_BUSY)
min: MIN(CPF_CPF_STAT_BUSY)
max: MAX(CPF_CPF_STAT_BUSY)
unit: Cycles/Kernel
tips:
CPF Util:
avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
min: MIN((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
max: MAX((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
unit: pct
tips:
CPF Stall:
avg: AVG((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY
!= 0) else None))
min: MIN((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY
!= 0) else None))
max: MAX((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY
!= 0) else None))
unit: Cycles/Kernel
tips:
L2Cache Intf Busy:
avg: AVG(CPF_CPF_TCIU_BUSY)
min: MIN(CPF_CPF_TCIU_BUSY)
max: MAX(CPF_CPF_TCIU_BUSY)
unit: Cycles/Kernel
tips:
L2Cache Intf Util:
avg: AVG((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE))
if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None))
min: MIN((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE))
if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None))
max: MAX((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE))
if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None))
unit: pct
tips:
L2Cache Intf Stall:
avg: AVG((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY
!= 0) else None))
min: MIN((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY
!= 0) else None))
max: MAX((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY
!= 0) else None))
unit: pct
tips:
UTCL1 Stall:
avg: AVG(CPF_CMP_UTCL1_STALL_ON_TRANSLATION)
min: MIN(CPF_CMP_UTCL1_STALL_ON_TRANSLATION)
max: MAX(CPF_CMP_UTCL1_STALL_ON_TRANSLATION)
unit: Cycles/Kernel
tips:
- metric_table:
id: 502
title: Command Processor Compute
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
GPU Busy Cycles:
avg: AVG(GRBM_GUI_ACTIVE)
min: MIN(GRBM_GUI_ACTIVE)
max: MAX(GRBM_GUI_ACTIVE)
unit: Cycles
tips:
CPC Busy Cycles:
avg: AVG(CPC_CPC_STAT_BUSY)
min: MIN(CPC_CPC_STAT_BUSY)
max: MAX(CPC_CPC_STAT_BUSY)
unit: Cycles
tips:
CPC Util:
avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
min: MIN((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
max: MAX((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
unit: pct
tips:
CPC Stall Cycles:
avg: AVG(CPC_CPC_STAT_STALL)
min: MIN(CPC_CPC_STAT_STALL)
max: MAX(CPC_CPC_STAT_STALL)
unit: Cycles
tips:
CPC Stall Rate:
avg: AVG((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
!= 0) else None))
min: MIN((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
!= 0) else None))
max: MAX((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
!= 0) else None))
unit: pct
tips:
CPC Packet Decoding:
avg: AVG(CPC_ME1_BUSY_FOR_PACKET_DECODE)
min: MIN(CPC_ME1_BUSY_FOR_PACKET_DECODE)
max: MAX(CPC_ME1_BUSY_FOR_PACKET_DECODE)
unit: Cycles
tips:
SPI Intf Busy Cycles:
avg: AVG(CPC_ME1_DC0_SPI_BUSY)
min: MIN(CPC_ME1_DC0_SPI_BUSY)
max: MAX(CPC_ME1_DC0_SPI_BUSY)
unit: Cycles
tips:
SPI Intf Util:
avg: AVG((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
!= 0) else None))
min: MIN((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
!= 0) else None))
max: MAX((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
!= 0) else None))
unit: pct
tips:
L2Cache Intf Util:
avg: AVG((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE))
if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None))
min: MIN((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE))
if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None))
max: MAX((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE))
if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None))
unit: pct
tips:
UTCL1 Stall Cycles:
avg: AVG(CPC_UTCL1_STALL_ON_TRANSLATION)
min: MIN(CPC_UTCL1_STALL_ON_TRANSLATION)
max: MAX(CPC_UTCL1_STALL_ON_TRANSLATION)
unit: Cycles
tips:
UTCL2 Intf Busy Cycles:
avg: AVG(CPC_CPC_UTCL2IU_BUSY)
min: MIN(CPC_CPC_UTCL2IU_BUSY)
max: MAX(CPC_CPC_UTCL2IU_BUSY)
unit: Cycles
tips:
UTCL2 Intf Util:
avg: AVG((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE))
if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None))
min: MIN((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE))
if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None))
max: MAX((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE))
if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None))
unit: pct
tips:
@@ -0,0 +1,174 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 600
title: Shader Processor Input (SPI)
data source:
- metric_table:
id: 601
title: SPI Stats
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
GPU Busy:
avg: AVG(GRBM_GUI_ACTIVE)
min: MIN(GRBM_GUI_ACTIVE)
max: MAX(GRBM_GUI_ACTIVE)
unit: Cycles
tips:
CS Busy:
avg: AVG(SPI_CSN_BUSY)
min: MIN(SPI_CSN_BUSY)
max: MAX(SPI_CSN_BUSY)
unit: Cycles
tips:
SPI Busy:
avg: AVG(GRBM_SPI_BUSY)
min: MIN(GRBM_SPI_BUSY)
max: MAX(GRBM_SPI_BUSY)
unit: Cycles
tips:
SQ Busy:
avg: AVG(SQ_BUSY_CYCLES)
min: MIN(SQ_BUSY_CYCLES)
max: MAX(SQ_BUSY_CYCLES)
unit: Cycles
tips:
Dispatched Workgroups:
avg: AVG(SPI_CSN_NUM_THREADGROUPS)
min: MIN(SPI_CSN_NUM_THREADGROUPS)
max: MAX(SPI_CSN_NUM_THREADGROUPS)
unit: Workgroups
tips:
Dispatched Wavefronts:
avg: AVG(SPI_CSN_WAVE)
min: MIN(SPI_CSN_WAVE)
max: MAX(SPI_CSN_WAVE)
unit: Wavefronts
tips:
Wave Alloc Failed:
avg: AVG(SPI_RA_REQ_NO_ALLOC)
min: MIN(SPI_RA_REQ_NO_ALLOC)
max: MAX(SPI_RA_REQ_NO_ALLOC)
unit: Cycles
tips:
Wave Alloc Failed - CS:
avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN)
min: MIN(SPI_RA_REQ_NO_ALLOC_CSN)
max: MAX(SPI_RA_REQ_NO_ALLOC_CSN)
unit: Cycles
tips:
- metric_table:
id: 602
title: SPI Resource Allocation
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Wave request Failed (CS):
avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN)
min: MIN(SPI_RA_REQ_NO_ALLOC_CSN)
max: MAX(SPI_RA_REQ_NO_ALLOC_CSN)
unit: Cycles
tips:
CS Stall:
avg: AVG(SPI_RA_RES_STALL_CSN)
min: MIN(SPI_RA_RES_STALL_CSN)
max: MAX(SPI_RA_RES_STALL_CSN)
unit: Cycles
tips:
CS Stall Rate:
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY !=
0) else None))
min: MIN((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY !=
0) else None))
max: MAX((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY !=
0) else None))
unit: pct
tips:
Scratch Stall:
avg: AVG(SPI_RA_TMP_STALL_CSN)
min: MIN(SPI_RA_TMP_STALL_CSN)
max: MAX(SPI_RA_TMP_STALL_CSN)
unit: Cycles
tips:
Insufficient SIMD Waveslots:
avg: AVG(SPI_RA_WAVE_SIMD_FULL_CSN)
min: MIN(SPI_RA_WAVE_SIMD_FULL_CSN)
max: MAX(SPI_RA_WAVE_SIMD_FULL_CSN)
unit: SIMD
tips:
Insufficient SIMD VGPRs:
avg: AVG(SPI_RA_VGPR_SIMD_FULL_CSN)
min: MIN(SPI_RA_VGPR_SIMD_FULL_CSN)
max: MAX(SPI_RA_VGPR_SIMD_FULL_CSN)
unit: SIMD
tips:
Insufficient SIMD SGPRs:
avg: AVG(SPI_RA_SGPR_SIMD_FULL_CSN)
min: MIN(SPI_RA_SGPR_SIMD_FULL_CSN)
max: MAX(SPI_RA_SGPR_SIMD_FULL_CSN)
unit: SIMD
tips:
Insufficient CU LDS:
avg: AVG(SPI_RA_LDS_CU_FULL_CSN)
min: MIN(SPI_RA_LDS_CU_FULL_CSN)
max: MAX(SPI_RA_LDS_CU_FULL_CSN)
unit: CU
tips:
Insufficient CU Barries:
avg: AVG(SPI_RA_BAR_CU_FULL_CSN)
min: MIN(SPI_RA_BAR_CU_FULL_CSN)
max: MAX(SPI_RA_BAR_CU_FULL_CSN)
unit: CU
tips:
Insufficient Bulky Resource:
avg: AVG(SPI_RA_BULKY_CU_FULL_CSN)
min: MIN(SPI_RA_BULKY_CU_FULL_CSN)
max: MAX(SPI_RA_BULKY_CU_FULL_CSN)
unit: CU
tips:
Reach CU Threadgroups Limit:
avg: AVG(SPI_RA_TGLIM_CU_FULL_CSN)
min: MIN(SPI_RA_TGLIM_CU_FULL_CSN)
max: MAX(SPI_RA_TGLIM_CU_FULL_CSN)
unit: Cycles
tips:
Reach CU Wave Limit:
avg: AVG(SPI_RA_WVLIM_STALL_CSN)
min: MIN(SPI_RA_WVLIM_STALL_CSN)
max: MAX(SPI_RA_WVLIM_STALL_CSN)
unit: Cycles
tips:
VGPR Writes:
avg: AVG((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
None))
min: MIN((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
None))
max: MAX((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
None))
unit: Cycles/wave
tips:
SGPR Writes:
avg: AVG((((1 * SPI_SWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
None))
min: MIN((((1 * SPI_SWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
None))
max: MAX((((1 * SPI_SWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
None))
unit: Cycles/wave
tips:
@@ -0,0 +1,142 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 700
title: Wavefront
data source:
- metric_table:
id: 701
title: Wavefront Launch Stats
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Grid Size:
avg: AVG(Grid_Size)
min: MIN(Grid_Size)
max: MAX(Grid_Size)
unit: Work Items
tips:
Workgroup Size:
avg: AVG(LDS_Per_Workgroup)
min: MIN(LDS_Per_Workgroup)
max: MAX(LDS_Per_Workgroup)
unit: Work Items
tips:
Total Wavefronts:
avg: AVG(SPI_CSN_WAVE)
min: MIN(SPI_CSN_WAVE)
max: MAX(SPI_CSN_WAVE)
unit: Wavefronts
tips:
Saved Wavefronts:
avg: AVG(SQ_WAVES_SAVED)
min: MIN(SQ_WAVES_SAVED)
max: MAX(SQ_WAVES_SAVED)
unit: Wavefronts
tips:
Restored Wavefronts:
avg: AVG(SQ_WAVES_RESTORED)
min: MIN(SQ_WAVES_RESTORED)
max: MAX(SQ_WAVES_RESTORED)
unit: Wavefronts
tips:
VGPRs:
avg: AVG(Arch_VGPR)
min: MIN(Arch_VGPR)
max: MAX(Arch_VGPR)
unit: Registers
tips:
AGPRs:
avg: AVG(Accum_VGPR)
min: MIN(Accum_VGPR)
max: MAX(Accum_VGPR)
unit: Registers
tips:
SGPRs:
avg: AVG(SGPR)
min: MIN(SGPR)
max: MAX(SGPR)
unit: Registers
tips:
LDS Allocation:
avg: AVG(LDS_Per_Workgroup)
min: MIN(LDS_Per_Workgroup)
max: MAX(LDS_Per_Workgroup)
unit: Bytes
tips:
Scratch Allocation:
avg: AVG(Scratch_Per_Workitem)
min: MIN(Scratch_Per_Workitem)
max: MAX(Scratch_Per_Workitem)
unit: Bytes
tips:
- metric_table:
id: 702
title: Wavefront Runtime Stats
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
unit: ns
tips:
Kernel Time (Cycles):
avg: AVG(GRBM_GUI_ACTIVE)
min: MIN(GRBM_GUI_ACTIVE)
max: MAX(GRBM_GUI_ACTIVE)
unit: Cycle
tips:
Instr/wavefront:
avg: AVG((SQ_INSTS / SQ_WAVES))
min: MIN((SQ_INSTS / SQ_WAVES))
max: MAX((SQ_INSTS / SQ_WAVES))
unit: Instr/wavefront
tips:
Wave Cycles:
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
unit: (Cycles + $normUnit)
tips:
Dependency Wait Cycles:
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Issue Wait Cycles:
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Active Cycles:
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Wavefront Occupancy:
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
min: MIN((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
max: MAX((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
@@ -0,0 +1,228 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1000
title: Compute Units - Instruction Mix
data source:
- metric_table:
id: 1001
title: Instruction Mix
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
VALU - Vector:
avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
unit: (instr + $normUnit)
tips:
VMEM:
avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
unit: (instr + $normUnit)
tips:
LDS:
avg: AVG((SQ_INSTS_LDS / $denom))
min: MIN((SQ_INSTS_LDS / $denom))
max: MAX((SQ_INSTS_LDS / $denom))
unit: (instr + $normUnit)
tips:
VALU - MFMA:
avg: AVG((SQ_INSTS_MFMA / $denom))
min: MIN((SQ_INSTS_MFMA / $denom))
max: MAX((SQ_INSTS_MFMA / $denom))
unit: (instr + $normUnit)
tips:
SALU:
avg: AVG((SQ_INSTS_SALU / $denom))
min: MIN((SQ_INSTS_SALU / $denom))
max: MAX((SQ_INSTS_SALU / $denom))
unit: (instr + $normUnit)
tips:
SMEM:
avg: AVG((SQ_INSTS_SMEM / $denom))
min: MIN((SQ_INSTS_SMEM / $denom))
max: MAX((SQ_INSTS_SMEM / $denom))
unit: (instr + $normUnit)
tips:
Branch:
avg: AVG((SQ_INSTS_BRANCH / $denom))
min: MIN((SQ_INSTS_BRANCH / $denom))
max: MAX((SQ_INSTS_BRANCH / $denom))
unit: (instr + $normUnit)
tips:
GDS:
avg: AVG((SQ_INSTS_GDS / $denom))
min: MIN((SQ_INSTS_GDS / $denom))
max: MAX((SQ_INSTS_GDS / $denom))
unit: (instr + $normUnit)
tips:
- metric_table:
id: 1002
title: VALU Arithmetic Instr Mix
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
INT32:
avg: AVG((SQ_INSTS_VALU_INT32 / $denom))
min: MIN((SQ_INSTS_VALU_INT32 / $denom))
max: MAX((SQ_INSTS_VALU_INT32 / $denom))
unit: (instr + $normUnit)
tips:
INT64:
avg: AVG((SQ_INSTS_VALU_INT64 / $denom))
min: MIN((SQ_INSTS_VALU_INT64 / $denom))
max: MAX((SQ_INSTS_VALU_INT64 / $denom))
unit: (instr + $normUnit)
tips:
F16-ADD:
avg: AVG((SQ_INSTS_VALU_ADD_F16 / $denom))
min: MIN((SQ_INSTS_VALU_ADD_F16 / $denom))
max: MAX((SQ_INSTS_VALU_ADD_F16 / $denom))
unit: (instr + $normUnit)
tips:
F16-MUL:
avg: AVG((SQ_INSTS_VALU_MUL_F16 / $denom))
min: MIN((SQ_INSTS_VALU_MUL_F16 / $denom))
max: MAX((SQ_INSTS_VALU_MUL_F16 / $denom))
unit: (instr + $normUnit)
tips:
F16-FMA:
avg: AVG((SQ_INSTS_VALU_FMA_F16 / $denom))
min: MIN((SQ_INSTS_VALU_FMA_F16 / $denom))
max: MAX((SQ_INSTS_VALU_FMA_F16 / $denom))
unit: (instr + $normUnit)
tips:
F16-Trans:
avg: AVG((SQ_INSTS_VALU_TRANS_F16 / $denom))
min: MIN((SQ_INSTS_VALU_TRANS_F16 / $denom))
max: MAX((SQ_INSTS_VALU_TRANS_F16 / $denom))
unit: (instr + $normUnit)
tips:
F32-ADD:
avg: AVG((SQ_INSTS_VALU_ADD_F32 / $denom))
min: MIN((SQ_INSTS_VALU_ADD_F32 / $denom))
max: MAX((SQ_INSTS_VALU_ADD_F32 / $denom))
unit: (instr + $normUnit)
tips:
F32-MUL:
avg: AVG((SQ_INSTS_VALU_MUL_F32 / $denom))
min: MIN((SQ_INSTS_VALU_MUL_F32 / $denom))
max: MAX((SQ_INSTS_VALU_MUL_F32 / $denom))
unit: (instr + $normUnit)
tips:
F32-FMA:
avg: AVG((SQ_INSTS_VALU_FMA_F32 / $denom))
min: MIN((SQ_INSTS_VALU_FMA_F32 / $denom))
max: MAX((SQ_INSTS_VALU_FMA_F32 / $denom))
unit: (instr + $normUnit)
tips:
F32-Trans:
avg: AVG((SQ_INSTS_VALU_TRANS_F32 / $denom))
min: MIN((SQ_INSTS_VALU_TRANS_F32 / $denom))
max: MAX((SQ_INSTS_VALU_TRANS_F32 / $denom))
unit: (instr + $normUnit)
tips:
F64-ADD:
avg: AVG((SQ_INSTS_VALU_ADD_F64 / $denom))
min: MIN((SQ_INSTS_VALU_ADD_F64 / $denom))
max: MAX((SQ_INSTS_VALU_ADD_F64 / $denom))
unit: (instr + $normUnit)
tips:
F64-MUL:
avg: AVG((SQ_INSTS_VALU_MUL_F64 / $denom))
min: MIN((SQ_INSTS_VALU_MUL_F64 / $denom))
max: MAX((SQ_INSTS_VALU_MUL_F64 / $denom))
unit: (instr + $normUnit)
tips:
F64-FMA:
avg: AVG((SQ_INSTS_VALU_FMA_F64 / $denom))
min: MIN((SQ_INSTS_VALU_FMA_F64 / $denom))
max: MAX((SQ_INSTS_VALU_FMA_F64 / $denom))
unit: (instr + $normUnit)
tips:
F64-Trans:
avg: AVG((SQ_INSTS_VALU_TRANS_F64 / $denom))
min: MIN((SQ_INSTS_VALU_TRANS_F64 / $denom))
max: MAX((SQ_INSTS_VALU_TRANS_F64 / $denom))
unit: (instr + $normUnit)
tips:
Conversion:
avg: AVG((SQ_INSTS_VALU_CVT / $denom))
min: MIN((SQ_INSTS_VALU_CVT / $denom))
max: MAX((SQ_INSTS_VALU_CVT / $denom))
unit: (instr + $normUnit)
tips:
- metric_table:
id: 1003
title: VMEM Instr Mix
header:
type: type
count: Count
tips: Tips
metric:
Buffer Instr:
count: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom))
tips:
Buffer Read:
count: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
tips:
Buffer Write:
count: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
tips:
Buffer Atomic:
count: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
tips:
Flat Instr:
count: AVG((TA_FLAT_WAVEFRONTS_sum / $denom))
tips:
Flat Read:
count: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
tips:
Flat Write:
count: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
tips:
Flat Atomic:
count: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
tips:
- metric_table:
id: 1004
title: MFMA Arithmetic Instr Mix
header:
type: type
count: Count
tips: Tips
metric:
MFMA-I8:
count: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom))
tips:
MFMA-F16:
count: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom))
tips:
MFMA-BF16:
count: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom))
tips:
MFMA-F32:
count: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom))
tips:
MFMA-F64:
count: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom))
tips:
@@ -0,0 +1,198 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1100
title: Compute Units - Compute Pipeline
data source:
- metric_table:
id: 1101
title: Speed-of-Light
header:
metric: Metric
value: Value
unit: Unit
tips: Tips
metric:
valu_flops_pop:
value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
unit: Pct of Peak
tips:
mfma_flops_bf16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 512) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f32_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f64_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_i8_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
- metric_table:
id: 1102
title: Pipeline Stats
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
IPC (Avg):
avg: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES))
min: MIN((SQ_INSTS / SQ_BUSY_CU_CYCLES))
max: MAX((SQ_INSTS / SQ_BUSY_CU_CYCLES))
unit: Instr/cycle
tips:
IPC (Issue):
avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
/ SQ_ACTIVE_INST_ANY))
min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
/ SQ_ACTIVE_INST_ANY))
max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
/ SQ_ACTIVE_INST_ANY))
unit: Instr/cycle
tips:
SALU Util:
avg: AVG((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU))
min: MIN((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU))
max: MAX((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU))
unit: pct
tips:
VALU Util:
avg: AVG((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU))
min: MIN((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU))
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU))
unit: pct
tips:
VALU Active Threads:
avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min: MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max: MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: Threads
tips:
MFMA Util:
avg: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE)))
min: MIN(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE)))
max: MAX(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE)))
unit: pct
tips:
MFMA Instr Cycles:
avg: AVG(((SQ_VALU_MFMA_BUSY_CYCLES / SQ_INSTS_MFMA) if (SQ_INSTS_MFMA != 0)
else None))
min: MIN(((SQ_VALU_MFMA_BUSY_CYCLES / SQ_INSTS_MFMA) if (SQ_INSTS_MFMA != 0)
else None))
max: MAX(((SQ_VALU_MFMA_BUSY_CYCLES / SQ_INSTS_MFMA) if (SQ_INSTS_MFMA != 0)
else None))
unit: cycles/instr
tips:
- metric_table:
id: 1103
title: Arithmetic Operations
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
FLOPs (Total):
avg: AVG((((((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
+ (SQ_INSTS_VALU_FMA_F16 * 2))) + ((512 * SQ_INSTS_VALU_MFMA_MOPS_F16) + (512
* SQ_INSTS_VALU_MFMA_MOPS_BF16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (SQ_INSTS_VALU_FMA_F32 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (SQ_INSTS_VALU_FMA_F64 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) /
$denom))
min: MIN((((((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
+ (SQ_INSTS_VALU_FMA_F16 * 2))) + ((512 * SQ_INSTS_VALU_MFMA_MOPS_F16) + (512
* SQ_INSTS_VALU_MFMA_MOPS_BF16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (SQ_INSTS_VALU_FMA_F32 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (SQ_INSTS_VALU_FMA_F64 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) /
$denom))
max: MAX((((((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
+ (SQ_INSTS_VALU_FMA_F16 * 2))) + ((512 * SQ_INSTS_VALU_MFMA_MOPS_F16) + (512
* SQ_INSTS_VALU_MFMA_MOPS_BF16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (SQ_INSTS_VALU_FMA_F32 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (SQ_INSTS_VALU_FMA_F64 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) /
$denom))
unit: (OPs + $normUnit)
tips:
INT8 OPs:
avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
max: MAX(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
unit: (OPs + $normUnit)
tips:
F16 OPs:
avg: AVG(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
SQ_INSTS_VALU_MFMA_MOPS_F16)) / $denom))
min: MIN(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
SQ_INSTS_VALU_MFMA_MOPS_F16)) / $denom))
max: MAX(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
SQ_INSTS_VALU_MFMA_MOPS_F16)) / $denom))
unit: (OPs + $normUnit)
tips:
BF16 OPs:
avg: AVG(((512 * SQ_INSTS_VALU_MFMA_MOPS_BF16) / $denom))
min: MIN(((512 * SQ_INSTS_VALU_MFMA_MOPS_BF16) / $denom))
max: MAX(((512 * SQ_INSTS_VALU_MFMA_MOPS_BF16) / $denom))
unit: (OPs + $normUnit)
tips:
F32 OPs:
avg: AVG((((64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32)
+ (SQ_INSTS_VALU_FMA_F32 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32)) / $denom))
min: MIN((((64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32)
+ (SQ_INSTS_VALU_FMA_F32 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32)) / $denom))
max: MAX((((64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32)
+ (SQ_INSTS_VALU_FMA_F32 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32)) / $denom))
unit: (OPs + $normUnit)
tips:
F64 OPs:
avg: AVG((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
min: MIN((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
max: MAX((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
unit: (OPs + $normUnit)
tips:
@@ -0,0 +1,121 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1200
title: Local Data Share (LDS)
data source:
- metric_table:
id: 1201
title: Speed-of-Light
header:
metric: Metric
value: Value
tips: Tips
metric:
Utilization:
value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
tips:
Access Rate:
value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU)))
tips:
Bandwidth (Pct-of-Peak):
value:
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
tips:
Bank Conflict Rate:
value:
AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
tips:
comparable: false # for now
cli_style: simple_bar
- metric_table:
id: 1202
title: LDS Stats
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
LDS Instrs:
avg: AVG((SQ_INSTS_LDS / $denom))
min: MIN((SQ_INSTS_LDS / $denom))
max: MAX((SQ_INSTS_LDS / $denom))
unit: (Instr + $normUnit)
tips:
Bandwidth:
avg:
AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ $denom))
min:
MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ $denom))
max:
MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ $denom))
unit: (Bytes + $normUnit)
tips:
Bank Conficts/Access:
avg:
AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
min:
MIN(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
max:
MAX(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
unit: Conflicts/Access
tips:
Index Accesses:
avg: AVG((SQ_LDS_IDX_ACTIVE / $denom))
min: MIN((SQ_LDS_IDX_ACTIVE / $denom))
max: MAX((SQ_LDS_IDX_ACTIVE / $denom))
unit: (Cycles + $normUnit)
tips:
Atomic Cycles:
avg: AVG((SQ_LDS_ATOMIC_RETURN / $denom))
min: MIN((SQ_LDS_ATOMIC_RETURN / $denom))
max: MAX((SQ_LDS_ATOMIC_RETURN / $denom))
unit: (Cycles + $normUnit)
tips:
Bank Conflict:
avg: AVG((SQ_LDS_BANK_CONFLICT / $denom))
min: MIN((SQ_LDS_BANK_CONFLICT / $denom))
max: MAX((SQ_LDS_BANK_CONFLICT / $denom))
unit: (Cycles + $normUnit)
tips:
Addr Conflict:
avg: AVG((SQ_LDS_ADDR_CONFLICT / $denom))
min: MIN((SQ_LDS_ADDR_CONFLICT / $denom))
max: MAX((SQ_LDS_ADDR_CONFLICT / $denom))
unit: (Cycles + $normUnit)
tips:
Unaligned Stall:
avg: AVG((SQ_LDS_UNALIGNED_STALL / $denom))
min: MIN((SQ_LDS_UNALIGNED_STALL / $denom))
max: MAX((SQ_LDS_UNALIGNED_STALL / $denom))
unit: (Cycles + $normUnit)
tips:
Mem Violations:
avg: AVG((SQ_LDS_MEM_VIOLATIONS / $denom))
min: MIN((SQ_LDS_MEM_VIOLATIONS / $denom))
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
unit: ( + $normUnit)
tips:
LDS Latency:
avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None))
min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None))
max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None))
unit: Cycles
coll_level: SQ_INST_LEVEL_LDS
tips:
@@ -0,0 +1,77 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1300
title: Instruction Cache
data source:
- metric_table:
id: 1301
title: Speed-of-Light
header:
metric: Metric
value: Value
tips: Tips
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
tips:
Cache Hit:
value:
AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
+ SQC_ICACHE_MISSES_DUPLICATE)))
tips:
comparable: false # for now
cli_style: simple_bar
- metric_table:
id: 1302
title: Instruction Cache Accesses
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Req:
avg: AVG((SQC_ICACHE_REQ / $denom))
min: MIN((SQC_ICACHE_REQ / $denom))
max: MAX((SQC_ICACHE_REQ / $denom))
unit: (Req + $normUnit)
tips:
Hits:
avg: AVG((SQC_ICACHE_HITS / $denom))
min: MIN((SQC_ICACHE_HITS / $denom))
max: MAX((SQC_ICACHE_HITS / $denom))
unit: (Hits + $normUnit)
tips:
Misses - Non Duplicated:
avg: AVG((SQC_ICACHE_MISSES / $denom))
min: MIN((SQC_ICACHE_MISSES / $denom))
max: MAX((SQC_ICACHE_MISSES / $denom))
unit: (Misses + $normUnit)
tips:
Misses - Duplicated:
avg: AVG((SQC_ICACHE_MISSES_DUPLICATE / $denom))
min: MIN((SQC_ICACHE_MISSES_DUPLICATE / $denom))
max: MAX((SQC_ICACHE_MISSES_DUPLICATE / $denom))
unit: (Misses + $normUnit)
tips:
Cache Hit:
avg:
AVG(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
+ SQC_ICACHE_MISSES_DUPLICATE)))
min:
MIN(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) +
SQC_ICACHE_MISSES_DUPLICATE)))
max:
MAX(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) +
SQC_ICACHE_MISSES_DUPLICATE)))
unit: pct
tips:
@@ -0,0 +1,164 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1400
title: Scalar L1 Data Cache
data source:
- metric_table:
id: 1401
title: Speed-of-Light
header:
mertic: Metric
value: Value
tips: Tips
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
tips:
Cache Hit:
value:
AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE))
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
tips:
comparable: false # for now
cli_style: simple_bar
- metric_table:
id: 1402
title: Scalar L1D Cache Accesses
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Req:
avg: AVG((SQC_DCACHE_REQ / $denom))
min: MIN((SQC_DCACHE_REQ / $denom))
max: MAX((SQC_DCACHE_REQ / $denom))
unit: (Req + $normUnit)
tips:
Hits:
avg: AVG((SQC_DCACHE_HITS / $denom))
min: MIN((SQC_DCACHE_HITS / $denom))
max: MAX((SQC_DCACHE_HITS / $denom))
unit: (Req + $normUnit)
tips:
Misses - Non Duplicated:
avg: AVG((SQC_DCACHE_MISSES / $denom))
min: MIN((SQC_DCACHE_MISSES / $denom))
max: MAX((SQC_DCACHE_MISSES / $denom))
unit: (Req + $normUnit)
tips:
Misses- Duplicated:
avg: AVG((SQC_DCACHE_MISSES_DUPLICATE / $denom))
min: MIN((SQC_DCACHE_MISSES_DUPLICATE / $denom))
max: MAX((SQC_DCACHE_MISSES_DUPLICATE / $denom))
unit: (Req + $normUnit)
tips:
Cache Hit:
avg:
AVG((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
+ SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
+ SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
min:
MIN((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
+ SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
+ SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
max:
MAX((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
+ SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
+ SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
unit: pct
tips:
Read Req (Total):
avg:
AVG((((((SQC_DCACHE_REQ_READ_1 + SQC_DCACHE_REQ_READ_2) + SQC_DCACHE_REQ_READ_4)
+ SQC_DCACHE_REQ_READ_8) + SQC_DCACHE_REQ_READ_16) / $denom))
min:
MIN((((((SQC_DCACHE_REQ_READ_1 + SQC_DCACHE_REQ_READ_2) + SQC_DCACHE_REQ_READ_4)
+ SQC_DCACHE_REQ_READ_8) + SQC_DCACHE_REQ_READ_16) / $denom))
max:
MAX((((((SQC_DCACHE_REQ_READ_1 + SQC_DCACHE_REQ_READ_2) + SQC_DCACHE_REQ_READ_4)
+ SQC_DCACHE_REQ_READ_8) + SQC_DCACHE_REQ_READ_16) / $denom))
unit: (Req + $normUnit)
tips:
Atomic Req:
avg: AVG((SQC_DCACHE_ATOMIC / $denom))
min: MIN((SQC_DCACHE_ATOMIC / $denom))
max: MAX((SQC_DCACHE_ATOMIC / $denom))
unit: (Req + $normUnit)
tips:
Read Req (1 DWord):
avg: AVG((SQC_DCACHE_REQ_READ_1 / $denom))
min: MIN((SQC_DCACHE_REQ_READ_1 / $denom))
max: MAX((SQC_DCACHE_REQ_READ_1 / $denom))
unit: (Req + $normUnit)
tips:
Read Req (2 DWord):
avg: AVG((SQC_DCACHE_REQ_READ_2 / $denom))
min: MIN((SQC_DCACHE_REQ_READ_2 / $denom))
max: MAX((SQC_DCACHE_REQ_READ_2 / $denom))
unit: (Req + $normUnit)
tips:
Read Req (4 DWord):
avg: AVG((SQC_DCACHE_REQ_READ_4 / $denom))
min: MIN((SQC_DCACHE_REQ_READ_4 / $denom))
max: MAX((SQC_DCACHE_REQ_READ_4 / $denom))
unit: (Req + $normUnit)
tips:
Read Req (8 DWord):
avg: AVG((SQC_DCACHE_REQ_READ_8 / $denom))
min: MIN((SQC_DCACHE_REQ_READ_8 / $denom))
max: MAX((SQC_DCACHE_REQ_READ_8 / $denom))
unit: (Req + $normUnit)
tips:
Read Req (16 DWord):
avg: AVG((SQC_DCACHE_REQ_READ_16 / $denom))
min: MIN((SQC_DCACHE_REQ_READ_16 / $denom))
max: MAX((SQC_DCACHE_REQ_READ_16 / $denom))
unit: (Req + $normUnit)
tips:
- metric_table:
id: 1403
title: Scalar L1D Cache - L2 Interface
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Read Req:
avg: AVG((SQC_TC_DATA_READ_REQ / $denom))
min: MIN((SQC_TC_DATA_READ_REQ / $denom))
max: MAX((SQC_TC_DATA_READ_REQ / $denom))
unit: (Req + $normUnit)
tips:
Write Req:
avg: AVG((SQC_TC_DATA_WRITE_REQ / $denom))
min: MIN((SQC_TC_DATA_WRITE_REQ / $denom))
max: MAX((SQC_TC_DATA_WRITE_REQ / $denom))
unit: (Req + $normUnit)
tips:
Atomic Req:
avg: AVG((SQC_TC_DATA_ATOMIC_REQ / $denom))
min: MIN((SQC_TC_DATA_ATOMIC_REQ / $denom))
max: MAX((SQC_TC_DATA_ATOMIC_REQ / $denom))
unit: (Req + $normUnit)
tips:
Stall:
avg: AVG((SQC_TC_STALL / $denom))
min: MIN((SQC_TC_STALL / $denom))
max: MAX((SQC_TC_STALL / $denom))
unit: (Cycles + $normUnit)
tips:
@@ -0,0 +1,174 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1500
title: Texture Addresser and Texture Data (TA/TD)
data source:
- metric_table:
id: 1501
title: TA
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
TA Busy:
avg: AVG(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
TC2TA Addr Stall:
avg: AVG(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
TC2TA Data Stall:
avg: AVG(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
TD2TA Addr Stall:
avg: AVG(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
Total Instructions:
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
max: MAX((TA_TOTAL_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Flat Instr:
avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Flat Read Instr:
avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Flat Write Instr:
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Flat Atomic Instr:
avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Buffer Instr:
avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Buffer Read Instr:
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Buffer Write Instr:
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Buffer Atomic Instr:
avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
unit: (Instr + $normUnit)
tips:
Buffer Total Cylces:
avg: AVG((TA_BUFFER_TOTAL_CYCLES_sum / $denom))
min: MIN((TA_BUFFER_TOTAL_CYCLES_sum / $denom))
max: MAX((TA_BUFFER_TOTAL_CYCLES_sum / $denom))
unit: (Cycles + $normUnit)
tips:
Buffer Coalesced Read:
avg: AVG((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom))
min: MIN((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom))
max: MAX((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom))
unit: (Cycles + $normUnit)
tips:
Buffer Coalesced Write:
avg: AVG((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom))
min: MIN((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom))
max: MAX((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom))
unit: (Cycles + $normUnit)
tips:
- metric_table:
id: 1502
title: TD
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
TD Busy:
avg: AVG(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
TC2TD Stall:
avg: AVG(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
SPI2TD Stall:
avg: AVG(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
min: MIN(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
max: MAX(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
unit: pct
tips:
Coalescable Instr:
avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom))
min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom))
max: MAX((TD_COALESCABLE_WAVEFRONT_sum / $denom))
unit: (Instr + $normUnit)
tips:
Load Instr:
avg: AVG((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum)
/ $denom))
min: MIN((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum)
/ $denom))
max: MAX((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum)
/ $denom))
unit: (Instr + $normUnit)
tips:
Store Instr:
avg: AVG((TD_STORE_WAVEFRONT_sum / $denom))
min: MIN((TD_STORE_WAVEFRONT_sum / $denom))
max: MAX((TD_STORE_WAVEFRONT_sum / $denom))
unit: (Instr + $normUnit)
tips:
Atomic Instr:
avg: AVG((TD_ATOMIC_WAVEFRONT_sum / $denom))
min: MIN((TD_ATOMIC_WAVEFRONT_sum / $denom))
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
unit: (Instr + $normUnit)
tips:
@@ -0,0 +1,403 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1600
title: Vector L1 Data Cache
data source:
- metric_table:
id: 1601
title: Speed-of-Light
header:
metric: Metric
value: Value
tips: Tips
metric:
Buffer Coalescing:
value:
AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
* 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None))
tips:
Cache Util:
value:
AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
!= 0) else None))
tips:
Cache BW:
value:
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
Cache Hit:
value:
AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
tips:
comparable: false # for now
cli_style: simple_bar
- metric_table:
id: 1602
title: L1D Cache Stalls (%)
header:
metric: Metric
expr: Expression
tips: Tips
metric:
Stalled on L2 Data:
expr:
(((100 * TCP_PENDING_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
!= 0) else None)
tips:
Stalled on L2 Req:
expr:
(((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
!= 0) else None)
tips:
Tag RAM Stall (Read):
expr:
(((100 * TCP_READ_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)
tips:
Tag RAM Stall (Write):
expr:
(((100 * TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)
tips:
Tag RAM Stall (Atomic):
expr:
(((100 * TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)
tips:
cli_style: simple_box
- metric_table:
id: 1603
title: L1D Cache Accesses
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Total Req:
avg: AVG((TCP_TOTAL_ACCESSES_sum / $denom))
min: MIN((TCP_TOTAL_ACCESSES_sum / $denom))
max: MAX((TCP_TOTAL_ACCESSES_sum / $denom))
unit: (Req + $normUnit)
tips:
Read Req:
avg: AVG((TCP_TOTAL_READ_sum / $denom))
min: MIN((TCP_TOTAL_READ_sum / $denom))
max: MAX((TCP_TOTAL_READ_sum / $denom))
unit: (Req + $normUnit)
tips:
Write Req:
avg: AVG((TCP_TOTAL_WRITE_sum / $denom))
min: MIN((TCP_TOTAL_WRITE_sum / $denom))
max: MAX((TCP_TOTAL_WRITE_sum / $denom))
unit: (Req + $normUnit)
tips:
Atomic Req:
avg:
AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom))
min:
MIN(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom))
max:
MAX(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom))
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
unit: GB/s
tips:
Cache Accesses:
avg: AVG((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
min: MIN((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
max: MAX((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
unit: (Req + $normUnit)
tips:
Cache Hits:
avg:
AVG(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ $denom))
min:
MIN(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ $denom))
max:
MAX(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ $denom))
unit: (Req + $normUnit)
tips:
Cache Hit Rate:
avg:
AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
min:
MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
max:
MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
unit: pct
tips:
Invalidate:
avg: AVG((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
min: MIN((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
max: MAX((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
unit: (Req + $normUnit)
tips:
L1-L2 BW:
avg:
AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
min:
AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
max:
AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
unit: (Bytes + $normUnit)
tips:
L1-L2 Read:
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
L1-L2 Write:
avg: AVG((TCP_TCC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
L1-L2 Atomic:
avg:
AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom))
min:
MIN(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom))
max:
MAX(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom))
unit: (Req + $normUnit)
tips:
L1 Access Latency:
avg:
# AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
# != 0) else None))
min:
# MIN(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
# != 0) else None))
max:
# MAX(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
# != 0) else None))
unit: Cycles
tips:
L1-L2 Read Latency:
avg:
# AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None))
min:
# MIN(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None))
max:
# MAX(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None))
unit: Cycles
tips:
L1-L2 Write Latency:
avg:
# AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
# if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else
# None))
min:
# MIN(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
# if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else
# None))
max:
# MAX(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
# if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else
# None))
unit: Cycles
tips:
- metric_table:
id: 1604
title: L1D - L2 Transactions
header:
metric: Metric
xfer: Xfer
coherency: Coherency
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
NC - Read:
xfer: Read
coherency: NC
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Read:
xfer: Read
coherency: UC
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Read:
xfer: Read
coherency: CC
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Read:
xfer: Read
coherency: RW
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Write:
xfer: Write
coherency: RW
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Write:
xfer: Write
coherency: NC
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Write:
xfer: Write
coherency: UC
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Write:
xfer: Write
coherency: CC
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Atomic:
xfer: Atomic
coherency: NC
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Atomic:
xfer: Atomic
coherency: UC
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Atomic:
xfer: Atomic
coherency: CC
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Atomic:
xfer: Atomic
coherency: RW
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
- metric_table:
id: 1605
title: L1D Addr Translation
header:
metric: Metric
avg: Avg
min: Min
max: Max
units: Units
tips: Tips
metric:
Req:
avg: AVG((TCP_UTCL1_REQUEST_sum / $denom))
min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
units: ( + $normUnit)
tips:
Hit Ratio:
avg:
AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
(TCP_UTCL1_REQUEST_sum != 0) else None))
min:
MIN((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
(TCP_UTCL1_REQUEST_sum != 0) else None))
max:
MAX((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
(TCP_UTCL1_REQUEST_sum != 0) else None))
units: pct
tips:
Hits:
avg: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
units: ( + $normUnit)
tips:
Misses (Translation):
avg: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
units: ( + $normUnit)
tips:
Misses (Permission):
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
units: ( + $normUnit)
tips:
@@ -0,0 +1,387 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1700
title: L2 Cache
data source:
- metric_table:
id: 1701
title: Speed-of-Light
header:
metric: Metric
value: Value
unit: Unit
tips: Tips
metric:
L2 Util:
value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
unit: pct
tips:
Cache Hit:
value:
AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else 0))
unit: pct
tips:
L2-EA Rd BW:
value:
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
unit: GB/s
tips:
L2-EA Wr BW:
value:
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
unit: GB/s
tips:
- metric_table:
id: 1702
title: L2 - Fabric Transactions
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Read BW:
avg:
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
min:
MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
max:
MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
unit: (Bytes + $normUnit)
tips:
Write BW:
avg:
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
min:
MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
max:
MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
unit: (Bytes + $normUnit)
tips:
Read (32B):
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
unit: (Req + $normUnit)
tips:
Read (Uncached 32B):
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
tips:
Read (64B):
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
unit: (Req + $normUnit)
tips:
HBM Read:
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
tips:
Write (32B):
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
unit: (Req + $normUnit)
tips:
Write (Uncached 32B):
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
tips:
Write (64B):
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
unit: (Req + $normUnit)
tips:
HBM Write:
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
tips:
Read Latency:
avg:
AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
0) else None))
min:
MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
0) else None))
max:
MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
0) else None))
unit: Cycles
tips:
Write Latency:
avg:
AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
0) else None))
min:
MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
0) else None))
max:
MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
0) else None))
unit: Cycles
tips:
Atomic Latency:
avg:
AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
min:
MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
max:
MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
unit: Cycles
tips:
Read Stall:
avg:
# AVG((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
# + TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
# 0) else None))
min:
# MIN((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
# + TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
# 0) else None))
max:
# MAX((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
# + TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
# 0) else None))
unit: pct
tips:
Write Stall:
avg:
# AVG((((100 * ((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum + TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum)
# + TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
# 0) else None))
min:
# MIN((((100 * ((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum + TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum)
# + TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
# 0) else None))
max:
# MAX((((100 * ((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum + TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum)
# + TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
# 0) else None))
unit: pct
tips:
- metric_table:
id: 1703
title: L2 Cache Accesses
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Req:
avg: AVG((TCC_REQ_sum / $denom))
min: MIN((TCC_REQ_sum / $denom))
max: MAX((TCC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
Streaming Req:
avg: AVG((TCC_STREAMING_REQ_sum / $denom))
min: MIN((TCC_STREAMING_REQ_sum / $denom))
max: MAX((TCC_STREAMING_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
Read Req:
avg: AVG((TCC_READ_sum / $denom))
min: MIN((TCC_READ_sum / $denom))
max: MAX((TCC_READ_sum / $denom))
unit: (Req + $normUnit)
tips:
Write Req:
avg: AVG((TCC_WRITE_sum / $denom))
min: MIN((TCC_WRITE_sum / $denom))
max: MAX((TCC_WRITE_sum / $denom))
unit: (Req + $normUnit)
tips:
Atomic Req:
avg: AVG((TCC_ATOMIC_sum / $denom))
min: MIN((TCC_ATOMIC_sum / $denom))
max: MAX((TCC_ATOMIC_sum / $denom))
unit: (Req + $normUnit)
tips:
Probe Req:
avg: AVG((TCC_PROBE_sum / $denom))
min: MIN((TCC_PROBE_sum / $denom))
max: MAX((TCC_PROBE_sum / $denom))
unit: (Req + $normUnit)
tips:
Hits:
avg: AVG((TCC_HIT_sum / $denom))
min: MIN((TCC_HIT_sum / $denom))
max: MAX((TCC_HIT_sum / $denom))
unit: (Hits + $normUnit)
tips:
Misses:
avg: AVG((TCC_MISS_sum / $denom))
min: MIN((TCC_MISS_sum / $denom))
max: MAX((TCC_MISS_sum / $denom))
unit: (Misses + $normUnit)
tips:
Cache Hit:
avg:
AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
min:
MIN((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
max:
MAX((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
unit: pct
tips:
Writeback:
avg: AVG((TCC_WRITEBACK_sum / $denom))
min: MIN((TCC_WRITEBACK_sum / $denom))
max: MAX((TCC_WRITEBACK_sum / $denom))
unit: ( + $normUnit)
tips:
NC Req:
avg: AVG((TCC_NC_REQ_sum / $denom))
min: MIN((TCC_NC_REQ_sum / $denom))
max: MAX((TCC_NC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC Req:
avg: AVG((TCC_UC_REQ_sum / $denom))
min: MIN((TCC_UC_REQ_sum / $denom))
max: MAX((TCC_UC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC Req:
avg: AVG((TCC_CC_REQ_sum / $denom))
min: MIN((TCC_CC_REQ_sum / $denom))
max: MAX((TCC_CC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW Req:
avg: AVG((TCC_RW_REQ_sum / $denom))
min: MIN((TCC_RW_REQ_sum / $denom))
max: MAX((TCC_RW_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
Writeback (Normal):
avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom))
min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom))
max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom))
unit: ( + $normUnit)
tips:
Writeback (TC Req):
avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
unit: ( + $normUnit)
tips:
Evict (Normal):
avg: AVG((TCC_NORMAL_EVICT_sum / $denom))
min: MIN((TCC_NORMAL_EVICT_sum / $denom))
max: MAX((TCC_NORMAL_EVICT_sum / $denom))
unit: ( + $normUnit)
tips:
Evict (TC Req):
avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
unit: ( + $normUnit)
tips:
- metric_table:
id: 1704
title: L2 - Fabric Interface Stalls
header:
metric: Metric
type: Type
transaction: Transaction
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Read - Remote Socket Stall:
type: Remote Socket Stall
transaction: Read
avg: # AVG((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / $denom))
min: # MIN((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / $denom))
max: # MAX((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - Peer GCD Stall:
type: Peer GCD Stall
transaction: Read
avg: # AVG((TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / $denom))
min: # MIN((TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / $denom))
max: # MAX((TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - HBM Stall:
type: HBM Stall
transaction: Read
avg: # AVG((TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
min: # MIN((TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
max: # MAX((TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Remote Socket Stall:
type: Remote Socket Stall
transaction: Write
avg: # AVG((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / $denom))
min: # MIN((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / $denom))
max: # MAX((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Peer GCD Stall:
type: Peer GCD Stall
transaction: Write
avg: # AVG((TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / $denom))
min: # MIN((TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / $denom))
max: # MAX((TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - HBM Stall:
type: HBM Stall
transaction: Write
avg: # AVG((TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
min: # MIN((TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
max: # MAX((TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Credit Starvation:
type: Credit Starvation
transaction: Write
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
@@ -0,0 +1,298 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1800
title: L2 Cache (per Channel)
data source:
- metric_table:
id: 1801
title: Aggregate Stats (All 32 channels)
header:
metric: Metric
avg: Avg
std dev: Std Dev
min: Min
max: Max
unit: Unit
tips: Tips
metric:
L2 Cache Hit Rate:
avg: AVG(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
std dev: STD(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
min: MIN(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
max: MAX(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
unit: pct
tips:
# FIXME: other arggr metrics!!
- metric_table:
id: 1802
title: L2 Cache Hit Rate (%)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr:
(((100 * TCC_HIT[::_1]) / (TCC_HIT[::_1] + TCC_MISS[::_1])) if ((TCC_HIT[::_1]
+ TCC_MISS[::_1]) != 0) else None)
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_box
- metric_table:
id: 1803
title: Requests (Requests)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr: (TO_INT(TCC_REQ[::_1]) / $denom)
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_box
- metric_table:
id: 1804
title: L1-L2 Access (Requests)
header:
metric: Metric
read req: L1-L2 Read
write req: L1-L2 Write
atomic req: L1-L2 Atomic
metric:
"::_1":
read req: AVG((TO_INT(TCC_READ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[::_1]) / $denom))
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_multiple_bar
- metric_table:
id: 1805
title: L2-EA Access (Requests)
header:
metric: Metric
read req: L2-EA Read
write req: L2-EA Write
atomic req: L2-EA Atomic
metric:
"::_1":
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_multiple_bar
# - metric_table:
# id: 1806
# title: L2-EA Latency (Cycles)
# header:
# metric: Metric
# read lat: L2-EA Read
# write lat: L2-EA Write
# atomic lat: L2-EA Atomic
# metric:
# "::_1":
# read lat:
# AVG(((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
# != 0) else None))
# write lat:
# AVG(((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
# != 0) else None))
# atomic lat:
# AVG(((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if
# (TCC_EA0_ATOMIC[::_1] != 0) else 0))
# placeholder_range:
# "::_1": $totalL2Banks
# cli_style: simple_multiple_bar
- metric_table:
id: 1806
title: L2-EA Read Latency (Cycles)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr:
((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
!= 0) else None)
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_box
- metric_table:
id: 1807
title: L2-EA Write Latency (Cycles)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr:
((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
!= 0) else None)
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_box
- metric_table:
id: 1808
title: L2-EA Atomic Latency (Cycles)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if
(TCC_EA0_ATOMIC[::_1] != 0) else 0)
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_box
- metric_table:
id: 1809
title: L2-EA Read Stall (Cycles per normUnit)
header:
metric: Metric
ea read stall - io: L2-EA Read Stall - IO
ea read stall - gmi: L2-EA Read Stall - GMI
ea read stall - dram: L2-EA Read Stall - DRAM
metric:
"::_1":
ea read stall - io: AVG((TO_INT(TCC_EA0_RDREQ_IO_CREDIT_STALL[::_1]) / $denom))
ea read stall - gmi: AVG((TO_INT(TCC_EA0_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom))
ea read stall - dram: AVG((TO_INT(TCC_EA0_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom))
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_multiple_bar
- metric_table:
id: 1810
title: L2-EA Write Stall (Cycles per normUnit)
header:
metric: Metric
ea write stall - io: L2-EA Write Stall - IO
ea write stall - gmi: L2-EA Write Stall - GMI
ea write stall - dram: L2-EA Write Stall - DRAM
ea write stall - starve: L2-EA Write Stall - Starve
metric:
"::_1":
ea write stall - io: AVG((TO_INT(TCC_EA0_WRREQ_IO_CREDIT_STALL[::_1]) / $denom))
ea write stall - gmi: AVG((TO_INT(TCC_EA0_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom))
ea write stall - dram: AVG((TO_INT(TCC_EA0_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom))
ea write stall - starve: AVG((TO_INT(TCC_TOO_MANY_EA0_WRREQS_STALL[::_1]) / $denom))
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_multiple_bar
- metric_table:
id: 1811
title: L2 Tag Stall (cycles)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr: TCC_TAG_STALL[::_1]
placeholder_range:
"::_1": $totalL2Banks
cli_style: simple_box
- metric_table:
id: 1812
title: L2 Bubble (128B request)
header:
metric: Metric
expr: Expression
metric:
"::_1":
expr: TCC_BUBBLE[::_1]
placeholder_range:
"::_1": $totalL2Banks
# tips: Number of 128-byte read requests sent to EA
cli_style: simple_box
@@ -0,0 +1,8 @@
---
Panel Config:
id: 2000
title: Kernels
data source:
- raw_csv_table:
id: 2001
source: pmc_dispatch_info.csv
@@ -0,0 +1,112 @@
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import os
import config
from omniperf_soc.soc_base import OmniSoC_Base
from utils.utils import demarcate, mibench
from roofline import Roofline
import logging
class gfx941_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
self.set_soc_name("gfx941")
if hasattr(self.get_args(), 'roof_only') and self.get_args().roof_only:
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline"))
else:
# NB: We're using generalized Mi300 perfmon configs
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940"))
self.set_compatible_profilers(["rocprofv2"])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 32
}
)
self.set_soc_param(
{
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
)
self.roofline_obj = Roofline(args)
#-----------------------
# Required child methods
#-----------------------
@demarcate
def profiling_setup(self):
"""Perform any SoC-specific setup prior to profiling.
"""
super().profiling_setup()
# Performance counter filtering
self.perfmon_filter(self.get_args().roof_only)
@demarcate
def post_profiling(self):
"""Perform any SoC-specific post profiling activities.
"""
super().post_profiling()
logging.info("[roofline] Roofline temporarily disabled in Mi300")
# if not self.get_args().no_roof:
# logging.info("[roofline] Checking for roofline.csv in " + str(self.get_args().path))
# if not os.path.isfile(os.path.join(self.get_args().path, "roofline.csv")):
# mibench(self.get_args())
# self.roofline_obj.post_processing()
# else:
# logging.info("[roofline] Skipping roofline")
@demarcate
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis.
"""
super().analysis_setup()
logging.info("[roofline] Roofline temporarily disabled in Mi300")
# configure roofline for analysis
# if roofline_parameters:
# self.roofline_obj = Roofline(self.get_args(), roofline_parameters)