Adding support for Mi300X-A0
Signed-off-by: colramos-amd <colramos@amd.com>
[ROCm/rocprofiler-compute commit: f229b36277]
Este commit está contenido en:
@@ -51,6 +51,7 @@ class Omniperf:
|
||||
"gfx906": {"mi50": ["MI50", "MI60"]},
|
||||
"gfx908": {"mi100": ["MI100"]},
|
||||
"gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]},
|
||||
"gfx941": {"mi300": ["MI300X_A0"]},
|
||||
"gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
|
||||
}
|
||||
|
||||
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
---
|
||||
Panel Config:
|
||||
id: 000
|
||||
title: Top Stat
|
||||
data source:
|
||||
- raw_csv_table:
|
||||
id: 001
|
||||
source: pmc_kernel_top.csv
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
---
|
||||
Panel Config:
|
||||
id: 100
|
||||
title: System Info
|
||||
data source:
|
||||
- raw_csv_table:
|
||||
id: 101
|
||||
source: sysinfo.csv
|
||||
columnwise: True
|
||||
+247
@@ -0,0 +1,247 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
SALU: &SALU_anchor Scalar Arithmetic Logic Unit
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 200
|
||||
title: System Speed-of-Light
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 201
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
metric: Metric
|
||||
value: Value
|
||||
unit: Unit
|
||||
peak: Peak
|
||||
pop: PoP
|
||||
tips: Tips
|
||||
metric:
|
||||
VALU FLOPs:
|
||||
value: AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
|
||||
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
|
||||
/ (EndNs - BeginNs)))
|
||||
unit: GFLOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
VALU IOPs:
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
|
||||
unit: GIOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
|
||||
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F64):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
|
||||
unit: GIOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
value: $numActiveCUs
|
||||
unit: CUs
|
||||
peak: $numCU
|
||||
pop: ((100 * $numActiveCUs) / $numCU)
|
||||
tips:
|
||||
SALU Util:
|
||||
value: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
tips:
|
||||
VALU Util:
|
||||
value: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
tips:
|
||||
MFMA Util:
|
||||
value: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((GRBM_GUI_ACTIVE * $numCU)
|
||||
* 4)))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((GRBM_GUI_ACTIVE * $numCU)
|
||||
* 4)))
|
||||
tips:
|
||||
VALU Active Threads/Wave:
|
||||
value: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
|
||||
!= 0) else None))
|
||||
unit: Threads
|
||||
peak: 64
|
||||
pop: (AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
|
||||
!= 0) else None)) * 1.5625)
|
||||
tips:
|
||||
IPC - Issue:
|
||||
value: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
|
||||
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
|
||||
/ SQ_ACTIVE_INST_ANY))
|
||||
unit: Instr/cycle
|
||||
peak: 5
|
||||
pop: ((100 * AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
|
||||
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
|
||||
/ SQ_ACTIVE_INST_ANY))) / 5)
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
|
||||
unit: Conflicts/access
|
||||
peak: 32
|
||||
pop: ((100 * AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))) / 32)
|
||||
tips:
|
||||
Instr Cache Hit Rate:
|
||||
value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES))
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES))
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
|
||||
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
|
||||
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
value: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
unit: pct
|
||||
peak: 100
|
||||
pop: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
peak: ''
|
||||
pop: ''
|
||||
tips:
|
||||
L2-Fabric Write Latency:
|
||||
value: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
peak: ''
|
||||
pop: ''
|
||||
tips:
|
||||
Wave Occupancy:
|
||||
value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
unit: Wavefronts
|
||||
peak: ($maxWavesPerCU * $numCU)
|
||||
pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU
|
||||
* $numCU))))
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
Instr Fetch Latency:
|
||||
value: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH))
|
||||
unit: Cycles
|
||||
peak: ''
|
||||
pop: ''
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
+315
@@ -0,0 +1,315 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 300
|
||||
title: Memory Chart
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 301
|
||||
title: Memory Chart
|
||||
header:
|
||||
metric: Metric
|
||||
#alias: #alias
|
||||
value: Value
|
||||
tips: Tips
|
||||
metric:
|
||||
# ----------------------------------------
|
||||
# Instr Buff Block
|
||||
|
||||
#TODO: double check wave_occupancy
|
||||
Wavefront Occupancy:
|
||||
#alias: wave_occ_
|
||||
value: ROUND(AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / $numActiveCUs), 0)
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Wave Life:
|
||||
#alias: wave_life_
|
||||
value: ROUND(AVG(((4 * (SQ_WAVE_CYCLES / SQ_WAVES)) if (SQ_WAVES != 0) else 0)), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# Instr Dispatch Block
|
||||
SALU:
|
||||
#alias: salu_
|
||||
value: ROUND(AVG((SQ_INSTS_SALU / $denom)), 0)
|
||||
tips:
|
||||
SMEM:
|
||||
#alias: smem_
|
||||
value: ROUND(AVG((SQ_INSTS_SMEM / $denom)), 0)
|
||||
tips:
|
||||
VALU:
|
||||
#alias: valu_
|
||||
value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0)
|
||||
tips:
|
||||
MFMA:
|
||||
#alias: mfma_
|
||||
value: ROUND(AVG((SQ_INSTS_MFMA / $denom)), 0)
|
||||
tips:
|
||||
VMEM:
|
||||
#alias: vmem_
|
||||
value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0)
|
||||
tips:
|
||||
LDS:
|
||||
#alias: lds_
|
||||
value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0)
|
||||
tips:
|
||||
GWS:
|
||||
#alias: gws_
|
||||
value: ROUND(AVG((SQ_INSTS_GDS / $denom)), 0)
|
||||
tips:
|
||||
BR:
|
||||
#alias: br_
|
||||
value: ROUND(AVG((SQ_INSTS_BRANCH / $denom)), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# Exec Block
|
||||
Active CUs:
|
||||
#alias: active_cu_
|
||||
value: $numActiveCUs
|
||||
tips:
|
||||
Num CUs:
|
||||
#alias: num_cu_
|
||||
value: $numCU
|
||||
tips:
|
||||
VGPR:
|
||||
#alias: vgpr_
|
||||
value: ROUND(AVG(Arch_VGPR), 0)
|
||||
tips:
|
||||
# Todo: add AGPRs
|
||||
SGPR:
|
||||
#alias: sgpr_
|
||||
value: ROUND(AVG(SGPR), 0)
|
||||
tips:
|
||||
LDS Allocation:
|
||||
#alias: lds_alloc_
|
||||
value: ROUND(AVG(LDS_Per_Workgroup), 0)
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
#alias: scratch_alloc_
|
||||
value: ROUND(AVG(Scratch_Per_Workitem), 0)
|
||||
tips:
|
||||
Wavefronts:
|
||||
#alias: wavefronts_
|
||||
value: ROUND(AVG(SPI_CSN_WAVE), 0)
|
||||
tips:
|
||||
Workgroups:
|
||||
#alias: workgroups_
|
||||
value: ROUND(AVG(SPI_CSN_NUM_THREADGROUPS), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# LDS Block
|
||||
LDS Req:
|
||||
#alias: lds_req_
|
||||
value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0)
|
||||
tips:
|
||||
LDS Util:
|
||||
#alias: lds_util_
|
||||
value:
|
||||
ROUND(AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU))),
|
||||
0)
|
||||
tips:
|
||||
LDS Latency:
|
||||
#alias: lds_lat
|
||||
value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)),0)
|
||||
coll_level: SQ_INST_LEVEL_LDS
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# Vector L1 Cache Block
|
||||
VL1 Rd:
|
||||
#alias: vl1_rd_
|
||||
value: ROUND(AVG((TCP_TOTAL_READ_sum / $denom)), 0)
|
||||
tips:
|
||||
VL1 Wr:
|
||||
#alias: vl1_wr_
|
||||
value: ROUND(AVG((TCP_TOTAL_WRITE_sum / $denom)), 0)
|
||||
tips:
|
||||
VL1 Atomic:
|
||||
#alias: vl1_atom_
|
||||
value:
|
||||
ROUND(AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
|
||||
/ $denom)), 0)
|
||||
tips:
|
||||
|
||||
VL1 Hit:
|
||||
#alias: vl1_hit_
|
||||
value:
|
||||
ROUND(AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None )), 0)
|
||||
tips:
|
||||
VL1 Lat:
|
||||
#alias: vl1_lat_
|
||||
value:
|
||||
ROUND(AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
|
||||
!= 0) else None)), 0)
|
||||
tips:
|
||||
VL1 Coalesce:
|
||||
#alias: vl1_coales_
|
||||
value:
|
||||
ROUND(AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
|
||||
* 4)) if (TCP_TOTAL_ACCESSES_sum != None) else 0)), 0)
|
||||
tips:
|
||||
VL1 Stall:
|
||||
#alias: vl1_stall_
|
||||
value:
|
||||
ROUND(AVG((((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
|
||||
if (TCP_GATE_EN1_sum != 0) else None)), 0)
|
||||
tips:
|
||||
|
||||
VL1_L2 Rd:
|
||||
#alias: vl1_l2_rd_
|
||||
value: ROUND(AVG((TCP_TCC_READ_REQ_sum / $denom)), 0)
|
||||
tips:
|
||||
VL1_L2 Wr:
|
||||
#alias: vl1_l2_wr_
|
||||
value: ROUND(AVG((TCP_TCC_WRITE_REQ_sum / $denom)), 0)
|
||||
tips:
|
||||
VL1_L2 Atomic:
|
||||
#alias: vl1_l2_atom_
|
||||
value:
|
||||
ROUND(AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
|
||||
/ $denom)), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# Scalar L1D Cache Block
|
||||
VL1D Rd:
|
||||
#alias: sl1_rd_
|
||||
value: ROUND(AVG((SQC_DCACHE_REQ / $denom)), 0)
|
||||
tips:
|
||||
VL1D Hit:
|
||||
#alias: sl1_hit_
|
||||
value:
|
||||
ROUND((AVG(((SQC_DCACHE_HITS / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ !=
|
||||
0) else None)) * 100), 0)
|
||||
tips:
|
||||
VL1D Lat:
|
||||
#alias: sl1_lat_
|
||||
value:
|
||||
ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ !=
|
||||
0) else None)) * 100), 0)
|
||||
coll_level: SQC_DCACHE_INFLIGHT_LEVEL
|
||||
tips:
|
||||
|
||||
VL1D_L2 Rd:
|
||||
#alias: sl1_l2_rd_
|
||||
value: ROUND(AVG((SQC_TC_DATA_READ_REQ / $denom)), 0)
|
||||
tips:
|
||||
VL1D_L2 Wr:
|
||||
#alias: sl1_l2_wr_
|
||||
value: ROUND(AVG((SQC_TC_DATA_WRITE_REQ / $denom)), 0)
|
||||
tips:
|
||||
VL1D_L2 Atomic:
|
||||
#alias: sl1_l2_atom_
|
||||
value: ROUND(AVG((SQC_TC_DATA_ATOMIC_REQ / $denom)), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# Instr L1 Cache Block
|
||||
IL1 Fetch:
|
||||
#alias: il1_fetch_
|
||||
value: ROUND(AVG((SQC_ICACHE_REQ / $denom)), 0)
|
||||
tips:
|
||||
IL1 Hit:
|
||||
#alias: il1_hit_
|
||||
value: ROUND((AVG((SQC_ICACHE_HITS / SQC_ICACHE_REQ)) * 100), 0)
|
||||
tips:
|
||||
IL1 Lat:
|
||||
#alias: il1_lat_
|
||||
value:
|
||||
ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_ICACHE_REQ) if (SQC_ICACHE_REQ !=
|
||||
0) else None)) * 100), 0)
|
||||
tips: # ??? coll_level: SQ_IFETCH_LEVEL
|
||||
IL1_L2 Rd:
|
||||
#alias: il1_l2_req_
|
||||
value: ROUND(AVG((SQC_TC_INST_REQ / $denom)), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# L2 Cache Block(inside)
|
||||
L2 Rd:
|
||||
#alias: l2_rd_
|
||||
value: ROUND(AVG((TCC_READ_sum / $denom)), 0)
|
||||
tips:
|
||||
L2 Wr:
|
||||
#alias: l2_wr_
|
||||
value: ROUND(AVG((TCC_WRITE_sum / $denom)), 0)
|
||||
tips:
|
||||
L2 Atomic:
|
||||
#alias: l2_atom_
|
||||
value: ROUND(AVG((TCC_ATOMIC_sum / $denom)), 0)
|
||||
tips:
|
||||
L2 Hit:
|
||||
#alias: l2_hit_
|
||||
value:
|
||||
ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else 0)), 0)
|
||||
tips:
|
||||
L2 Rd Lat:
|
||||
#alias: l2_rd_lat_
|
||||
value:
|
||||
# ROUND(AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
|
||||
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None)),
|
||||
# 0)
|
||||
tips:
|
||||
L2 Wr Lat:
|
||||
#alias: l2_wr_lat_
|
||||
value:
|
||||
# ROUND(AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum +
|
||||
# TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
|
||||
# != 0) else None)), 0)
|
||||
tips:
|
||||
|
||||
# ----------------------------------------
|
||||
# Fabric Block
|
||||
Fabric_L2 Rd:
|
||||
#alias: l2_fabric_rd_
|
||||
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
|
||||
tips:
|
||||
Fabric_L2 Wr:
|
||||
#alias: l2_fabric_wr_
|
||||
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
|
||||
tips:
|
||||
Fabric_L2 Atomic:
|
||||
#alias: l2_fabric_atom_
|
||||
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
|
||||
tips:
|
||||
|
||||
Fabric Rd Lat:
|
||||
#alias: fabric_rd_lat_
|
||||
value:
|
||||
ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else 0)), 0)
|
||||
tips:
|
||||
Fabric Wr Lat:
|
||||
#alias: fabric_wr_lat_
|
||||
value:
|
||||
ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else 0)), 0)
|
||||
tips:
|
||||
Fabric Atomic Lat:
|
||||
#alias: fabric_atom_lat_
|
||||
value:
|
||||
ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else 0)), 0)
|
||||
tips:
|
||||
|
||||
HBM Rd:
|
||||
#alias: hbm_rd_
|
||||
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
|
||||
tips:
|
||||
HBM Wr:
|
||||
#alias: hbm_wr_
|
||||
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
|
||||
tips:
|
||||
|
||||
comparable: false # for now
|
||||
cli_style: mem_chart
|
||||
+180
@@ -0,0 +1,180 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 500
|
||||
title: Command Processor (CPC/CPF)
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 501
|
||||
title: Command Processor Fetcher
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
GPU Busy Cycles:
|
||||
avg: AVG(GRBM_GUI_ACTIVE)
|
||||
min: MIN(GRBM_GUI_ACTIVE)
|
||||
max: MAX(GRBM_GUI_ACTIVE)
|
||||
unit: Cycles/Kernel
|
||||
tips:
|
||||
CPF Busy:
|
||||
avg: AVG(CPF_CPF_STAT_BUSY)
|
||||
min: MIN(CPF_CPF_STAT_BUSY)
|
||||
max: MAX(CPF_CPF_STAT_BUSY)
|
||||
unit: Cycles/Kernel
|
||||
tips:
|
||||
CPF Util:
|
||||
avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
min: MIN((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
max: MAX((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
CPF Stall:
|
||||
avg: AVG((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY
|
||||
!= 0) else None))
|
||||
min: MIN((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY
|
||||
!= 0) else None))
|
||||
max: MAX((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY
|
||||
!= 0) else None))
|
||||
unit: Cycles/Kernel
|
||||
tips:
|
||||
L2Cache Intf Busy:
|
||||
avg: AVG(CPF_CPF_TCIU_BUSY)
|
||||
min: MIN(CPF_CPF_TCIU_BUSY)
|
||||
max: MAX(CPF_CPF_TCIU_BUSY)
|
||||
unit: Cycles/Kernel
|
||||
tips:
|
||||
L2Cache Intf Util:
|
||||
avg: AVG((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE))
|
||||
if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None))
|
||||
min: MIN((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE))
|
||||
if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None))
|
||||
max: MAX((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE))
|
||||
if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
L2Cache Intf Stall:
|
||||
avg: AVG((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY
|
||||
!= 0) else None))
|
||||
min: MIN((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY
|
||||
!= 0) else None))
|
||||
max: MAX((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
UTCL1 Stall:
|
||||
avg: AVG(CPF_CMP_UTCL1_STALL_ON_TRANSLATION)
|
||||
min: MIN(CPF_CMP_UTCL1_STALL_ON_TRANSLATION)
|
||||
max: MAX(CPF_CMP_UTCL1_STALL_ON_TRANSLATION)
|
||||
unit: Cycles/Kernel
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 502
|
||||
title: Command Processor Compute
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
GPU Busy Cycles:
|
||||
avg: AVG(GRBM_GUI_ACTIVE)
|
||||
min: MIN(GRBM_GUI_ACTIVE)
|
||||
max: MAX(GRBM_GUI_ACTIVE)
|
||||
unit: Cycles
|
||||
tips:
|
||||
CPC Busy Cycles:
|
||||
avg: AVG(CPC_CPC_STAT_BUSY)
|
||||
min: MIN(CPC_CPC_STAT_BUSY)
|
||||
max: MAX(CPC_CPC_STAT_BUSY)
|
||||
unit: Cycles
|
||||
tips:
|
||||
CPC Util:
|
||||
avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
|
||||
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
|
||||
min: MIN((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
|
||||
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
|
||||
max: MAX((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
|
||||
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
CPC Stall Cycles:
|
||||
avg: AVG(CPC_CPC_STAT_STALL)
|
||||
min: MIN(CPC_CPC_STAT_STALL)
|
||||
max: MAX(CPC_CPC_STAT_STALL)
|
||||
unit: Cycles
|
||||
tips:
|
||||
CPC Stall Rate:
|
||||
avg: AVG((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
|
||||
!= 0) else None))
|
||||
min: MIN((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
|
||||
!= 0) else None))
|
||||
max: MAX((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
CPC Packet Decoding:
|
||||
avg: AVG(CPC_ME1_BUSY_FOR_PACKET_DECODE)
|
||||
min: MIN(CPC_ME1_BUSY_FOR_PACKET_DECODE)
|
||||
max: MAX(CPC_ME1_BUSY_FOR_PACKET_DECODE)
|
||||
unit: Cycles
|
||||
tips:
|
||||
SPI Intf Busy Cycles:
|
||||
avg: AVG(CPC_ME1_DC0_SPI_BUSY)
|
||||
min: MIN(CPC_ME1_DC0_SPI_BUSY)
|
||||
max: MAX(CPC_ME1_DC0_SPI_BUSY)
|
||||
unit: Cycles
|
||||
tips:
|
||||
SPI Intf Util:
|
||||
avg: AVG((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
|
||||
!= 0) else None))
|
||||
min: MIN((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
|
||||
!= 0) else None))
|
||||
max: MAX((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
L2Cache Intf Util:
|
||||
avg: AVG((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE))
|
||||
if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None))
|
||||
min: MIN((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE))
|
||||
if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None))
|
||||
max: MAX((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE))
|
||||
if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
UTCL1 Stall Cycles:
|
||||
avg: AVG(CPC_UTCL1_STALL_ON_TRANSLATION)
|
||||
min: MIN(CPC_UTCL1_STALL_ON_TRANSLATION)
|
||||
max: MAX(CPC_UTCL1_STALL_ON_TRANSLATION)
|
||||
unit: Cycles
|
||||
tips:
|
||||
UTCL2 Intf Busy Cycles:
|
||||
avg: AVG(CPC_CPC_UTCL2IU_BUSY)
|
||||
min: MIN(CPC_CPC_UTCL2IU_BUSY)
|
||||
max: MAX(CPC_CPC_UTCL2IU_BUSY)
|
||||
unit: Cycles
|
||||
tips:
|
||||
UTCL2 Intf Util:
|
||||
avg: AVG((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE))
|
||||
if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None))
|
||||
min: MIN((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE))
|
||||
if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None))
|
||||
max: MAX((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE))
|
||||
if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
+174
@@ -0,0 +1,174 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 600
|
||||
title: Shader Processor Input (SPI)
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 601
|
||||
title: SPI Stats
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
GPU Busy:
|
||||
avg: AVG(GRBM_GUI_ACTIVE)
|
||||
min: MIN(GRBM_GUI_ACTIVE)
|
||||
max: MAX(GRBM_GUI_ACTIVE)
|
||||
unit: Cycles
|
||||
tips:
|
||||
CS Busy:
|
||||
avg: AVG(SPI_CSN_BUSY)
|
||||
min: MIN(SPI_CSN_BUSY)
|
||||
max: MAX(SPI_CSN_BUSY)
|
||||
unit: Cycles
|
||||
tips:
|
||||
SPI Busy:
|
||||
avg: AVG(GRBM_SPI_BUSY)
|
||||
min: MIN(GRBM_SPI_BUSY)
|
||||
max: MAX(GRBM_SPI_BUSY)
|
||||
unit: Cycles
|
||||
tips:
|
||||
SQ Busy:
|
||||
avg: AVG(SQ_BUSY_CYCLES)
|
||||
min: MIN(SQ_BUSY_CYCLES)
|
||||
max: MAX(SQ_BUSY_CYCLES)
|
||||
unit: Cycles
|
||||
tips:
|
||||
Dispatched Workgroups:
|
||||
avg: AVG(SPI_CSN_NUM_THREADGROUPS)
|
||||
min: MIN(SPI_CSN_NUM_THREADGROUPS)
|
||||
max: MAX(SPI_CSN_NUM_THREADGROUPS)
|
||||
unit: Workgroups
|
||||
tips:
|
||||
Dispatched Wavefronts:
|
||||
avg: AVG(SPI_CSN_WAVE)
|
||||
min: MIN(SPI_CSN_WAVE)
|
||||
max: MAX(SPI_CSN_WAVE)
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
Wave Alloc Failed:
|
||||
avg: AVG(SPI_RA_REQ_NO_ALLOC)
|
||||
min: MIN(SPI_RA_REQ_NO_ALLOC)
|
||||
max: MAX(SPI_RA_REQ_NO_ALLOC)
|
||||
unit: Cycles
|
||||
tips:
|
||||
Wave Alloc Failed - CS:
|
||||
avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN)
|
||||
min: MIN(SPI_RA_REQ_NO_ALLOC_CSN)
|
||||
max: MAX(SPI_RA_REQ_NO_ALLOC_CSN)
|
||||
unit: Cycles
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 602
|
||||
title: SPI Resource Allocation
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Wave request Failed (CS):
|
||||
avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN)
|
||||
min: MIN(SPI_RA_REQ_NO_ALLOC_CSN)
|
||||
max: MAX(SPI_RA_REQ_NO_ALLOC_CSN)
|
||||
unit: Cycles
|
||||
tips:
|
||||
CS Stall:
|
||||
avg: AVG(SPI_RA_RES_STALL_CSN)
|
||||
min: MIN(SPI_RA_RES_STALL_CSN)
|
||||
max: MAX(SPI_RA_RES_STALL_CSN)
|
||||
unit: Cycles
|
||||
tips:
|
||||
CS Stall Rate:
|
||||
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY !=
|
||||
0) else None))
|
||||
min: MIN((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY !=
|
||||
0) else None))
|
||||
max: MAX((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY !=
|
||||
0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
Scratch Stall:
|
||||
avg: AVG(SPI_RA_TMP_STALL_CSN)
|
||||
min: MIN(SPI_RA_TMP_STALL_CSN)
|
||||
max: MAX(SPI_RA_TMP_STALL_CSN)
|
||||
unit: Cycles
|
||||
tips:
|
||||
Insufficient SIMD Waveslots:
|
||||
avg: AVG(SPI_RA_WAVE_SIMD_FULL_CSN)
|
||||
min: MIN(SPI_RA_WAVE_SIMD_FULL_CSN)
|
||||
max: MAX(SPI_RA_WAVE_SIMD_FULL_CSN)
|
||||
unit: SIMD
|
||||
tips:
|
||||
Insufficient SIMD VGPRs:
|
||||
avg: AVG(SPI_RA_VGPR_SIMD_FULL_CSN)
|
||||
min: MIN(SPI_RA_VGPR_SIMD_FULL_CSN)
|
||||
max: MAX(SPI_RA_VGPR_SIMD_FULL_CSN)
|
||||
unit: SIMD
|
||||
tips:
|
||||
Insufficient SIMD SGPRs:
|
||||
avg: AVG(SPI_RA_SGPR_SIMD_FULL_CSN)
|
||||
min: MIN(SPI_RA_SGPR_SIMD_FULL_CSN)
|
||||
max: MAX(SPI_RA_SGPR_SIMD_FULL_CSN)
|
||||
unit: SIMD
|
||||
tips:
|
||||
Insufficient CU LDS:
|
||||
avg: AVG(SPI_RA_LDS_CU_FULL_CSN)
|
||||
min: MIN(SPI_RA_LDS_CU_FULL_CSN)
|
||||
max: MAX(SPI_RA_LDS_CU_FULL_CSN)
|
||||
unit: CU
|
||||
tips:
|
||||
Insufficient CU Barries:
|
||||
avg: AVG(SPI_RA_BAR_CU_FULL_CSN)
|
||||
min: MIN(SPI_RA_BAR_CU_FULL_CSN)
|
||||
max: MAX(SPI_RA_BAR_CU_FULL_CSN)
|
||||
unit: CU
|
||||
tips:
|
||||
Insufficient Bulky Resource:
|
||||
avg: AVG(SPI_RA_BULKY_CU_FULL_CSN)
|
||||
min: MIN(SPI_RA_BULKY_CU_FULL_CSN)
|
||||
max: MAX(SPI_RA_BULKY_CU_FULL_CSN)
|
||||
unit: CU
|
||||
tips:
|
||||
Reach CU Threadgroups Limit:
|
||||
avg: AVG(SPI_RA_TGLIM_CU_FULL_CSN)
|
||||
min: MIN(SPI_RA_TGLIM_CU_FULL_CSN)
|
||||
max: MAX(SPI_RA_TGLIM_CU_FULL_CSN)
|
||||
unit: Cycles
|
||||
tips:
|
||||
Reach CU Wave Limit:
|
||||
avg: AVG(SPI_RA_WVLIM_STALL_CSN)
|
||||
min: MIN(SPI_RA_WVLIM_STALL_CSN)
|
||||
max: MAX(SPI_RA_WVLIM_STALL_CSN)
|
||||
unit: Cycles
|
||||
tips:
|
||||
VGPR Writes:
|
||||
avg: AVG((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
|
||||
None))
|
||||
min: MIN((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
|
||||
None))
|
||||
max: MAX((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
|
||||
None))
|
||||
unit: Cycles/wave
|
||||
tips:
|
||||
SGPR Writes:
|
||||
avg: AVG((((1 * SPI_SWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
|
||||
None))
|
||||
min: MIN((((1 * SPI_SWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
|
||||
None))
|
||||
max: MAX((((1 * SPI_SWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else
|
||||
None))
|
||||
unit: Cycles/wave
|
||||
tips:
|
||||
+142
@@ -0,0 +1,142 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 700
|
||||
title: Wavefront
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 701
|
||||
title: Wavefront Launch Stats
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Grid Size:
|
||||
avg: AVG(Grid_Size)
|
||||
min: MIN(Grid_Size)
|
||||
max: MAX(Grid_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Workgroup Size:
|
||||
avg: AVG(LDS_Per_Workgroup)
|
||||
min: MIN(LDS_Per_Workgroup)
|
||||
max: MAX(LDS_Per_Workgroup)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Total Wavefronts:
|
||||
avg: AVG(SPI_CSN_WAVE)
|
||||
min: MIN(SPI_CSN_WAVE)
|
||||
max: MAX(SPI_CSN_WAVE)
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
Saved Wavefronts:
|
||||
avg: AVG(SQ_WAVES_SAVED)
|
||||
min: MIN(SQ_WAVES_SAVED)
|
||||
max: MAX(SQ_WAVES_SAVED)
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
Restored Wavefronts:
|
||||
avg: AVG(SQ_WAVES_RESTORED)
|
||||
min: MIN(SQ_WAVES_RESTORED)
|
||||
max: MAX(SQ_WAVES_RESTORED)
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
VGPRs:
|
||||
avg: AVG(Arch_VGPR)
|
||||
min: MIN(Arch_VGPR)
|
||||
max: MAX(Arch_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
AGPRs:
|
||||
avg: AVG(Accum_VGPR)
|
||||
min: MIN(Accum_VGPR)
|
||||
max: MAX(Accum_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
SGPRs:
|
||||
avg: AVG(SGPR)
|
||||
min: MIN(SGPR)
|
||||
max: MAX(SGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
LDS Allocation:
|
||||
avg: AVG(LDS_Per_Workgroup)
|
||||
min: MIN(LDS_Per_Workgroup)
|
||||
max: MAX(LDS_Per_Workgroup)
|
||||
unit: Bytes
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
avg: AVG(Scratch_Per_Workitem)
|
||||
min: MIN(Scratch_Per_Workitem)
|
||||
max: MAX(Scratch_Per_Workitem)
|
||||
unit: Bytes
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 702
|
||||
title: Wavefront Runtime Stats
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
avg: AVG(GRBM_GUI_ACTIVE)
|
||||
min: MIN(GRBM_GUI_ACTIVE)
|
||||
max: MAX(GRBM_GUI_ACTIVE)
|
||||
unit: Cycle
|
||||
tips:
|
||||
Instr/wavefront:
|
||||
avg: AVG((SQ_INSTS / SQ_WAVES))
|
||||
min: MIN((SQ_INSTS / SQ_WAVES))
|
||||
max: MAX((SQ_INSTS / SQ_WAVES))
|
||||
unit: Instr/wavefront
|
||||
tips:
|
||||
Wave Cycles:
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Dependency Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Issue Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Active Cycles:
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Wavefront Occupancy:
|
||||
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
min: MIN((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
max: MAX((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
unit: Wavefronts
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
+228
@@ -0,0 +1,228 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1000
|
||||
title: Compute Units - Instruction Mix
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1001
|
||||
title: Instruction Mix
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
VALU - Vector:
|
||||
avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
|
||||
min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
|
||||
max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
VMEM:
|
||||
avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
|
||||
min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
|
||||
max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
LDS:
|
||||
avg: AVG((SQ_INSTS_LDS / $denom))
|
||||
min: MIN((SQ_INSTS_LDS / $denom))
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
VALU - MFMA:
|
||||
avg: AVG((SQ_INSTS_MFMA / $denom))
|
||||
min: MIN((SQ_INSTS_MFMA / $denom))
|
||||
max: MAX((SQ_INSTS_MFMA / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
SALU:
|
||||
avg: AVG((SQ_INSTS_SALU / $denom))
|
||||
min: MIN((SQ_INSTS_SALU / $denom))
|
||||
max: MAX((SQ_INSTS_SALU / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
SMEM:
|
||||
avg: AVG((SQ_INSTS_SMEM / $denom))
|
||||
min: MIN((SQ_INSTS_SMEM / $denom))
|
||||
max: MAX((SQ_INSTS_SMEM / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Branch:
|
||||
avg: AVG((SQ_INSTS_BRANCH / $denom))
|
||||
min: MIN((SQ_INSTS_BRANCH / $denom))
|
||||
max: MAX((SQ_INSTS_BRANCH / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
GDS:
|
||||
avg: AVG((SQ_INSTS_GDS / $denom))
|
||||
min: MIN((SQ_INSTS_GDS / $denom))
|
||||
max: MAX((SQ_INSTS_GDS / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1002
|
||||
title: VALU Arithmetic Instr Mix
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
INT32:
|
||||
avg: AVG((SQ_INSTS_VALU_INT32 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_INT32 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_INT32 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
INT64:
|
||||
avg: AVG((SQ_INSTS_VALU_INT64 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_INT64 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_INT64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-ADD:
|
||||
avg: AVG((SQ_INSTS_VALU_ADD_F16 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_ADD_F16 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_ADD_F16 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-MUL:
|
||||
avg: AVG((SQ_INSTS_VALU_MUL_F16 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_MUL_F16 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_MUL_F16 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-FMA:
|
||||
avg: AVG((SQ_INSTS_VALU_FMA_F16 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_FMA_F16 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_FMA_F16 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-Trans:
|
||||
avg: AVG((SQ_INSTS_VALU_TRANS_F16 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_TRANS_F16 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_TRANS_F16 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-ADD:
|
||||
avg: AVG((SQ_INSTS_VALU_ADD_F32 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_ADD_F32 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_ADD_F32 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-MUL:
|
||||
avg: AVG((SQ_INSTS_VALU_MUL_F32 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_MUL_F32 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_MUL_F32 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-FMA:
|
||||
avg: AVG((SQ_INSTS_VALU_FMA_F32 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_FMA_F32 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_FMA_F32 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-Trans:
|
||||
avg: AVG((SQ_INSTS_VALU_TRANS_F32 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_TRANS_F32 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_TRANS_F32 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-ADD:
|
||||
avg: AVG((SQ_INSTS_VALU_ADD_F64 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_ADD_F64 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_ADD_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-MUL:
|
||||
avg: AVG((SQ_INSTS_VALU_MUL_F64 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_MUL_F64 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_MUL_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-FMA:
|
||||
avg: AVG((SQ_INSTS_VALU_FMA_F64 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_FMA_F64 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_FMA_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-Trans:
|
||||
avg: AVG((SQ_INSTS_VALU_TRANS_F64 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_TRANS_F64 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_TRANS_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Conversion:
|
||||
avg: AVG((SQ_INSTS_VALU_CVT / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_CVT / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_CVT / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1003
|
||||
title: VMEM Instr Mix
|
||||
header:
|
||||
type: type
|
||||
count: Count
|
||||
tips: Tips
|
||||
metric:
|
||||
Buffer Instr:
|
||||
count: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Buffer Read:
|
||||
count: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Buffer Write:
|
||||
count: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Buffer Atomic:
|
||||
count: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Flat Instr:
|
||||
count: AVG((TA_FLAT_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Flat Read:
|
||||
count: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Flat Write:
|
||||
count: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
Flat Atomic:
|
||||
count: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1004
|
||||
title: MFMA Arithmetic Instr Mix
|
||||
header:
|
||||
type: type
|
||||
count: Count
|
||||
tips: Tips
|
||||
metric:
|
||||
MFMA-I8:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom))
|
||||
tips:
|
||||
MFMA-F16:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom))
|
||||
tips:
|
||||
MFMA-BF16:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom))
|
||||
tips:
|
||||
MFMA-F32:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom))
|
||||
tips:
|
||||
MFMA-F64:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
tips:
|
||||
+198
@@ -0,0 +1,198 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1100
|
||||
title: Compute Units - Compute Pipeline
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1101
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
metric: Metric
|
||||
value: Value
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
valu_flops_pop:
|
||||
value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_bf16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 512) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f32_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f64_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_i8_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1102
|
||||
title: Pipeline Stats
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
IPC (Avg):
|
||||
avg: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES))
|
||||
min: MIN((SQ_INSTS / SQ_BUSY_CU_CYCLES))
|
||||
max: MAX((SQ_INSTS / SQ_BUSY_CU_CYCLES))
|
||||
unit: Instr/cycle
|
||||
tips:
|
||||
IPC (Issue):
|
||||
avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
|
||||
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
|
||||
/ SQ_ACTIVE_INST_ANY))
|
||||
min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
|
||||
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
|
||||
/ SQ_ACTIVE_INST_ANY))
|
||||
max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)
|
||||
+ SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED)
|
||||
/ SQ_ACTIVE_INST_ANY))
|
||||
unit: Instr/cycle
|
||||
tips:
|
||||
SALU Util:
|
||||
avg: AVG((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU))
|
||||
min: MIN((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU))
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU))
|
||||
unit: pct
|
||||
tips:
|
||||
VALU Util:
|
||||
avg: AVG((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU))
|
||||
min: MIN((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU))
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU))
|
||||
unit: pct
|
||||
tips:
|
||||
VALU Active Threads:
|
||||
avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
|
||||
!= 0) else None))
|
||||
min: MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
|
||||
!= 0) else None))
|
||||
max: MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
|
||||
!= 0) else None))
|
||||
unit: Threads
|
||||
tips:
|
||||
MFMA Util:
|
||||
avg: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE)))
|
||||
min: MIN(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE)))
|
||||
max: MAX(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE)))
|
||||
unit: pct
|
||||
tips:
|
||||
MFMA Instr Cycles:
|
||||
avg: AVG(((SQ_VALU_MFMA_BUSY_CYCLES / SQ_INSTS_MFMA) if (SQ_INSTS_MFMA != 0)
|
||||
else None))
|
||||
min: MIN(((SQ_VALU_MFMA_BUSY_CYCLES / SQ_INSTS_MFMA) if (SQ_INSTS_MFMA != 0)
|
||||
else None))
|
||||
max: MAX(((SQ_VALU_MFMA_BUSY_CYCLES / SQ_INSTS_MFMA) if (SQ_INSTS_MFMA != 0)
|
||||
else None))
|
||||
unit: cycles/instr
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1103
|
||||
title: Arithmetic Operations
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
FLOPs (Total):
|
||||
avg: AVG((((((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
|
||||
+ (SQ_INSTS_VALU_FMA_F16 * 2))) + ((512 * SQ_INSTS_VALU_MFMA_MOPS_F16) + (512
|
||||
* SQ_INSTS_VALU_MFMA_MOPS_BF16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (SQ_INSTS_VALU_FMA_F32 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) /
|
||||
$denom))
|
||||
min: MIN((((((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
|
||||
+ (SQ_INSTS_VALU_FMA_F16 * 2))) + ((512 * SQ_INSTS_VALU_MFMA_MOPS_F16) + (512
|
||||
* SQ_INSTS_VALU_MFMA_MOPS_BF16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (SQ_INSTS_VALU_FMA_F32 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) /
|
||||
$denom))
|
||||
max: MAX((((((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16)
|
||||
+ (SQ_INSTS_VALU_FMA_F16 * 2))) + ((512 * SQ_INSTS_VALU_MFMA_MOPS_F16) + (512
|
||||
* SQ_INSTS_VALU_MFMA_MOPS_BF16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (SQ_INSTS_VALU_FMA_F32 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2)))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) /
|
||||
$denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
INT8 OPs:
|
||||
avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
max: MAX(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F16 OPs:
|
||||
avg: AVG(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
|
||||
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
|
||||
SQ_INSTS_VALU_MFMA_MOPS_F16)) / $denom))
|
||||
min: MIN(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
|
||||
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
|
||||
SQ_INSTS_VALU_MFMA_MOPS_F16)) / $denom))
|
||||
max: MAX(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
|
||||
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
|
||||
SQ_INSTS_VALU_MFMA_MOPS_F16)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
BF16 OPs:
|
||||
avg: AVG(((512 * SQ_INSTS_VALU_MFMA_MOPS_BF16) / $denom))
|
||||
min: MIN(((512 * SQ_INSTS_VALU_MFMA_MOPS_BF16) / $denom))
|
||||
max: MAX(((512 * SQ_INSTS_VALU_MFMA_MOPS_BF16) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F32 OPs:
|
||||
avg: AVG((((64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32)
|
||||
+ (SQ_INSTS_VALU_FMA_F32 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32)) / $denom))
|
||||
min: MIN((((64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32)
|
||||
+ (SQ_INSTS_VALU_FMA_F32 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32)) / $denom))
|
||||
max: MAX((((64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32)
|
||||
+ (SQ_INSTS_VALU_FMA_F32 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F32)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F64 OPs:
|
||||
avg: AVG((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
min: MIN((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
max: MAX((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1200
|
||||
title: Local Data Share (LDS)
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1201
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
metric: Metric
|
||||
value: Value
|
||||
tips: Tips
|
||||
metric:
|
||||
Utilization:
|
||||
value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
tips:
|
||||
Access Rate:
|
||||
value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
tips:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value:
|
||||
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
value:
|
||||
AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
|
||||
tips:
|
||||
comparable: false # for now
|
||||
cli_style: simple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1202
|
||||
title: LDS Stats
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
LDS Instrs:
|
||||
avg: AVG((SQ_INSTS_LDS / $denom))
|
||||
min: MIN((SQ_INSTS_LDS / $denom))
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Bandwidth:
|
||||
avg:
|
||||
AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ $denom))
|
||||
min:
|
||||
MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ $denom))
|
||||
max:
|
||||
MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Bank Conficts/Access:
|
||||
avg:
|
||||
AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
|
||||
min:
|
||||
MIN(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
|
||||
max:
|
||||
MAX(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
|
||||
unit: Conflicts/Access
|
||||
tips:
|
||||
Index Accesses:
|
||||
avg: AVG((SQ_LDS_IDX_ACTIVE / $denom))
|
||||
min: MIN((SQ_LDS_IDX_ACTIVE / $denom))
|
||||
max: MAX((SQ_LDS_IDX_ACTIVE / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Atomic Cycles:
|
||||
avg: AVG((SQ_LDS_ATOMIC_RETURN / $denom))
|
||||
min: MIN((SQ_LDS_ATOMIC_RETURN / $denom))
|
||||
max: MAX((SQ_LDS_ATOMIC_RETURN / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Bank Conflict:
|
||||
avg: AVG((SQ_LDS_BANK_CONFLICT / $denom))
|
||||
min: MIN((SQ_LDS_BANK_CONFLICT / $denom))
|
||||
max: MAX((SQ_LDS_BANK_CONFLICT / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Addr Conflict:
|
||||
avg: AVG((SQ_LDS_ADDR_CONFLICT / $denom))
|
||||
min: MIN((SQ_LDS_ADDR_CONFLICT / $denom))
|
||||
max: MAX((SQ_LDS_ADDR_CONFLICT / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Unaligned Stall:
|
||||
avg: AVG((SQ_LDS_UNALIGNED_STALL / $denom))
|
||||
min: MIN((SQ_LDS_UNALIGNED_STALL / $denom))
|
||||
max: MAX((SQ_LDS_UNALIGNED_STALL / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Mem Violations:
|
||||
avg: AVG((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
min: MIN((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
unit: ( + $normUnit)
|
||||
tips:
|
||||
LDS Latency:
|
||||
avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None))
|
||||
min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None))
|
||||
max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None))
|
||||
unit: Cycles
|
||||
coll_level: SQ_INST_LEVEL_LDS
|
||||
tips:
|
||||
+77
@@ -0,0 +1,77 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1300
|
||||
title: Instruction Cache
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1301
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
metric: Metric
|
||||
value: Value
|
||||
tips: Tips
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
|
||||
+ SQC_ICACHE_MISSES_DUPLICATE)))
|
||||
tips:
|
||||
comparable: false # for now
|
||||
cli_style: simple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1302
|
||||
title: Instruction Cache Accesses
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Req:
|
||||
avg: AVG((SQC_ICACHE_REQ / $denom))
|
||||
min: MIN((SQC_ICACHE_REQ / $denom))
|
||||
max: MAX((SQC_ICACHE_REQ / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Hits:
|
||||
avg: AVG((SQC_ICACHE_HITS / $denom))
|
||||
min: MIN((SQC_ICACHE_HITS / $denom))
|
||||
max: MAX((SQC_ICACHE_HITS / $denom))
|
||||
unit: (Hits + $normUnit)
|
||||
tips:
|
||||
Misses - Non Duplicated:
|
||||
avg: AVG((SQC_ICACHE_MISSES / $denom))
|
||||
min: MIN((SQC_ICACHE_MISSES / $denom))
|
||||
max: MAX((SQC_ICACHE_MISSES / $denom))
|
||||
unit: (Misses + $normUnit)
|
||||
tips:
|
||||
Misses - Duplicated:
|
||||
avg: AVG((SQC_ICACHE_MISSES_DUPLICATE / $denom))
|
||||
min: MIN((SQC_ICACHE_MISSES_DUPLICATE / $denom))
|
||||
max: MAX((SQC_ICACHE_MISSES_DUPLICATE / $denom))
|
||||
unit: (Misses + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg:
|
||||
AVG(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
|
||||
+ SQC_ICACHE_MISSES_DUPLICATE)))
|
||||
min:
|
||||
MIN(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) +
|
||||
SQC_ICACHE_MISSES_DUPLICATE)))
|
||||
max:
|
||||
MAX(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) +
|
||||
SQC_ICACHE_MISSES_DUPLICATE)))
|
||||
unit: pct
|
||||
tips:
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1400
|
||||
title: Scalar L1 Data Cache
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1401
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
mertic: Metric
|
||||
value: Value
|
||||
tips: Tips
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE))
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
|
||||
tips:
|
||||
comparable: false # for now
|
||||
cli_style: simple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1402
|
||||
title: Scalar L1D Cache Accesses
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Req:
|
||||
avg: AVG((SQC_DCACHE_REQ / $denom))
|
||||
min: MIN((SQC_DCACHE_REQ / $denom))
|
||||
max: MAX((SQC_DCACHE_REQ / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Hits:
|
||||
avg: AVG((SQC_DCACHE_HITS / $denom))
|
||||
min: MIN((SQC_DCACHE_HITS / $denom))
|
||||
max: MAX((SQC_DCACHE_HITS / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Misses - Non Duplicated:
|
||||
avg: AVG((SQC_DCACHE_MISSES / $denom))
|
||||
min: MIN((SQC_DCACHE_MISSES / $denom))
|
||||
max: MAX((SQC_DCACHE_MISSES / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Misses- Duplicated:
|
||||
avg: AVG((SQC_DCACHE_MISSES_DUPLICATE / $denom))
|
||||
min: MIN((SQC_DCACHE_MISSES_DUPLICATE / $denom))
|
||||
max: MAX((SQC_DCACHE_MISSES_DUPLICATE / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg:
|
||||
AVG((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
|
||||
+ SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
|
||||
+ SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
|
||||
min:
|
||||
MIN((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
|
||||
+ SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
|
||||
+ SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
|
||||
max:
|
||||
MAX((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
|
||||
+ SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES)
|
||||
+ SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
Read Req (Total):
|
||||
avg:
|
||||
AVG((((((SQC_DCACHE_REQ_READ_1 + SQC_DCACHE_REQ_READ_2) + SQC_DCACHE_REQ_READ_4)
|
||||
+ SQC_DCACHE_REQ_READ_8) + SQC_DCACHE_REQ_READ_16) / $denom))
|
||||
min:
|
||||
MIN((((((SQC_DCACHE_REQ_READ_1 + SQC_DCACHE_REQ_READ_2) + SQC_DCACHE_REQ_READ_4)
|
||||
+ SQC_DCACHE_REQ_READ_8) + SQC_DCACHE_REQ_READ_16) / $denom))
|
||||
max:
|
||||
MAX((((((SQC_DCACHE_REQ_READ_1 + SQC_DCACHE_REQ_READ_2) + SQC_DCACHE_REQ_READ_4)
|
||||
+ SQC_DCACHE_REQ_READ_8) + SQC_DCACHE_REQ_READ_16) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Req:
|
||||
avg: AVG((SQC_DCACHE_ATOMIC / $denom))
|
||||
min: MIN((SQC_DCACHE_ATOMIC / $denom))
|
||||
max: MAX((SQC_DCACHE_ATOMIC / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req (1 DWord):
|
||||
avg: AVG((SQC_DCACHE_REQ_READ_1 / $denom))
|
||||
min: MIN((SQC_DCACHE_REQ_READ_1 / $denom))
|
||||
max: MAX((SQC_DCACHE_REQ_READ_1 / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req (2 DWord):
|
||||
avg: AVG((SQC_DCACHE_REQ_READ_2 / $denom))
|
||||
min: MIN((SQC_DCACHE_REQ_READ_2 / $denom))
|
||||
max: MAX((SQC_DCACHE_REQ_READ_2 / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req (4 DWord):
|
||||
avg: AVG((SQC_DCACHE_REQ_READ_4 / $denom))
|
||||
min: MIN((SQC_DCACHE_REQ_READ_4 / $denom))
|
||||
max: MAX((SQC_DCACHE_REQ_READ_4 / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req (8 DWord):
|
||||
avg: AVG((SQC_DCACHE_REQ_READ_8 / $denom))
|
||||
min: MIN((SQC_DCACHE_REQ_READ_8 / $denom))
|
||||
max: MAX((SQC_DCACHE_REQ_READ_8 / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req (16 DWord):
|
||||
avg: AVG((SQC_DCACHE_REQ_READ_16 / $denom))
|
||||
min: MIN((SQC_DCACHE_REQ_READ_16 / $denom))
|
||||
max: MAX((SQC_DCACHE_REQ_READ_16 / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1403
|
||||
title: Scalar L1D Cache - L2 Interface
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Read Req:
|
||||
avg: AVG((SQC_TC_DATA_READ_REQ / $denom))
|
||||
min: MIN((SQC_TC_DATA_READ_REQ / $denom))
|
||||
max: MAX((SQC_TC_DATA_READ_REQ / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Req:
|
||||
avg: AVG((SQC_TC_DATA_WRITE_REQ / $denom))
|
||||
min: MIN((SQC_TC_DATA_WRITE_REQ / $denom))
|
||||
max: MAX((SQC_TC_DATA_WRITE_REQ / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Req:
|
||||
avg: AVG((SQC_TC_DATA_ATOMIC_REQ / $denom))
|
||||
min: MIN((SQC_TC_DATA_ATOMIC_REQ / $denom))
|
||||
max: MAX((SQC_TC_DATA_ATOMIC_REQ / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Stall:
|
||||
avg: AVG((SQC_TC_STALL / $denom))
|
||||
min: MIN((SQC_TC_STALL / $denom))
|
||||
max: MAX((SQC_TC_STALL / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
+174
@@ -0,0 +1,174 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1500
|
||||
title: Texture Addresser and Texture Data (TA/TD)
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1501
|
||||
title: TA
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
TA Busy:
|
||||
avg: AVG(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
TC2TA Addr Stall:
|
||||
avg: AVG(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
TC2TA Data Stall:
|
||||
avg: AVG(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
TD2TA Addr Stall:
|
||||
avg: AVG(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
Total Instructions:
|
||||
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Flat Instr:
|
||||
avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Flat Read Instr:
|
||||
avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Flat Write Instr:
|
||||
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Flat Atomic Instr:
|
||||
avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Buffer Instr:
|
||||
avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Buffer Read Instr:
|
||||
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Buffer Write Instr:
|
||||
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Buffer Atomic Instr:
|
||||
avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Buffer Total Cylces:
|
||||
avg: AVG((TA_BUFFER_TOTAL_CYCLES_sum / $denom))
|
||||
min: MIN((TA_BUFFER_TOTAL_CYCLES_sum / $denom))
|
||||
max: MAX((TA_BUFFER_TOTAL_CYCLES_sum / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Buffer Coalesced Read:
|
||||
avg: AVG((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom))
|
||||
min: MIN((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom))
|
||||
max: MAX((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Buffer Coalesced Write:
|
||||
avg: AVG((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom))
|
||||
min: MIN((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom))
|
||||
max: MAX((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1502
|
||||
title: TD
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
TD Busy:
|
||||
avg: AVG(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
TC2TD Stall:
|
||||
avg: AVG(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
SPI2TD Stall:
|
||||
avg: AVG(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
min: MIN(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
max: MAX(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU)))
|
||||
unit: pct
|
||||
tips:
|
||||
Coalescable Instr:
|
||||
avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom))
|
||||
min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom))
|
||||
max: MAX((TD_COALESCABLE_WAVEFRONT_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Load Instr:
|
||||
avg: AVG((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum)
|
||||
/ $denom))
|
||||
min: MIN((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum)
|
||||
/ $denom))
|
||||
max: MAX((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum)
|
||||
/ $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Store Instr:
|
||||
avg: AVG((TD_STORE_WAVEFRONT_sum / $denom))
|
||||
min: MIN((TD_STORE_WAVEFRONT_sum / $denom))
|
||||
max: MAX((TD_STORE_WAVEFRONT_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
Atomic Instr:
|
||||
avg: AVG((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
min: MIN((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
+403
@@ -0,0 +1,403 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1600
|
||||
title: Vector L1 Data Cache
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1601
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
metric: Metric
|
||||
value: Value
|
||||
tips: Tips
|
||||
metric:
|
||||
Buffer Coalescing:
|
||||
value:
|
||||
AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
|
||||
* 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None))
|
||||
tips:
|
||||
Cache Util:
|
||||
value:
|
||||
AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
|
||||
!= 0) else None))
|
||||
tips:
|
||||
Cache BW:
|
||||
value:
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None))
|
||||
tips:
|
||||
comparable: false # for now
|
||||
cli_style: simple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1602
|
||||
title: L1D Cache Stalls (%)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
tips: Tips
|
||||
metric:
|
||||
Stalled on L2 Data:
|
||||
expr:
|
||||
(((100 * TCP_PENDING_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
|
||||
!= 0) else None)
|
||||
tips:
|
||||
Stalled on L2 Req:
|
||||
expr:
|
||||
(((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
|
||||
!= 0) else None)
|
||||
tips:
|
||||
Tag RAM Stall (Read):
|
||||
expr:
|
||||
(((100 * TCP_READ_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
|
||||
if (TCP_GATE_EN1_sum != 0) else None)
|
||||
tips:
|
||||
Tag RAM Stall (Write):
|
||||
expr:
|
||||
(((100 * TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
|
||||
if (TCP_GATE_EN1_sum != 0) else None)
|
||||
tips:
|
||||
Tag RAM Stall (Atomic):
|
||||
expr:
|
||||
(((100 * TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
|
||||
if (TCP_GATE_EN1_sum != 0) else None)
|
||||
tips:
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1603
|
||||
title: L1D Cache Accesses
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Total Req:
|
||||
avg: AVG((TCP_TOTAL_ACCESSES_sum / $denom))
|
||||
min: MIN((TCP_TOTAL_ACCESSES_sum / $denom))
|
||||
max: MAX((TCP_TOTAL_ACCESSES_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req:
|
||||
avg: AVG((TCP_TOTAL_READ_sum / $denom))
|
||||
min: MIN((TCP_TOTAL_READ_sum / $denom))
|
||||
max: MAX((TCP_TOTAL_READ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Req:
|
||||
avg: AVG((TCP_TOTAL_WRITE_sum / $denom))
|
||||
min: MIN((TCP_TOTAL_WRITE_sum / $denom))
|
||||
max: MAX((TCP_TOTAL_WRITE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Req:
|
||||
avg:
|
||||
AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
|
||||
/ $denom))
|
||||
min:
|
||||
MIN(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
|
||||
/ $denom))
|
||||
max:
|
||||
MAX(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
|
||||
/ $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
avg: AVG((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
|
||||
min: MIN((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
|
||||
max: MAX((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hits:
|
||||
avg:
|
||||
AVG(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
/ $denom))
|
||||
min:
|
||||
MIN(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
/ $denom))
|
||||
max:
|
||||
MAX(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
/ $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit Rate:
|
||||
avg:
|
||||
AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
|
||||
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
|
||||
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None))
|
||||
min:
|
||||
MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
|
||||
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
|
||||
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None))
|
||||
max:
|
||||
MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
|
||||
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
|
||||
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
|
||||
None))
|
||||
unit: pct
|
||||
tips:
|
||||
Invalidate:
|
||||
avg: AVG((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
|
||||
min: MIN((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
|
||||
max: MAX((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 BW:
|
||||
avg:
|
||||
AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
min:
|
||||
AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
max:
|
||||
AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
L1-L2 Read:
|
||||
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Write:
|
||||
avg: AVG((TCP_TCC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Atomic:
|
||||
avg:
|
||||
AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
|
||||
/ $denom))
|
||||
min:
|
||||
MIN(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
|
||||
/ $denom))
|
||||
max:
|
||||
MAX(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
|
||||
/ $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1 Access Latency:
|
||||
avg:
|
||||
# AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
|
||||
# != 0) else None))
|
||||
min:
|
||||
# MIN(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
|
||||
# != 0) else None))
|
||||
max:
|
||||
# MAX(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
|
||||
# != 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Read Latency:
|
||||
avg:
|
||||
# AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
|
||||
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None))
|
||||
min:
|
||||
# MIN(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
|
||||
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None))
|
||||
max:
|
||||
# MAX(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
|
||||
# if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Write Latency:
|
||||
avg:
|
||||
# AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
# if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else
|
||||
# None))
|
||||
min:
|
||||
# MIN(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
# if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else
|
||||
# None))
|
||||
max:
|
||||
# MAX(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
|
||||
# if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else
|
||||
# None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1604
|
||||
title: L1D - L2 Transactions
|
||||
header:
|
||||
metric: Metric
|
||||
xfer: Xfer
|
||||
coherency: Coherency
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
NC - Read:
|
||||
xfer: Read
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Read:
|
||||
xfer: Read
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Read:
|
||||
xfer: Read
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Read:
|
||||
xfer: Read
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Write:
|
||||
xfer: Write
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Write:
|
||||
xfer: Write
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Write:
|
||||
xfer: Write
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Write:
|
||||
xfer: Write
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Atomic:
|
||||
xfer: Atomic
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Atomic:
|
||||
xfer: Atomic
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Atomic:
|
||||
xfer: Atomic
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Atomic:
|
||||
xfer: Atomic
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1605
|
||||
title: L1D Addr Translation
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
units: Units
|
||||
tips: Tips
|
||||
metric:
|
||||
Req:
|
||||
avg: AVG((TCP_UTCL1_REQUEST_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
|
||||
max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
|
||||
units: ( + $normUnit)
|
||||
tips:
|
||||
Hit Ratio:
|
||||
avg:
|
||||
AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
|
||||
(TCP_UTCL1_REQUEST_sum != 0) else None))
|
||||
min:
|
||||
MIN((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
|
||||
(TCP_UTCL1_REQUEST_sum != 0) else None))
|
||||
max:
|
||||
MAX((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
|
||||
(TCP_UTCL1_REQUEST_sum != 0) else None))
|
||||
units: pct
|
||||
tips:
|
||||
Hits:
|
||||
avg: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
|
||||
units: ( + $normUnit)
|
||||
tips:
|
||||
Misses (Translation):
|
||||
avg: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
units: ( + $normUnit)
|
||||
tips:
|
||||
Misses (Permission):
|
||||
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
units: ( + $normUnit)
|
||||
tips:
|
||||
+387
@@ -0,0 +1,387 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1700
|
||||
title: L2 Cache
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1701
|
||||
title: Speed-of-Light
|
||||
header:
|
||||
metric: Metric
|
||||
value: Value
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
L2 Util:
|
||||
value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
|
||||
unit: pct
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else 0))
|
||||
unit: pct
|
||||
tips:
|
||||
L2-EA Rd BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1702
|
||||
title: L2 - Fabric Transactions
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Read BW:
|
||||
avg:
|
||||
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
min:
|
||||
MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
max:
|
||||
MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write BW:
|
||||
avg:
|
||||
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
min:
|
||||
MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
max:
|
||||
MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read (32B):
|
||||
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read (Uncached 32B):
|
||||
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read (64B):
|
||||
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
HBM Read:
|
||||
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write (32B):
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write (Uncached 32B):
|
||||
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write (64B):
|
||||
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
HBM Write:
|
||||
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Latency:
|
||||
avg:
|
||||
AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
|
||||
0) else None))
|
||||
min:
|
||||
MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
|
||||
0) else None))
|
||||
max:
|
||||
MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
|
||||
0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Write Latency:
|
||||
avg:
|
||||
AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
|
||||
0) else None))
|
||||
min:
|
||||
MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
|
||||
0) else None))
|
||||
max:
|
||||
MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
|
||||
0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Atomic Latency:
|
||||
avg:
|
||||
AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
min:
|
||||
MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
max:
|
||||
MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Read Stall:
|
||||
avg:
|
||||
# AVG((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
|
||||
# + TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
|
||||
# 0) else None))
|
||||
min:
|
||||
# MIN((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
|
||||
# + TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
|
||||
# 0) else None))
|
||||
max:
|
||||
# MAX((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
|
||||
# + TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
|
||||
# 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
Write Stall:
|
||||
avg:
|
||||
# AVG((((100 * ((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum + TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum)
|
||||
# + TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
|
||||
# 0) else None))
|
||||
min:
|
||||
# MIN((((100 * ((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum + TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum)
|
||||
# + TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
|
||||
# 0) else None))
|
||||
max:
|
||||
# MAX((((100 * ((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum + TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum)
|
||||
# + TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
|
||||
# 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1703
|
||||
title: L2 Cache Accesses
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Req:
|
||||
avg: AVG((TCC_REQ_sum / $denom))
|
||||
min: MIN((TCC_REQ_sum / $denom))
|
||||
max: MAX((TCC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Streaming Req:
|
||||
avg: AVG((TCC_STREAMING_REQ_sum / $denom))
|
||||
min: MIN((TCC_STREAMING_REQ_sum / $denom))
|
||||
max: MAX((TCC_STREAMING_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Req:
|
||||
avg: AVG((TCC_READ_sum / $denom))
|
||||
min: MIN((TCC_READ_sum / $denom))
|
||||
max: MAX((TCC_READ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Req:
|
||||
avg: AVG((TCC_WRITE_sum / $denom))
|
||||
min: MIN((TCC_WRITE_sum / $denom))
|
||||
max: MAX((TCC_WRITE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Req:
|
||||
avg: AVG((TCC_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Probe Req:
|
||||
avg: AVG((TCC_PROBE_sum / $denom))
|
||||
min: MIN((TCC_PROBE_sum / $denom))
|
||||
max: MAX((TCC_PROBE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Hits:
|
||||
avg: AVG((TCC_HIT_sum / $denom))
|
||||
min: MIN((TCC_HIT_sum / $denom))
|
||||
max: MAX((TCC_HIT_sum / $denom))
|
||||
unit: (Hits + $normUnit)
|
||||
tips:
|
||||
Misses:
|
||||
avg: AVG((TCC_MISS_sum / $denom))
|
||||
min: MIN((TCC_MISS_sum / $denom))
|
||||
max: MAX((TCC_MISS_sum / $denom))
|
||||
unit: (Misses + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg:
|
||||
AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
min:
|
||||
MIN((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
max:
|
||||
MAX((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
unit: pct
|
||||
tips:
|
||||
Writeback:
|
||||
avg: AVG((TCC_WRITEBACK_sum / $denom))
|
||||
min: MIN((TCC_WRITEBACK_sum / $denom))
|
||||
max: MAX((TCC_WRITEBACK_sum / $denom))
|
||||
unit: ( + $normUnit)
|
||||
tips:
|
||||
NC Req:
|
||||
avg: AVG((TCC_NC_REQ_sum / $denom))
|
||||
min: MIN((TCC_NC_REQ_sum / $denom))
|
||||
max: MAX((TCC_NC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC Req:
|
||||
avg: AVG((TCC_UC_REQ_sum / $denom))
|
||||
min: MIN((TCC_UC_REQ_sum / $denom))
|
||||
max: MAX((TCC_UC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC Req:
|
||||
avg: AVG((TCC_CC_REQ_sum / $denom))
|
||||
min: MIN((TCC_CC_REQ_sum / $denom))
|
||||
max: MAX((TCC_CC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW Req:
|
||||
avg: AVG((TCC_RW_REQ_sum / $denom))
|
||||
min: MIN((TCC_RW_REQ_sum / $denom))
|
||||
max: MAX((TCC_RW_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Writeback (Normal):
|
||||
avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom))
|
||||
min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom))
|
||||
max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom))
|
||||
unit: ( + $normUnit)
|
||||
tips:
|
||||
Writeback (TC Req):
|
||||
avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
|
||||
min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
|
||||
max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
|
||||
unit: ( + $normUnit)
|
||||
tips:
|
||||
Evict (Normal):
|
||||
avg: AVG((TCC_NORMAL_EVICT_sum / $denom))
|
||||
min: MIN((TCC_NORMAL_EVICT_sum / $denom))
|
||||
max: MAX((TCC_NORMAL_EVICT_sum / $denom))
|
||||
unit: ( + $normUnit)
|
||||
tips:
|
||||
Evict (TC Req):
|
||||
avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
|
||||
min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
|
||||
max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
|
||||
unit: ( + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1704
|
||||
title: L2 - Fabric Interface Stalls
|
||||
header:
|
||||
metric: Metric
|
||||
type: Type
|
||||
transaction: Transaction
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Read - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Read
|
||||
avg: # AVG((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: # MIN((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: # MAX((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Read
|
||||
avg: # AVG((TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: # MIN((TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: # MAX((TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: # AVG((TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: # MIN((TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: # MAX((TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Write
|
||||
avg: # AVG((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: # MIN((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: # MAX((TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Write
|
||||
avg: # AVG((TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: # MIN((TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: # MAX((TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: # AVG((TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: # MIN((TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: # MAX((TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
+298
@@ -0,0 +1,298 @@
|
||||
---
|
||||
# Add description/tips for each metric in this section.
|
||||
# So it could be shown in hover.
|
||||
Metric Description:
|
||||
|
||||
# Define the panel properties and properties of each metric in the panel.
|
||||
Panel Config:
|
||||
id: 1800
|
||||
title: L2 Cache (per Channel)
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1801
|
||||
title: Aggregate Stats (All 32 channels)
|
||||
header:
|
||||
metric: Metric
|
||||
avg: Avg
|
||||
std dev: Std Dev
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
L2 Cache Hit Rate:
|
||||
avg: AVG(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
|
||||
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
|
||||
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
|
||||
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
|
||||
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
|
||||
/ (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15])) != 0) else None)
|
||||
std dev: STD(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
|
||||
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
|
||||
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
|
||||
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
|
||||
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
|
||||
/ (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15])) != 0) else None)
|
||||
min: MIN(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
|
||||
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
|
||||
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
|
||||
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
|
||||
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
|
||||
/ (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15])) != 0) else None)
|
||||
max: MAX(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
|
||||
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
|
||||
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
|
||||
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
|
||||
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
|
||||
/ (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
|
||||
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
|
||||
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
|
||||
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
|
||||
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
|
||||
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
|
||||
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
|
||||
+ TCC_HIT[15])) != 0) else None)
|
||||
unit: pct
|
||||
tips:
|
||||
# FIXME: other arggr metrics!!
|
||||
|
||||
- metric_table:
|
||||
id: 1802
|
||||
title: L2 Cache Hit Rate (%)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr:
|
||||
(((100 * TCC_HIT[::_1]) / (TCC_HIT[::_1] + TCC_MISS[::_1])) if ((TCC_HIT[::_1]
|
||||
+ TCC_MISS[::_1]) != 0) else None)
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1803
|
||||
title: Requests (Requests)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr: (TO_INT(TCC_REQ[::_1]) / $denom)
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1804
|
||||
title: L1-L2 Access (Requests)
|
||||
header:
|
||||
metric: Metric
|
||||
read req: L1-L2 Read
|
||||
write req: L1-L2 Write
|
||||
atomic req: L1-L2 Atomic
|
||||
metric:
|
||||
"::_1":
|
||||
read req: AVG((TO_INT(TCC_READ[::_1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[::_1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[::_1]) / $denom))
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_multiple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1805
|
||||
title: L2-EA Access (Requests)
|
||||
header:
|
||||
metric: Metric
|
||||
read req: L2-EA Read
|
||||
write req: L2-EA Write
|
||||
atomic req: L2-EA Atomic
|
||||
metric:
|
||||
"::_1":
|
||||
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_multiple_bar
|
||||
|
||||
# - metric_table:
|
||||
# id: 1806
|
||||
# title: L2-EA Latency (Cycles)
|
||||
# header:
|
||||
# metric: Metric
|
||||
# read lat: L2-EA Read
|
||||
# write lat: L2-EA Write
|
||||
# atomic lat: L2-EA Atomic
|
||||
# metric:
|
||||
# "::_1":
|
||||
# read lat:
|
||||
# AVG(((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
|
||||
# != 0) else None))
|
||||
# write lat:
|
||||
# AVG(((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
|
||||
# != 0) else None))
|
||||
# atomic lat:
|
||||
# AVG(((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if
|
||||
# (TCC_EA0_ATOMIC[::_1] != 0) else 0))
|
||||
# placeholder_range:
|
||||
# "::_1": $totalL2Banks
|
||||
# cli_style: simple_multiple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1806
|
||||
title: L2-EA Read Latency (Cycles)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr:
|
||||
((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
|
||||
!= 0) else None)
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1807
|
||||
title: L2-EA Write Latency (Cycles)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr:
|
||||
((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
|
||||
!= 0) else None)
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1808
|
||||
title: L2-EA Atomic Latency (Cycles)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if
|
||||
(TCC_EA0_ATOMIC[::_1] != 0) else 0)
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1809
|
||||
title: L2-EA Read Stall (Cycles per normUnit)
|
||||
header:
|
||||
metric: Metric
|
||||
ea read stall - io: L2-EA Read Stall - IO
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM
|
||||
metric:
|
||||
"::_1":
|
||||
ea read stall - io: AVG((TO_INT(TCC_EA0_RDREQ_IO_CREDIT_STALL[::_1]) / $denom))
|
||||
ea read stall - gmi: AVG((TO_INT(TCC_EA0_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom))
|
||||
ea read stall - dram: AVG((TO_INT(TCC_EA0_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom))
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_multiple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1810
|
||||
title: L2-EA Write Stall (Cycles per normUnit)
|
||||
header:
|
||||
metric: Metric
|
||||
ea write stall - io: L2-EA Write Stall - IO
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM
|
||||
ea write stall - starve: L2-EA Write Stall - Starve
|
||||
metric:
|
||||
"::_1":
|
||||
ea write stall - io: AVG((TO_INT(TCC_EA0_WRREQ_IO_CREDIT_STALL[::_1]) / $denom))
|
||||
ea write stall - gmi: AVG((TO_INT(TCC_EA0_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom))
|
||||
ea write stall - dram: AVG((TO_INT(TCC_EA0_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom))
|
||||
ea write stall - starve: AVG((TO_INT(TCC_TOO_MANY_EA0_WRREQS_STALL[::_1]) / $denom))
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_multiple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1811
|
||||
title: L2 Tag Stall (cycles)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr: TCC_TAG_STALL[::_1]
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
cli_style: simple_box
|
||||
|
||||
- metric_table:
|
||||
id: 1812
|
||||
title: L2 Bubble (128B request)
|
||||
header:
|
||||
metric: Metric
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr: TCC_BUBBLE[::_1]
|
||||
placeholder_range:
|
||||
"::_1": $totalL2Banks
|
||||
# tips: Number of 128-byte read requests sent to EA
|
||||
cli_style: simple_box
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
---
|
||||
Panel Config:
|
||||
id: 2000
|
||||
title: Kernels
|
||||
data source:
|
||||
- raw_csv_table:
|
||||
id: 2001
|
||||
source: pmc_dispatch_info.csv
|
||||
@@ -0,0 +1,112 @@
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import config
|
||||
from omniperf_soc.soc_base import OmniSoC_Base
|
||||
from utils.utils import demarcate, mibench
|
||||
from roofline import Roofline
|
||||
import logging
|
||||
|
||||
class gfx941_soc (OmniSoC_Base):
|
||||
def __init__(self,args):
|
||||
super().__init__(args)
|
||||
self.set_soc_name("gfx941")
|
||||
if hasattr(self.get_args(), 'roof_only') and self.get_args().roof_only:
|
||||
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline"))
|
||||
else:
|
||||
# NB: We're using generalized Mi300 perfmon configs
|
||||
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940"))
|
||||
self.set_compatible_profilers(["rocprofv2"])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
"SQ": 8,
|
||||
"TA": 2,
|
||||
"TD": 2,
|
||||
"TCP": 4,
|
||||
"TCC": 4,
|
||||
"CPC": 2,
|
||||
"CPF": 2,
|
||||
"SPI": 2,
|
||||
"GRBM": 2,
|
||||
"GDS": 4,
|
||||
"TCC_channels": 32
|
||||
}
|
||||
)
|
||||
self.set_soc_param(
|
||||
{
|
||||
"numSE": 8,
|
||||
"numCU": 38,
|
||||
"numSIMD": 4,
|
||||
"numWavesPerCU": 32,
|
||||
"numSQC": 56,
|
||||
"L2Banks": 16,
|
||||
"LDSBanks": 32,
|
||||
"Freq": 1950,
|
||||
"mclk": 1300
|
||||
}
|
||||
)
|
||||
self.roofline_obj = Roofline(args)
|
||||
|
||||
#-----------------------
|
||||
# Required child methods
|
||||
#-----------------------
|
||||
@demarcate
|
||||
def profiling_setup(self):
|
||||
"""Perform any SoC-specific setup prior to profiling.
|
||||
"""
|
||||
super().profiling_setup()
|
||||
# Performance counter filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities.
|
||||
"""
|
||||
super().post_profiling()
|
||||
|
||||
logging.info("[roofline] Roofline temporarily disabled in Mi300")
|
||||
# if not self.get_args().no_roof:
|
||||
# logging.info("[roofline] Checking for roofline.csv in " + str(self.get_args().path))
|
||||
# if not os.path.isfile(os.path.join(self.get_args().path, "roofline.csv")):
|
||||
# mibench(self.get_args())
|
||||
# self.roofline_obj.post_processing()
|
||||
# else:
|
||||
# logging.info("[roofline] Skipping roofline")
|
||||
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis.
|
||||
"""
|
||||
super().analysis_setup()
|
||||
logging.info("[roofline] Roofline temporarily disabled in Mi300")
|
||||
# configure roofline for analysis
|
||||
# if roofline_parameters:
|
||||
# self.roofline_obj = Roofline(self.get_args(), roofline_parameters)
|
||||
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user