Improve baseline comparison (#817)
* Do not force unsupported metrics to be specified in older gpu
architectures as None
* Remove metrics which are explicitly set to None
* Update CHANGELOG
* Fix analysis configuration to fix baseline comparisons across all gpu
architectures
* Add missing 1812 section for gfx908
* Add missing 1812 section for gfx90a
* Baseline comparision will only show common metrics
* First workload will be used to set Metric ID index column
This commit is contained in:
@@ -83,6 +83,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* OS distribution support minimum for roofline feature is now Ubuntu22.04, RHEL9, and SLES15SP6
|
||||
* Improve analysis block based filtering to accept metric id level filtering
|
||||
* This can be used to collect individual metrics from various sections of analysis config
|
||||
* CLI analysis mode baseline comparison will now only compare common metrics across workloads and will not show Metric ID
|
||||
* Remove metrics from analysis configuration files which are explicitly marked as empty or None
|
||||
|
||||
### Optimized
|
||||
|
||||
|
||||
@@ -32,12 +32,6 @@ Panel Config:
|
||||
peak: (((($max_sclk * $cu_per_gpu) * 64) * 2) / 1000)
|
||||
pop: None # No perf counter
|
||||
tips:
|
||||
MFMA FLOPs (F8):
|
||||
value: None # No HW module
|
||||
unit: GFLOP/s
|
||||
peak: None # No HW module
|
||||
pop: None # No HW module
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: None # No perf counter
|
||||
unit: GFLOP/s
|
||||
@@ -62,13 +56,6 @@ Panel Config:
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 256) / 1000)
|
||||
pop: None # No perf counter
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP/s
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: None # No perf counter
|
||||
unit: GIOP/s
|
||||
|
||||
@@ -45,10 +45,6 @@ Panel Config:
|
||||
#alias: valu_
|
||||
value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0)
|
||||
tips:
|
||||
MFMA:
|
||||
#alias: mfma_
|
||||
value: None # No perf counter
|
||||
tips:
|
||||
VMEM:
|
||||
#alias: vmem_
|
||||
value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0)
|
||||
|
||||
@@ -19,27 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC SYNC FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC CANE Stall Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC ADC Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPF Utilization:
|
||||
avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
|
||||
@@ -19,13 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Schedule-Pipe Wave Occupancy:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Wave
|
||||
tips:
|
||||
Accelerator Utilization:
|
||||
avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
@@ -38,13 +31,6 @@ Panel Config:
|
||||
max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu))
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe Wave Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Workgroup Manager Utilization:
|
||||
avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
@@ -122,13 +108,6 @@ Panel Config:
|
||||
0) else None)
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Scheduler-Pipe Stall Rate:
|
||||
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD !=
|
||||
0) else None))
|
||||
|
||||
-158
@@ -19,30 +19,12 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
VALU:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
VMEM:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
LDS:
|
||||
avg: AVG((SQ_INSTS_LDS / $denom))
|
||||
min: MIN((SQ_INSTS_LDS / $denom))
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
SALU:
|
||||
avg: AVG((SQ_INSTS_SALU / $denom))
|
||||
min: MIN((SQ_INSTS_SALU / $denom))
|
||||
@@ -73,96 +55,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
INT32:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
INT64:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-ADD:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-MUL:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-FMA:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F16-Trans:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-ADD:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-MUL:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-FMA:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F32-Trans:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-ADD:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-MUL:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-FMA:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
F64-Trans:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Conversion:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1003
|
||||
@@ -181,13 +73,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
Spill/Stack Coalesceable Instr:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read:
|
||||
avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
@@ -242,46 +127,3 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
MFMA-I8:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-F8:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-F16:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-BF16:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-F32:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-F64:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA-F6F4:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
-155
@@ -19,61 +19,6 @@ Panel Config:
|
||||
pop: Pct of Peak
|
||||
tips: Tips
|
||||
metric:
|
||||
VALU FLOPs:
|
||||
value: None # No perf counter
|
||||
unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
VALU IOPs:
|
||||
value: None # No perf counter
|
||||
unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA FLOPs (F8):
|
||||
value: None # No perf counter
|
||||
unit: GFLOP
|
||||
peak: None # No perf counter
|
||||
pop: None # No perf counter
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: None # No perf counter
|
||||
Unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: None # No perf counter
|
||||
unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: None # No perf counter
|
||||
unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA FLOPs (F64):
|
||||
value: None # No perf counter
|
||||
unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (INT8):
|
||||
value: None # No perf counter
|
||||
unit: None
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1102
|
||||
@@ -116,25 +61,6 @@ Panel Config:
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
VALU Co-Issue Efficiency:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
VMEM Utilization:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: pct
|
||||
tips:
|
||||
Branch Utilization:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: pct
|
||||
tips:
|
||||
VALU Active Threads:
|
||||
avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU
|
||||
!= 0) else None))
|
||||
@@ -144,32 +70,6 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: Threads
|
||||
tips:
|
||||
MFMA Utilization:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: pct
|
||||
tips:
|
||||
MFMA Instr Cycles:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: cycles/instr
|
||||
tips:
|
||||
VMEM Latency:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: Cycles
|
||||
coll_level: SQ_INST_LEVEL_VMEM
|
||||
tips:
|
||||
SMEM Latency:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: Cycles
|
||||
coll_level: SQ_INST_LEVEL_SMEM
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1103
|
||||
@@ -182,58 +82,3 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
FLOPs (Total):
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
IOPs (Total):
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F8 OPs:
|
||||
avg: None # No HW module
|
||||
min: None # No HW module
|
||||
max: None # No HW module
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F16 OPs:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
BF16 OPs:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F32 OPs:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F64 OPs:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
F6F4 OPs:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
INT8 OPs:
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -55,48 +55,6 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
Theoretical Bandwidth:
|
||||
avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
|
||||
/ $denom))
|
||||
@@ -158,17 +116,3 @@ Panel Config:
|
||||
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
unit: (Accesses + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Command FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Data FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -43,27 +43,6 @@ Panel Config:
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu)))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Address Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Command Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Data Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Total Instructions:
|
||||
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
@@ -82,12 +61,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Write Instructions:
|
||||
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -112,12 +85,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Write Instructions:
|
||||
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -172,12 +139,6 @@ Panel Config:
|
||||
max: MAX(((100 * TD_TC_STALL_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu)))
|
||||
unit: pct
|
||||
tips:
|
||||
Workgroup manager → Data-Return Stall:
|
||||
avg: # No perf counter
|
||||
min: # No perf counter
|
||||
max: # No perf counter
|
||||
unit: pct
|
||||
tips:
|
||||
Coalescable Instructions:
|
||||
avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom))
|
||||
min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom))
|
||||
@@ -205,9 +166,3 @@ Panel Config:
|
||||
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Write Ack Instructions:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -189,30 +189,6 @@ Panel Config:
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Tag RAM 0 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 1 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 2 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 3 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Read:
|
||||
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
|
||||
@@ -419,12 +395,6 @@ Panel Config:
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Misses under Translation Miss:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Permission Misses:
|
||||
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
@@ -442,45 +412,3 @@ Panel Config:
|
||||
units: Units
|
||||
tips: Tips
|
||||
metric:
|
||||
Cache Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Cache Miss Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Serialization Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Thrashing Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Latency FIFO Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Resident Page Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
UTCL2 Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -143,18 +143,6 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Read Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1703
|
||||
@@ -173,24 +161,6 @@ Panel Config:
|
||||
max: MAX((TCC_REQ_sum * 64) / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Req:
|
||||
avg: AVG((TCC_REQ_sum / $denom))
|
||||
min: MIN((TCC_REQ_sum / $denom))
|
||||
@@ -221,24 +191,12 @@ Panel Config:
|
||||
max: MAX((TCC_STREAMING_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Bypasss Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Probe Req:
|
||||
avg: AVG((TCC_PROBE_sum / $denom))
|
||||
min: MIN((TCC_PROBE_sum / $denom))
|
||||
max: MAX((TCC_PROBE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Input Buffer Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
@@ -326,24 +284,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Stalled on Latency FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Stalled on Write Data FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Input Buffer Stalled on L2:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1705
|
||||
@@ -360,54 +300,6 @@ Panel Config:
|
||||
style:
|
||||
type: simple_multi_bar
|
||||
metric:
|
||||
Read - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
@@ -458,24 +350,6 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
@@ -506,51 +380,9 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -333,3 +333,18 @@ Panel Config:
|
||||
placeholder_range:
|
||||
"::_1": $total_l2_chan
|
||||
cli_style: simple_multiple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1812
|
||||
title: L2-Fabric (128B read requests per normUnit)
|
||||
header:
|
||||
metric: Channel
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr: (TO_INT(TCC_BUBBLE[::_1]) / $denom)
|
||||
placeholder_range:
|
||||
"::_1": $total_l2_chan
|
||||
# tips: Number of 128-byte read requests sent to EA
|
||||
cli_style: simple_box
|
||||
tui_style: simple_box
|
||||
|
||||
@@ -42,12 +42,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
|
||||
- Start_Timestamp)))) / (((($max_sclk * $cu_per_gpu) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F8):
|
||||
value: None
|
||||
unit: GFLOP/s
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP/s
|
||||
@@ -76,13 +70,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP/s
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP/s
|
||||
|
||||
@@ -19,24 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
CPC SYNC FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPC CANE Stall Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPC ADC Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPF Utilization:
|
||||
avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
|
||||
@@ -19,13 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Schedule-Pipe Wave Occupancy:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Wave
|
||||
tips:
|
||||
Accelerator Utilization:
|
||||
avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
@@ -38,13 +31,6 @@ Panel Config:
|
||||
max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu))
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe Wave Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Workgroup Manager Utilization:
|
||||
avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
@@ -122,13 +108,6 @@ Panel Config:
|
||||
0) else None)
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Scheduler-Pipe Stall Rate:
|
||||
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD !=
|
||||
0) else None))
|
||||
|
||||
-20
@@ -181,13 +181,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
Spill/Stack Coalesceable Instr:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read:
|
||||
avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
@@ -248,12 +241,6 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_VALU_MFMA_I8 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-F8:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
MFMA-F16:
|
||||
avg: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom))
|
||||
min: MIN((SQ_INSTS_VALU_MFMA_F16 / $denom))
|
||||
@@ -278,10 +265,3 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA-F6F4:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
-33
@@ -41,12 +41,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
|
||||
- Start_Timestamp)))) / (((($max_sclk * $cu_per_gpu) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F8):
|
||||
value: None
|
||||
unit: GFLOP
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
@@ -75,13 +69,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
@@ -131,13 +118,6 @@ Panel Config:
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
VALU Co-Issue Efficiency:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
VMEM Utilization:
|
||||
avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
@@ -236,12 +216,6 @@ Panel Config:
|
||||
max: MAX(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) + (SQ_INSTS_VALU_MFMA_MOPS_I8 * 512)) / $denom)
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F8 OPs:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
F16 OPs:
|
||||
avg: AVG(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) +
|
||||
(64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 *
|
||||
@@ -278,13 +252,6 @@ Panel Config:
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
F6F4 OPs:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
INT8 OPs:
|
||||
avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
|
||||
@@ -55,48 +55,6 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
Theoretical Bandwidth:
|
||||
avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
|
||||
/ $denom))
|
||||
@@ -158,17 +116,3 @@ Panel Config:
|
||||
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
unit: (Accesses + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Command FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Data FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -43,27 +43,6 @@ Panel Config:
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu)))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Address Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Command Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Data Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Total Instructions:
|
||||
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
@@ -82,12 +61,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Write Instructions:
|
||||
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -112,12 +85,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Write Instructions:
|
||||
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -205,9 +172,3 @@ Panel Config:
|
||||
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Write Ack Instructions:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -189,30 +189,6 @@ Panel Config:
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Tag RAM 0 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 1 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 2 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 3 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Read:
|
||||
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
|
||||
@@ -419,12 +395,6 @@ Panel Config:
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Misses under Translation Miss:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Permission Misses:
|
||||
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
@@ -442,45 +412,3 @@ Panel Config:
|
||||
units: Units
|
||||
tips: Tips
|
||||
metric:
|
||||
Cache Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Cache Miss Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Serialization Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Thrashing Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Latency FIFO Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Resident Page Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
UTCL2 Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -143,18 +143,6 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Read Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1703
|
||||
@@ -173,24 +161,6 @@ Panel Config:
|
||||
max: MAX((TCC_REQ_sum * 128) / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Req:
|
||||
avg: AVG((TCC_REQ_sum / $denom))
|
||||
min: MIN((TCC_REQ_sum / $denom))
|
||||
@@ -221,24 +191,12 @@ Panel Config:
|
||||
max: MAX((TCC_STREAMING_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Bypasss Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Probe Req:
|
||||
avg: AVG((TCC_PROBE_sum / $denom))
|
||||
min: MIN((TCC_PROBE_sum / $denom))
|
||||
max: MAX((TCC_PROBE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Input Buffer Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
@@ -326,24 +284,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Stalled on Latency FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Stalled on Write Data FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Input Buffer Stalled on L2:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1705
|
||||
@@ -360,54 +300,6 @@ Panel Config:
|
||||
style:
|
||||
type: simple_multi_bar
|
||||
metric:
|
||||
Read - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
@@ -458,24 +350,6 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
@@ -506,51 +380,9 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -333,3 +333,18 @@ Panel Config:
|
||||
placeholder_range:
|
||||
"::_1": $total_l2_chan
|
||||
cli_style: simple_multiple_bar
|
||||
|
||||
- metric_table:
|
||||
id: 1812
|
||||
title: L2-Fabric (128B read requests per normUnit)
|
||||
header:
|
||||
metric: Channel
|
||||
expr: Expression
|
||||
metric:
|
||||
"::_1":
|
||||
expr: (TO_INT(TCC_BUBBLE[::_1]) / $denom)
|
||||
placeholder_range:
|
||||
"::_1": $total_l2_chan
|
||||
# tips: Number of 128-byte read requests sent to EA
|
||||
cli_style: simple_box
|
||||
tui_style: simple_box
|
||||
|
||||
@@ -77,13 +77,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP/s
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP/s
|
||||
|
||||
@@ -19,27 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC SYNC FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC CANE Stall Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC ADC Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPF Utilization:
|
||||
avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
|
||||
@@ -19,13 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Schedule-Pipe Wave Occupancy:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Wave
|
||||
tips:
|
||||
Accelerator Utilization:
|
||||
avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
@@ -38,13 +31,6 @@ Panel Config:
|
||||
max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu))
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe Wave Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Workgroup Manager Utilization:
|
||||
avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
@@ -122,13 +108,6 @@ Panel Config:
|
||||
0) else None)
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Scheduler-Pipe Stall Rate:
|
||||
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD !=
|
||||
0) else None))
|
||||
|
||||
-12
@@ -209,13 +209,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
Spill/Stack Coalesceable Instr:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read:
|
||||
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
@@ -281,9 +274,4 @@ Panel Config:
|
||||
min: MIN((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
MFMA-F6F4:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
-21
@@ -76,13 +76,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
@@ -132,13 +125,6 @@ Panel Config:
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
VALU Co-Issue Efficiency:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
VMEM Utilization:
|
||||
avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
@@ -279,13 +265,6 @@ Panel Config:
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
F6F4 OPs:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
INT8 OPs:
|
||||
avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
|
||||
@@ -55,48 +55,6 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
Theoretical Bandwidth:
|
||||
avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
|
||||
/ $denom))
|
||||
@@ -158,17 +116,3 @@ Panel Config:
|
||||
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
unit: (Accesses + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Command FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Data FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -43,27 +43,6 @@ Panel Config:
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu)))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Address Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Command Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Data Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Total Instructions:
|
||||
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
@@ -82,12 +61,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Write Instructions:
|
||||
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -112,12 +85,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Write Instructions:
|
||||
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -205,9 +172,3 @@ Panel Config:
|
||||
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Write Ack Instructions:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -192,30 +192,6 @@ Panel Config:
|
||||
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Tag RAM 0 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 1 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 2 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 3 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Read:
|
||||
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
|
||||
@@ -237,24 +213,6 @@ Panel Config:
|
||||
/ $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1 Access Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Read Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Write Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1604
|
||||
@@ -410,12 +368,6 @@ Panel Config:
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Misses under Translation Miss:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Permission Misses:
|
||||
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
@@ -433,45 +385,3 @@ Panel Config:
|
||||
units: Units
|
||||
tips: Tips
|
||||
metric:
|
||||
Cache Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Cache Miss Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Serialization Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Thrashing Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Latency FIFO Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Resident Page Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
UTCL2 Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -146,18 +146,6 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Read Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1703
|
||||
@@ -176,24 +164,6 @@ Panel Config:
|
||||
max: MAX((TCC_REQ_sum * 128) / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Req:
|
||||
avg: AVG((TCC_REQ_sum / $denom))
|
||||
min: MIN((TCC_REQ_sum / $denom))
|
||||
@@ -224,24 +194,12 @@ Panel Config:
|
||||
max: MAX((TCC_STREAMING_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Bypasss Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Probe Req:
|
||||
avg: AVG((TCC_PROBE_sum / $denom))
|
||||
min: MIN((TCC_PROBE_sum / $denom))
|
||||
max: MAX((TCC_PROBE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Input Buffer Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
@@ -329,24 +287,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Stalled on Latency FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Stalled on Write Data FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Input Buffer Stalled on L2:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1705
|
||||
@@ -363,54 +303,6 @@ Panel Config:
|
||||
style:
|
||||
type: simple_multi_bar
|
||||
metric:
|
||||
Read - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
@@ -461,24 +353,6 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
@@ -509,51 +383,9 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -77,13 +77,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP/s
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP/s
|
||||
|
||||
@@ -19,27 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
CPC SYNC FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
CPC CANE Stall Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
CPC ADC Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPF Utilization:
|
||||
avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE))
|
||||
if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None))
|
||||
|
||||
@@ -19,13 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Schedule-Pipe Wave Occupancy:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Wave
|
||||
tips:
|
||||
Accelerator Utilization:
|
||||
avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
@@ -38,13 +31,6 @@ Panel Config:
|
||||
max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu))
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe Wave Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Workgroup Manager Utilization:
|
||||
avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
@@ -122,13 +108,6 @@ Panel Config:
|
||||
0) else None)
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Scheduler-Pipe Stall Rate:
|
||||
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD !=
|
||||
0) else None))
|
||||
|
||||
-14
@@ -209,13 +209,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
Spill/Stack Coalesceable Instr:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read:
|
||||
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
@@ -282,10 +275,3 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA-F6F4:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
-21
@@ -76,13 +76,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
@@ -132,13 +125,6 @@ Panel Config:
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
VALU Co-Issue Efficiency:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
VMEM Utilization:
|
||||
avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
@@ -279,13 +265,6 @@ Panel Config:
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
F6F4 OPs:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
INT8 OPs:
|
||||
avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
|
||||
@@ -55,48 +55,6 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
Theoretical Bandwidth:
|
||||
avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
|
||||
/ $denom))
|
||||
@@ -158,17 +116,3 @@ Panel Config:
|
||||
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
unit: (Accesses + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Command FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Data FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -43,27 +43,6 @@ Panel Config:
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu)))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Address Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Command Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Data Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Total Instructions:
|
||||
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
@@ -82,12 +61,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Write Instructions:
|
||||
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -112,12 +85,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Write Instructions:
|
||||
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -205,9 +172,3 @@ Panel Config:
|
||||
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Write Ack Instructions:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -192,30 +192,6 @@ Panel Config:
|
||||
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Tag RAM 0 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 1 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 2 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 3 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Read:
|
||||
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
|
||||
@@ -237,24 +213,6 @@ Panel Config:
|
||||
/ $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1 Access Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Read Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Write Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1604
|
||||
@@ -410,12 +368,6 @@ Panel Config:
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Misses under Translation Miss:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Permission Misses:
|
||||
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
@@ -433,45 +385,3 @@ Panel Config:
|
||||
units: Units
|
||||
tips: Tips
|
||||
metric:
|
||||
Cache Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Cache Miss Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Serialization Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Thrashing Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Latency FIFO Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Resident Page Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
UTCL2 Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -146,18 +146,6 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Read Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1703
|
||||
@@ -176,24 +164,6 @@ Panel Config:
|
||||
max: MAX((TCC_REQ_sum * 128) / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Req:
|
||||
avg: AVG((TCC_REQ_sum / $denom))
|
||||
min: MIN((TCC_REQ_sum / $denom))
|
||||
@@ -224,24 +194,12 @@ Panel Config:
|
||||
max: MAX((TCC_STREAMING_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Bypasss Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Probe Req:
|
||||
avg: AVG((TCC_PROBE_sum / $denom))
|
||||
min: MIN((TCC_PROBE_sum / $denom))
|
||||
max: MAX((TCC_PROBE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Input Buffer Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
@@ -329,24 +287,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Stalled on Latency FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Stalled on Write Data FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Input Buffer Stalled on L2:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1705
|
||||
@@ -363,54 +303,6 @@ Panel Config:
|
||||
style:
|
||||
type: simple_multi_bar
|
||||
metric:
|
||||
Read - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
@@ -461,24 +353,6 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
@@ -509,51 +383,9 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -77,13 +77,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP/s
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP/s
|
||||
|
||||
@@ -76,27 +76,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC SYNC FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC CANE Stall Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
CPC ADC Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
CPC Utilization:
|
||||
avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE))
|
||||
if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None))
|
||||
|
||||
@@ -19,13 +19,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Schedule-Pipe Wave Occupancy:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Wave
|
||||
tips:
|
||||
Accelerator Utilization:
|
||||
avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD)
|
||||
@@ -38,13 +31,6 @@ Panel Config:
|
||||
max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu))
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe Wave Utilization:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Workgroup Manager Utilization:
|
||||
avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD)
|
||||
@@ -122,13 +108,6 @@ Panel Config:
|
||||
0) else None)
|
||||
unit: Pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Scheduler-Pipe FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: Pct
|
||||
tips:
|
||||
Scheduler-Pipe Stall Rate:
|
||||
avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD !=
|
||||
0) else None))
|
||||
|
||||
-14
@@ -209,13 +209,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then remove this
|
||||
Spill/Stack Coalesceable Instr:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read:
|
||||
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
@@ -282,10 +275,3 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA-F6F4:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
|
||||
-21
@@ -76,13 +76,6 @@ Panel Config:
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 256) / 1000))
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
MFMA FLOPs (F6F4):
|
||||
value: None
|
||||
unit: GFLOP
|
||||
peak: None
|
||||
pop: None
|
||||
tips:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
@@ -132,13 +125,6 @@ Panel Config:
|
||||
max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
VALU Co-Issue Efficiency:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: pct
|
||||
tips:
|
||||
VMEM Utilization:
|
||||
avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu))
|
||||
@@ -279,13 +265,6 @@ Panel Config:
|
||||
+ (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
F6F4 OPs:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (OPs + $normUnit)
|
||||
tips:
|
||||
INT8 OPs:
|
||||
avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom))
|
||||
|
||||
@@ -56,48 +56,6 @@ Panel Config:
|
||||
max: MAX((SQ_INSTS_LDS / $denom))
|
||||
unit: (Instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (instr + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS LOAD Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS STORE Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS ATOMIC Bandwidth:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
units: Gbps
|
||||
tips:
|
||||
Theoretical Bandwidth:
|
||||
avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
|
||||
/ $denom))
|
||||
@@ -159,17 +117,3 @@ Panel Config:
|
||||
max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom))
|
||||
unit: (Accesses + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Command FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
LDS Data FIFO Full Rate:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -43,27 +43,6 @@ Panel Config:
|
||||
max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu)))
|
||||
unit: pct
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Address Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Command Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
# TODO: Fix baseline comparision logic to handle non existent metrics, then
|
||||
Sequencer → TA Data Stall:
|
||||
avg: None
|
||||
min: None
|
||||
max: None
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Total Instructions:
|
||||
avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom))
|
||||
@@ -82,12 +61,6 @@ Panel Config:
|
||||
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Global/Generic Write Instructions:
|
||||
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -112,12 +85,6 @@ Panel Config:
|
||||
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Read Instructions for LDS:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Spill/Stack Write Instructions:
|
||||
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
|
||||
@@ -205,9 +172,3 @@ Panel Config:
|
||||
max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom))
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
Write Ack Instructions:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Instructions + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -194,30 +194,6 @@ Panel Config:
|
||||
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Tag RAM 0 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 1 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 2 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Tag RAM 3 Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1-L2 Read:
|
||||
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
|
||||
@@ -239,24 +215,6 @@ Panel Config:
|
||||
/ $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
L1 Access Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Read Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
L1-L2 Write Latency:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: Cycles
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1604
|
||||
@@ -412,12 +370,6 @@ Panel Config:
|
||||
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Misses under Translation Miss:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Req + $normUnit)
|
||||
tips:
|
||||
Permission Misses:
|
||||
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
|
||||
@@ -435,45 +387,3 @@ Panel Config:
|
||||
units: Units
|
||||
tips: Tips
|
||||
metric:
|
||||
Cache Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Cache Miss Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Serialization Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Thrashing Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Latency FIFO Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
Resident Page Full Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
UTCL2 Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
units: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -150,18 +150,6 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
tips:
|
||||
Read Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write Stall:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1703
|
||||
@@ -180,24 +168,6 @@ Panel Config:
|
||||
max: MAX((TCC_REQ_sum * 128) / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Req:
|
||||
avg: AVG((TCC_REQ_sum / $denom))
|
||||
min: MIN((TCC_REQ_sum / $denom))
|
||||
@@ -228,24 +198,12 @@ Panel Config:
|
||||
max: MAX((TCC_STREAMING_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Bypasss Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Probe Req:
|
||||
avg: AVG((TCC_PROBE_sum / $denom))
|
||||
min: MIN((TCC_PROBE_sum / $denom))
|
||||
max: MAX((TCC_PROBE_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Input Buffer Req:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache Hit:
|
||||
avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
|
||||
+ TCC_MISS_sum) != 0) else None))
|
||||
@@ -333,24 +291,6 @@ Panel Config:
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
Stalled on Latency FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Stalled on Write Data FIFO:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Input Buffer Stalled on L2:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
id: 1705
|
||||
@@ -367,54 +307,6 @@ Panel Config:
|
||||
style:
|
||||
type: simple_multi_bar
|
||||
metric:
|
||||
Read - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - PCIe Stall:
|
||||
type: PCIe Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Infinity Fabric™ Stall:
|
||||
type: Infinity Fabric™ Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: pct
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
@@ -471,24 +363,6 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
@@ -519,51 +393,9 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - PCIe:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - Infinity Fabric™:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Atomic Bandwidth - HBM:
|
||||
avg: None # Missing perfmon
|
||||
min: None # Missing perfmon
|
||||
max: None # Missing perfmon
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
|
||||
@@ -473,24 +473,6 @@ Panel Config:
|
||||
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - PCIe:
|
||||
avg: None # AVG(TCC_EA0_RDREQ_IO_32B_sum / $denom)
|
||||
min: None # MIN(TCC_EA0_RDREQ_IO_32B_sum / $denom)
|
||||
max: None # MAX(TCC_EA0_RDREQ_IO_32B_sum / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - Infinity Fabric™:
|
||||
avg: None # AVG(TCC_EA0_RDREQ_GMI_32B_sum / $denom)
|
||||
min: None # MIN(TCC_EA0_RDREQ_GMI_32B_sum / $denom)
|
||||
max: None # MAX(TCC_EA0_RDREQ_GMI_32B_sum / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Read Bandwidth - HBM:
|
||||
avg: None # AVG(TCC_EA0_RDREQ_DRAM_32B_sum / $denom)
|
||||
min: None # MIN(TCC_EA0_RDREQ_DRAM_32B_sum / $denom)
|
||||
max: None # MAX(TCC_EA0_RDREQ_DRAM_32B_sum / $denom)
|
||||
unit: (Bytes + $normUnit)
|
||||
tips:
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
|
||||
@@ -75,6 +75,12 @@ def load_panel_configs(dir):
|
||||
if f.endswith(".yaml"):
|
||||
with open(str(Path(root).joinpath(f))) as file:
|
||||
config = yaml.safe_load(file)
|
||||
# metric key can be None due to some metric tables not having any metrics
|
||||
# metric key should be empty dict instead of None
|
||||
for data_source in config["Panel Config"]["data source"]:
|
||||
metric_table = data_source.get("metric_table")
|
||||
if metric_table and metric_table["metric"] is None:
|
||||
metric_table["metric"] = {}
|
||||
d[config["Panel Config"]["id"]] = config["Panel Config"]
|
||||
|
||||
# TODO: sort metrics as the header order in case they are not defined in the same order
|
||||
|
||||
@@ -114,6 +114,34 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
show_roof_plot(roof_plot)
|
||||
continue
|
||||
|
||||
# Metrics baseline comparison mode
|
||||
# We cannot guarantee that all runs have the same metrics. Only show common metrics.
|
||||
if (
|
||||
type == "metric_table"
|
||||
and "Metric" in table_config["header"].values()
|
||||
and len(runs) > 1
|
||||
):
|
||||
# Common metrics across all runs
|
||||
common_metrics = set()
|
||||
for _, data in runs.items():
|
||||
if not common_metrics:
|
||||
common_metrics = set(data.dfs[table_config["id"]]["Metric"])
|
||||
else:
|
||||
common_metrics &= set(data.dfs[table_config["id"]]["Metric"])
|
||||
# Apply common metrics across all runs
|
||||
# Reindex all runs based on first run
|
||||
initial_index = None
|
||||
for key in runs.keys():
|
||||
runs[key].dfs[table_config["id"]] = (
|
||||
runs[key]
|
||||
.dfs[table_config["id"]]
|
||||
.loc[lambda d: d["Metric"].isin(common_metrics)]
|
||||
)
|
||||
if initial_index is None:
|
||||
initial_index= runs[key].dfs[table_config["id"]].index
|
||||
else:
|
||||
runs[key].dfs[table_config["id"]].index = initial_index
|
||||
|
||||
# take the 1st run as baseline
|
||||
base_run, base_data = next(iter(runs.items()))
|
||||
base_df = base_data.dfs[table_config["id"]]
|
||||
|
||||
مرجع در شماره جدید
Block a user