Enable tuned performance counters for gfx950 (#652)

* Enable non-functional performance counters for gfx950.

* Update changelog.

* Add none value metrics for non-gfx950 socs

* Remove rocprofv3 missing metrics.

[ROCm/rocprofiler-compute commit: dce75f4afa]
Этот коммит содержится в:
xuchen-amd
2025-04-02 14:43:12 -04:00
коммит произвёл GitHub
родитель 7bfc49e9f8
Коммит b21384ca60
8 изменённых файлов: 1738 добавлений и 1 удалений
+6
Просмотреть файл
@@ -22,6 +22,12 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Support host-trap PC Sampling on CLI (beta version)
* Add support for tuned performance counters for gfx950 GPUs
* Add L1 latencies
* Add L2 latencies
* Add L2 to EA stalls
* Add L2 to EA stalls per channel
### Changed
* Change normal_unit default to per_kernel
+3 -1
Просмотреть файл
@@ -26,9 +26,9 @@
# SOFTWARE.
##############################################################################el
import os
import re
import sys
import os
# import logging
from pathlib import Path
@@ -48,12 +48,14 @@ except ImportError as e:
sys.path.append(os.path.abspath(additional_path))
from importlib import metadata
from rocprof_compute_base import RocProfCompute
from utils.utils import console_error
except ImportError as e:
# print("Failed to import required modules: " + str(e))
pass
def verify_deps_version(localVer, desiredVer, operator):
"""Check package version strings with simple operators used in companion
requirements.txt file"""
@@ -181,6 +181,12 @@ Panel Config:
max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Global/Generic Coalesceable Instr:
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (instr + $normUnit)
tips:
Global/Generic Read:
avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
@@ -205,6 +211,12 @@ Panel Config:
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Spill/Stack Coalesceable Instr:
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (instr + $normUnit)
tips:
Spill/Stack Read:
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
@@ -0,0 +1,315 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 300
title: Memory Chart
data source:
- metric_table:
id: 301
title: Memory Chart
header:
metric: Metric
#alias: #alias
value: Value
tips: Tips
metric:
# ----------------------------------------
# Instr Buff Block
#TODO: double check wave_occupancy
Wavefront Occupancy:
#alias: wave_occ_
value: ROUND(AVG((SQ_ACCUM_PREV_HIRES / $GRBM_GUI_ACTIVE_PER_XCD) / $numActiveCUs), 0)
coll_level: SQ_LEVEL_WAVES
tips:
Wave Life:
#alias: wave_life_
value: ROUND(AVG(((4 * (SQ_WAVE_CYCLES / SQ_WAVES)) if (SQ_WAVES != 0) else 0)), 0)
tips:
# ----------------------------------------
# Instr Dispatch Block
SALU:
#alias: salu_
value: ROUND(AVG((SQ_INSTS_SALU / $denom)), 0)
tips:
SMEM:
#alias: smem_
value: ROUND(AVG((SQ_INSTS_SMEM / $denom)), 0)
tips:
VALU:
#alias: valu_
value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0)
tips:
MFMA:
#alias: mfma_
value: ROUND(AVG((SQ_INSTS_MFMA / $denom)), 0)
tips:
VMEM:
#alias: vmem_
value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0)
tips:
LDS:
#alias: lds_
value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0)
tips:
GWS:
#alias: gws_
value: ROUND(AVG((SQ_INSTS_GDS / $denom)), 0)
tips:
BR:
#alias: br_
value: ROUND(AVG((SQ_INSTS_BRANCH / $denom)), 0)
tips:
# ----------------------------------------
# Exec Block
Active CUs:
#alias: active_cu_
value: $numActiveCUs
tips:
Num CUs:
#alias: num_cu_
value: $cu_per_gpu
tips:
VGPR:
#alias: vgpr_
value: ROUND(AVG(Arch_VGPR), 0)
tips:
# Todo: add AGPRs
SGPR:
#alias: sgpr_
value: ROUND(AVG(SGPR), 0)
tips:
LDS Allocation:
#alias: lds_alloc_
value: ROUND(AVG(LDS_Per_Workgroup), 0)
tips:
Scratch Allocation:
#alias: scratch_alloc_
value: ROUND(AVG(Scratch_Per_Workitem), 0)
tips:
Wavefronts:
#alias: wavefronts_
value: ROUND(AVG(SPI_CS0_WAVE + SPI_CS1_WAVE + SPI_CS2_WAVE + SPI_CS3_WAVE), 0)
tips:
Workgroups:
#alias: workgroups_
value: ROUND(AVG(SPI_CS0_NUM_THREADGROUPS + SPI_CS1_NUM_THREADGROUPS + SPI_CS2_NUM_THREADGROUPS + SPI_CS3_NUM_THREADGROUPS), 0)
tips:
# ----------------------------------------
# LDS Block
LDS Req:
#alias: lds_req_
value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0)
tips:
LDS Util:
#alias: lds_util_
value:
ROUND(AVG(((100 * SQ_LDS_IDX_ACTIVE) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))),
0)
tips:
LDS Latency:
#alias: lds_lat
value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)),0)
coll_level: SQ_INST_LEVEL_LDS
tips:
# ----------------------------------------
# Vector L1 Cache Block
VL1 Rd:
#alias: vl1_rd_
value: ROUND(AVG((TCP_TOTAL_READ_sum / $denom)), 0)
tips:
VL1 Wr:
#alias: vl1_wr_
value: ROUND(AVG((TCP_TOTAL_WRITE_sum / $denom)), 0)
tips:
VL1 Atomic:
#alias: vl1_atom_
value:
ROUND(AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom)), 0)
tips:
VL1 Hit:
#alias: vl1_hit_
value:
ROUND(AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None )), 0)
tips:
VL1 Lat:
#alias: vl1_lat_
value:
ROUND(AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum
!= 0) else None)), 0)
tips:
VL1 Coalesce:
#alias: vl1_coales_
value:
ROUND(AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
* 4)) if (TCP_TOTAL_ACCESSES_sum != None) else 0)), 0)
tips:
VL1 Stall:
#alias: vl1_stall_
value:
ROUND(AVG((((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)), 0)
tips:
VL1_L2 Rd:
#alias: vl1_l2_rd_
value: ROUND(AVG((TCP_TCC_READ_REQ_sum / $denom)), 0)
tips:
VL1_L2 Wr:
#alias: vl1_l2_wr_
value: ROUND(AVG((TCP_TCC_WRITE_REQ_sum / $denom)), 0)
tips:
VL1_L2 Atomic:
#alias: vl1_l2_atom_
value:
ROUND(AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom)), 0)
tips:
# ----------------------------------------
# Scalar L1D Cache Block
VL1D Rd:
#alias: sl1_rd_
value: ROUND(AVG((SQC_DCACHE_REQ / $denom)), 0)
tips:
VL1D Hit:
#alias: sl1_hit_
value:
ROUND((AVG(((SQC_DCACHE_HITS / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ !=
0) else None)) * 100), 0)
tips:
VL1D Lat:
#alias: sl1_lat_
value:
ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ !=
0) else None)) * 100), 0)
coll_level: SQC_DCACHE_INFLIGHT_LEVEL
tips:
VL1D_L2 Rd:
#alias: sl1_l2_rd_
value: ROUND(AVG((SQC_TC_DATA_READ_REQ / $denom)), 0)
tips:
VL1D_L2 Wr:
#alias: sl1_l2_wr_
value: ROUND(AVG((SQC_TC_DATA_WRITE_REQ / $denom)), 0)
tips:
VL1D_L2 Atomic:
#alias: sl1_l2_atom_
value: ROUND(AVG((SQC_TC_DATA_ATOMIC_REQ / $denom)), 0)
tips:
# ----------------------------------------
# Instr L1 Cache Block
IL1 Fetch:
#alias: il1_fetch_
value: ROUND(AVG((SQC_ICACHE_REQ / $denom)), 0)
tips:
IL1 Hit:
#alias: il1_hit_
value: ROUND((AVG((SQC_ICACHE_HITS / SQC_ICACHE_REQ)) * 100), 0)
tips:
IL1 Lat:
#alias: il1_lat_
value:
ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_ICACHE_REQ) if (SQC_ICACHE_REQ !=
0) else None)) * 100), 0)
tips: # ??? coll_level: SQ_IFETCH_LEVEL
IL1_L2 Rd:
#alias: il1_l2_req_
value: ROUND(AVG((SQC_TC_INST_REQ / $denom)), 0)
tips:
# ----------------------------------------
# L2 Cache Block(inside)
L2 Rd:
#alias: l2_rd_
value: ROUND(AVG((TCC_READ_sum / $denom)), 0)
tips:
L2 Wr:
#alias: l2_wr_
value: ROUND(AVG((TCC_WRITE_sum / $denom)), 0)
tips:
L2 Atomic:
#alias: l2_atom_
value: ROUND(AVG((TCC_ATOMIC_sum / $denom)), 0)
tips:
L2 Hit:
#alias: l2_hit_
value:
ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else 0)), 0)
tips:
L2 Rd Lat:
#alias: l2_rd_lat_
value:
ROUND(AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum))
if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None)),
0)
tips:
L2 Wr Lat:
#alias: l2_wr_lat_
value:
ROUND(AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum +
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
!= 0) else None)), 0)
tips:
# ----------------------------------------
# Fabric Block
Fabric_L2 Rd:
#alias: l2_fabric_rd_
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
tips:
Fabric_L2 Wr:
#alias: l2_fabric_wr_
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
tips:
Fabric_L2 Atomic:
#alias: l2_fabric_atom_
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
tips:
Fabric Rd Lat:
#alias: fabric_rd_lat_
value:
ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else 0)), 0)
tips:
Fabric Wr Lat:
#alias: fabric_wr_lat_
value:
ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else 0)), 0)
tips:
Fabric Atomic Lat:
#alias: fabric_atom_lat_
value:
ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else 0)), 0)
tips:
HBM Rd:
#alias: hbm_rd_
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
tips:
HBM Wr:
#alias: hbm_wr_
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
tips:
comparable: false # for now
cli_style: mem_chart
@@ -0,0 +1,292 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1000
title: Compute Units - Instruction Mix
data source:
- metric_table:
id: 1001
title: Overall Instruction Mix
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
VALU:
avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
unit: (instr + $normUnit)
tips:
VMEM:
# TODO: need to fix this when the new FLAT/LDS counts
# are present in ROCm
avg: AVG(((SQ_INSTS_VMEM) / $denom))
min: MIN(((SQ_INSTS_VMEM) / $denom))
max: MAX(((SQ_INSTS_VMEM) / $denom))
unit: (instr + $normUnit)
tips:
LDS:
# TODO: need to fix this when the new FLAT/LDS counts
# are present in ROCm
avg: AVG((SQ_INSTS_LDS / $denom))
min: MIN((SQ_INSTS_LDS / $denom))
max: MAX((SQ_INSTS_LDS / $denom))
unit: (instr + $normUnit)
tips:
MFMA:
avg: AVG((SQ_INSTS_MFMA / $denom))
min: MIN((SQ_INSTS_MFMA / $denom))
max: MAX((SQ_INSTS_MFMA / $denom))
unit: (instr + $normUnit)
tips:
SALU:
avg: AVG((SQ_INSTS_SALU / $denom))
min: MIN((SQ_INSTS_SALU / $denom))
max: MAX((SQ_INSTS_SALU / $denom))
unit: (instr + $normUnit)
tips:
SMEM:
avg: AVG((SQ_INSTS_SMEM / $denom))
min: MIN((SQ_INSTS_SMEM / $denom))
max: MAX((SQ_INSTS_SMEM / $denom))
unit: (instr + $normUnit)
tips:
Branch:
avg: AVG((SQ_INSTS_BRANCH / $denom))
min: MIN((SQ_INSTS_BRANCH / $denom))
max: MAX((SQ_INSTS_BRANCH / $denom))
unit: (instr + $normUnit)
tips:
- metric_table:
id: 1002
title: VALU Arithmetic Instr Mix
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
INT32:
avg: AVG((SQ_INSTS_VALU_INT32 / $denom))
min: MIN((SQ_INSTS_VALU_INT32 / $denom))
max: MAX((SQ_INSTS_VALU_INT32 / $denom))
unit: (instr + $normUnit)
tips:
INT64:
avg: AVG((SQ_INSTS_VALU_INT64 / $denom))
min: MIN((SQ_INSTS_VALU_INT64 / $denom))
max: MAX((SQ_INSTS_VALU_INT64 / $denom))
unit: (instr + $normUnit)
tips:
F16-ADD:
avg: AVG((SQ_INSTS_VALU_ADD_F16 / $denom))
min: MIN((SQ_INSTS_VALU_ADD_F16 / $denom))
max: MAX((SQ_INSTS_VALU_ADD_F16 / $denom))
unit: (instr + $normUnit)
tips:
F16-MUL:
avg: AVG((SQ_INSTS_VALU_MUL_F16 / $denom))
min: MIN((SQ_INSTS_VALU_MUL_F16 / $denom))
max: MAX((SQ_INSTS_VALU_MUL_F16 / $denom))
unit: (instr + $normUnit)
tips:
F16-FMA:
avg: AVG((SQ_INSTS_VALU_FMA_F16 / $denom))
min: MIN((SQ_INSTS_VALU_FMA_F16 / $denom))
max: MAX((SQ_INSTS_VALU_FMA_F16 / $denom))
unit: (instr + $normUnit)
tips:
F16-Trans:
avg: AVG((SQ_INSTS_VALU_TRANS_F16 / $denom))
min: MIN((SQ_INSTS_VALU_TRANS_F16 / $denom))
max: MAX((SQ_INSTS_VALU_TRANS_F16 / $denom))
unit: (instr + $normUnit)
tips:
F32-ADD:
avg: AVG((SQ_INSTS_VALU_ADD_F32 / $denom))
min: MIN((SQ_INSTS_VALU_ADD_F32 / $denom))
max: MAX((SQ_INSTS_VALU_ADD_F32 / $denom))
unit: (instr + $normUnit)
tips:
F32-MUL:
avg: AVG((SQ_INSTS_VALU_MUL_F32 / $denom))
min: MIN((SQ_INSTS_VALU_MUL_F32 / $denom))
max: MAX((SQ_INSTS_VALU_MUL_F32 / $denom))
unit: (instr + $normUnit)
tips:
F32-FMA:
avg: AVG((SQ_INSTS_VALU_FMA_F32 / $denom))
min: MIN((SQ_INSTS_VALU_FMA_F32 / $denom))
max: MAX((SQ_INSTS_VALU_FMA_F32 / $denom))
unit: (instr + $normUnit)
tips:
F32-Trans:
avg: AVG((SQ_INSTS_VALU_TRANS_F32 / $denom))
min: MIN((SQ_INSTS_VALU_TRANS_F32 / $denom))
max: MAX((SQ_INSTS_VALU_TRANS_F32 / $denom))
unit: (instr + $normUnit)
tips:
F64-ADD:
avg: AVG((SQ_INSTS_VALU_ADD_F64 / $denom))
min: MIN((SQ_INSTS_VALU_ADD_F64 / $denom))
max: MAX((SQ_INSTS_VALU_ADD_F64 / $denom))
unit: (instr + $normUnit)
tips:
F64-MUL:
avg: AVG((SQ_INSTS_VALU_MUL_F64 / $denom))
min: MIN((SQ_INSTS_VALU_MUL_F64 / $denom))
max: MAX((SQ_INSTS_VALU_MUL_F64 / $denom))
unit: (instr + $normUnit)
tips:
F64-FMA:
avg: AVG((SQ_INSTS_VALU_FMA_F64 / $denom))
min: MIN((SQ_INSTS_VALU_FMA_F64 / $denom))
max: MAX((SQ_INSTS_VALU_FMA_F64 / $denom))
unit: (instr + $normUnit)
tips:
F64-Trans:
avg: AVG((SQ_INSTS_VALU_TRANS_F64 / $denom))
min: MIN((SQ_INSTS_VALU_TRANS_F64 / $denom))
max: MAX((SQ_INSTS_VALU_TRANS_F64 / $denom))
unit: (instr + $normUnit)
tips:
Conversion:
avg: AVG((SQ_INSTS_VALU_CVT / $denom))
min: MIN((SQ_INSTS_VALU_CVT / $denom))
max: MAX((SQ_INSTS_VALU_CVT / $denom))
unit: (instr + $normUnit)
tips:
- metric_table:
id: 1003
title: VMEM Instr Mix
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Global/Generic Instr:
avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Global/Generic Coalesceable Instr:
avg: None
# AVG((TA_FLAT_COALESCEABLE_WAVEFRONTS_sum / $denom))
min: None
# MIN((TA_FLAT_COALESCEABLE_WAVEFRONTS_sum / $denom))
max: None
# MAX((TA_FLAT_COALESCEABLE_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Global/Generic Read:
avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Global/Generic Write:
avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Global/Generic Atomic:
avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Spill/Stack Instr:
avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Spill/Stack Coalesceable Instr:
avg: AVG((TA_BUFFER_COALESCEABLE_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_COALESCEABLE_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_COALESCEABLE_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Spill/Stack Read:
avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Spill/Stack Write:
avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
Spill/Stack Atomic:
avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom))
unit: (instr + $normUnit)
tips:
- metric_table:
id: 1004
title: MFMA Arithmetic Instr Mix
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
MFMA-I8:
avg: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom))
min: MIN((SQ_INSTS_VALU_MFMA_I8 / $denom))
max: MAX((SQ_INSTS_VALU_MFMA_I8 / $denom))
unit: (instr + $normUnit)
tips:
MFMA-F8:
avg: AVG((SQ_INSTS_VALU_MFMA_F8 / $denom))
min: MIN((SQ_INSTS_VALU_MFMA_F8 / $denom))
max: MAX((SQ_INSTS_VALU_MFMA_F8 / $denom))
unit: (instr + $normUnit)
tips:
MFMA-F16:
avg: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom))
min: MIN((SQ_INSTS_VALU_MFMA_F16 / $denom))
max: MAX((SQ_INSTS_VALU_MFMA_F16 / $denom))
unit: (instr + $normUnit)
tips:
MFMA-BF16:
avg: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom))
min: MIN((SQ_INSTS_VALU_MFMA_BF16 / $denom))
max: MAX((SQ_INSTS_VALU_MFMA_BF16 / $denom))
unit: (instr + $normUnit)
tips:
MFMA-F32:
avg: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom))
min: MIN((SQ_INSTS_VALU_MFMA_F32 / $denom))
max: MAX((SQ_INSTS_VALU_MFMA_F32 / $denom))
unit: (instr + $normUnit)
tips:
MFMA-F64:
avg: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom))
min: MIN((SQ_INSTS_VALU_MFMA_F64 / $denom))
max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom))
unit: (instr + $normUnit)
tips:
@@ -0,0 +1,368 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1600
title: Vector L1 Data Cache
data source:
- metric_table:
id: 1601
title: Speed-of-Light
header:
metric: Metric
value: Avg
unit: Unit
tips: Tips
metric:
Hit rate:
value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
unit: Pct of Peak
tips:
Bandwidth:
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 128) / (End_Timestamp - Start_Timestamp))))
/ ((($max_sclk / 1000) * 128) * $cu_per_gpu))
unit: Pct of Peak
tips:
Utilization:
value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
!= 0) else None))
unit: Pct of Peak
tips:
Coalescing:
value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
* 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None))
unit: Pct of Peak
tips:
comparable: false # for now
cli_style: simple_bar
- metric_table:
id: 1602
title: L1D Cache Stalls (%)
header:
metric: Metric
expr: Expression
tips: Tips
metric:
Stalled on L2 Data:
expr:
(((100 * TCP_PENDING_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
!= 0) else None)
tips:
Stalled on L2 Req:
expr:
(((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
!= 0) else None)
tips:
Tag RAM Stall (Read):
expr:
(((100 * TCP_READ_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)
tips:
Tag RAM Stall (Write):
expr:
(((100 * TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)
tips:
Tag RAM Stall (Atomic):
expr:
(((100 * TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum) / TCP_GATE_EN1_sum)
if (TCP_GATE_EN1_sum != 0) else None)
tips:
cli_style: simple_box
- metric_table:
id: 1603
title: L1D Cache Accesses
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Total Req:
avg: AVG((TCP_TOTAL_ACCESSES_sum / $denom))
min: MIN((TCP_TOTAL_ACCESSES_sum / $denom))
max: MAX((TCP_TOTAL_ACCESSES_sum / $denom))
unit: (Req + $normUnit)
tips:
Read Req:
avg: AVG((TCP_TOTAL_READ_sum / $denom))
min: MIN((TCP_TOTAL_READ_sum / $denom))
max: MAX((TCP_TOTAL_READ_sum / $denom))
unit: (Req + $normUnit)
tips:
Write Req:
avg: AVG((TCP_TOTAL_WRITE_sum / $denom))
min: MIN((TCP_TOTAL_WRITE_sum / $denom))
max: MAX((TCP_TOTAL_WRITE_sum / $denom))
unit: (Req + $normUnit)
tips:
Atomic Req:
avg: AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom))
min: MIN(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom))
max: MAX(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum)
/ $denom))
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 128) / $denom))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 128) / $denom))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 128) / $denom))
unit: (Bytes + $normUnit)
tips:
Cache Hit Rate:
avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) +
TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) /
TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
None))
unit: pct
tips:
Cache Accesses:
avg: AVG((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
min: MIN((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
max: MAX((TCP_TOTAL_CACHE_ACCESSES_sum / $denom))
unit: (Req + $normUnit)
tips:
Cache Hits:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ $denom))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ $denom))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum - (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
+ TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
/ $denom))
unit: (Req + $normUnit)
tips:
Invalidations:
avg: AVG((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
min: MIN((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
max: MAX((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom))
unit: (Req + $normUnit)
tips:
L1-L2 BW:
avg: AVG(((128 * TCP_TCC_READ_REQ_sum + 64 *
(TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum +
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
min: MIN(((128 * TCP_TCC_READ_REQ_sum + 64 *
(TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum +
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
max: MAX(((128 * TCP_TCC_READ_REQ_sum + 64 *
(TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum +
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom))
unit: (Bytes + $normUnit)
tips:
L1-L2 Read:
avg: AVG((TCP_TCC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
L1-L2 Write:
avg: AVG((TCP_TCC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
L1-L2 Atomic:
avg: AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom))
min: MIN(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom))
max: MAX(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
/ $denom))
unit: (Req + $normUnit)
tips:
L1 Access Latency:
avg: AVG((TCP_TCP_LATENCY_sum / $denom))
min: MIN((TCP_TCP_LATENCY_sum / $denom))
max: MAX((TCP_TCP_LATENCY_sum / $denom))
unit: (Cycles + $normUnit)
tips:
L1-L2 Read Latency:
avg: AVG((TCP_TCC_READ_REQ_LATENCY_sum / $denom))
min: MIN((TCP_TCC_READ_REQ_LATENCY_sum / $denom))
max: MAX((TCP_TCC_READ_REQ_LATENCY_sum / $denom))
unit: (Cycles + $normUnit)
tips:
L1-L2 Write Latency:
avg: AVG((TCP_TCC_WRITE_REQ_LATENCY_sum / $denom))
min: MIN((TCP_TCC_WRITE_REQ_LATENCY_sum / $denom))
max: MAX((TCP_TCC_WRITE_REQ_LATENCY_sum / $denom))
unit: (Cycles + $normUnit)
tips:
- metric_table:
id: 1604
title: L1D - L2 Transactions
header:
metric: Metric
xfer: Xfer
coherency: Coherency
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
NC - Read:
xfer: Read
coherency: NC
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Read:
xfer: Read
coherency: UC
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Read:
xfer: Read
coherency: CC
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Read:
xfer: Read
coherency: RW
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Write:
xfer: Write
coherency: RW
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Write:
xfer: Write
coherency: NC
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Write:
xfer: Write
coherency: UC
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Write:
xfer: Write
coherency: CC
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Atomic:
xfer: Atomic
coherency: NC
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Atomic:
xfer: Atomic
coherency: UC
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Atomic:
xfer: Atomic
coherency: CC
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Atomic:
xfer: Atomic
coherency: RW
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
- metric_table:
id: 1605
title: L1D Addr Translation
header:
metric: Metric
avg: Avg
min: Min
max: Max
units: Units
tips: Tips
metric:
Req:
avg: AVG((TCP_UTCL1_REQUEST_sum / $denom))
min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
units: (Req + $normUnit)
tips:
Hit Ratio:
avg: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
(TCP_UTCL1_REQUEST_sum != 0) else None))
min: MIN((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
(TCP_UTCL1_REQUEST_sum != 0) else None))
max: MAX((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
(TCP_UTCL1_REQUEST_sum != 0) else None))
units: pct
tips:
Hits:
avg: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
units: (Req + $normUnit)
tips:
Translation Misses:
avg: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
units: (Req + $normUnit)
tips:
Permission Misses:
avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
units: (Req + $normUnit)
tips:
@@ -0,0 +1,444 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1700
title: L2 Cache
data source:
- metric_table:
id: 1701
title: Speed-of-Light
header:
metric: Metric
value: Avg
unit: Unit
tips: Tips
metric:
Utilization:
value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($total_l2_chan) * $GRBM_GUI_ACTIVE_PER_XCD)))
unit: pct
tips:
Bandwidth:
value: ((100 * AVG(((TCC_REQ_sum * 128) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk / 1000) * 128) * TO_INT($total_l2_chan)))
unit: pct
tips:
Hit Rate:
value: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else 0))
unit: pct
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-Fabric Write and Atomic BW:
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
- metric_table:
id: 1702
title: L2 - Fabric Transactions
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Read BW:
avg: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
min: MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
max: MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
unit: (Bytes + $normUnit)
tips:
HBM Read Traffic:
avg: AVG((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
min: MIN((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
max: MAX((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
unit: pct
tips:
Remote Read Traffic:
avg: AVG((100 * (MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
min: MIN((100 * (MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
max: MAX((100 * (MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
unit: pct
tips:
Uncached Read Traffic:
avg: AVG((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
min: MIN((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
max: MAX((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum != 0) else None))
unit: pct
tips:
Write and Atomic BW:
avg:
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
min:
MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
max:
MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
unit: (Bytes + $normUnit)
tips:
HBM Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
min: MIN((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
max: MAX((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
unit: pct
tips:
Remote Write and Atomic Traffic:
avg: AVG((100 * (MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum),0) / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
min: MIN((100 * (MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum),0) / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
max: MAX((100 * (MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum),0) / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
unit: pct
tips:
Atomic Traffic:
avg: AVG((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
min: MIN((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
max: MAX((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
unit: pct
tips:
Uncached Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
min: MIN((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
max: MAX((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum != 0) else None))
unit: pct
tips:
Read Latency:
avg: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
0) else None))
min: MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
0) else None))
max: MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum !=
0) else None))
unit: Cycles
tips:
Write and Atomic Latency:
avg: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
0) else None))
min: MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
0) else None))
max: MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum !=
0) else None))
unit: Cycles
tips:
Atomic Latency:
avg: AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
min: MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
max: MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
unit: Cycles
tips:
Read Stall:
avg: AVG((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
+ TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
0) else None))
min: MIN((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
+ TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
0) else None))
max: MAX((((100 * ((TCC_EA0_RDREQ_IO_CREDIT_STALL_sum + TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum)
+ TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
0) else None))
unit: pct
tips:
Write Stall:
avg: AVG(((100 * (TCC_EA0_WRREQ_STALL_sum) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
0) else None))
min: MIN(((100 * (TCC_EA0_WRREQ_STALL_sum) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
0) else None))
max: MAX(((100 * (TCC_EA0_WRREQ_STALL_sum) / TCC_BUSY_sum) if (TCC_BUSY_sum !=
0) else None))
unit: pct
tips:
- metric_table:
id: 1703
title: L2 Cache Accesses
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Bandwidth:
avg: AVG((TCC_REQ_sum * 128) / $denom)
min: MIN((TCC_REQ_sum * 128) / $denom)
max: MAX((TCC_REQ_sum * 128) / $denom)
unit: (Bytes + $normUnit)
tips:
Req:
avg: AVG((TCC_REQ_sum / $denom))
min: MIN((TCC_REQ_sum / $denom))
max: MAX((TCC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
Read Req:
avg: AVG((TCC_READ_sum / $denom))
min: MIN((TCC_READ_sum / $denom))
max: MAX((TCC_READ_sum / $denom))
unit: (Req + $normUnit)
tips:
Write Req:
avg: AVG((TCC_WRITE_sum / $denom))
min: MIN((TCC_WRITE_sum / $denom))
max: MAX((TCC_WRITE_sum / $denom))
unit: (Req + $normUnit)
tips:
Atomic Req:
avg: AVG((TCC_ATOMIC_sum / $denom))
min: MIN((TCC_ATOMIC_sum / $denom))
max: MAX((TCC_ATOMIC_sum / $denom))
unit: (Req + $normUnit)
tips:
Streaming Req:
avg: AVG((TCC_STREAMING_REQ_sum / $denom))
min: MIN((TCC_STREAMING_REQ_sum / $denom))
max: MAX((TCC_STREAMING_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
Probe Req:
avg: AVG((TCC_PROBE_sum / $denom))
min: MIN((TCC_PROBE_sum / $denom))
max: MAX((TCC_PROBE_sum / $denom))
unit: (Req + $normUnit)
tips:
Cache Hit:
avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
min: MIN((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
max: MAX((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum
+ TCC_MISS_sum) != 0) else None))
unit: pct
tips:
Hits:
avg: AVG((TCC_HIT_sum / $denom))
min: MIN((TCC_HIT_sum / $denom))
max: MAX((TCC_HIT_sum / $denom))
unit: (Hits + $normUnit)
tips:
Misses:
avg: AVG((TCC_MISS_sum / $denom))
min: MIN((TCC_MISS_sum / $denom))
max: MAX((TCC_MISS_sum / $denom))
unit: (Misses + $normUnit)
tips:
Writeback:
avg: AVG((TCC_WRITEBACK_sum / $denom))
min: MIN((TCC_WRITEBACK_sum / $denom))
max: MAX((TCC_WRITEBACK_sum / $denom))
unit: (Cachelines + $normUnit)
tips:
Writeback (Internal):
avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom))
min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom))
max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom))
unit: (Cachelines + $normUnit)
tips:
Writeback (vL1D Req):
avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom))
unit: (Cachelines + $normUnit)
tips:
Evict (Internal):
avg: AVG((TCC_NORMAL_EVICT_sum / $denom))
min: MIN((TCC_NORMAL_EVICT_sum / $denom))
max: MAX((TCC_NORMAL_EVICT_sum / $denom))
unit: (Cachelines + $normUnit)
tips:
Evict (vL1D Req):
avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom))
unit: (Cachelines + $normUnit)
tips:
NC Req:
avg: AVG((TCC_NC_REQ_sum / $denom))
min: MIN((TCC_NC_REQ_sum / $denom))
max: MAX((TCC_NC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC Req:
avg: AVG((TCC_UC_REQ_sum / $denom))
min: MIN((TCC_UC_REQ_sum / $denom))
max: MAX((TCC_UC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC Req:
avg: AVG((TCC_CC_REQ_sum / $denom))
min: MIN((TCC_CC_REQ_sum / $denom))
max: MAX((TCC_CC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW Req:
avg: AVG((TCC_RW_REQ_sum / $denom))
min: MIN((TCC_RW_REQ_sum / $denom))
max: MAX((TCC_RW_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
- metric_table:
id: 1704
title: L2 - Fabric Interface Stalls
header:
metric: Metric
type: Type
transaction: Transaction
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
style:
type: simple_multi_bar
metric:
Read - PCIe Stall:
type: PCIe Stall
transaction: Read
avg: AVG(((100 * (TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_EA0_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
Read - Infinity Fabric™ Stall:
type: Infinity Fabric™ Stall
transaction: Read
avg: AVG(((100 * (TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
Read - HBM Stall:
type: HBM Stall
transaction: Read
avg: AVG(((100 * (TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
Write - PCIe Stall:
type: PCIe Stall
transaction: Write
avg: AVG(((100 * (TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_EA0_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
Write - Infinity Fabric™ Stall:
type: Infinity Fabric™ Stall
transaction: Write
avg: AVG(((100 * (TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
Write - HBM Stall:
type: HBM Stall
transaction: Write
avg: AVG(((100 * (TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
Write - Credit Starvation:
type: Credit Starvation
transaction: Write
avg: AVG(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
min: MIN(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
max: MAX(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None))
unit: pct
tips:
- metric_table:
id: 1705
title: L2 - Fabric Detailed Transaction Breakdown
header:
metric: Metric
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
Read (32B):
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
unit: (Req + $normUnit)
tips:
Read (Uncached):
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
tips:
Read (64B):
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
unit: (Req + $normUnit)
tips:
HBM Read:
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
tips:
Remote Read:
avg: AVG((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
unit: (Req + $normUnit)
tips:
Write and Atomic (32B):
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
unit: (Req + $normUnit)
tips:
Write and Atomic (Uncached):
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
tips:
Write and Atomic (64B):
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
unit: (Req + $normUnit)
tips:
HBM Write and Atomic:
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
tips:
Remote Write and Atomic:
avg: AVG((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
unit: (Req + $normUnit)
tips:
Atomic:
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
unit: (Req + $normUnit)
tips:
@@ -0,0 +1,298 @@
---
# Add description/tips for each metric in this section.
# So it could be shown in hover.
Metric Description:
# Define the panel properties and properties of each metric in the panel.
Panel Config:
id: 1800
title: L2 Cache (per Channel)
data source:
- metric_table:
id: 1801
title: Aggregate Stats (All channels)
header:
metric: Metric
avg: Avg
std dev: Std Dev
min: Min
max: Max
unit: Unit
tips: Tips
metric:
L2 Cache Hit Rate:
avg: AVG(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
std dev: STD(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
min: MIN(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
max: MAX(((((((((((((((((100 * TCC_HIT[0]) + (100 * TCC_HIT[1]))
+ (100 * TCC_HIT[2])) + (100 * TCC_HIT[3])) + (100 * TCC_HIT[4])) + (100 *
TCC_HIT[5])) + (100 * TCC_HIT[6])) + (100 * TCC_HIT[7])) + (100 * TCC_HIT[8]))
+ (100 * TCC_HIT[9])) + (100 * TCC_HIT[10])) + (100 * TCC_HIT[11])) + (100
* TCC_HIT[12])) + (100 * TCC_HIT[13])) + (100 * TCC_HIT[14])) + (100 * TCC_HIT[15]))
/ (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15]))) if (((((((((((((((((TCC_MISS[0]
+ TCC_HIT[0]) + (TCC_MISS[1] + TCC_HIT[1])) + (TCC_MISS[2] + TCC_HIT[2]))
+ (TCC_MISS[3] + TCC_HIT[3])) + (TCC_MISS[4] + TCC_HIT[4])) + (TCC_MISS[5]
+ TCC_HIT[5])) + (TCC_MISS[6] + TCC_HIT[6])) + (TCC_MISS[7] + TCC_HIT[7]))
+ (TCC_MISS[8] + TCC_HIT[8])) + (TCC_MISS[9] + TCC_HIT[9])) + (TCC_MISS[10]
+ TCC_HIT[10])) + (TCC_MISS[11] + TCC_HIT[11])) + (TCC_MISS[12] + TCC_HIT[12]))
+ (TCC_MISS[13] + TCC_HIT[13])) + (TCC_MISS[14] + TCC_HIT[14])) + (TCC_MISS[15]
+ TCC_HIT[15])) != 0) else None)
unit: pct
tips:
# FIXME: other arggr metrics!!
- metric_table:
id: 1802
title: L2 Cache Hit Rate (pct)
header:
metric: Channel
expr: Expression
metric:
"::_1":
expr:
(((100 * TCC_HIT[::_1]) / (TCC_HIT[::_1] + TCC_MISS[::_1])) if ((TCC_HIT[::_1]
+ TCC_MISS[::_1]) != 0) else None)
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_box
- metric_table:
id: 1803
title: L2 Requests (per normUnit)
header:
metric: Channel
expr: Expression
metric:
"::_1":
expr: (TO_INT(TCC_REQ[::_1]) / $denom)
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_box
- metric_table:
id: 1804
title: L2 Requests (per normUnit)
header:
metric: Channel
read req: L2 Read
write req: L2 Write
atomic req: L2 Atomic
metric:
"::_1":
read req: AVG((TO_INT(TCC_READ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[::_1]) / $denom))
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_multiple_bar
- metric_table:
id: 1805
title: L2-Fabric Requests (per normUnit)
header:
metric: Channel
read req: L2-Fabric Read
write req: L2-Fabric Write and Atomic
atomic req: L2-Fabric Atomic
metric:
"::_1":
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_multiple_bar
# - metric_table:
# id: 1806
# title: L2-Fabric Latency (Cycles)
# header:
# metric: Metric
# read lat: L2-Fabric Read
# write lat: L2-Fabric Write
# atomic lat: L2-Fabric Atomic
# metric:
# "::_1":
# read lat:
# AVG(((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
# != 0) else None))
# write lat:
# AVG(((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
# != 0) else None))
# atomic lat:
# AVG(((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if
# (TCC_EA0_ATOMIC[::_1] != 0) else 0))
# placeholder_range:
# "::_1": $total_l2_chan
# cli_style: simple_multiple_bar
- metric_table:
id: 1806
title: L2-Fabric Read Latency (Cycles)
header:
metric: Channel
expr: Expression
metric:
"::_1":
expr:
((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
!= 0) else None)
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_box
- metric_table:
id: 1807
title: L2-Fabric Write and Atomic Latency (Cycles)
header:
metric: Channel
expr: Expression
metric:
"::_1":
expr:
((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
!= 0) else None)
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_box
- metric_table:
id: 1808
title: L2-Fabric Atomic Latency (Cycles)
header:
metric: Channel
expr: Expression
metric:
"::_1":
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if
(TCC_EA0_ATOMIC[::_1] != 0) else 0)
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_box
- metric_table:
id: 1809
title: L2-Fabric Read Stall (Cycles per normUnit)
header:
metric: Channel
ea read stall - pcie: L2-Fabric Read Stall (PCIe)
ea read stall - if: L2-Fabric Read Stall (Infinity Fabric™)
ea read stall - hbm: L2-Fabric Read Stall (HBM)
metric:
"::_1":
ea read stall - pcie: AVG((TO_INT(TCC_EA0_RDREQ_IO_CREDIT_STALL[::_1]) / $denom))
ea read stall - if: AVG((TO_INT(TCC_EA0_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom))
ea read stall - hbm: AVG((TO_INT(TCC_EA0_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom))
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_multiple_bar
- metric_table:
id: 1810
title: L2-Fabric Write and Atomic Stall (Cycles per normUnit)
header:
metric: Channel
ea write stall - pcie: L2-Fabric Write Stall (PCIe)
ea write stall - if: L2-Fabric Write Stall (Infinity Fabric™)
ea write stall - hbm: L2-Fabric Write Stall (HBM)
ea write stall - starve: L2-Fabric Write Starve
metric:
"::_1":
ea write stall - pcie: AVG((TO_INT(TCC_EA0_WRREQ_IO_CREDIT_STALL[::_1]) / $denom))
ea write stall - if: AVG((TO_INT(TCC_EA0_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom))
ea write stall - hbm: AVG((TO_INT(TCC_EA0_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom))
ea write stall - starve: AVG((TO_INT(TCC_TOO_MANY_EA_WRREQS_STALL[::_1]) / $denom))
placeholder_range:
"::_1": $total_l2_chan
cli_style: simple_multiple_bar
# - metric_table:
# id: 1811
# title: L2 Tag Stall (cycles)
# header:
# metric: Metric
# expr: Expression
# metric:
# "::_1":
# expr: TCC_TAG_STALL[::_1]
# placeholder_range:
# "::_1": $total_l2_chan
# cli_style: simple_box
- metric_table:
id: 1812
title: L2-Fabric (128B read requests per normUnit)
header:
metric: Channel
expr: Expression
metric:
"::_1":
expr: (TO_INT(TCC_BUBBLE[::_1]) / $denom)
placeholder_range:
"::_1": $total_l2_chan
# tips: Number of 128-byte read requests sent to EA
cli_style: simple_box