Use own counter definition (#91)

* Use own counter definition
  * Do not depend on rocprofiler-sdk counter definition

* Add missing counter definitions for MI100, MI200, MI300, MI350 series
  * Counters added based on register specification
  * This prevents some missing metrics

* Enable SQC_DCACHE_INFLIGHT_LEVEL counter and associated metrics

* Enable TCP_TCP_LATENCY counter and associated counter for all GPUs
  except MI300

* Update TCC_EA_* counters for MI100 to TCC_EA0_*
  * Update MI100 metrics which depend on TCC_EA0_* counters

* Enable accumulation counters for MI100

* Improve rocprof list avail usage to get a better idea of supported
  counters

* Update CHANGELOG

* Move accumulation counters to counter definition

---------

Co-authored-by: Vignesh Edithal <Vignesh.Edithal@amd.com>
Cette révision appartient à :
systems-assistant[bot]
2025-08-08 14:39:10 -04:00
révisé par GitHub
Parent 97d9f35033
révision d3f9ab25eb
12 fichiers modifiés avec 10942 ajouts et 3214 suppressions
+9
Voir le fichier
@@ -7,11 +7,17 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
### Added
* Add `rocpd` choice for `--format-rocprof-output` option in profile mode
* Add `--retain-rocpd-output` option in profile mode to save large raw rocpd databases in workload directory
* Show description of metrics during analysis
* Use `--include-cols Description` to show the Description column, which is excluded by default from the
ROCm Compute Profiler CLI output.
* Add missing counters based on register specification which enables missing metrics
* Enable SQC_DCACHE_INFLIGHT_LEVEL counter and associated metrics
* Enable TCP_TCP_LATENCY counter and associated counter for all GPUs except MI300
### Changed
* Add notice for change in default output format to `rocpd` in a future release
@@ -53,6 +59,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Fixed standalone GUI crashing
* Fixed L2 read/write/atomic bandwidths on MI350
* Update metric names for better alignment between analysis configuration and documentation
* Fixed an issue where accumulation counters could not be collected on AMD Instinct MI100
### Known issues
@@ -60,6 +67,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Improved `--time-unit` option in analyze mode to apply time unit conversion across all analysis sections, not just kernel top stats.
* Improve logic to obtain rocprof supported counters which prevents unnecessary warnings
### Removed
* Usage of rocm-smi
@@ -260,27 +260,29 @@ Panel Config:
pop: ((100 * AVG(((TCC_REQ_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($max_sclk / 1000) * 64) * TO_INT($total_l2_chan)))
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBandwidth
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBandwidth)
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum -
TCC_EA0_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp)))) /
$hbmBandwidth)
L2-Fabric Write BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBandwidth
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBandwidth)
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum -
TCC_EA0_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp)))) /
$hbmBandwidth)
L2-Fabric Read Latency:
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: Cycles
peak: None
pop: None
L2-Fabric Write Latency:
value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
value: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: Cycles
peak: None
@@ -244,24 +244,24 @@ Panel Config:
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
!= 0) else None)), 0)
Fabric_L2 Rd:
value: ROUND(AVG((TCC_EA_RDREQ_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
Fabric_L2 Wr:
value: ROUND(AVG((TCC_EA_WRREQ_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
Fabric_L2 Atomic:
value: ROUND(AVG((TCC_EA_ATOMIC_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
Fabric Rd Lat:
value: ROUND(AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
value: ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else 0)), 0)
Fabric Wr Lat:
value: ROUND(AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
value: ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else 0)), 0)
Fabric Atomic Lat:
value: ROUND(AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
value: ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else 0)), 0)
HBM Rd:
value: ROUND(AVG((TCC_EA_RDREQ_DRAM_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
HBM Wr:
value: ROUND(AVG((TCC_EA_WRREQ_DRAM_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
comparable: false
cli_style: mem_chart
tui_style: mem_chart
@@ -235,11 +235,11 @@ Panel Config:
+ TCC_MISS_sum) != 0) else 0))
unit: pct
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
L2-Fabric Write and Atomic BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
HBM Bandwidth:
@@ -256,99 +256,99 @@ Panel Config:
unit: Unit
metric:
Read BW:
avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: Gbps
avg: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
min: MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
max: MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
unit: (Bytes + $normUnit)
HBM Read Traffic:
avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
avg: AVG((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
min: MIN((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
max: MAX((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: pct
Remote Read Traffic:
avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
if (TCC_EA_RDREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
if (TCC_EA_RDREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
if (TCC_EA_RDREQ_sum != 0) else None))
avg: AVG((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
if (TCC_EA0_RDREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
if (TCC_EA0_RDREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
if (TCC_EA0_RDREQ_sum != 0) else None))
unit: pct
Uncached Read Traffic:
avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
avg: AVG((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
min: MIN((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
max: MAX((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: pct
Write and Atomic BW:
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: Gbps
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
unit: (Bytes + $normUnit)
HBM Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: pct
Remote Write and Atomic Traffic:
avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
if (TCC_EA_WRREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
if (TCC_EA_WRREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
if (TCC_EA_WRREQ_sum != 0) else None))
avg: AVG((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
if (TCC_EA0_WRREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
if (TCC_EA0_WRREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
if (TCC_EA0_WRREQ_sum != 0) else None))
unit: pct
Atomic Traffic:
avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: pct
Uncached Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: pct
Read Latency:
avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
avg: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
min: MIN(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
min: MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
max: MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: Cycles
Write and Atomic Latency:
avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: Cycles
Atomic Latency:
avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
avg: AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
min: MIN(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
min: MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
max: MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
unit: Cycles
- metric_table:
@@ -504,57 +504,57 @@ Panel Config:
unit: Unit
metric:
Read (32B):
avg: AVG((TCC_EA_RDREQ_32B_sum / $denom))
min: MIN((TCC_EA_RDREQ_32B_sum / $denom))
max: MAX((TCC_EA_RDREQ_32B_sum / $denom))
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
unit: (Req + $normUnit)
Read (64B):
avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
unit: (Req + $normUnit)
Read (Uncached):
avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom))
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
HBM Read:
avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom))
min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom))
max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom))
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
Remote Read:
avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
avg: AVG((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
unit: (Req + $normUnit)
Write and Atomic (32B):
avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
unit: (Req + $normUnit)
Write and Atomic (Uncached):
avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom))
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
Write and Atomic (64B):
avg: AVG((TCC_EA_WRREQ_64B_sum / $denom))
min: MIN((TCC_EA_WRREQ_64B_sum / $denom))
max: MAX((TCC_EA_WRREQ_64B_sum / $denom))
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
unit: (Req + $normUnit)
HBM Write and Atomic:
avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom))
min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom))
max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom))
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
Remote Write and Atomic:
avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
avg: AVG((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
unit: (Req + $normUnit)
Atomic:
avg: AVG((TCC_EA_ATOMIC_sum / $denom))
min: MIN((TCC_EA_ATOMIC_sum / $denom))
max: MAX((TCC_EA_ATOMIC_sum / $denom))
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
unit: (Req + $normUnit)
@@ -222,9 +222,9 @@ Panel Config:
atomic req: L2-Fabric Atomic
metric:
::_1:
read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_EA_WRREQ[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_EA_ATOMIC[::_1]) / $denom))
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
placeholder_range:
::_1: $total_l2_chan
cli_style: simple_multiple_bar
@@ -237,7 +237,7 @@ Panel Config:
expr: Expression
metric:
::_1:
expr: ((TCC_EA_RDREQ_LEVEL[::_1] / TCC_EA_RDREQ[::_1]) if (TCC_EA_RDREQ[::_1]
expr: ((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
!= 0) else None)
placeholder_range:
::_1: $total_l2_chan
@@ -251,7 +251,7 @@ Panel Config:
expr: Expression
metric:
::_1:
expr: ((TCC_EA_WRREQ_LEVEL[::_1] / TCC_EA_WRREQ[::_1]) if (TCC_EA_WRREQ[::_1]
expr: ((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
!= 0) else None)
placeholder_range:
::_1: $total_l2_chan
@@ -265,7 +265,7 @@ Panel Config:
expr: Expression
metric:
::_1:
expr: ((TCC_EA_ATOMIC_LEVEL[::_1] / TCC_EA_ATOMIC[::_1]) if (TCC_EA_ATOMIC[::_1]
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if (TCC_EA0_ATOMIC[::_1]
!= 0) else 0)
placeholder_range:
::_1: $total_l2_chan
@@ -288,13 +288,13 @@ Panel Config:
!= 0) else None))
unit: pct
Write and Atomic BW:
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: Gbps
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
unit: (Bytes + $normUnit)
HBM Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
!= 0) else None))
Fichier diff supprimé car celui-ci est trop grand Voir la Diff
Fichier diff supprimé car celui-ci est trop grand Voir la Diff
+8 -92
Voir le fichier
@@ -396,11 +396,8 @@ class OmniSoC_Base:
# Counters not supported in rocprof v1 / v2
counters = counters - {"SQ_INSTS_VALU_MFMA_F8", "SQ_INSTS_VALU_MFMA_MOPS_F8"}
# Following counters are not supported
# TCP_TCP_LATENCY_sum (except for gfx950)
# SQC_DCACHE_INFLIGHT_LEVEL
counters = counters - {"SQC_DCACHE_INFLIGHT_LEVEL"}
if self.__arch != "gfx950":
# TCP_TCP_LATENCY_sum not supported for MI300 (gfx940, gfx941, gfx942)
if self.__arch in ("gfx940", "gfx941", "gfx942"):
counters = counters - {"TCP_TCP_LATENCY_sum"}
# SQ_ACCUM_PREV_HIRES will be injected for level counters later on
@@ -508,40 +505,15 @@ class OmniSoC_Base:
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
rocprof_counters.update(counters)
elif str(rocprof_cmd).endswith("rocprofv3"):
command = [rocprof_cmd, "--list-avail"]
success, output = capture_subprocess_output(command, enable_logging=False)
# return code should be 0 so success should be True
if not success:
console_error(
f"Failed to list rocprof supported counters using command: {command}"
)
for line in output.splitlines():
if "counter_name" in line:
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
rocprof_counters.update(counters)
# Custom counter support for mi100 for rocprofv3
if self._mspec.gpu_model.lower() == "mi100":
counter_defs_path = (
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ "gfx908_counter_defs.yaml"
)
with open(counter_defs_path, "r") as fp:
counter_defs_contents = fp.read()
counters, _ = self.parse_counters_text(counter_defs_contents)
rocprof_counters.update(counters)
elif str(rocprof_cmd) == "rocprofiler-sdk":
# Point to rocprofiler sdk counter definition
elif (
str(rocprof_cmd).endswith("rocprofv3")
or str(rocprof_cmd) == "rocprofiler-sdk"
):
# Point to counter definition
old_rocprofiler_metrics_path = os.environ.get("ROCPROFILER_METRICS_PATH")
os.environ["ROCPROFILER_METRICS_PATH"] = str(
Path(self.get_args().rocprofiler_sdk_library_path)
.resolve()
.parent.parent.joinpath("share", "rocprofiler-sdk")
config.rocprof_compute_home / "rocprof_compute_soc" / "profile_configs"
)
sys.path.append(
str(
Path(self.get_args().rocprofiler_sdk_library_path).parent.parent
@@ -562,19 +534,6 @@ class OmniSoC_Base:
for counter in counters[list(counters.keys())[0]]
if hasattr(counter, "block") or hasattr(counter, "expression")
}
# Custom counter support for mi100 for rocprofiler-sdk
if self._mspec.gpu_model.lower() == "mi100":
counter_defs_path = (
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ "gfx908_counter_defs.yaml"
)
with open(counter_defs_path, "r") as fp:
counter_defs_contents = fp.read()
counters, _ = self.parse_counters_text(counter_defs_contents)
rocprof_counters.update(counters)
# Reset env. var.
if old_rocprofiler_metrics_path is None:
del os.environ["ROCPROFILER_METRICS_PATH"]
@@ -774,49 +733,6 @@ class OmniSoC_Base:
]:
pmc.append(ctr)
if using_v3():
# MI 100 accumulate counters dont work with rocprofiler sdk
if self._mspec.gpu_model.lower() != "mi100":
# Add accumulation counters definitions
if ctr == "SQ_IFETCH_LEVEL":
counter_def = add_counter_extra_config_input_yaml(
counter_def,
"SQ_IFETCH_LEVEL_ACCUM",
"SQ_IFETCH_LEVEL accumulation",
"accumulate(SQ_IFETCH_LEVEL, HIGH_RES)",
[self.__arch],
)
elif ctr == "SQ_INST_LEVEL_LDS":
counter_def = add_counter_extra_config_input_yaml(
counter_def,
"SQ_INST_LEVEL_LDS_ACCUM",
"SQ_INST_LEVEL_LDS accumulation",
"accumulate(SQ_INST_LEVEL_LDS, HIGH_RES)",
[self.__arch],
)
elif ctr == "SQ_INST_LEVEL_SMEM":
counter_def = add_counter_extra_config_input_yaml(
counter_def,
"SQ_INST_LEVEL_SMEM_ACCUM",
"SQ_INST_LEVEL_SMEM accumulation",
"accumulate(SQ_INST_LEVEL_SMEM, HIGH_RES)",
[self.__arch],
)
elif ctr == "SQ_INST_LEVEL_VMEM":
counter_def = add_counter_extra_config_input_yaml(
counter_def,
"SQ_INST_LEVEL_VMEM_ACCUM",
"SQ_INST_LEVEL_VMEM accumulation",
"accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES)",
[self.__arch],
)
elif ctr == "SQ_LEVEL_WAVES":
counter_def = add_counter_extra_config_input_yaml(
counter_def,
"SQ_LEVEL_WAVES_ACCUM",
"SQ_LEVEL_WAVES accumulation",
"accumulate(SQ_LEVEL_WAVES, HIGH_RES)",
[self.__arch],
)
# Add TCC channel counters definitions
if is_tcc_channel_counter(ctr):
counter_name = ctr.split("[")[0]
+15 -35
Voir le fichier
@@ -737,41 +737,21 @@ def run_prof(
new_env = os.environ.copy()
if using_v3():
# Default counter definitions
if rocprof_cmd == "rocprofiler-sdk":
counter_defs_path = (
path(options["ROCP_TOOL_LIBRARIES"])
.resolve()
.parent.parent.parent.joinpath(
"share", "rocprofiler-sdk", "counter_defs.yaml"
)
)
else:
counter_defs_path = (
path(shutil.which(rocprof_cmd))
.resolve()
.parent.parent.joinpath("share", "rocprofiler-sdk", "counter_defs.yaml")
)
# Custom counter definitions for MI 100
if mspec.gpu_model.lower() == "mi100":
counter_defs_path = (
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ "gfx908_counter_defs.yaml"
)
# Read counter definitions
with open(counter_defs_path, "r") as file:
# Counter definitions
with open(
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ f"counter_defs.yaml",
"r",
) as file:
counter_defs = yaml.safe_load(file)
# Get extra counter definitions
path_counter_config_yaml = path(fname).with_suffix(".yaml")
if path_counter_config_yaml.exists():
with open(path_counter_config_yaml, "r") as file:
extra_counter_defs = yaml.safe_load(file)
# Merge extra counter definitions
counter_defs["rocprofiler-sdk"]["counters"].extend(
extra_counter_defs["rocprofiler-sdk"]["counters"]
)
# Extra counter definitions
if path(fname).with_suffix(".yaml").exists():
with open(path(fname).with_suffix(".yaml"), "r") as file:
counter_defs["rocprofiler-sdk"]["counters"].extend(
yaml.safe_load(file)["rocprofiler-sdk"]["counters"]
)
# Write counter definitions to a temporary file
tmpfile_path = (
path(tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp"))
@@ -779,7 +759,7 @@ def run_prof(
)
with open(tmpfile_path, "w") as tmpfile:
yaml.dump(counter_defs, tmpfile, default_flow_style=False, sort_keys=False)
# Set rocprofiler sdk counter definitions
# Set counter definitions
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
console_debug(
f"Adding env var for counter definitions: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
+5 -5
Voir le fichier
@@ -11,13 +11,13 @@ src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml: 739e39e69
src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef
src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef
src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef
src/rocprof_compute_soc/analysis_configs/gfx908/0200_system_speed_of_light.yaml: 383f51bf243980df626dacd34c26844b397e4093988524f91e3c7a9a3b8bf063
src/rocprof_compute_soc/analysis_configs/gfx908/0200_system_speed_of_light.yaml: 2103e9d6123f473f1cb18b71c046f197b5d1d873563c4aad4933d7361255f0c1
src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system_speed_of_light.yaml: e9f552ee72849dc9c4ab14fee77ecc2681f4bcf610a8649c55365ab7eea7aafc
src/rocprof_compute_soc/analysis_configs/gfx940/0200_system_speed_of_light.yaml: 70716745e727d3a7e6fa706d34c346f796c241c485516da52e0c694386b3cf57
src/rocprof_compute_soc/analysis_configs/gfx941/0200_system_speed_of_light.yaml: a1d4f1f712755f6369d3a350eadcd5b0fcd90b5c0cab8be691c24bb860d90ba5
src/rocprof_compute_soc/analysis_configs/gfx942/0200_system_speed_of_light.yaml: 70716745e727d3a7e6fa706d34c346f796c241c485516da52e0c694386b3cf57
src/rocprof_compute_soc/analysis_configs/gfx950/0200_system_speed_of_light.yaml: a2cb003c74c0a75b9fe690da4e21b46e78fdb2f3233fc4753bca9276e93d60b0
src/rocprof_compute_soc/analysis_configs/gfx908/0300_memory_chart.yaml: 6e008d397d9f364d6cb5fdd5a7974e4d372654a583d3e30d8bb8796f97b9b211
src/rocprof_compute_soc/analysis_configs/gfx908/0300_memory_chart.yaml: c2ce64cc7406df29b444ea8e1d494b19dbbd15ac6d17a9f5452dada215fb5671
src/rocprof_compute_soc/analysis_configs/gfx90a/0300_memory_chart.yaml: cbb3c841b1ad8cbb23a071fcc145dedabb5341d36054c188c9f61878632fd664
src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml: f3c235b5c9ef06c837c04689fc1f413d1137360795ffccfc0256b40769c926c6
src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml: f3c235b5c9ef06c837c04689fc1f413d1137360795ffccfc0256b40769c926c6
@@ -89,13 +89,13 @@ src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml:
src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230
src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230
src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml: 67054ec0a4c6ca147a5dd40cc91f0e8e81378e1affe7d479274747579ecc524a
src/rocprof_compute_soc/analysis_configs/gfx908/1700_l2_cache.yaml: b1baa76f9dbfcc52d5e12cc1834102a0011ddf8bdece5be5fabc2945ab8971f4
src/rocprof_compute_soc/analysis_configs/gfx90a/1700_l2_cache.yaml: 4d834a2066d7f2cb655a8e41fc17531282150b6fe64bbc9c5ff3a10acddee5af
src/rocprof_compute_soc/analysis_configs/gfx908/1700_l2_cache.yaml: 54ff1df4ee08206d0aa4ff9cd9f0b20cbaa3866aecb9b40a0ac5969e9e25ed20
src/rocprof_compute_soc/analysis_configs/gfx90a/1700_l2_cache.yaml: ee87b5b6cdaca98de6e5cb0d06e2e092470e0e25aac1498f8abcfc8421932ae6
src/rocprof_compute_soc/analysis_configs/gfx940/1700_l2_cache.yaml: 78f9fee5dafc83d311da1c801200c1820e16a0678dd0548fafa8a966ec6a94d5
src/rocprof_compute_soc/analysis_configs/gfx941/1700_l2_cache.yaml: 51fe6e3888975b805594c2ab2b3147e717ae5e015468ee592cbcddc389c689bc
src/rocprof_compute_soc/analysis_configs/gfx942/1700_l2_cache.yaml: dc2dc9ff61b1747e492c28ef5ac76764fd75c18fd0827834130bc583f2afc619
src/rocprof_compute_soc/analysis_configs/gfx950/1700_l2_cache.yaml: d181f753c3fff608c72b8015d1af30bfd8cf8cdfbc0a17c505f717ddaa3b1efc
src/rocprof_compute_soc/analysis_configs/gfx908/1800_l2_cache_per_channel.yaml: a0c53202fe9f68d5e1fa689ce0643c471ced7d47e007d8ccc68fba294f7f6a05
src/rocprof_compute_soc/analysis_configs/gfx908/1800_l2_cache_per_channel.yaml: f5db15673a4be8b92f05a380738c5a10f68ca78ca2b1a9c31c19acae13d17f7b
src/rocprof_compute_soc/analysis_configs/gfx90a/1800_l2_cache_per_channel.yaml: a0c53202fe9f68d5e1fa689ce0643c471ced7d47e007d8ccc68fba294f7f6a05
src/rocprof_compute_soc/analysis_configs/gfx940/1800_l2_cache_per_channel.yaml: e184e3692eb0d641fb2e37fada0e58a6c4958553931d7c038b884e1e6986093f
src/rocprof_compute_soc/analysis_configs/gfx941/1800_l2_cache_per_channel.yaml: e184e3692eb0d641fb2e37fada0e58a6c4958553931d7c038b884e1e6986093f
+114 -114
Voir le fichier
@@ -1258,29 +1258,29 @@ panels:
pop: ((100 * AVG(((TCC_REQ_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($max_sclk / 1000) * 64) * TO_INT($total_l2_chan)))
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBandwidth
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum -
TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))))
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum -
TCC_EA0_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))))
/ $hbmBandwidth)
L2-Fabric Write BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBandwidth
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum -
TCC_EA_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))))
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum -
TCC_EA0_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))))
/ $hbmBandwidth)
L2-Fabric Read Latency:
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: Cycles
peak: None
pop: None
L2-Fabric Write Latency:
value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
value: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: Cycles
peak: None
@@ -2423,24 +2423,24 @@ panels:
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum +
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else None)), 0)
Fabric_L2 Rd:
value: ROUND(AVG((TCC_EA_RDREQ_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
Fabric_L2 Wr:
value: ROUND(AVG((TCC_EA_WRREQ_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
Fabric_L2 Atomic:
value: ROUND(AVG((TCC_EA_ATOMIC_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
Fabric Rd Lat:
value: ROUND(AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
value: ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else 0)), 0)
Fabric Wr Lat:
value: ROUND(AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
value: ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else 0)), 0)
Fabric Atomic Lat:
value: ROUND(AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
value: ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else 0)), 0)
HBM Rd:
value: ROUND(AVG((TCC_EA_RDREQ_DRAM_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
HBM Wr:
value: ROUND(AVG((TCC_EA_WRREQ_DRAM_sum / $denom)), 0)
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
comparable: false
cli_style: mem_chart
tui_style: mem_chart
@@ -13064,11 +13064,11 @@ panels:
+ TCC_MISS_sum) != 0) else 0))
unit: pct
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
L2-Fabric Write and Atomic BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
HBM Bandwidth:
@@ -13118,13 +13118,13 @@ panels:
!= 0) else None))
unit: pct
Write and Atomic BW:
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: Gbps
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
unit: (Bytes + $normUnit)
HBM Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
!= 0) else None))
@@ -13590,99 +13590,99 @@ panels:
unit: pct
gfx908:
Read BW:
avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: Gbps
avg: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
min: MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
max: MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / $denom))
unit: (Bytes + $normUnit)
HBM Read Traffic:
avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
avg: AVG((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
min: MIN((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
max: MAX((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: pct
Remote Read Traffic:
avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
if (TCC_EA_RDREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
if (TCC_EA_RDREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
if (TCC_EA_RDREQ_sum != 0) else None))
avg: AVG((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
if (TCC_EA0_RDREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
if (TCC_EA0_RDREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
if (TCC_EA0_RDREQ_sum != 0) else None))
unit: pct
Uncached Read Traffic:
avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
avg: AVG((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
min: MIN((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
max: MAX((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: pct
Write and Atomic BW:
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: Gbps
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / $denom))
unit: (Bytes + $normUnit)
HBM Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: pct
Remote Write and Atomic Traffic:
avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
if (TCC_EA_WRREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
if (TCC_EA_WRREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
if (TCC_EA_WRREQ_sum != 0) else None))
avg: AVG((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
if (TCC_EA0_WRREQ_sum != 0) else None))
min: MIN((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
if (TCC_EA0_WRREQ_sum != 0) else None))
max: MAX((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
if (TCC_EA0_WRREQ_sum != 0) else None))
unit: pct
Atomic Traffic:
avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: pct
Uncached Write and Atomic Traffic:
avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: pct
Read Latency:
avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
avg: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
min: MIN(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
min: MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
max: MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
!= 0) else None))
unit: Cycles
Write and Atomic Latency:
avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
avg: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
min: MIN(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
min: MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
max: MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
!= 0) else None))
unit: Cycles
Atomic Latency:
avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
avg: AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
min: MIN(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
min: MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
max: MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
!= 0) else None))
unit: Cycles
- metric_table:
@@ -14840,59 +14840,59 @@ panels:
unit: Gbps
gfx908:
Read (32B):
avg: AVG((TCC_EA_RDREQ_32B_sum / $denom))
min: MIN((TCC_EA_RDREQ_32B_sum / $denom))
max: MAX((TCC_EA_RDREQ_32B_sum / $denom))
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
unit: (Req + $normUnit)
Read (64B):
avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
unit: (Req + $normUnit)
Read (Uncached):
avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom))
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
HBM Read:
avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom))
min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom))
max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom))
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
Remote Read:
avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
avg: AVG((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
unit: (Req + $normUnit)
Write and Atomic (32B):
avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
unit: (Req + $normUnit)
Write and Atomic (Uncached):
avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom))
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
unit: (Req + $normUnit)
Write and Atomic (64B):
avg: AVG((TCC_EA_WRREQ_64B_sum / $denom))
min: MIN((TCC_EA_WRREQ_64B_sum / $denom))
max: MAX((TCC_EA_WRREQ_64B_sum / $denom))
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
unit: (Req + $normUnit)
HBM Write and Atomic:
avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom))
min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom))
max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom))
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
unit: (Req + $normUnit)
Remote Write and Atomic:
avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
avg: AVG((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
min: MIN((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
unit: (Req + $normUnit)
Atomic:
avg: AVG((TCC_EA_ATOMIC_sum / $denom))
min: MIN((TCC_EA_ATOMIC_sum / $denom))
max: MAX((TCC_EA_ATOMIC_sum / $denom))
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
unit: (Req + $normUnit)
metrics_description:
Utilization:
@@ -16268,9 +16268,9 @@ panels:
::_1: $total_l2_chan
gfx908:
::_1:
read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_EA_WRREQ[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_EA_ATOMIC[::_1]) / $denom))
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
placeholder_range:
::_1: $total_l2_chan
cli_style: simple_multiple_bar
@@ -16314,7 +16314,7 @@ panels:
::_1: $total_l2_chan
gfx908:
::_1:
expr: ((TCC_EA_RDREQ_LEVEL[::_1] / TCC_EA_RDREQ[::_1]) if (TCC_EA_RDREQ[::_1]
expr: ((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
!= 0) else None)
placeholder_range:
::_1: $total_l2_chan
@@ -16359,7 +16359,7 @@ panels:
::_1: $total_l2_chan
gfx908:
::_1:
expr: ((TCC_EA_WRREQ_LEVEL[::_1] / TCC_EA_WRREQ[::_1]) if (TCC_EA_WRREQ[::_1]
expr: ((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
!= 0) else None)
placeholder_range:
::_1: $total_l2_chan
@@ -16404,7 +16404,7 @@ panels:
::_1: $total_l2_chan
gfx908:
::_1:
expr: ((TCC_EA_ATOMIC_LEVEL[::_1] / TCC_EA_ATOMIC[::_1]) if (TCC_EA_ATOMIC[::_1]
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if (TCC_EA0_ATOMIC[::_1]
!= 0) else 0)
placeholder_range:
::_1: $total_l2_chan