Use own counter definition (#91)
* Use own counter definition * Do not depend on rocprofiler-sdk counter definition * Add missing counter definitions for MI100, MI200, MI300, MI350 series * Counters added based on register specification * This prevents some missing metrics * Enable SQC_DCACHE_INFLIGHT_LEVEL counter and associated metrics * Enable TCP_TCP_LATENCY counter and associated counter for all GPUs except MI300 * Update TCC_EA_* counters for MI100 to TCC_EA0_* * Update MI100 metrics which depend on TCC_EA0_* counters * Enable accumulation counters for MI100 * Improve rocprof list avail usage to get a better idea of supported counters * Update CHANGELOG * Move accumulation counters to counter definition --------- Co-authored-by: Vignesh Edithal <Vignesh.Edithal@amd.com>
Cette révision appartient à :
révisé par
GitHub
Parent
97d9f35033
révision
d3f9ab25eb
@@ -7,11 +7,17 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
### Added
|
||||
|
||||
* Add `rocpd` choice for `--format-rocprof-output` option in profile mode
|
||||
|
||||
* Add `--retain-rocpd-output` option in profile mode to save large raw rocpd databases in workload directory
|
||||
|
||||
* Show description of metrics during analysis
|
||||
* Use `--include-cols Description` to show the Description column, which is excluded by default from the
|
||||
ROCm Compute Profiler CLI output.
|
||||
|
||||
* Add missing counters based on register specification which enables missing metrics
|
||||
* Enable SQC_DCACHE_INFLIGHT_LEVEL counter and associated metrics
|
||||
* Enable TCP_TCP_LATENCY counter and associated counter for all GPUs except MI300
|
||||
|
||||
### Changed
|
||||
|
||||
* Add notice for change in default output format to `rocpd` in a future release
|
||||
@@ -53,6 +59,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* Fixed standalone GUI crashing
|
||||
* Fixed L2 read/write/atomic bandwidths on MI350
|
||||
* Update metric names for better alignment between analysis configuration and documentation
|
||||
* Fixed an issue where accumulation counters could not be collected on AMD Instinct MI100
|
||||
|
||||
### Known issues
|
||||
|
||||
@@ -60,6 +67,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
* Improved `--time-unit` option in analyze mode to apply time unit conversion across all analysis sections, not just kernel top stats.
|
||||
|
||||
* Improve logic to obtain rocprof supported counters which prevents unnecessary warnings
|
||||
|
||||
### Removed
|
||||
|
||||
* Usage of rocm-smi
|
||||
|
||||
+10
-8
@@ -260,27 +260,29 @@ Panel Config:
|
||||
pop: ((100 * AVG(((TCC_REQ_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk / 1000) * 64) * TO_INT($total_l2_chan)))
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBandwidth
|
||||
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBandwidth)
|
||||
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum -
|
||||
TCC_EA0_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp)))) /
|
||||
$hbmBandwidth)
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBandwidth
|
||||
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBandwidth)
|
||||
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum -
|
||||
TCC_EA0_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp)))) /
|
||||
$hbmBandwidth)
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
peak: None
|
||||
pop: None
|
||||
L2-Fabric Write Latency:
|
||||
value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
value: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
peak: None
|
||||
|
||||
+8
-8
@@ -244,24 +244,24 @@ Panel Config:
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)
|
||||
!= 0) else None)), 0)
|
||||
Fabric_L2 Rd:
|
||||
value: ROUND(AVG((TCC_EA_RDREQ_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
|
||||
Fabric_L2 Wr:
|
||||
value: ROUND(AVG((TCC_EA_WRREQ_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
|
||||
Fabric_L2 Atomic:
|
||||
value: ROUND(AVG((TCC_EA_ATOMIC_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
|
||||
Fabric Rd Lat:
|
||||
value: ROUND(AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
value: ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else 0)), 0)
|
||||
Fabric Wr Lat:
|
||||
value: ROUND(AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
value: ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else 0)), 0)
|
||||
Fabric Atomic Lat:
|
||||
value: ROUND(AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
value: ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else 0)), 0)
|
||||
HBM Rd:
|
||||
value: ROUND(AVG((TCC_EA_RDREQ_DRAM_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
|
||||
HBM Wr:
|
||||
value: ROUND(AVG((TCC_EA_WRREQ_DRAM_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
|
||||
comparable: false
|
||||
cli_style: mem_chart
|
||||
tui_style: mem_chart
|
||||
|
||||
+85
-85
@@ -235,11 +235,11 @@ Panel Config:
|
||||
+ TCC_MISS_sum) != 0) else 0))
|
||||
unit: pct
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
L2-Fabric Write and Atomic BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
HBM Bandwidth:
|
||||
@@ -256,99 +256,99 @@ Panel Config:
|
||||
unit: Unit
|
||||
metric:
|
||||
Read BW:
|
||||
avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: Gbps
|
||||
avg: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
min: MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
max: MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
HBM Read Traffic:
|
||||
avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Remote Read Traffic:
|
||||
avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
|
||||
if (TCC_EA_RDREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
|
||||
if (TCC_EA_RDREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
|
||||
if (TCC_EA_RDREQ_sum != 0) else None))
|
||||
avg: AVG((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
|
||||
if (TCC_EA0_RDREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
|
||||
if (TCC_EA0_RDREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
|
||||
if (TCC_EA0_RDREQ_sum != 0) else None))
|
||||
unit: pct
|
||||
Uncached Read Traffic:
|
||||
avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Write and Atomic BW:
|
||||
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: Gbps
|
||||
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
HBM Write and Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Remote Write and Atomic Traffic:
|
||||
avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
|
||||
if (TCC_EA_WRREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
|
||||
if (TCC_EA_WRREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
|
||||
if (TCC_EA_WRREQ_sum != 0) else None))
|
||||
avg: AVG((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
|
||||
if (TCC_EA0_WRREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
|
||||
if (TCC_EA0_WRREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
|
||||
if (TCC_EA0_WRREQ_sum != 0) else None))
|
||||
unit: pct
|
||||
Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Uncached Write and Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Read Latency:
|
||||
avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
avg: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
min: MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
max: MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
Write and Atomic Latency:
|
||||
avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
Atomic Latency:
|
||||
avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
avg: AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
min: MIN(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
min: MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
max: MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
- metric_table:
|
||||
@@ -504,57 +504,57 @@ Panel Config:
|
||||
unit: Unit
|
||||
metric:
|
||||
Read (32B):
|
||||
avg: AVG((TCC_EA_RDREQ_32B_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_32B_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_32B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Read (64B):
|
||||
avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
|
||||
min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
|
||||
max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
|
||||
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Read (Uncached):
|
||||
avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
HBM Read:
|
||||
avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom))
|
||||
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Remote Read:
|
||||
avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
avg: AVG((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Write and Atomic (Uncached):
|
||||
avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Write and Atomic (64B):
|
||||
avg: AVG((TCC_EA_WRREQ_64B_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_64B_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_64B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
HBM Write and Atomic:
|
||||
avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom))
|
||||
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Remote Write and Atomic:
|
||||
avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
avg: AVG((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA_ATOMIC_sum / $denom))
|
||||
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
|
||||
+6
-6
@@ -222,9 +222,9 @@ Panel Config:
|
||||
atomic req: L2-Fabric Atomic
|
||||
metric:
|
||||
::_1:
|
||||
read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_EA_WRREQ[::_1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_EA_ATOMIC[::_1]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
cli_style: simple_multiple_bar
|
||||
@@ -237,7 +237,7 @@ Panel Config:
|
||||
expr: Expression
|
||||
metric:
|
||||
::_1:
|
||||
expr: ((TCC_EA_RDREQ_LEVEL[::_1] / TCC_EA_RDREQ[::_1]) if (TCC_EA_RDREQ[::_1]
|
||||
expr: ((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
|
||||
!= 0) else None)
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
@@ -251,7 +251,7 @@ Panel Config:
|
||||
expr: Expression
|
||||
metric:
|
||||
::_1:
|
||||
expr: ((TCC_EA_WRREQ_LEVEL[::_1] / TCC_EA_WRREQ[::_1]) if (TCC_EA_WRREQ[::_1]
|
||||
expr: ((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
|
||||
!= 0) else None)
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
@@ -265,7 +265,7 @@ Panel Config:
|
||||
expr: Expression
|
||||
metric:
|
||||
::_1:
|
||||
expr: ((TCC_EA_ATOMIC_LEVEL[::_1] / TCC_EA_ATOMIC[::_1]) if (TCC_EA_ATOMIC[::_1]
|
||||
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if (TCC_EA0_ATOMIC[::_1]
|
||||
!= 0) else 0)
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
|
||||
+7
-7
@@ -288,13 +288,13 @@ Panel Config:
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Write and Atomic BW:
|
||||
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: Gbps
|
||||
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
HBM Write and Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
!= 0) else None))
|
||||
|
||||
+10675
Fichier diff supprimé car celui-ci est trop grand
Voir la Diff
-2854
Fichier diff supprimé car celui-ci est trop grand
Voir la Diff
@@ -396,11 +396,8 @@ class OmniSoC_Base:
|
||||
# Counters not supported in rocprof v1 / v2
|
||||
counters = counters - {"SQ_INSTS_VALU_MFMA_F8", "SQ_INSTS_VALU_MFMA_MOPS_F8"}
|
||||
|
||||
# Following counters are not supported
|
||||
# TCP_TCP_LATENCY_sum (except for gfx950)
|
||||
# SQC_DCACHE_INFLIGHT_LEVEL
|
||||
counters = counters - {"SQC_DCACHE_INFLIGHT_LEVEL"}
|
||||
if self.__arch != "gfx950":
|
||||
# TCP_TCP_LATENCY_sum not supported for MI300 (gfx940, gfx941, gfx942)
|
||||
if self.__arch in ("gfx940", "gfx941", "gfx942"):
|
||||
counters = counters - {"TCP_TCP_LATENCY_sum"}
|
||||
|
||||
# SQ_ACCUM_PREV_HIRES will be injected for level counters later on
|
||||
@@ -508,40 +505,15 @@ class OmniSoC_Base:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif str(rocprof_cmd).endswith("rocprofv3"):
|
||||
command = [rocprof_cmd, "--list-avail"]
|
||||
success, output = capture_subprocess_output(command, enable_logging=False)
|
||||
# return code should be 0 so success should be True
|
||||
if not success:
|
||||
console_error(
|
||||
f"Failed to list rocprof supported counters using command: {command}"
|
||||
)
|
||||
for line in output.splitlines():
|
||||
if "counter_name" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
|
||||
rocprof_counters.update(counters)
|
||||
# Custom counter support for mi100 for rocprofv3
|
||||
if self._mspec.gpu_model.lower() == "mi100":
|
||||
counter_defs_path = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "gfx908_counter_defs.yaml"
|
||||
)
|
||||
with open(counter_defs_path, "r") as fp:
|
||||
counter_defs_contents = fp.read()
|
||||
counters, _ = self.parse_counters_text(counter_defs_contents)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif str(rocprof_cmd) == "rocprofiler-sdk":
|
||||
# Point to rocprofiler sdk counter definition
|
||||
elif (
|
||||
str(rocprof_cmd).endswith("rocprofv3")
|
||||
or str(rocprof_cmd) == "rocprofiler-sdk"
|
||||
):
|
||||
# Point to counter definition
|
||||
old_rocprofiler_metrics_path = os.environ.get("ROCPROFILER_METRICS_PATH")
|
||||
os.environ["ROCPROFILER_METRICS_PATH"] = str(
|
||||
Path(self.get_args().rocprofiler_sdk_library_path)
|
||||
.resolve()
|
||||
.parent.parent.joinpath("share", "rocprofiler-sdk")
|
||||
config.rocprof_compute_home / "rocprof_compute_soc" / "profile_configs"
|
||||
)
|
||||
|
||||
sys.path.append(
|
||||
str(
|
||||
Path(self.get_args().rocprofiler_sdk_library_path).parent.parent
|
||||
@@ -562,19 +534,6 @@ class OmniSoC_Base:
|
||||
for counter in counters[list(counters.keys())[0]]
|
||||
if hasattr(counter, "block") or hasattr(counter, "expression")
|
||||
}
|
||||
# Custom counter support for mi100 for rocprofiler-sdk
|
||||
if self._mspec.gpu_model.lower() == "mi100":
|
||||
counter_defs_path = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "gfx908_counter_defs.yaml"
|
||||
)
|
||||
with open(counter_defs_path, "r") as fp:
|
||||
counter_defs_contents = fp.read()
|
||||
counters, _ = self.parse_counters_text(counter_defs_contents)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
# Reset env. var.
|
||||
if old_rocprofiler_metrics_path is None:
|
||||
del os.environ["ROCPROFILER_METRICS_PATH"]
|
||||
@@ -774,49 +733,6 @@ class OmniSoC_Base:
|
||||
]:
|
||||
pmc.append(ctr)
|
||||
if using_v3():
|
||||
# MI 100 accumulate counters dont work with rocprofiler sdk
|
||||
if self._mspec.gpu_model.lower() != "mi100":
|
||||
# Add accumulation counters definitions
|
||||
if ctr == "SQ_IFETCH_LEVEL":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_IFETCH_LEVEL_ACCUM",
|
||||
"SQ_IFETCH_LEVEL accumulation",
|
||||
"accumulate(SQ_IFETCH_LEVEL, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_INST_LEVEL_LDS":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_INST_LEVEL_LDS_ACCUM",
|
||||
"SQ_INST_LEVEL_LDS accumulation",
|
||||
"accumulate(SQ_INST_LEVEL_LDS, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_INST_LEVEL_SMEM":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_INST_LEVEL_SMEM_ACCUM",
|
||||
"SQ_INST_LEVEL_SMEM accumulation",
|
||||
"accumulate(SQ_INST_LEVEL_SMEM, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_INST_LEVEL_VMEM":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_INST_LEVEL_VMEM_ACCUM",
|
||||
"SQ_INST_LEVEL_VMEM accumulation",
|
||||
"accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_LEVEL_WAVES":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_LEVEL_WAVES_ACCUM",
|
||||
"SQ_LEVEL_WAVES accumulation",
|
||||
"accumulate(SQ_LEVEL_WAVES, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
# Add TCC channel counters definitions
|
||||
if is_tcc_channel_counter(ctr):
|
||||
counter_name = ctr.split("[")[0]
|
||||
|
||||
@@ -737,41 +737,21 @@ def run_prof(
|
||||
new_env = os.environ.copy()
|
||||
|
||||
if using_v3():
|
||||
# Default counter definitions
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
counter_defs_path = (
|
||||
path(options["ROCP_TOOL_LIBRARIES"])
|
||||
.resolve()
|
||||
.parent.parent.parent.joinpath(
|
||||
"share", "rocprofiler-sdk", "counter_defs.yaml"
|
||||
)
|
||||
)
|
||||
else:
|
||||
counter_defs_path = (
|
||||
path(shutil.which(rocprof_cmd))
|
||||
.resolve()
|
||||
.parent.parent.joinpath("share", "rocprofiler-sdk", "counter_defs.yaml")
|
||||
)
|
||||
# Custom counter definitions for MI 100
|
||||
if mspec.gpu_model.lower() == "mi100":
|
||||
counter_defs_path = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "gfx908_counter_defs.yaml"
|
||||
)
|
||||
# Read counter definitions
|
||||
with open(counter_defs_path, "r") as file:
|
||||
# Counter definitions
|
||||
with open(
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ f"counter_defs.yaml",
|
||||
"r",
|
||||
) as file:
|
||||
counter_defs = yaml.safe_load(file)
|
||||
# Get extra counter definitions
|
||||
path_counter_config_yaml = path(fname).with_suffix(".yaml")
|
||||
if path_counter_config_yaml.exists():
|
||||
with open(path_counter_config_yaml, "r") as file:
|
||||
extra_counter_defs = yaml.safe_load(file)
|
||||
# Merge extra counter definitions
|
||||
counter_defs["rocprofiler-sdk"]["counters"].extend(
|
||||
extra_counter_defs["rocprofiler-sdk"]["counters"]
|
||||
)
|
||||
# Extra counter definitions
|
||||
if path(fname).with_suffix(".yaml").exists():
|
||||
with open(path(fname).with_suffix(".yaml"), "r") as file:
|
||||
counter_defs["rocprofiler-sdk"]["counters"].extend(
|
||||
yaml.safe_load(file)["rocprofiler-sdk"]["counters"]
|
||||
)
|
||||
# Write counter definitions to a temporary file
|
||||
tmpfile_path = (
|
||||
path(tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp"))
|
||||
@@ -779,7 +759,7 @@ def run_prof(
|
||||
)
|
||||
with open(tmpfile_path, "w") as tmpfile:
|
||||
yaml.dump(counter_defs, tmpfile, default_flow_style=False, sort_keys=False)
|
||||
# Set rocprofiler sdk counter definitions
|
||||
# Set counter definitions
|
||||
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
|
||||
console_debug(
|
||||
f"Adding env var for counter definitions: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
|
||||
|
||||
@@ -11,13 +11,13 @@ src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml: 739e39e69
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef
|
||||
src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef
|
||||
src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/0200_system_speed_of_light.yaml: 383f51bf243980df626dacd34c26844b397e4093988524f91e3c7a9a3b8bf063
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/0200_system_speed_of_light.yaml: 2103e9d6123f473f1cb18b71c046f197b5d1d873563c4aad4933d7361255f0c1
|
||||
src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system_speed_of_light.yaml: e9f552ee72849dc9c4ab14fee77ecc2681f4bcf610a8649c55365ab7eea7aafc
|
||||
src/rocprof_compute_soc/analysis_configs/gfx940/0200_system_speed_of_light.yaml: 70716745e727d3a7e6fa706d34c346f796c241c485516da52e0c694386b3cf57
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/0200_system_speed_of_light.yaml: a1d4f1f712755f6369d3a350eadcd5b0fcd90b5c0cab8be691c24bb860d90ba5
|
||||
src/rocprof_compute_soc/analysis_configs/gfx942/0200_system_speed_of_light.yaml: 70716745e727d3a7e6fa706d34c346f796c241c485516da52e0c694386b3cf57
|
||||
src/rocprof_compute_soc/analysis_configs/gfx950/0200_system_speed_of_light.yaml: a2cb003c74c0a75b9fe690da4e21b46e78fdb2f3233fc4753bca9276e93d60b0
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/0300_memory_chart.yaml: 6e008d397d9f364d6cb5fdd5a7974e4d372654a583d3e30d8bb8796f97b9b211
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/0300_memory_chart.yaml: c2ce64cc7406df29b444ea8e1d494b19dbbd15ac6d17a9f5452dada215fb5671
|
||||
src/rocprof_compute_soc/analysis_configs/gfx90a/0300_memory_chart.yaml: cbb3c841b1ad8cbb23a071fcc145dedabb5341d36054c188c9f61878632fd664
|
||||
src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml: f3c235b5c9ef06c837c04689fc1f413d1137360795ffccfc0256b40769c926c6
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml: f3c235b5c9ef06c837c04689fc1f413d1137360795ffccfc0256b40769c926c6
|
||||
@@ -89,13 +89,13 @@ src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml:
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230
|
||||
src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230
|
||||
src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml: 67054ec0a4c6ca147a5dd40cc91f0e8e81378e1affe7d479274747579ecc524a
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/1700_l2_cache.yaml: b1baa76f9dbfcc52d5e12cc1834102a0011ddf8bdece5be5fabc2945ab8971f4
|
||||
src/rocprof_compute_soc/analysis_configs/gfx90a/1700_l2_cache.yaml: 4d834a2066d7f2cb655a8e41fc17531282150b6fe64bbc9c5ff3a10acddee5af
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/1700_l2_cache.yaml: 54ff1df4ee08206d0aa4ff9cd9f0b20cbaa3866aecb9b40a0ac5969e9e25ed20
|
||||
src/rocprof_compute_soc/analysis_configs/gfx90a/1700_l2_cache.yaml: ee87b5b6cdaca98de6e5cb0d06e2e092470e0e25aac1498f8abcfc8421932ae6
|
||||
src/rocprof_compute_soc/analysis_configs/gfx940/1700_l2_cache.yaml: 78f9fee5dafc83d311da1c801200c1820e16a0678dd0548fafa8a966ec6a94d5
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/1700_l2_cache.yaml: 51fe6e3888975b805594c2ab2b3147e717ae5e015468ee592cbcddc389c689bc
|
||||
src/rocprof_compute_soc/analysis_configs/gfx942/1700_l2_cache.yaml: dc2dc9ff61b1747e492c28ef5ac76764fd75c18fd0827834130bc583f2afc619
|
||||
src/rocprof_compute_soc/analysis_configs/gfx950/1700_l2_cache.yaml: d181f753c3fff608c72b8015d1af30bfd8cf8cdfbc0a17c505f717ddaa3b1efc
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/1800_l2_cache_per_channel.yaml: a0c53202fe9f68d5e1fa689ce0643c471ced7d47e007d8ccc68fba294f7f6a05
|
||||
src/rocprof_compute_soc/analysis_configs/gfx908/1800_l2_cache_per_channel.yaml: f5db15673a4be8b92f05a380738c5a10f68ca78ca2b1a9c31c19acae13d17f7b
|
||||
src/rocprof_compute_soc/analysis_configs/gfx90a/1800_l2_cache_per_channel.yaml: a0c53202fe9f68d5e1fa689ce0643c471ced7d47e007d8ccc68fba294f7f6a05
|
||||
src/rocprof_compute_soc/analysis_configs/gfx940/1800_l2_cache_per_channel.yaml: e184e3692eb0d641fb2e37fada0e58a6c4958553931d7c038b884e1e6986093f
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/1800_l2_cache_per_channel.yaml: e184e3692eb0d641fb2e37fada0e58a6c4958553931d7c038b884e1e6986093f
|
||||
|
||||
@@ -1258,29 +1258,29 @@ panels:
|
||||
pop: ((100 * AVG(((TCC_REQ_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk / 1000) * 64) * TO_INT($total_l2_chan)))
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBandwidth
|
||||
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum -
|
||||
TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))))
|
||||
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum -
|
||||
TCC_EA0_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))))
|
||||
/ $hbmBandwidth)
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBandwidth
|
||||
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum -
|
||||
TCC_EA_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))))
|
||||
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum -
|
||||
TCC_EA0_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))))
|
||||
/ $hbmBandwidth)
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
peak: None
|
||||
pop: None
|
||||
L2-Fabric Write Latency:
|
||||
value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
value: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
peak: None
|
||||
@@ -2423,24 +2423,24 @@ panels:
|
||||
+ TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum +
|
||||
TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) != 0) else None)), 0)
|
||||
Fabric_L2 Rd:
|
||||
value: ROUND(AVG((TCC_EA_RDREQ_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0)
|
||||
Fabric_L2 Wr:
|
||||
value: ROUND(AVG((TCC_EA_WRREQ_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_WRREQ_sum / $denom)), 0)
|
||||
Fabric_L2 Atomic:
|
||||
value: ROUND(AVG((TCC_EA_ATOMIC_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_ATOMIC_sum / $denom)), 0)
|
||||
Fabric Rd Lat:
|
||||
value: ROUND(AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
value: ROUND(AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else 0)), 0)
|
||||
Fabric Wr Lat:
|
||||
value: ROUND(AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
value: ROUND(AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else 0)), 0)
|
||||
Fabric Atomic Lat:
|
||||
value: ROUND(AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
value: ROUND(AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else 0)), 0)
|
||||
HBM Rd:
|
||||
value: ROUND(AVG((TCC_EA_RDREQ_DRAM_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_RDREQ_DRAM_sum / $denom)), 0)
|
||||
HBM Wr:
|
||||
value: ROUND(AVG((TCC_EA_WRREQ_DRAM_sum / $denom)), 0)
|
||||
value: ROUND(AVG((TCC_EA0_WRREQ_DRAM_sum / $denom)), 0)
|
||||
comparable: false
|
||||
cli_style: mem_chart
|
||||
tui_style: mem_chart
|
||||
@@ -13064,11 +13064,11 @@ panels:
|
||||
+ TCC_MISS_sum) != 0) else 0))
|
||||
unit: pct
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
L2-Fabric Write and Atomic BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
HBM Bandwidth:
|
||||
@@ -13118,13 +13118,13 @@ panels:
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Write and Atomic BW:
|
||||
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: Gbps
|
||||
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
HBM Write and Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
!= 0) else None))
|
||||
@@ -13590,99 +13590,99 @@ panels:
|
||||
unit: pct
|
||||
gfx908:
|
||||
Read BW:
|
||||
avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: Gbps
|
||||
avg: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
min: MIN((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
max: MAX((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
HBM Read Traffic:
|
||||
avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_RDREQ_DRAM_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Remote Read Traffic:
|
||||
avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
|
||||
if (TCC_EA_RDREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
|
||||
if (TCC_EA_RDREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum)
|
||||
if (TCC_EA_RDREQ_sum != 0) else None))
|
||||
avg: AVG((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
|
||||
if (TCC_EA0_RDREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
|
||||
if (TCC_EA0_RDREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum) / TCC_EA0_RDREQ_sum)
|
||||
if (TCC_EA0_RDREQ_sum != 0) else None))
|
||||
unit: pct
|
||||
Uncached Read Traffic:
|
||||
avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_RD_UNCACHED_32B_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Write and Atomic BW:
|
||||
avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: Gbps
|
||||
avg: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
min: MIN((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
max: MAX((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / $denom))
|
||||
unit: (Bytes + $normUnit)
|
||||
HBM Write and Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_WRREQ_DRAM_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Remote Write and Atomic Traffic:
|
||||
avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
|
||||
if (TCC_EA_WRREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
|
||||
if (TCC_EA_WRREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum)
|
||||
if (TCC_EA_WRREQ_sum != 0) else None))
|
||||
avg: AVG((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
|
||||
if (TCC_EA0_WRREQ_sum != 0) else None))
|
||||
min: MIN((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
|
||||
if (TCC_EA0_WRREQ_sum != 0) else None))
|
||||
max: MAX((100 * ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum) / TCC_EA0_WRREQ_sum)
|
||||
if (TCC_EA0_WRREQ_sum != 0) else None))
|
||||
unit: pct
|
||||
Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_ATOMIC_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Uncached Write and Atomic Traffic:
|
||||
avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX((100 * (TCC_EA0_WR_UNCACHED_32B_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: pct
|
||||
Read Latency:
|
||||
avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
avg: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
min: MIN(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
max: MAX(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
Write and Atomic Latency:
|
||||
avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
avg: AVG(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
min: MIN(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
min: MIN(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum
|
||||
max: MAX(((TCC_EA0_WRREQ_LEVEL_sum / TCC_EA0_WRREQ_sum) if (TCC_EA0_WRREQ_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
Atomic Latency:
|
||||
avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
avg: AVG(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
min: MIN(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
min: MIN(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum
|
||||
max: MAX(((TCC_EA0_ATOMIC_LEVEL_sum / TCC_EA0_ATOMIC_sum) if (TCC_EA0_ATOMIC_sum
|
||||
!= 0) else None))
|
||||
unit: Cycles
|
||||
- metric_table:
|
||||
@@ -14840,59 +14840,59 @@ panels:
|
||||
unit: Gbps
|
||||
gfx908:
|
||||
Read (32B):
|
||||
avg: AVG((TCC_EA_RDREQ_32B_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_32B_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_32B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_RDREQ_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Read (64B):
|
||||
avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
|
||||
min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
|
||||
max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom))
|
||||
avg: AVG(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
max: MAX(((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Read (Uncached):
|
||||
avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_RD_UNCACHED_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
HBM Read:
|
||||
avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom))
|
||||
avg: AVG((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA0_RDREQ_DRAM_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Remote Read:
|
||||
avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom))
|
||||
avg: AVG((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Write and Atomic (32B):
|
||||
avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom))
|
||||
avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
max: MAX(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Write and Atomic (Uncached):
|
||||
avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
min: MIN((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
max: MAX((TCC_EA0_WR_UNCACHED_32B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Write and Atomic (64B):
|
||||
avg: AVG((TCC_EA_WRREQ_64B_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_64B_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_64B_sum / $denom))
|
||||
avg: AVG((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
min: MIN((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
max: MAX((TCC_EA0_WRREQ_64B_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
HBM Write and Atomic:
|
||||
avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom))
|
||||
avg: AVG((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
min: MIN((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
max: MAX((TCC_EA0_WRREQ_DRAM_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Remote Write and Atomic:
|
||||
avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom))
|
||||
avg: AVG((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
min: MIN((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
Atomic:
|
||||
avg: AVG((TCC_EA_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA_ATOMIC_sum / $denom))
|
||||
avg: AVG((TCC_EA0_ATOMIC_sum / $denom))
|
||||
min: MIN((TCC_EA0_ATOMIC_sum / $denom))
|
||||
max: MAX((TCC_EA0_ATOMIC_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
metrics_description:
|
||||
Utilization:
|
||||
@@ -16268,9 +16268,9 @@ panels:
|
||||
::_1: $total_l2_chan
|
||||
gfx908:
|
||||
::_1:
|
||||
read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_EA_WRREQ[::_1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_EA_ATOMIC[::_1]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_EA0_RDREQ[::_1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_EA0_WRREQ[::_1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_EA0_ATOMIC[::_1]) / $denom))
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
cli_style: simple_multiple_bar
|
||||
@@ -16314,7 +16314,7 @@ panels:
|
||||
::_1: $total_l2_chan
|
||||
gfx908:
|
||||
::_1:
|
||||
expr: ((TCC_EA_RDREQ_LEVEL[::_1] / TCC_EA_RDREQ[::_1]) if (TCC_EA_RDREQ[::_1]
|
||||
expr: ((TCC_EA0_RDREQ_LEVEL[::_1] / TCC_EA0_RDREQ[::_1]) if (TCC_EA0_RDREQ[::_1]
|
||||
!= 0) else None)
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
@@ -16359,7 +16359,7 @@ panels:
|
||||
::_1: $total_l2_chan
|
||||
gfx908:
|
||||
::_1:
|
||||
expr: ((TCC_EA_WRREQ_LEVEL[::_1] / TCC_EA_WRREQ[::_1]) if (TCC_EA_WRREQ[::_1]
|
||||
expr: ((TCC_EA0_WRREQ_LEVEL[::_1] / TCC_EA0_WRREQ[::_1]) if (TCC_EA0_WRREQ[::_1]
|
||||
!= 0) else None)
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
@@ -16404,7 +16404,7 @@ panels:
|
||||
::_1: $total_l2_chan
|
||||
gfx908:
|
||||
::_1:
|
||||
expr: ((TCC_EA_ATOMIC_LEVEL[::_1] / TCC_EA_ATOMIC[::_1]) if (TCC_EA_ATOMIC[::_1]
|
||||
expr: ((TCC_EA0_ATOMIC_LEVEL[::_1] / TCC_EA0_ATOMIC[::_1]) if (TCC_EA0_ATOMIC[::_1]
|
||||
!= 0) else 0)
|
||||
placeholder_range:
|
||||
::_1: $total_l2_chan
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur