diff --git a/CHANGELOG.md b/CHANGELOG.md index e4bea1e960..e96c1d3144 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,6 +83,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * OS distribution support minimum for roofline feature is now Ubuntu22.04, RHEL9, and SLES15SP6 * Improve analysis block based filtering to accept metric id level filtering * This can be used to collect individual metrics from various sections of analysis config +* CLI analysis mode baseline comparison will now only compare common metrics across workloads and will not show Metric ID + * Remove metrics from analysis configuration files which are explicitly marked as empty or None ### Optimized diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml index 3fb76df8c3..2586d5bab1 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml @@ -32,12 +32,6 @@ Panel Config: peak: (((($max_sclk * $cu_per_gpu) * 64) * 2) / 1000) pop: None # No perf counter tips: - MFMA FLOPs (F8): - value: None # No HW module - unit: GFLOP/s - peak: None # No HW module - pop: None # No HW module - tips: MFMA FLOPs (BF16): value: None # No perf counter unit: GFLOP/s @@ -62,13 +56,6 @@ Panel Config: peak: ((($max_sclk * $cu_per_gpu) * 256) / 1000) pop: None # No perf counter tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - MFMA FLOPs (F6F4): - value: None - unit: GFLOP/s - peak: None - pop: None - tips: MFMA IOPs (Int8): value: None # No perf counter unit: GIOP/s diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/0300_mem_chart.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/0300_mem_chart.yaml index c1b3f01695..8ff885f13f 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/0300_mem_chart.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/0300_mem_chart.yaml @@ -45,10 +45,6 @@ Panel Config: #alias: valu_ value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0) tips: - MFMA: - #alias: mfma_ - value: None # No perf counter - tips: VMEM: #alias: vmem_ value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/0500_command-processor.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/0500_command-processor.yaml index 22213e355e..164b3552bf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/0500_command-processor.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/0500_command-processor.yaml @@ -19,27 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC SYNC FIFO Full Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC CANE Stall Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC ADC Utilization: - avg: None - min: None - max: None - unit: pct - tips: CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml index 2459e41db9..c78c3645a0 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml @@ -19,13 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Schedule-Pipe Wave Occupancy: - avg: None - min: None - max: None - unit: Wave - tips: Accelerator Utilization: avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) @@ -38,13 +31,6 @@ Panel Config: max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu)) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe Wave Utilization: - avg: None - min: None - max: None - unit: Pct - tips: Workgroup Manager Utilization: avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) @@ -122,13 +108,6 @@ Panel Config: 0) else None) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe FIFO Full Rate: - avg: None - min: None - max: None - unit: Pct - tips: Scheduler-Pipe Stall Rate: avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml index c4e282ac5c..d980e784a4 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml @@ -19,30 +19,12 @@ Panel Config: unit: Unit tips: Tips metric: - VALU: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: - VMEM: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: LDS: avg: AVG((SQ_INSTS_LDS / $denom)) min: MIN((SQ_INSTS_LDS / $denom)) max: MAX((SQ_INSTS_LDS / $denom)) unit: (instr + $normUnit) tips: - MFMA: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: SALU: avg: AVG((SQ_INSTS_SALU / $denom)) min: MIN((SQ_INSTS_SALU / $denom)) @@ -73,96 +55,6 @@ Panel Config: unit: Unit tips: Tips metric: - INT32: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - INT64: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F16-ADD: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F16-MUL: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F16-FMA: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F16-Trans: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F32-ADD: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F32-MUL: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F32-FMA: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F32-Trans: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F64-ADD: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F64-MUL: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F64-FMA: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - F64-Trans: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - Conversion: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (instr + $normUnit) - tips: - metric_table: id: 1003 @@ -181,13 +73,6 @@ Panel Config: max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - Spill/Stack Coalesceable Instr: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: Global/Generic Read: avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) @@ -242,46 +127,3 @@ Panel Config: unit: Unit tips: Tips metric: - MFMA-I8: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: - MFMA-F8: - avg: None # No HW module - min: None # No HW module - max: None # No HW module None # No HW module - unit: (instr + $normUnit) - tips: - MFMA-F16: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: - MFMA-BF16: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: - MFMA-F32: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: - MFMA-F64: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA-F6F4: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml index 41bb9563f5..2021ff08ea 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml @@ -19,61 +19,6 @@ Panel Config: pop: Pct of Peak tips: Tips metric: - VALU FLOPs: - value: None # No perf counter - unit: None - peak: None - pop: None - tips: - VALU IOPs: - value: None # No perf counter - unit: None - peak: None - pop: None - tips: - MFMA FLOPs (F8): - value: None # No perf counter - unit: GFLOP - peak: None # No perf counter - pop: None # No perf counter - tips: - MFMA FLOPs (BF16): - value: None # No perf counter - Unit: None - peak: None - pop: None - tips: - MFMA FLOPs (F16): - value: None # No perf counter - unit: None - peak: None - pop: None - tips: - MFMA FLOPs (F32): - value: None # No perf counter - unit: None - peak: None - pop: None - tips: - MFMA FLOPs (F64): - value: None # No perf counter - unit: None - peak: None - pop: None - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA FLOPs (F6F4): - value: None - unit: GFLOP - peak: None - pop: None - tips: - MFMA IOPs (INT8): - value: None # No perf counter - unit: None - peak: None - pop: None - tips: - metric_table: id: 1102 @@ -116,25 +61,6 @@ Panel Config: max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - VALU Co-Issue Efficiency: - avg: None - min: None - max: None - unit: pct - tips: - VMEM Utilization: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: pct - tips: - Branch Utilization: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: pct - tips: VALU Active Threads: avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) @@ -144,32 +70,6 @@ Panel Config: != 0) else None)) unit: Threads tips: - MFMA Utilization: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: pct - tips: - MFMA Instr Cycles: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: cycles/instr - tips: - VMEM Latency: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: Cycles - coll_level: SQ_INST_LEVEL_VMEM - tips: - SMEM Latency: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: Cycles - coll_level: SQ_INST_LEVEL_SMEM - tips: - metric_table: id: 1103 @@ -182,58 +82,3 @@ Panel Config: unit: Unit tips: Tips metric: - FLOPs (Total): - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: - IOPs (Total): - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: - F8 OPs: - avg: None # No HW module - min: None # No HW module - max: None # No HW module - unit: (OPs + $normUnit) - tips: - F16 OPs: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: - BF16 OPs: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: - F32 OPs: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: - F64 OPs: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - F6F4 OPs: - avg: None - min: None - max: None - unit: (OPs + $normUnit) - tips: - INT8 OPs: - avg: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (OPs + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1200_lds.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1200_lds.yaml index 9ee74470a8..2c3fc34b2a 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1200_lds.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1200_lds.yaml @@ -55,48 +55,6 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu)) / $denom)) @@ -158,17 +116,3 @@ Panel Config: max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) unit: (Accesses + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Command FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Data FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml index c7d1a59133..a59975bf17 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml @@ -43,27 +43,6 @@ Panel Config: max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Address Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Command Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Data Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: Total Instructions: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) @@ -82,12 +61,6 @@ Panel Config: max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Global/Generic Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) @@ -112,12 +85,6 @@ Panel Config: max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Spill/Stack Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) @@ -172,12 +139,6 @@ Panel Config: max: MAX(((100 * TD_TC_STALL_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct tips: - Workgroup manager → Data-Return Stall: - avg: # No perf counter - min: # No perf counter - max: # No perf counter - unit: pct - tips: Coalescable Instructions: avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom)) min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom)) @@ -205,9 +166,3 @@ Panel Config: max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) unit: (Instructions + $normUnit) tips: - Write Ack Instructions: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1600_L1_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1600_L1_cache.yaml index ba19d17f2b..452fa277ab 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1600_L1_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1600_L1_cache.yaml @@ -189,30 +189,6 @@ Panel Config: + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: - Tag RAM 0 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 1 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 2 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 3 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: L1-L2 Read: avg: AVG((TCP_TCC_READ_REQ_sum / $denom)) min: MIN((TCP_TCC_READ_REQ_sum / $denom)) @@ -419,12 +395,6 @@ Panel Config: max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) units: (Req + $normUnit) tips: - Misses under Translation Miss: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Req + $normUnit) - tips: Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) @@ -442,45 +412,3 @@ Panel Config: units: Units tips: Tips metric: - Cache Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Cache Miss Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Serialization Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Thrashing Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Latency FIFO Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Resident Page Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - UTCL2 Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1700_L2_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1700_L2_cache.yaml index 07c39cb7a6..e1c1bffd0f 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1700_L2_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1700_L2_cache.yaml @@ -143,18 +143,6 @@ Panel Config: != 0) else None)) unit: Cycles tips: - Read Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - metric_table: id: 1703 @@ -173,24 +161,6 @@ Panel Config: max: MAX((TCC_REQ_sum * 64) / $denom) unit: (Bytes + $normUnit) tips: - Read Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) @@ -221,24 +191,12 @@ Panel Config: max: MAX((TCC_STREAMING_REQ_sum / $denom)) unit: (Req + $normUnit) tips: - Bypasss Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) tips: - Input Buffer Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -326,24 +284,6 @@ Panel Config: unit: Unit tips: Tips metric: - Stalled on Latency FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Stalled on Write Data FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Input Buffer Stalled on L2: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - metric_table: id: 1705 @@ -360,54 +300,6 @@ Panel Config: style: type: simple_multi_bar metric: - Read - PCIe Stall: - type: PCIe Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - HBM Stall: - type: HBM Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - PCIe Stall: - type: PCIe Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - HBM Stall: - type: HBM Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: Write - Credit Starvation: type: Credit Starvation transaction: Write @@ -458,24 +350,6 @@ Panel Config: max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Read Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Write and Atomic (32B): avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) @@ -506,51 +380,9 @@ Panel Config: max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Write Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Atomic: avg: AVG((TCC_EA_ATOMIC_sum / $denom)) min: MIN((TCC_EA_ATOMIC_sum / $denom)) max: MAX((TCC_EA_ATOMIC_sum / $denom)) unit: (Req + $normUnit) tips: - Atomic - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Atomic Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml b/src/rocprof_compute_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml index cbfe0ea38e..a787f360cf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml @@ -333,3 +333,18 @@ Panel Config: placeholder_range: "::_1": $total_l2_chan cli_style: simple_multiple_bar + + - metric_table: + id: 1812 + title: L2-Fabric (128B read requests per normUnit) + header: + metric: Channel + expr: Expression + metric: + "::_1": + expr: (TO_INT(TCC_BUBBLE[::_1]) / $denom) + placeholder_range: + "::_1": $total_l2_chan + # tips: Number of 128-byte read requests sent to EA + cli_style: simple_box + tui_style: simple_box diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml index b37a329903..8943b5a65e 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml @@ -42,12 +42,6 @@ Panel Config: pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))) / (((($max_sclk * $cu_per_gpu) * 64) * 2) / 1000)) tips: - MFMA FLOPs (F8): - value: None - unit: GFLOP/s - peak: None - pop: None - tips: MFMA FLOPs (BF16): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP/s @@ -76,13 +70,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - MFMA FLOPs (F6F4): - value: None - unit: GFLOP/s - peak: None - pop: None - tips: MFMA IOPs (Int8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP/s diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/0500_command-processor.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/0500_command-processor.yaml index 834ca2de6b..164b3552bf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/0500_command-processor.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/0500_command-processor.yaml @@ -19,24 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - CPC SYNC FIFO Full Rate: - avg: None - min: None - max: None - unit: pct - tips: - CPC CANE Stall Rate: - avg: None - min: None - max: None - unit: pct - tips: - CPC ADC Utilization: - avg: None - min: None - max: None - unit: pct - tips: CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml index 2459e41db9..c78c3645a0 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml @@ -19,13 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Schedule-Pipe Wave Occupancy: - avg: None - min: None - max: None - unit: Wave - tips: Accelerator Utilization: avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) @@ -38,13 +31,6 @@ Panel Config: max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu)) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe Wave Utilization: - avg: None - min: None - max: None - unit: Pct - tips: Workgroup Manager Utilization: avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) @@ -122,13 +108,6 @@ Panel Config: 0) else None) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe FIFO Full Rate: - avg: None - min: None - max: None - unit: Pct - tips: Scheduler-Pipe Stall Rate: avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml index 93f303deb6..045f217ad8 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml @@ -181,13 +181,6 @@ Panel Config: max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - Spill/Stack Coalesceable Instr: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: Global/Generic Read: avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) @@ -248,12 +241,6 @@ Panel Config: max: MAX((SQ_INSTS_VALU_MFMA_I8 / $denom)) unit: (instr + $normUnit) tips: - MFMA-F8: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: MFMA-F16: avg: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom)) min: MIN((SQ_INSTS_VALU_MFMA_F16 / $denom)) @@ -278,10 +265,3 @@ Panel Config: max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA-F6F4: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml index 1ec04d7a3e..c54a6703e8 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml @@ -41,12 +41,6 @@ Panel Config: pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))) / (((($max_sclk * $cu_per_gpu) * 64) * 2) / 1000)) tips: - MFMA FLOPs (F8): - value: None - unit: GFLOP - peak: None - pop: None - tips: MFMA FLOPs (BF16): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP @@ -75,13 +69,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA FLOPs (F6F4): - value: None - unit: GFLOP - peak: None - pop: None - tips: MFMA IOPs (INT8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP @@ -131,13 +118,6 @@ Panel Config: max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - VALU Co-Issue Efficiency: - avg: None - min: None - max: None - unit: pct - tips: VMEM Utilization: avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) @@ -236,12 +216,6 @@ Panel Config: max: MAX(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) + (SQ_INSTS_VALU_MFMA_MOPS_I8 * 512)) / $denom) unit: (OPs + $normUnit) tips: - F8 OPs: - avg: None - min: None - max: None - unit: (OPs + $normUnit) - tips: F16 OPs: avg: AVG(((((((64 * SQ_INSTS_VALU_ADD_F16) + (64 * SQ_INSTS_VALU_MUL_F16)) + (64 * SQ_INSTS_VALU_TRANS_F16)) + (128 * SQ_INSTS_VALU_FMA_F16)) + (512 * @@ -278,13 +252,6 @@ Panel Config: + (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom)) unit: (OPs + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - F6F4 OPs: - avg: None - min: None - max: None - unit: (OPs + $normUnit) - tips: INT8 OPs: avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1200_lds.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1200_lds.yaml index 9ee74470a8..2c3fc34b2a 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1200_lds.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1200_lds.yaml @@ -55,48 +55,6 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu)) / $denom)) @@ -158,17 +116,3 @@ Panel Config: max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) unit: (Accesses + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Command FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Data FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml index 855c071506..8994d0b17d 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml @@ -43,27 +43,6 @@ Panel Config: max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Address Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Command Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Data Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: Total Instructions: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) @@ -82,12 +61,6 @@ Panel Config: max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Global/Generic Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) @@ -112,12 +85,6 @@ Panel Config: max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Spill/Stack Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) @@ -205,9 +172,3 @@ Panel Config: max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) unit: (Instructions + $normUnit) tips: - Write Ack Instructions: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_L1_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_L1_cache.yaml index e9b787a737..5c14bae452 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_L1_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_L1_cache.yaml @@ -189,30 +189,6 @@ Panel Config: + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: - Tag RAM 0 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 1 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 2 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 3 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: L1-L2 Read: avg: AVG((TCP_TCC_READ_REQ_sum / $denom)) min: MIN((TCP_TCC_READ_REQ_sum / $denom)) @@ -419,12 +395,6 @@ Panel Config: max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) units: (Req + $normUnit) tips: - Misses under Translation Miss: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Req + $normUnit) - tips: Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) @@ -442,45 +412,3 @@ Panel Config: units: Units tips: Tips metric: - Cache Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Cache Miss Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Serialization Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Thrashing Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Latency FIFO Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Resident Page Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - UTCL2 Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1700_L2_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1700_L2_cache.yaml index c55cbfc787..08a8a5d724 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1700_L2_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1700_L2_cache.yaml @@ -143,18 +143,6 @@ Panel Config: != 0) else None)) unit: Cycles tips: - Read Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - metric_table: id: 1703 @@ -173,24 +161,6 @@ Panel Config: max: MAX((TCC_REQ_sum * 128) / $denom) unit: (Bytes + $normUnit) tips: - Read Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) @@ -221,24 +191,12 @@ Panel Config: max: MAX((TCC_STREAMING_REQ_sum / $denom)) unit: (Req + $normUnit) tips: - Bypasss Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) tips: - Input Buffer Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -326,24 +284,6 @@ Panel Config: unit: Unit tips: Tips metric: - Stalled on Latency FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Stalled on Write Data FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Input Buffer Stalled on L2: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - metric_table: id: 1705 @@ -360,54 +300,6 @@ Panel Config: style: type: simple_multi_bar metric: - Read - PCIe Stall: - type: PCIe Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - HBM Stall: - type: HBM Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - PCIe Stall: - type: PCIe Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - HBM Stall: - type: HBM Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: Write - Credit Starvation: type: Credit Starvation transaction: Write @@ -458,24 +350,6 @@ Panel Config: max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Read Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Write and Atomic (32B): avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) @@ -506,51 +380,9 @@ Panel Config: max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Write Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Atomic: avg: AVG((TCC_EA_ATOMIC_sum / $denom)) min: MIN((TCC_EA_ATOMIC_sum / $denom)) max: MAX((TCC_EA_ATOMIC_sum / $denom)) unit: (Req + $normUnit) tips: - Atomic - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Atomic Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml index cbfe0ea38e..a787f360cf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -333,3 +333,18 @@ Panel Config: placeholder_range: "::_1": $total_l2_chan cli_style: simple_multiple_bar + + - metric_table: + id: 1812 + title: L2-Fabric (128B read requests per normUnit) + header: + metric: Channel + expr: Expression + metric: + "::_1": + expr: (TO_INT(TCC_BUBBLE[::_1]) / $denom) + placeholder_range: + "::_1": $total_l2_chan + # tips: Number of 128-byte read requests sent to EA + cli_style: simple_box + tui_style: simple_box diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml index 75d3096cbf..68687f1c28 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml @@ -77,13 +77,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - MFMA FLOPs (F6F4): - value: None - unit: GFLOP/s - peak: None - pop: None - tips: MFMA IOPs (Int8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP/s diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/0500_command-processor.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/0500_command-processor.yaml index 22213e355e..164b3552bf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/0500_command-processor.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/0500_command-processor.yaml @@ -19,27 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC SYNC FIFO Full Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC CANE Stall Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC ADC Utilization: - avg: None - min: None - max: None - unit: pct - tips: CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/0600_shader-processor-input.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/0600_shader-processor-input.yaml index 2459e41db9..c78c3645a0 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/0600_shader-processor-input.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/0600_shader-processor-input.yaml @@ -19,13 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Schedule-Pipe Wave Occupancy: - avg: None - min: None - max: None - unit: Wave - tips: Accelerator Utilization: avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) @@ -38,13 +31,6 @@ Panel Config: max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu)) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe Wave Utilization: - avg: None - min: None - max: None - unit: Pct - tips: Workgroup Manager Utilization: avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) @@ -122,13 +108,6 @@ Panel Config: 0) else None) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe FIFO Full Rate: - avg: None - min: None - max: None - unit: Pct - tips: Scheduler-Pipe Stall Rate: avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/1000_compute-unit-instruction-mix.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/1000_compute-unit-instruction-mix.yaml index 12bbd3fc5e..83ba5367a7 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/1000_compute-unit-instruction-mix.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/1000_compute-unit-instruction-mix.yaml @@ -209,13 +209,6 @@ Panel Config: max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - Spill/Stack Coalesceable Instr: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: Spill/Stack Read: avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) @@ -281,9 +274,4 @@ Panel Config: min: MIN((SQ_INSTS_VALU_MFMA_F64 / $denom)) max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom)) unit: (instr + $normUnit) - MFMA-F6F4: - avg: None - min: None - max: None - unit: (instr + $normUnit) tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml index 85bc40baf2..3821a9d879 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml @@ -76,13 +76,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA FLOPs (F6F4): - value: None - unit: GFLOP - peak: None - pop: None - tips: MFMA IOPs (INT8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP @@ -132,13 +125,6 @@ Panel Config: max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - VALU Co-Issue Efficiency: - avg: None - min: None - max: None - unit: pct - tips: VMEM Utilization: avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) @@ -279,13 +265,6 @@ Panel Config: + (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom)) unit: (OPs + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - F6F4 OPs: - avg: None - min: None - max: None - unit: (OPs + $normUnit) - tips: INT8 OPs: avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/1200_lds.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/1200_lds.yaml index 7f054c2af6..c687e7c471 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/1200_lds.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/1200_lds.yaml @@ -55,48 +55,6 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu)) / $denom)) @@ -158,17 +116,3 @@ Panel Config: max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) unit: (Accesses + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Command FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Data FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/1500_TA_and_TD.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/1500_TA_and_TD.yaml index 855c071506..8994d0b17d 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/1500_TA_and_TD.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/1500_TA_and_TD.yaml @@ -43,27 +43,6 @@ Panel Config: max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Address Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Command Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Data Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: Total Instructions: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) @@ -82,12 +61,6 @@ Panel Config: max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Global/Generic Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) @@ -112,12 +85,6 @@ Panel Config: max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Spill/Stack Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) @@ -205,9 +172,3 @@ Panel Config: max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) unit: (Instructions + $normUnit) tips: - Write Ack Instructions: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/1600_L1_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/1600_L1_cache.yaml index 13e508450c..7fabcfdb47 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/1600_L1_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/1600_L1_cache.yaml @@ -192,30 +192,6 @@ Panel Config: TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: - Tag RAM 0 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 1 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 2 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 3 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: L1-L2 Read: avg: AVG((TCP_TCC_READ_REQ_sum / $denom)) min: MIN((TCP_TCC_READ_REQ_sum / $denom)) @@ -237,24 +213,6 @@ Panel Config: / $denom)) unit: (Req + $normUnit) tips: - L1 Access Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - L1-L2 Read Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - L1-L2 Write Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - metric_table: id: 1604 @@ -410,12 +368,6 @@ Panel Config: max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) units: (Req + $normUnit) tips: - Misses under Translation Miss: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Req + $normUnit) - tips: Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) @@ -433,45 +385,3 @@ Panel Config: units: Units tips: Tips metric: - Cache Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Cache Miss Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Serialization Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Thrashing Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Latency FIFO Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Resident Page Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - UTCL2 Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx940/1700_L2_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx940/1700_L2_cache.yaml index e7045e29df..4476ce7b15 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx940/1700_L2_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx940/1700_L2_cache.yaml @@ -146,18 +146,6 @@ Panel Config: != 0) else None)) unit: Cycles tips: - Read Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - metric_table: id: 1703 @@ -176,24 +164,6 @@ Panel Config: max: MAX((TCC_REQ_sum * 128) / $denom) unit: (Bytes + $normUnit) tips: - Read Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) @@ -224,24 +194,12 @@ Panel Config: max: MAX((TCC_STREAMING_REQ_sum / $denom)) unit: (Req + $normUnit) tips: - Bypasss Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) tips: - Input Buffer Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -329,24 +287,6 @@ Panel Config: unit: Unit tips: Tips metric: - Stalled on Latency FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Stalled on Write Data FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Input Buffer Stalled on L2: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - metric_table: id: 1705 @@ -363,54 +303,6 @@ Panel Config: style: type: simple_multi_bar metric: - Read - PCIe Stall: - type: PCIe Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - HBM Stall: - type: HBM Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - PCIe Stall: - type: PCIe Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - HBM Stall: - type: HBM Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: Write - Credit Starvation: type: Credit Starvation transaction: Write @@ -461,24 +353,6 @@ Panel Config: max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Read Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Write and Atomic (32B): avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) @@ -509,51 +383,9 @@ Panel Config: max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Write Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Atomic: avg: AVG((TCC_EA0_ATOMIC_sum / $denom)) min: MIN((TCC_EA0_ATOMIC_sum / $denom)) max: MAX((TCC_EA0_ATOMIC_sum / $denom)) unit: (Req + $normUnit) tips: - Atomic - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Atomic Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml index 75d3096cbf..68687f1c28 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml @@ -77,13 +77,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - MFMA FLOPs (F6F4): - value: None - unit: GFLOP/s - peak: None - pop: None - tips: MFMA IOPs (Int8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP/s diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/0500_command-processor.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/0500_command-processor.yaml index e9d9927466..164b3552bf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/0500_command-processor.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/0500_command-processor.yaml @@ -19,27 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - CPC SYNC FIFO Full Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - CPC CANE Stall Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - CPC ADC Utilization: - avg: None - min: None - max: None - unit: pct - tips: CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/0600_shader-processor-input.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/0600_shader-processor-input.yaml index 2459e41db9..c78c3645a0 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/0600_shader-processor-input.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/0600_shader-processor-input.yaml @@ -19,13 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Schedule-Pipe Wave Occupancy: - avg: None - min: None - max: None - unit: Wave - tips: Accelerator Utilization: avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) @@ -38,13 +31,6 @@ Panel Config: max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu)) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe Wave Utilization: - avg: None - min: None - max: None - unit: Pct - tips: Workgroup Manager Utilization: avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) @@ -122,13 +108,6 @@ Panel Config: 0) else None) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe FIFO Full Rate: - avg: None - min: None - max: None - unit: Pct - tips: Scheduler-Pipe Stall Rate: avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/1000_compute-unit-instruction-mix.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/1000_compute-unit-instruction-mix.yaml index 56262065d0..83ba5367a7 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/1000_compute-unit-instruction-mix.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/1000_compute-unit-instruction-mix.yaml @@ -209,13 +209,6 @@ Panel Config: max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - Spill/Stack Coalesceable Instr: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: Spill/Stack Read: avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) @@ -282,10 +275,3 @@ Panel Config: max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA-F6F4: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml index 85bc40baf2..3821a9d879 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml @@ -76,13 +76,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA FLOPs (F6F4): - value: None - unit: GFLOP - peak: None - pop: None - tips: MFMA IOPs (INT8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP @@ -132,13 +125,6 @@ Panel Config: max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - VALU Co-Issue Efficiency: - avg: None - min: None - max: None - unit: pct - tips: VMEM Utilization: avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) @@ -279,13 +265,6 @@ Panel Config: + (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom)) unit: (OPs + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - F6F4 OPs: - avg: None - min: None - max: None - unit: (OPs + $normUnit) - tips: INT8 OPs: avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/1200_lds.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/1200_lds.yaml index 7f054c2af6..c687e7c471 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/1200_lds.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/1200_lds.yaml @@ -55,48 +55,6 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu)) / $denom)) @@ -158,17 +116,3 @@ Panel Config: max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) unit: (Accesses + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Command FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Data FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/1500_TA_and_TD.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/1500_TA_and_TD.yaml index 855c071506..8994d0b17d 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/1500_TA_and_TD.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/1500_TA_and_TD.yaml @@ -43,27 +43,6 @@ Panel Config: max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Address Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Command Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Data Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: Total Instructions: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) @@ -82,12 +61,6 @@ Panel Config: max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Global/Generic Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) @@ -112,12 +85,6 @@ Panel Config: max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Spill/Stack Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) @@ -205,9 +172,3 @@ Panel Config: max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) unit: (Instructions + $normUnit) tips: - Write Ack Instructions: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/1600_L1_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/1600_L1_cache.yaml index 13e508450c..7fabcfdb47 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/1600_L1_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/1600_L1_cache.yaml @@ -192,30 +192,6 @@ Panel Config: TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: - Tag RAM 0 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 1 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 2 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 3 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: L1-L2 Read: avg: AVG((TCP_TCC_READ_REQ_sum / $denom)) min: MIN((TCP_TCC_READ_REQ_sum / $denom)) @@ -237,24 +213,6 @@ Panel Config: / $denom)) unit: (Req + $normUnit) tips: - L1 Access Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - L1-L2 Read Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - L1-L2 Write Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - metric_table: id: 1604 @@ -410,12 +368,6 @@ Panel Config: max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) units: (Req + $normUnit) tips: - Misses under Translation Miss: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Req + $normUnit) - tips: Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) @@ -433,45 +385,3 @@ Panel Config: units: Units tips: Tips metric: - Cache Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Cache Miss Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Serialization Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Thrashing Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Latency FIFO Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Resident Page Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - UTCL2 Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx941/1700_L2_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx941/1700_L2_cache.yaml index 3d34614ab8..5ac54a12a0 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx941/1700_L2_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx941/1700_L2_cache.yaml @@ -146,18 +146,6 @@ Panel Config: != 0) else None)) unit: Cycles tips: - Read Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - metric_table: id: 1703 @@ -176,24 +164,6 @@ Panel Config: max: MAX((TCC_REQ_sum * 128) / $denom) unit: (Bytes + $normUnit) tips: - Read Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) @@ -224,24 +194,12 @@ Panel Config: max: MAX((TCC_STREAMING_REQ_sum / $denom)) unit: (Req + $normUnit) tips: - Bypasss Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) tips: - Input Buffer Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -329,24 +287,6 @@ Panel Config: unit: Unit tips: Tips metric: - Stalled on Latency FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Stalled on Write Data FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Input Buffer Stalled on L2: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - metric_table: id: 1705 @@ -363,54 +303,6 @@ Panel Config: style: type: simple_multi_bar metric: - Read - PCIe Stall: - type: PCIe Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - HBM Stall: - type: HBM Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - PCIe Stall: - type: PCIe Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - HBM Stall: - type: HBM Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: Write - Credit Starvation: type: Credit Starvation transaction: Write @@ -461,24 +353,6 @@ Panel Config: max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Read Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Write and Atomic (32B): avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) @@ -509,51 +383,9 @@ Panel Config: max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Write Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Atomic: avg: AVG((TCC_EA0_ATOMIC_sum / $denom)) min: MIN((TCC_EA0_ATOMIC_sum / $denom)) max: MAX((TCC_EA0_ATOMIC_sum / $denom)) unit: (Req + $normUnit) tips: - Atomic - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Atomic Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml index 75d3096cbf..68687f1c28 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml @@ -77,13 +77,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - MFMA FLOPs (F6F4): - value: None - unit: GFLOP/s - peak: None - pop: None - tips: MFMA IOPs (Int8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP/s diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/0500_command-processor.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/0500_command-processor.yaml index fcb59ac0bf..164b3552bf 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/0500_command-processor.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/0500_command-processor.yaml @@ -76,27 +76,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC SYNC FIFO Full Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC CANE Stall Rate: - avg: None - min: None - max: None - unit: pct - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - CPC ADC Utilization: - avg: None - min: None - max: None - unit: pct - tips: CPC Utilization: avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/0600_shader-processor-input.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/0600_shader-processor-input.yaml index 2459e41db9..c78c3645a0 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/0600_shader-processor-input.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/0600_shader-processor-input.yaml @@ -19,13 +19,6 @@ Panel Config: unit: Unit tips: Tips metric: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Schedule-Pipe Wave Occupancy: - avg: None - min: None - max: None - unit: Wave - tips: Accelerator Utilization: avg: AVG(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) min: MIN(100 * $GRBM_GUI_ACTIVE_PER_XCD / $GRBM_COUNT_PER_XCD) @@ -38,13 +31,6 @@ Panel Config: max: MAX(100 * SPI_CSN_BUSY / ($GRBM_GUI_ACTIVE_PER_XCD * $pipes_per_gpu * $se_per_gpu)) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe Wave Utilization: - avg: None - min: None - max: None - unit: Pct - tips: Workgroup Manager Utilization: avg: AVG(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) min: MIN(100 * $GRBM_SPI_BUSY_PER_XCD / $GRBM_GUI_ACTIVE_PER_XCD) @@ -122,13 +108,6 @@ Panel Config: 0) else None) unit: Pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Scheduler-Pipe FIFO Full Rate: - avg: None - min: None - max: None - unit: Pct - tips: Scheduler-Pipe Stall Rate: avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / ($GRBM_SPI_BUSY_PER_XCD * $se_per_gpu)) if ($GRBM_SPI_BUSY_PER_XCD != 0) else None)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/1000_compute-unit-instruction-mix.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/1000_compute-unit-instruction-mix.yaml index 56262065d0..83ba5367a7 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/1000_compute-unit-instruction-mix.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/1000_compute-unit-instruction-mix.yaml @@ -209,13 +209,6 @@ Panel Config: max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then remove this - Spill/Stack Coalesceable Instr: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: Spill/Stack Read: avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) @@ -282,10 +275,3 @@ Panel Config: max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom)) unit: (instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA-F6F4: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml index ef00cfdc81..95fbf70a9c 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml @@ -76,13 +76,6 @@ Panel Config: pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($max_sclk * $cu_per_gpu) * 256) / 1000)) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - MFMA FLOPs (F6F4): - value: None - unit: GFLOP - peak: None - pop: None - tips: MFMA IOPs (INT8): value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP @@ -132,13 +125,6 @@ Panel Config: max: MAX((((100 * SQ_ACTIVE_INST_VALU) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - VALU Co-Issue Efficiency: - avg: None - min: None - max: None - unit: pct - tips: VMEM Utilization: avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / $GRBM_GUI_ACTIVE_PER_XCD) / $cu_per_gpu)) @@ -279,13 +265,6 @@ Panel Config: + (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom)) unit: (OPs + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - F6F4 OPs: - avg: None - min: None - max: None - unit: (OPs + $normUnit) - tips: INT8 OPs: avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/1200_lds.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/1200_lds.yaml index 55d8c61172..797178a0b3 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/1200_lds.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/1200_lds.yaml @@ -56,48 +56,6 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC: - avg: None - min: None - max: None - unit: (instr + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS LOAD Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS STORE Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS ATOMIC Bandwidth: - avg: None - min: None - max: None - units: Gbps - tips: Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu)) / $denom)) @@ -159,17 +117,3 @@ Panel Config: max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) unit: (Accesses + $normUnit) tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Command FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - LDS Data FIFO Full Rate: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/1500_TA_and_TD.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/1500_TA_and_TD.yaml index 855c071506..8994d0b17d 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/1500_TA_and_TD.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/1500_TA_and_TD.yaml @@ -43,27 +43,6 @@ Panel Config: max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Address Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Command Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: - # TODO: Fix baseline comparision logic to handle non existent metrics, then - Sequencer → TA Data Stall: - avg: None - min: None - max: None - unit: (Cycles + $normUnit) - tips: Total Instructions: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) @@ -82,12 +61,6 @@ Panel Config: max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Global/Generic Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) @@ -112,12 +85,6 @@ Panel Config: max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) unit: (Instructions + $normUnit) tips: - Spill/Stack Read Instructions for LDS: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) @@ -205,9 +172,3 @@ Panel Config: max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) unit: (Instructions + $normUnit) tips: - Write Ack Instructions: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Instructions + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/1600_L1_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/1600_L1_cache.yaml index eebf3baac6..ef81f78e5c 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/1600_L1_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/1600_L1_cache.yaml @@ -194,30 +194,6 @@ Panel Config: TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: - Tag RAM 0 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 1 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 2 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Tag RAM 3 Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: L1-L2 Read: avg: AVG((TCP_TCC_READ_REQ_sum / $denom)) min: MIN((TCP_TCC_READ_REQ_sum / $denom)) @@ -239,24 +215,6 @@ Panel Config: / $denom)) unit: (Req + $normUnit) tips: - L1 Access Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - L1-L2 Read Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - L1-L2 Write Latency: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: Cycles - tips: - metric_table: id: 1604 @@ -412,12 +370,6 @@ Panel Config: max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) units: (Req + $normUnit) tips: - Misses under Translation Miss: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Req + $normUnit) - tips: Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) @@ -435,45 +387,3 @@ Panel Config: units: Units tips: Tips metric: - Cache Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Cache Miss Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Serialization Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Thrashing Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Latency FIFO Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - Resident Page Full Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: - UTCL2 Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - units: (Cycles + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx942/1700_L2_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx942/1700_L2_cache.yaml index 656534081b..4ce319bf9e 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx942/1700_L2_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx942/1700_L2_cache.yaml @@ -150,18 +150,6 @@ Panel Config: != 0) else None)) unit: Cycles tips: - Read Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write Stall: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - metric_table: id: 1703 @@ -180,24 +168,6 @@ Panel Config: max: MAX((TCC_REQ_sum * 128) / $denom) unit: (Bytes + $normUnit) tips: - Read Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) @@ -228,24 +198,12 @@ Panel Config: max: MAX((TCC_STREAMING_REQ_sum / $denom)) unit: (Req + $normUnit) tips: - Bypasss Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) tips: - Input Buffer Req: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -333,24 +291,6 @@ Panel Config: unit: Unit tips: Tips metric: - Stalled on Latency FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Stalled on Write Data FIFO: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - Input Buffer Stalled on L2: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Cycles + $normUnit) - tips: - metric_table: id: 1705 @@ -367,54 +307,6 @@ Panel Config: style: type: simple_multi_bar metric: - Read - PCIe Stall: - type: PCIe Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Read - HBM Stall: - type: HBM Stall - transaction: Read - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - PCIe Stall: - type: PCIe Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - Infinity Fabric™ Stall: - type: Infinity Fabric™ Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: - Write - HBM Stall: - type: HBM Stall - transaction: Write - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: pct - tips: Write - Credit Starvation: type: Credit Starvation transaction: Write @@ -471,24 +363,6 @@ Panel Config: max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Read Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Write and Atomic (32B): avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) @@ -519,51 +393,9 @@ Panel Config: max: MAX((MAX((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Write Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Write Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: Atomic: avg: AVG((TCC_EA0_ATOMIC_sum / $denom)) min: MIN((TCC_EA0_ATOMIC_sum / $denom)) max: MAX((TCC_EA0_ATOMIC_sum / $denom)) unit: (Req + $normUnit) tips: - Atomic - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Req + $normUnit) - tips: - Atomic Bandwidth - PCIe: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - Infinity Fabric™: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: - Atomic Bandwidth - HBM: - avg: None # Missing perfmon - min: None # Missing perfmon - max: None # Missing perfmon - unit: (Bytes + $normUnit) - tips: diff --git a/src/rocprof_compute_soc/analysis_configs/gfx950/1700_L2_cache.yaml b/src/rocprof_compute_soc/analysis_configs/gfx950/1700_L2_cache.yaml index b165ff8fc7..faf9664766 100644 --- a/src/rocprof_compute_soc/analysis_configs/gfx950/1700_L2_cache.yaml +++ b/src/rocprof_compute_soc/analysis_configs/gfx950/1700_L2_cache.yaml @@ -473,24 +473,6 @@ Panel Config: max: MAX((MAX((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_DRAM_sum), 0) / $denom)) unit: (Req + $normUnit) tips: - Read Bandwidth - PCIe: - avg: None # AVG(TCC_EA0_RDREQ_IO_32B_sum / $denom) - min: None # MIN(TCC_EA0_RDREQ_IO_32B_sum / $denom) - max: None # MAX(TCC_EA0_RDREQ_IO_32B_sum / $denom) - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - Infinity Fabric™: - avg: None # AVG(TCC_EA0_RDREQ_GMI_32B_sum / $denom) - min: None # MIN(TCC_EA0_RDREQ_GMI_32B_sum / $denom) - max: None # MAX(TCC_EA0_RDREQ_GMI_32B_sum / $denom) - unit: (Bytes + $normUnit) - tips: - Read Bandwidth - HBM: - avg: None # AVG(TCC_EA0_RDREQ_DRAM_32B_sum / $denom) - min: None # MIN(TCC_EA0_RDREQ_DRAM_32B_sum / $denom) - max: None # MAX(TCC_EA0_RDREQ_DRAM_32B_sum / $denom) - unit: (Bytes + $normUnit) - tips: Write and Atomic (32B): avg: AVG(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) min: MIN(((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) / $denom)) diff --git a/src/utils/file_io.py b/src/utils/file_io.py index 264ed08ac9..697ab2ba28 100644 --- a/src/utils/file_io.py +++ b/src/utils/file_io.py @@ -75,6 +75,12 @@ def load_panel_configs(dir): if f.endswith(".yaml"): with open(str(Path(root).joinpath(f))) as file: config = yaml.safe_load(file) + # metric key can be None due to some metric tables not having any metrics + # metric key should be empty dict instead of None + for data_source in config["Panel Config"]["data source"]: + metric_table = data_source.get("metric_table") + if metric_table and metric_table["metric"] is None: + metric_table["metric"] = {} d[config["Panel Config"]["id"]] = config["Panel Config"] # TODO: sort metrics as the header order in case they are not defined in the same order diff --git a/src/utils/tty.py b/src/utils/tty.py index ee73cf79cc..d8fe8b283f 100644 --- a/src/utils/tty.py +++ b/src/utils/tty.py @@ -114,6 +114,34 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None): show_roof_plot(roof_plot) continue + # Metrics baseline comparison mode + # We cannot guarantee that all runs have the same metrics. Only show common metrics. + if ( + type == "metric_table" + and "Metric" in table_config["header"].values() + and len(runs) > 1 + ): + # Common metrics across all runs + common_metrics = set() + for _, data in runs.items(): + if not common_metrics: + common_metrics = set(data.dfs[table_config["id"]]["Metric"]) + else: + common_metrics &= set(data.dfs[table_config["id"]]["Metric"]) + # Apply common metrics across all runs + # Reindex all runs based on first run + initial_index = None + for key in runs.keys(): + runs[key].dfs[table_config["id"]] = ( + runs[key] + .dfs[table_config["id"]] + .loc[lambda d: d["Metric"].isin(common_metrics)] + ) + if initial_index is None: + initial_index= runs[key].dfs[table_config["id"]].index + else: + runs[key].dfs[table_config["id"]].index = initial_index + # take the 1st run as baseline base_run, base_data = next(iter(runs.items())) base_df = base_data.dfs[table_config["id"]]