diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml index 774cb479e1..2b13ef9359 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml @@ -14,10 +14,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit peak: Peak - pop: PoP + pop: Pct of Peak tips: Tips metric: VALU FLOPs: @@ -28,7 +28,7 @@ Panel Config: tips: VALU IOPs: value: None # No perf counter - unit: GOPs + unit: GIOPs peak: (((($sclk * $numCU) * 64) * 2) / 1000) pop: None # No perf counter tips: @@ -68,25 +68,37 @@ Panel Config: peak: $numCU pop: ((100 * $numActiveCUs) / $numCU) tips: - SALU Util: + SALU Utilization: value: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct peak: 100 pop: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU))) tips: - VALU Util: + VALU Utilization: value: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct peak: 100 pop: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU))) tips: - MFMA Util: + MFMA Utilization: value: None # No HW module unit: pct peak: 100 pop: None # No HW module tips: - VALU Active Threads/Wave: + VMEM Utilization: + value: None # No HW module + unit: pct + peak: 100 + pop: None # No HW module + tips: + Branch Utilization: + value: None # No HW module + unit: pct + peak: 100 + pop: None # No HW module + tips: + VALU Active Threads: value: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) unit: Threads @@ -94,25 +106,29 @@ Panel Config: pop: (AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) * 1.5625) tips: - IPC - Issue: - value: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) - / SQ_ACTIVE_INST_ANY)) + IPC: + value: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES)) unit: Instr/cycle peak: 5 - pop: ((100 * AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) - / SQ_ACTIVE_INST_ANY))) / 5) + pop: ((100 * AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES))) / 5) tips: - LDS BW: + Wavefront Occupancy: + value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE)) + unit: Wavefronts + peak: ($maxWavesPerCU * $numCU) + pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU + * $numCU)))) + coll_level: SQ_LEVEL_WAVES + tips: + Theoretical LDS Bandwidth: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp))) - unit: GB/sec + unit: GB/s peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: - LDS Bank Conflict: + LDS Bank Conflicts/Access: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) unit: Conflicts/access @@ -120,35 +136,7 @@ Panel Config: pop: ((100 * AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))) / 32) tips: - Instr Cache Hit Rate: - value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) - unit: pct - peak: 100 - pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) - tips: - Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) - unit: GB/s - peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk - / 1000) * 64) * $numSQC)) - tips: - Scalar L1D Cache Hit Rate: - value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) - if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) - unit: pct - peak: 100 - pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) - if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) - tips: - Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) - unit: GB/s - peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk - / 1000) * 64) * $numSQC)) - tips: - Vector L1D Cache Hit Rate: + vL1D Cache Hit Rate: value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else @@ -160,7 +148,7 @@ Panel Config: TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else None)) tips: - Vector L1D Cache BW: + vL1D Cache BW: value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) @@ -175,6 +163,13 @@ Panel Config: pop: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) tips: + L2 Cache BW: + value: AVG(((TCC_REQ_sum * 64) / (EndNs - BeginNs))) + unit: GB/s + peak: ((($sclk / 1000) * 64) * TO_INT($L2Banks)) + pop: ((100 * AVG(((TCC_REQ_sum * 64) / (EndNs - BeginNs)))) + / ((($sclk / 1000) * 64) * TO_INT($L2Banks))) + tips: L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))) @@ -195,36 +190,48 @@ Panel Config: value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None tips: L2-Fabric Write Latency: value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None tips: - Wave Occupancy: - value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE)) - unit: Wavefronts - peak: ($maxWavesPerCU * $numCU) - pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU - * $numCU)))) - coll_level: SQ_LEVEL_WAVES + sL1D Cache Hit Rate: + value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) + if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) + unit: pct + peak: 100 + pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) + if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: - Instr Fetch BW: - value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) + sL1D Cache BW: + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s - peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC - * (($sclk / 1000) * 32))) - coll_level: SQ_IFETCH_LEVEL + peak: ((($sclk / 1000) * 64) * $numSQC) + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk + / 1000) * 64) * $numSQC)) tips: - Instr Fetch Latency: + L1I Hit Rate: + value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) + unit: pct + peak: 100 + pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) + tips: + L1I BW: + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 32)) + unit: GB/s + peak: ((($sclk / 1000) * 64) * $numSQC) + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 32))) / ((($sclk + / 1000) * 32) * $numSQC)) + tips: + L1I Fetch Latency: value: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0300_mem_chart.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0300_mem_chart.yaml new file mode 100644 index 0000000000..47211fbcab --- /dev/null +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0300_mem_chart.yaml @@ -0,0 +1,314 @@ +--- +# Add description/tips for each metric in this section. +# So it could be shown in hover. +Metric Description: + +# Define the panel properties and properties of each metric in the panel. +Panel Config: + id: 300 + title: Memory Chart + data source: + - metric_table: + id: 301 + title: Memory Chart + header: + metric: Metric + #alias: #alias + value: Value + tips: Tips + metric: + # ---------------------------------------- + # Instr Buff Block + + #TODO: double check wave_occupancy + Wavefront Occupancy: + #alias: wave_occ_ + value: ROUND(AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / $numActiveCUs), 0) + coll_level: SQ_LEVEL_WAVES + tips: + Wave Life: + #alias: wave_life_ + value: ROUND(AVG(((4 * (SQ_WAVE_CYCLES / SQ_WAVES)) if (SQ_WAVES != 0) else 0)), 0) + tips: + + # ---------------------------------------- + # Instr Dispatch Block + SALU: + #alias: salu_ + value: ROUND(AVG((SQ_INSTS_SALU / $denom)), 0) + tips: + SMEM: + #alias: smem_ + value: ROUND(AVG((SQ_INSTS_SMEM / $denom)), 0) + tips: + VALU: + #alias: valu_ + value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0) + tips: + MFMA: + #alias: mfma_ + value: None # No perf counter + tips: + VMEM: + #alias: vmem_ + value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0) + tips: + LDS: + #alias: lds_ + value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0) + tips: + GWS: + #alias: gws_ + value: ROUND(AVG((SQ_INSTS_GDS / $denom)), 0) + tips: + BR: + #alias: br_ + value: ROUND(AVG((SQ_INSTS_BRANCH / $denom)), 0) + tips: + + # ---------------------------------------- + # Exec Block + Active CUs: + #alias: active_cu_ + value: $numActiveCUs + tips: + Num CUs: + #alias: num_cu_ + value: $numCU + tips: + VGPR: + #alias: vgpr_ + value: ROUND(AVG(vgpr), 0) + tips: + SGPR: + #alias: sgpr_ + value: ROUND(AVG(sgpr), 0) + tips: + LDS Allocation: + #alias: lds_alloc_ + value: ROUND(AVG(lds), 0) + tips: + Scratch Allocation: + #alias: scratch_alloc_ + value: ROUND(AVG(scr), 0) + tips: + Wavefronts: + #alias: wavefronts_ + value: ROUND(AVG(SPI_CSN_WAVE), 0) + tips: + Workgroups: + #alias: workgroups_ + value: ROUND(AVG(SPI_CSN_NUM_THREADGROUPS), 0) + tips: + + # ---------------------------------------- + # LDS Block + LDS Req: + #alias: lds_req_ + value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0) + tips: + LDS Util: + #alias: lds_util_ + value: + ROUND(AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU))), + 0) + tips: + LDS Latency: + #alias: lds_lat + value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)),0) + coll_level: SQ_INST_LEVEL_LDS + tips: + + # ---------------------------------------- + # Vector L1 Cache Block + VL1 Rd: + #alias: vl1_rd_ + value: ROUND(AVG((TCP_TOTAL_READ_sum / $denom)), 0) + tips: + VL1 Wr: + #alias: vl1_wr_ + value: ROUND(AVG((TCP_TOTAL_WRITE_sum / $denom)), 0) + tips: + VL1 Atomic: + #alias: vl1_atom_ + value: + ROUND(AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum) + / $denom)), 0) + tips: + + VL1 Hit: + #alias: vl1_hit_ + value: + ROUND(AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) + / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None )), 0) + tips: + VL1 Lat: + #alias: vl1_lat_ + value: + ROUND(AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum + != 0) else None)), 0) + tips: + VL1 Coalesce: + #alias: vl1_coales_ + value: + ROUND(AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum + * 4)) if (TCP_TOTAL_ACCESSES_sum != None) else 0)), 0) + tips: + VL1 Stall: + #alias: vl1_stall_ + value: + ROUND(AVG((((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) + if (TCP_GATE_EN1_sum != 0) else None)), 0) + tips: + + VL1_L2 Rd: + #alias: vl1_l2_rd_ + value: ROUND(AVG((TCP_TCC_READ_REQ_sum / $denom)), 0) + tips: + VL1_L2 Wr: + #alias: vl1_l2_wr_ + value: ROUND(AVG((TCP_TCC_WRITE_REQ_sum / $denom)), 0) + tips: + VL1_L2 Atomic: + #alias: vl1_l2_atom_ + value: + ROUND(AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) + / $denom)), 0) + tips: + + # ---------------------------------------- + # Scalar L1D Cache Block + VL1D Rd: + #alias: sl1_rd_ + value: ROUND(AVG((SQC_DCACHE_REQ / $denom)), 0) + tips: + VL1D Hit: + #alias: sl1_hit_ + value: + ROUND((AVG(((SQC_DCACHE_HITS / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ != + 0) else None)) * 100), 0) + tips: + VL1D Lat: + #alias: sl1_lat_ + value: + ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ != + 0) else None)) * 100), 0) + coll_level: SQC_DCACHE_INFLIGHT_LEVEL + tips: + + VL1D_L2 Rd: + #alias: sl1_l2_rd_ + value: ROUND(AVG((SQC_TC_DATA_READ_REQ / $denom)), 0) + tips: + VL1D_L2 Wr: + #alias: sl1_l2_wr_ + value: ROUND(AVG((SQC_TC_DATA_WRITE_REQ / $denom)), 0) + tips: + VL1D_L2 Atomic: + #alias: sl1_l2_atom_ + value: ROUND(AVG((SQC_TC_DATA_ATOMIC_REQ / $denom)), 0) + tips: + + # ---------------------------------------- + # Instr L1 Cache Block + IL1 Fetch: + #alias: il1_fetch_ + value: ROUND(AVG((SQC_ICACHE_REQ / $denom)), 0) + tips: + IL1 Hit: + #alias: il1_hit_ + value: ROUND((AVG((SQC_ICACHE_HITS / SQC_ICACHE_REQ)) * 100), 0) + tips: + IL1 Lat: + #alias: il1_lat_ + value: + ROUND((AVG(((SQ_ACCUM_PREV_HIRES / SQC_ICACHE_REQ) if (SQC_ICACHE_REQ != + 0) else None)) * 100), 0) + tips: # ??? coll_level: SQ_IFETCH_LEVEL + IL1_L2 Rd: + #alias: il1_l2_req_ + value: ROUND(AVG((SQC_TC_INST_REQ / $denom)), 0) + tips: + + # ---------------------------------------- + # L2 Cache Block(inside) + L2 Rd: + #alias: l2_rd_ + value: ROUND(AVG((TCC_READ_sum / $denom)), 0) + tips: + L2 Wr: + #alias: l2_wr_ + value: ROUND(AVG((TCC_WRITE_sum / $denom)), 0) + tips: + L2 Atomic: + #alias: l2_atom_ + value: ROUND(AVG((TCC_ATOMIC_sum / $denom)), 0) + tips: + L2 Hit: + #alias: l2_hit_ + value: + ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + + TCC_MISS_sum) != 0) else 0)), 0) + tips: + L2 Rd Lat: + #alias: l2_rd_lat_ + value: + ROUND(AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)) + if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None)), + 0) + tips: + L2 Wr Lat: + #alias: l2_wr_lat_ + value: + ROUND(AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) + != 0) else None)), 0) + tips: + + # ---------------------------------------- + # Fabric Block + Fabric_L2 Rd: + #alias: l2_fabric_rd_ + value: ROUND(AVG((TCC_EA_RDREQ_sum / $denom)), 0) + tips: + Fabric_L2 Wr: + #alias: l2_fabric_wr_ + value: ROUND(AVG((TCC_EA_WRREQ_sum / $denom)), 0) + tips: + Fabric_L2 Atomic: + #alias: l2_fabric_atom_ + value: ROUND(AVG((TCC_EA_ATOMIC_sum / $denom)), 0) + tips: + + Fabric Rd Lat: + #alias: fabric_rd_lat_ + value: + ROUND(AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum + != 0) else 0)), 0) + tips: + Fabric Wr Lat: + #alias: fabric_wr_lat_ + value: + ROUND(AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum + != 0) else 0)), 0) + tips: + Fabric Atomic Lat: + #alias: fabric_atom_lat_ + value: + ROUND(AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum + != 0) else 0)), 0) + tips: + + HBM Rd: + #alias: hbm_rd_ + value: ROUND(AVG((TCC_EA_RDREQ_DRAM_sum / $denom)), 0) + tips: + HBM Wr: + #alias: hbm_wr_ + value: ROUND(AVG((TCC_EA_WRREQ_DRAM_sum / $denom)), 0) + tips: + + comparable: false # for now + cli_style: mem_chart \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0500_command-processor.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0500_command-processor.yaml index 5250918799..edd42da6e3 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0500_command-processor.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0500_command-processor.yaml @@ -19,19 +19,7 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy Cycles: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles/Kernel - tips: - CPF Busy: - avg: AVG(CPF_CPF_STAT_BUSY) - min: MIN(CPF_CPF_STAT_BUSY) - max: MAX(CPF_CPF_STAT_BUSY) - unit: Cycles/Kernel - tips: - CPF Util: + CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) min: MIN((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) @@ -47,15 +35,9 @@ Panel Config: != 0) else None)) max: MAX((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY != 0) else None)) - unit: Cycles/Kernel + unit: pct tips: - L2Cache Intf Busy: - avg: AVG(CPF_CPF_TCIU_BUSY) - min: MIN(CPF_CPF_TCIU_BUSY) - max: MAX(CPF_CPF_TCIU_BUSY) - unit: Cycles/Kernel - tips: - L2Cache Intf Util: + CPF-L2 Utilization: avg: AVG((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE)) if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None)) min: MIN((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE)) @@ -64,7 +46,7 @@ Panel Config: if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None)) unit: pct tips: - L2Cache Intf Stall: + CPF-L2 Stall: avg: AVG((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY != 0) else None)) min: MIN((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY @@ -73,11 +55,14 @@ Panel Config: != 0) else None)) unit: pct tips: - UTCL1 Stall: - avg: AVG(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - min: MIN(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - max: MAX(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - unit: Cycles/Kernel + CPF-UTCL1 Stall: + avg: AVG(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + min: MIN(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + max: MAX(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + unit: pct tips: - metric_table: @@ -91,19 +76,7 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy Cycles: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles - tips: - CPC Busy Cycles: - avg: AVG(CPC_CPC_STAT_BUSY) - min: MIN(CPC_CPC_STAT_BUSY) - max: MAX(CPC_CPC_STAT_BUSY) - unit: Cycles - tips: - CPC Util: + CPC Utilization: avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) @@ -112,12 +85,6 @@ Panel Config: if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) unit: pct tips: - CPC Stall Cycles: - avg: AVG(CPC_CPC_STAT_STALL) - min: MIN(CPC_CPC_STAT_STALL) - max: MAX(CPC_CPC_STAT_STALL) - unit: Cycles - tips: CPC Stall Rate: avg: AVG((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY != 0) else None)) @@ -127,28 +94,19 @@ Panel Config: != 0) else None)) unit: pct tips: - CPC Packet Decoding: - avg: AVG(CPC_ME1_BUSY_FOR_PACKET_DECODE) - min: MIN(CPC_ME1_BUSY_FOR_PACKET_DECODE) - max: MAX(CPC_ME1_BUSY_FOR_PACKET_DECODE) - unit: Cycles - tips: - SPI Intf Busy Cycles: - avg: AVG(CPC_ME1_DC0_SPI_BUSY) - min: MIN(CPC_ME1_DC0_SPI_BUSY) - max: MAX(CPC_ME1_DC0_SPI_BUSY) - unit: Cycles - tips: - SPI Intf Util: - avg: AVG((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) - min: MIN((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) - max: MAX((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) + CPC Packet Decoding Utilization: + avg: AVG((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + min: MIN((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + max: MAX((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) unit: pct tips: - L2Cache Intf Util: + CPC-Workgroup Manager Utilization: + avg: AVG((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + min: MIN((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + max: MAX((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + unit: Pct + tips: + CPC-L2 Utilization: avg: AVG((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE)) if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE)) @@ -157,19 +115,16 @@ Panel Config: if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None)) unit: pct tips: - UTCL1 Stall Cycles: - avg: AVG(CPC_UTCL1_STALL_ON_TRANSLATION) - min: MIN(CPC_UTCL1_STALL_ON_TRANSLATION) - max: MAX(CPC_UTCL1_STALL_ON_TRANSLATION) - unit: Cycles + CPC-UTCL1 Stall: + avg: AVG(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + min: MIN(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + max: MAX(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + unit: pct tips: - UTCL2 Intf Busy Cycles: - avg: AVG(CPC_CPC_UTCL2IU_BUSY) - min: MIN(CPC_CPC_UTCL2IU_BUSY) - max: MAX(CPC_CPC_UTCL2IU_BUSY) - unit: Cycles - tips: - UTCL2 Intf Util: + CPC-UTCL2 Utilization: avg: AVG((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE)) if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0600_shader-processor-input.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0600_shader-processor-input.yaml index bab48700ac..24d4036ecb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0600_shader-processor-input.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0600_shader-processor-input.yaml @@ -6,11 +6,11 @@ Metric Description: # Define the panel properties and properties of each metric in the panel. Panel Config: id: 600 - title: Shader Processor Input (SPI) + title: Workgroup Manager (SPI) data source: - metric_table: id: 601 - title: SPI Stats + title: Workgroup Manager Utilizations header: metric: Metric avg: Avg @@ -19,29 +19,35 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles + Accelerator Utilization: + avg: AVG(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + min: MIN(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + max: MAX(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + unit: Pct tips: - CS Busy: - avg: AVG(SPI_CSN_BUSY) - min: MIN(SPI_CSN_BUSY) - max: MAX(SPI_CSN_BUSY) - unit: Cycles + Scheduler-Pipe Utilization: + avg: AVG(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + min: MIN(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + max: MAX(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + unit: Pct tips: - SPI Busy: - avg: AVG(GRBM_SPI_BUSY) - min: MIN(GRBM_SPI_BUSY) - max: MAX(GRBM_SPI_BUSY) - unit: Cycles + Workgroup Manager Utilization: + avg: AVG(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + min: MIN(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + max: MAX(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + unit: Pct tips: - SQ Busy: - avg: AVG(SQ_BUSY_CYCLES) - min: MIN(SQ_BUSY_CYCLES) - max: MAX(SQ_BUSY_CYCLES) - unit: Cycles + Shader Engine Utilization: + avg: AVG(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + min: MIN(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + max: MAX(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + unit: Pct + tips: + SIMD Utilization: + avg: AVG(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct tips: Dispatched Workgroups: avg: AVG(SPI_CSN_NUM_THREADGROUPS) @@ -55,105 +61,6 @@ Panel Config: max: MAX(SPI_CSN_WAVE) unit: Wavefronts tips: - Wave Alloc Failed: - avg: AVG(SPI_RA_REQ_NO_ALLOC) - min: MIN(SPI_RA_REQ_NO_ALLOC) - max: MAX(SPI_RA_REQ_NO_ALLOC) - unit: Cycles - tips: - Wave Alloc Failed - CS: - avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN) - min: MIN(SPI_RA_REQ_NO_ALLOC_CSN) - max: MAX(SPI_RA_REQ_NO_ALLOC_CSN) - unit: Cycles - tips: - - - metric_table: - id: 602 - title: SPI Resource Allocation - header: - metric: Metric - avg: Avg - min: Min - max: Max - unit: Unit - tips: Tips - metric: - Wave request Failed (CS): - avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN) - min: MIN(SPI_RA_REQ_NO_ALLOC_CSN) - max: MAX(SPI_RA_REQ_NO_ALLOC_CSN) - unit: Cycles - tips: - CS Stall: - avg: AVG(SPI_RA_RES_STALL_CSN) - min: MIN(SPI_RA_RES_STALL_CSN) - max: MAX(SPI_RA_RES_STALL_CSN) - unit: Cycles - tips: - CS Stall Rate: - avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - min: MIN((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - max: MAX((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - unit: pct - tips: - Scratch Stall: - avg: AVG(SPI_RA_TMP_STALL_CSN) - min: MIN(SPI_RA_TMP_STALL_CSN) - max: MAX(SPI_RA_TMP_STALL_CSN) - unit: Cycles - tips: - Insufficient SIMD Waveslots: - avg: AVG(SPI_RA_WAVE_SIMD_FULL_CSN) - min: MIN(SPI_RA_WAVE_SIMD_FULL_CSN) - max: MAX(SPI_RA_WAVE_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient SIMD VGPRs: - avg: AVG(SPI_RA_VGPR_SIMD_FULL_CSN) - min: MIN(SPI_RA_VGPR_SIMD_FULL_CSN) - max: MAX(SPI_RA_VGPR_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient SIMD SGPRs: - avg: AVG(SPI_RA_SGPR_SIMD_FULL_CSN) - min: MIN(SPI_RA_SGPR_SIMD_FULL_CSN) - max: MAX(SPI_RA_SGPR_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient CU LDS: - avg: AVG(SPI_RA_LDS_CU_FULL_CSN) - min: MIN(SPI_RA_LDS_CU_FULL_CSN) - max: MAX(SPI_RA_LDS_CU_FULL_CSN) - unit: CU - tips: - Insufficient CU Barries: - avg: AVG(SPI_RA_BAR_CU_FULL_CSN) - min: MIN(SPI_RA_BAR_CU_FULL_CSN) - max: MAX(SPI_RA_BAR_CU_FULL_CSN) - unit: CU - tips: - Insufficient Bulky Resource: - avg: AVG(SPI_RA_BULKY_CU_FULL_CSN) - min: MIN(SPI_RA_BULKY_CU_FULL_CSN) - max: MAX(SPI_RA_BULKY_CU_FULL_CSN) - unit: CU - tips: - Reach CU Threadgroups Limit: - avg: AVG(SPI_RA_TGLIM_CU_FULL_CSN) - min: MIN(SPI_RA_TGLIM_CU_FULL_CSN) - max: MAX(SPI_RA_TGLIM_CU_FULL_CSN) - unit: Cycles - tips: - Reach CU Wave Limit: - avg: AVG(SPI_RA_WVLIM_STALL_CSN) - min: MIN(SPI_RA_WVLIM_STALL_CSN) - max: MAX(SPI_RA_WVLIM_STALL_CSN) - unit: Cycles - tips: VGPR Writes: avg: AVG((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else None)) @@ -172,3 +79,89 @@ Panel Config: None)) unit: Cycles/wave tips: + - metric_table: + id: 602 + title: Workgroup Manager - Resource Allocation + header: + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit + tips: Tips + metric: + Not-scheduled Rate (Workgroup Manager): + avg: AVG((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + min: MIN((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + max: MAX((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + unit: Pct + tips: + Not-scheduled Rate (Scheduler-Pipe): + avg: AVG((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + min: MIN((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + max: MAX((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + unit: Pct + tips: + Scheduler-Pipe Stall Rate: + avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + min: MIN((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + max: MAX((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + unit: Pct + tips: + Scratch Stall Rate: + avg: AVG((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + min: MIN((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + max: MAX((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + unit: Pct + tips: + Insufficient SIMD Waveslots: + avg: AVG(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient SIMD VGPRs: + avg: AVG(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient SIMD SGPRs: + avg: AVG(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient CU LDS: + avg: AVG(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient CU Barriers: + avg: AVG(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Reached CU Workgroup Limit: + avg: AVG(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Reached CU Wavefront Limit: + avg: AVG(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml index 2dceb05dc7..42a863af49 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml @@ -77,7 +77,7 @@ Panel Config: avg: AVG(Scratch_Per_Workitem) min: MIN(Scratch_Per_Workitem) max: MAX(Scratch_Per_Workitem) - unit: Bytes + unit: Bytes/Workitem tips: - metric_table: @@ -103,7 +103,7 @@ Panel Config: max: MAX(GRBM_GUI_ACTIVE) unit: Cycle tips: - Instr/wavefront: + Instructions per wavefront: avg: AVG((SQ_INSTS / SQ_WAVES)) min: MIN((SQ_INSTS / SQ_WAVES)) max: MAX((SQ_INSTS / SQ_WAVES)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1000_compute-unit-instruction-mix.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1000_compute-unit-instruction-mix.yaml index 679acc34d1..0092c202cc 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1000_compute-unit-instruction-mix.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1000_compute-unit-instruction-mix.yaml @@ -10,7 +10,7 @@ Panel Config: data source: - metric_table: id: 1001 - title: Instruction Mix + title: Overall Instruction Mix header: metric: Metric avg: Avg @@ -22,7 +22,7 @@ Panel Config: type: simple_bar label_txt: (# of instr + $normUnit) metric: - VALU - Vector: + VALU: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -40,7 +40,7 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (instr + $normUnit) tips: - VALU - MFMA: + MFMA: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -64,12 +64,6 @@ Panel Config: max: MAX((SQ_INSTS_BRANCH / $denom)) unit: (instr + $normUnit) tips: - GDS: - avg: AVG((SQ_INSTS_GDS / $denom)) - min: MIN((SQ_INSTS_GDS / $denom)) - max: MAX((SQ_INSTS_GDS / $denom)) - unit: (instr + $normUnit) - tips: - metric_table: id: 1002 @@ -103,7 +97,7 @@ Panel Config: max: None # No HW module unit: (instr + $normUnit) tips: - F16-Mult: + F16-MUL: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -127,7 +121,7 @@ Panel Config: max: None # No HW module unit: (instr + $normUnit) tips: - F32-Mult: + F32-MUL: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -151,7 +145,7 @@ Panel Config: max: None # No HW module unit: (instr + $normUnit) tips: - F64-Mult: + F64-MUL: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -180,55 +174,100 @@ Panel Config: id: 1003 title: VMEM Instr Mix header: - type: Type - count: Count + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit tips: Tips metric: - Buffer Instr: - count: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) - tips: - Buffer Read: - count: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) - tips: - Buffer Write: - count: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) - tips: - Buffer Atomic: - count: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) - tips: - Flat Instr: - count: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) - tips: - Flat Read: - count: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) - tips: - Flat Write: - count: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) - tips: - Flat Atomic: - count: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) - tips: + Global/Generic Instr: + avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Read: + avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Write: + avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Atomic: + avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Instr: + avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Read: + avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Write: + avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Atomic: + avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: - metric_table: id: 1004 title: MFMA Arithmetic Instr Mix header: - type: Type - count: Count + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit tips: Tips metric: MFMA-I8: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: MFMA-F16: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: MFMA-BF16: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: MFMA-F32: - count: None # No HW module + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) tips: MFMA-F64: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1100_compute-unit-compute-pipeline.yaml index 8cffb24c7e..63019bfecf 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1100_compute-unit-compute-pipeline.yaml @@ -13,7 +13,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg + unit: Unit + peak: Peak + pop: Pct of Peak tips: Tips style: type: simple_bar @@ -21,23 +24,47 @@ Panel Config: label_txt: (%) xrange: [0, 110] metric: - valu_flops_pop: + VALU FLOPs: value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_bf16_pop: + VALU IOPs: value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_f16_pop: + MFMA FLOPs (BF16): value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_f32_pop: + MFMA FLOPs (F16): value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_f64_pop: + MFMA FLOPs (F32): value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_i8_pop: + MFMA FLOPs (F64): value: None # No perf counter + Unit: None + peak: None + pop: None + tips: + MFMA IOPs (INT8): + value: None # No perf counter + Unit: None + peak: None + pop: None tips: - metric_table: @@ -51,36 +78,48 @@ Panel Config: unit: Unit tips: Tips metric: - IPC (Avg): + IPC: avg: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES)) min: MIN((SQ_INSTS / SQ_BUSY_CU_CYCLES)) max: MAX((SQ_INSTS / SQ_BUSY_CU_CYCLES)) unit: Instr/cycle tips: - IPC (Issue): - avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + IPC (Issued): + avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) - min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) - max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) unit: Instr/cycle tips: - SALU Util: + SALU Utilization: avg: AVG((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) min: MIN((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) max: MAX((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) unit: pct tips: - VALU Util: + VALU Utilization: avg: AVG((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) min: MIN((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) max: MAX((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) unit: pct tips: + VMEM Utilization: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: pct + tips: + Branch Utilization: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: pct + tips: VALU Active Threads: avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) @@ -90,7 +129,7 @@ Panel Config: != 0) else None)) unit: Threads tips: - MFMA Util: + MFMA Utilization: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -102,6 +141,20 @@ Panel Config: max: None # No HW module unit: cycles/instr tips: + VMEM Latency: + avg: None # No perf counter + min: None # No perf counter + max: None # No perf counter + unit: Cycles + coll_level: SQ_INST_LEVEL_VMEM + tips: + SMEM Latency: + avg: None # No perf counter + min: None # No perf counter + max: None # No perf counter + unit: Cycles + coll_level: SQ_INST_LEVEL_SMEM + tips: - metric_table: id: 1103 @@ -120,7 +173,7 @@ Panel Config: max: None # No perf counter unit: (OPs + $normUnit) tips: - INT8 OPs: + IOPs (Total): avg: None # No perf counter min: None # No perf counter max: None # No perf counter @@ -150,5 +203,9 @@ Panel Config: max: None # No perf counter unit: (OPs + $normUnit) tips: - - + INT8 OPs: + avg: None # No perf counter + min: None # No perf counter + max: None # No perf counter + unit: (OPs + $normUnit) + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml index 4ae5333608..9c9152ba53 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml @@ -26,20 +26,24 @@ Panel Config: value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU))) unit: Pct of Peak tips: + unit: pct Access Rate: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) unit: Pct of Peak tips: - Bandwidth (Pct-of-Peak): + unit: pct + Theoretical Bandwidth (% of Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) unit: Pct of Peak tips: + unit: pct Bank Conflict Rate: value: AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) unit: Pct of Peak tips: + unit: pct - metric_table: id: 1202 @@ -58,7 +62,7 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - Bandwidth: + Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) @@ -67,7 +71,14 @@ Panel Config: / $denom)) unit: (Bytes + $normUnit) tips: - Bank Conficts/Access: + LDS Latency: + avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + unit: Cycles + coll_level: SQ_INST_LEVEL_LDS + tips: + Bank Conflicts/Access: avg: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) min: MIN(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -82,7 +93,7 @@ Panel Config: max: MAX((SQ_LDS_IDX_ACTIVE / $denom)) unit: (Cycles + $normUnit) tips: - Atomic Cycles: + Atomic Return Cycles: avg: AVG((SQ_LDS_ATOMIC_RETURN / $denom)) min: MIN((SQ_LDS_ATOMIC_RETURN / $denom)) max: MAX((SQ_LDS_ATOMIC_RETURN / $denom)) @@ -110,12 +121,5 @@ Panel Config: avg: AVG((SQ_LDS_MEM_VIOLATIONS / $denom)) min: MIN((SQ_LDS_MEM_VIOLATIONS / $denom)) max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) - unit: ( + $normUnit) - tips: - LDS Latency: - avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - unit: Cycles - coll_level: SQ_INST_LEVEL_LDS - tips: + unit: (Accesses + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml index bbbf6ebe26..5cfe101e71 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml @@ -13,7 +13,7 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips style: @@ -27,11 +27,16 @@ Panel Config: * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: - Cache Hit: + Cache Hit Rate: value: AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) unit: Pct of Peak tips: + L1I-L2 Bandwidth: + value: AVG(((SQC_TC_INST_REQ * 100000) / (2 * ($sclk * $numSQC) + * (EndNs - BeginNs)))) + unit: Pct of Peak + tips: - metric_table: id: 1302 @@ -68,7 +73,7 @@ Panel Config: max: MAX((SQC_ICACHE_MISSES_DUPLICATE / $denom)) unit: (Misses + $normUnit) tips: - Cache Hit: + Cache Hit Rate: avg: AVG(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) min: MIN(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + @@ -77,3 +82,27 @@ Panel Config: SQC_ICACHE_MISSES_DUPLICATE))) unit: pct tips: + Instruction Fetch Latency: + avg: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + min: MIN((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + max: MAX((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + unit: Cycles + coll_level: SQ_IFETCH_LEVEL + tips: + - metric_table: + id: 1303 + title: Instruction Cache - L2 Interface + header: + metric: Metric + mean: Mean + min: Min + max: Max + unit: Unit + tips: Tips + metric: + L1I-L2 Bandwidth: + mean: AVG(((SQC_TC_INST_REQ * 64) / $denom)) + min: MIN(((SQC_TC_INST_REQ * 64) / $denom)) + max: MAX(((SQC_TC_INST_REQ * 64) / $denom)) + unit: (Bytes + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml index d0c89f1baf..f709ec2232 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml @@ -12,8 +12,8 @@ Panel Config: id: 1401 title: Speed-of-Light header: - mertic: Metric - value: Value + metric: Metric + value: Avg unit: Unit tips: Tips style: @@ -27,12 +27,17 @@ Panel Config: * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: - Cache Hit: + Cache Hit Rate: value: AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE)) if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None)) unit: Pct of Peak tips: + sL1D-L2 BW: + value: AVG(((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 100000) + / (2 * ($sclk * $numSQC) * (EndNs - BeginNs))) + unit: Pct of Peak + tips: - metric_table: id: 1402 @@ -138,6 +143,12 @@ Panel Config: unit: Unit tips: Tips metric: + sL1D-L2 BW: + mean: AVG(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + min: MIN(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + max: MAX(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + unit: (Bytes + $normUnit) + tips: Read Req: avg: AVG((SQC_TC_DATA_READ_REQ / $denom)) min: MIN((SQC_TC_DATA_READ_REQ / $denom)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1500_TA_and_TD.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1500_TA_and_TD.yaml index 8f71cedc99..773bb7c763 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1500_TA_and_TD.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1500_TA_and_TD.yaml @@ -6,11 +6,11 @@ Metric Description: # Define the panel properties and properties of each metric in the panel. Panel Config: id: 1500 - title: Texture Addresser and Texture Data (TA/TD) + title: Address Processing Unit and Data Return Path (TA/TD) data source: - metric_table: id: 1501 - title: TA + title: Address Processing Unit header: metric: Metric avg: Avg @@ -19,25 +19,25 @@ Panel Config: unit: Unit tips: Tips metric: - TA Busy: + Address Processing Unit Busy: avg: AVG(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TA Addr Stall: + Address Stall: avg: AVG(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TA Data Stall: + Data Stall: avg: AVG(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TD2TA Addr Stall: + Data-Processor → Address Stall: avg: AVG(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) @@ -47,69 +47,69 @@ Panel Config: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) max: MAX((TA_TOTAL_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Instr: + Global/Generic Instructions: avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Read Instr: + Global/Generic Read Instructions: avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Write Instr: + Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Atomic Instr: + Global/Generic Atomic Instructions: avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Instr: + Spill/Stack Instructions: avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Read Instr: + Spill/Stack Read Instructions: avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Write Instr: + Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Atomic Instr: + Spill/Stack Atomic Instructions: avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Total Cylces: + Spill/Stack Total Cycles: avg: AVG((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) unit: (Cycles + $normUnit) tips: - Buffer Coalesced Read: + Spill/Stack Coalesced Read: avg: AVG((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) unit: (Cycles + $normUnit) tips: - Buffer Coalesced Write: + Spill/Stack Coalesced Write: avg: AVG((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) @@ -118,7 +118,7 @@ Panel Config: - metric_table: id: 1502 - title: TD + title: Data-Return Path header: metric: Metric avg: Avg @@ -127,48 +127,48 @@ Panel Config: unit: Unit tips: Tips metric: - TD Busy: + Data-Return Busy: avg: AVG(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TD Stall: + Cache RAM → Data-Return Stall: avg: AVG(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - SPI2TD Stall: + Workgroup manager → Data-Return Stall: avg: # No perf counter min: # No perf counter max: # No perf counter unit: pct tips: - Coalescable Instr: + Coalescable Instructions: avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom)) min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom)) max: MAX((TD_COALESCABLE_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Load Instr: + Read Instructions: avg: AVG((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) min: MIN((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) max: MAX((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Store Instr: + Write Instructions: avg: AVG((TD_STORE_WAVEFRONT_sum / $denom)) min: MIN((TD_STORE_WAVEFRONT_sum / $denom)) max: MAX((TD_STORE_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Atomic Instr: + Atomic Instructions: avg: AVG((TD_ATOMIC_WAVEFRONT_sum / $denom)) min: MIN((TD_ATOMIC_WAVEFRONT_sum / $denom)) max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml index 1dee69c726..842b967d0e 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml @@ -13,7 +13,7 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips style: @@ -22,28 +22,28 @@ Panel Config: label_txt: (%) xrange: [0, 110] metric: - Buffer Coalescing: - value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum - * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None)) - unit: Pct of Peak - tips: - Cache Util: - value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum - != 0) else None)) - unit: Pct of Peak - tips: - Cache BW: - value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) - / ((($sclk / 1000) * 64) * $numCU)) - unit: Pct of Peak - tips: - Cache Hit: + Hit rate: value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else None)) unit: Pct of Peak tips: + Bandwidth: + value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) + / ((($sclk / 1000) * 64) * $numCU)) + unit: Pct of Peak + tips: + Utilization: + value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum + != 0) else None)) + unit: Pct of Peak + tips: + Coalescing: + value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum + * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None)) + unit: Pct of Peak + tips: - metric_table: id: 1602 @@ -141,11 +141,26 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - unit: GB/s + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + unit: (Bytes + $normUnit) tips: + Cache Hit Rate: + avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + unit: pct + tips: Cache Accesses: avg: AVG((TCP_TOTAL_CACHE_ACCESSES_sum / $denom)) min: MIN((TCP_TOTAL_CACHE_ACCESSES_sum / $denom)) @@ -164,22 +179,7 @@ Panel Config: / $denom)) unit: (Req + $normUnit) tips: - Cache Hit Rate: - avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - unit: pct - tips: - Invalidate: + Invalidations: avg: AVG((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) min: MIN((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) max: MAX((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) @@ -188,9 +188,9 @@ Panel Config: L1-L2 BW: avg: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) - min: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + min: MIN(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) - max: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + max: MAX(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: @@ -388,17 +388,17 @@ Panel Config: avg: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: (Hits + $normUnit) + units: (Req + $normUnit) tips: - Misses (Translation): + Translation Misses: avg: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: (Misses + $normUnit) + units: (Req + $normUnit) tips: - Misses (Permission): + Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: (Misses + $normUnit) + units: (Req + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml index 5004e31ea2..5f37ca77ca 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml @@ -13,31 +13,35 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips style: type: simple_bar metric: - L2 Util: + Utilization: value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE))) unit: pct + tips: + Bandwidth: + value: ((100 * AVG(((TCC_REQ_sum * 64) / (EndNs - BeginNs)))) / ((($sclk / 1000) * 64) * TO_INT($L2Banks))) + unit: pct tips: - Cache Hit: + Hit Rate: value: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)) unit: pct - tips: - L2-EA Rd BW: + tips: + L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s - tips: - L2-EA Wr BW: + tips: + L2-Fabric Write and Atomic BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s - tips: + tips: - metric_table: id: 1702 @@ -50,7 +54,7 @@ Panel Config: unit: Unit tips: Tips metric: - Read BW: + L2-Fabric Read BW: avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / $denom)) min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) @@ -58,8 +62,26 @@ Panel Config: max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / $denom)) unit: (Bytes + $normUnit) - tips: - Write BW: + tips: + HBM Read Traffic: + avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + Remote Read Traffic: + avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + Uncached Read Traffic: + avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + L2-Fabric Write and Atomic BW: avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / $denom)) min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) @@ -67,55 +89,31 @@ Panel Config: max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / $denom)) unit: (Bytes + $normUnit) - tips: - Read (32B): - avg: AVG((TCC_EA_RDREQ_32B_sum / $denom)) - min: MIN((TCC_EA_RDREQ_32B_sum / $denom)) - max: MAX((TCC_EA_RDREQ_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read (Uncached 32B): - avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read (64B): - avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - unit: (Req + $normUnit) - tips: - HBM Read: - avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom)) - min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom)) - max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write (32B): - avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - unit: (Req + $normUnit) - tips: - Write (Uncached 32B): - avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write (64B): - avg: AVG((TCC_EA_WRREQ_64B_sum / $denom)) - min: MIN((TCC_EA_WRREQ_64B_sum / $denom)) - max: MAX((TCC_EA_WRREQ_64B_sum / $denom)) - unit: (Req + $normUnit) - tips: - HBM Write: - avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom)) - min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom)) - max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom)) - unit: (Req + $normUnit) - tips: + tips: + HBM Write and Atomic Traffic: + avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Remote Write and Atomic Traffic: + avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Atomic Traffic: + avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Uncached Write and Atomic Traffic: + avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: Read Latency: avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) @@ -124,7 +122,7 @@ Panel Config: max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) unit: Cycles - tips: + tips: Write Latency: avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) @@ -133,7 +131,7 @@ Panel Config: max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) unit: Cycles - tips: + tips: Atomic Latency: avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum != 0) else None)) @@ -142,7 +140,7 @@ Panel Config: max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum != 0) else None)) unit: Cycles - tips: + tips: Read Stall: avg: AVG((((100 * ((TCC_EA_RDREQ_IO_CREDIT_STALL_sum + TCC_EA_RDREQ_GMI_CREDIT_STALL_sum) + TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != @@ -154,7 +152,7 @@ Panel Config: + TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != 0) else None)) unit: pct - tips: + tips: Write Stall: avg: AVG((((100 * ((TCC_EA_WRREQ_IO_CREDIT_STALL_sum + TCC_EA_WRREQ_GMI_CREDIT_STALL_sum) + TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != @@ -166,7 +164,7 @@ Panel Config: + TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != 0) else None)) unit: pct - tips: + tips: - metric_table: id: 1703 @@ -179,54 +177,48 @@ Panel Config: unit: Unit tips: Tips metric: + Bandwidth: + avg: AVG((TCC_REQ_sum * 64) / $denom) + min: MIN((TCC_REQ_sum * 64) / $denom) + max: MAX((TCC_REQ_sum * 64) / $denom) + unit: (Bytes + $normUnit) + tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) max: MAX((TCC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: - Streaming Req: - avg: AVG((TCC_STREAMING_REQ_sum / $denom)) - min: MIN((TCC_STREAMING_REQ_sum / $denom)) - max: MAX((TCC_STREAMING_REQ_sum / $denom)) - unit: (Req + $normUnit) - tips: + tips: Read Req: avg: AVG((TCC_READ_sum / $denom)) min: MIN((TCC_READ_sum / $denom)) max: MAX((TCC_READ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Write Req: avg: AVG((TCC_WRITE_sum / $denom)) min: MIN((TCC_WRITE_sum / $denom)) max: MAX((TCC_WRITE_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Atomic Req: avg: AVG((TCC_ATOMIC_sum / $denom)) min: MIN((TCC_ATOMIC_sum / $denom)) max: MAX((TCC_ATOMIC_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: + Streaming Req: + avg: AVG((TCC_STREAMING_REQ_sum / $denom)) + min: MIN((TCC_STREAMING_REQ_sum / $denom)) + max: MAX((TCC_STREAMING_REQ_sum / $denom)) + unit: (Req + $normUnit) + tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) - tips: - Hits: - avg: AVG((TCC_HIT_sum / $denom)) - min: MIN((TCC_HIT_sum / $denom)) - max: MAX((TCC_HIT_sum / $denom)) - unit: (Hits + $normUnit) - tips: - Misses: - avg: AVG((TCC_MISS_sum / $denom)) - min: MIN((TCC_MISS_sum / $denom)) - max: MAX((TCC_MISS_sum / $denom)) - unit: (Misses + $normUnit) - tips: + tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -235,65 +227,77 @@ Panel Config: max: MAX((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) unit: pct - tips: + tips: + Hits: + avg: AVG((TCC_HIT_sum / $denom)) + min: MIN((TCC_HIT_sum / $denom)) + max: MAX((TCC_HIT_sum / $denom)) + unit: (Hits + $normUnit) + tips: + Misses: + avg: AVG((TCC_MISS_sum / $denom)) + min: MIN((TCC_MISS_sum / $denom)) + max: MAX((TCC_MISS_sum / $denom)) + unit: (Misses + $normUnit) + tips: Writeback: avg: AVG((TCC_WRITEBACK_sum / $denom)) min: MIN((TCC_WRITEBACK_sum / $denom)) max: MAX((TCC_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: + unit: (Cachelines + $normUnit) + tips: + Writeback (Internal): + avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom)) + min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom)) + max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Writeback (vL1D Req): + avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Evict (Normal): + avg: AVG((TCC_NORMAL_EVICT_sum / $denom)) + min: MIN((TCC_NORMAL_EVICT_sum / $denom)) + max: MAX((TCC_NORMAL_EVICT_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Evict (vL1D Req): + avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: NC Req: avg: AVG((TCC_NC_REQ_sum / $denom)) min: MIN((TCC_NC_REQ_sum / $denom)) max: MAX((TCC_NC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: UC Req: avg: AVG((TCC_UC_REQ_sum / $denom)) min: MIN((TCC_UC_REQ_sum / $denom)) max: MAX((TCC_UC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: CC Req: avg: AVG((TCC_CC_REQ_sum / $denom)) min: MIN((TCC_CC_REQ_sum / $denom)) max: MAX((TCC_CC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: RW Req: avg: None # No HW module min: None # No HW module max: None # No HW module unit: (Req + $normUnit) - tips: - Writeback (Normal): - avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom)) - min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom)) - max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: - Writeback (TC Req): - avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: - Evict (Normal): - avg: AVG((TCC_NORMAL_EVICT_sum / $denom)) - min: MIN((TCC_NORMAL_EVICT_sum / $denom)) - max: MAX((TCC_NORMAL_EVICT_sum / $denom)) - unit: ( + $normUnit) - tips: - Evict (TC Req): - avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - unit: ( + $normUnit) - tips: + tips: - metric_table: id: 1704 - title: L2 - EA Interface Stalls + title: L2 - Fabric Interface Stalls header: metric: Metric type: Type @@ -306,59 +310,137 @@ Panel Config: style: type: simple_multi_bar metric: - Read - Remote Socket Stall: - type: Remote Socket Stall + Read - PCIe Stall: + type: PCIe Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read - Peer GCD Stall: - type: Peer GCD Stall + avg: AVG(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Read - Infinity Fabric™ Stall: + type: Infinity Fabric™ Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Read - HBM Stall: type: HBM Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write - Remote Socket Stall: - type: Remote Socket Stall + avg: AVG(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Write - PCIe Stall: + type: PCIe Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write - Peer GCD Stall: - type: Peer GCD Stall + avg: AVG(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Write - Infinity Fabric™ Stall: + type: Infinity Fabric™ Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Write - HBM Stall: type: HBM Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Write - Credit Starvation: type: Credit Starvation transaction: Write - avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) - min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) - max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) + avg: AVG(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + + - metric_table: + id: 1705 + title: L2 - Fabric Detailed Transaction Breakdown + header: + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit + tips: Tips + metric: + Read (32B): + avg: AVG((TCC_EA_RDREQ_32B_sum / $denom)) + min: MIN((TCC_EA_RDREQ_32B_sum / $denom)) + max: MAX((TCC_EA_RDREQ_32B_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: + Read (Uncached): + avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + unit: (Req + $normUnit) + tips: + Read (64B): + avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + unit: (Req + $normUnit) + tips: + HBM Read: + avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom)) + min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom)) + max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom)) + unit: (Req + $normUnit) + tips: + Remote Read: + avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (32B): + avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (Uncached): + avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (64B): + avg: AVG((TCC_EA_WRREQ_64B_sum / $denom)) + min: MIN((TCC_EA_WRREQ_64B_sum / $denom)) + max: MAX((TCC_EA_WRREQ_64B_sum / $denom)) + unit: (Req + $normUnit) + tips: + HBM Write and Atomic: + avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom)) + min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom)) + max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom)) + unit: (Req + $normUnit) + tips: + Remote Write and Atomic: + avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + unit: (Req + $normUnit) + tips: + Atomic: + avg: AVG((TCC_EA_ATOMIC_sum / $denom)) + min: MIN((TCC_EA_ATOMIC_sum / $denom)) + max: MAX((TCC_EA_ATOMIC_sum / $denom)) + unit: (Req + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1800_L2_cache_per_channel.yaml index 7a808c5b82..a136237df2 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1800_L2_cache_per_channel.yaml @@ -13,7 +13,7 @@ Panel Config: title: Aggregate Stats (All 32 channels) header: metric: Metric - avg: Mean + avg: Avg std dev: Std Dev min: Min max: Max @@ -167,1555 +167,169 @@ Panel Config: + TCC_HIT[30])) + (TCC_MISS[31] + TCC_HIT[31])) != 0) else None)) unit: pct tips: - Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_REQ[0]) + TO_INT(TCC_REQ[1])) - + TO_INT(TCC_REQ[2])) + TO_INT(TCC_REQ[3])) + TO_INT(TCC_REQ[4])) + TO_INT(TCC_REQ[5])) - + TO_INT(TCC_REQ[6])) + TO_INT(TCC_REQ[7])) + TO_INT(TCC_REQ[8])) + TO_INT(TCC_REQ[9])) - + TO_INT(TCC_REQ[10])) + TO_INT(TCC_REQ[11])) + TO_INT(TCC_REQ[12])) + TO_INT(TCC_REQ[13])) - + TO_INT(TCC_REQ[14])) + TO_INT(TCC_REQ[15])) + TO_INT(TCC_REQ[16])) + TO_INT(TCC_REQ[17])) - + TO_INT(TCC_REQ[18])) + TO_INT(TCC_REQ[19])) + TO_INT(TCC_REQ[20])) + TO_INT(TCC_REQ[21])) - + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) - + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) - + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_REQ[0]) + TO_INT(TCC_REQ[1])) - + TO_INT(TCC_REQ[2])) + TO_INT(TCC_REQ[3])) + TO_INT(TCC_REQ[4])) + TO_INT(TCC_REQ[5])) - + TO_INT(TCC_REQ[6])) + TO_INT(TCC_REQ[7])) + TO_INT(TCC_REQ[8])) + TO_INT(TCC_REQ[9])) - + TO_INT(TCC_REQ[10])) + TO_INT(TCC_REQ[11])) + TO_INT(TCC_REQ[12])) + TO_INT(TCC_REQ[13])) - + TO_INT(TCC_REQ[14])) + TO_INT(TCC_REQ[15])) + TO_INT(TCC_REQ[16])) + TO_INT(TCC_REQ[17])) - + TO_INT(TCC_REQ[18])) + TO_INT(TCC_REQ[19])) + TO_INT(TCC_REQ[20])) + TO_INT(TCC_REQ[21])) - + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) - + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) - + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_REQ[0]) + TO_INT(TCC_REQ[1])) - + TO_INT(TCC_REQ[2])) + TO_INT(TCC_REQ[3])) + TO_INT(TCC_REQ[4])) + TO_INT(TCC_REQ[5])) - + TO_INT(TCC_REQ[6])) + TO_INT(TCC_REQ[7])) + TO_INT(TCC_REQ[8])) + TO_INT(TCC_REQ[9])) - + TO_INT(TCC_REQ[10])) + TO_INT(TCC_REQ[11])) + TO_INT(TCC_REQ[12])) + TO_INT(TCC_REQ[13])) - + TO_INT(TCC_REQ[14])) + TO_INT(TCC_REQ[15])) + TO_INT(TCC_REQ[16])) + TO_INT(TCC_REQ[17])) - + TO_INT(TCC_REQ[18])) + TO_INT(TCC_REQ[19])) + TO_INT(TCC_REQ[20])) + TO_INT(TCC_REQ[21])) - + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) - + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) - + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_REQ[0]) + TO_INT(TCC_REQ[1])) - + TO_INT(TCC_REQ[2])) + TO_INT(TCC_REQ[3])) + TO_INT(TCC_REQ[4])) + TO_INT(TCC_REQ[5])) - + TO_INT(TCC_REQ[6])) + TO_INT(TCC_REQ[7])) + TO_INT(TCC_REQ[8])) + TO_INT(TCC_REQ[9])) - + TO_INT(TCC_REQ[10])) + TO_INT(TCC_REQ[11])) + TO_INT(TCC_REQ[12])) + TO_INT(TCC_REQ[13])) - + TO_INT(TCC_REQ[14])) + TO_INT(TCC_REQ[15])) + TO_INT(TCC_REQ[16])) + TO_INT(TCC_REQ[17])) - + TO_INT(TCC_REQ[18])) + TO_INT(TCC_REQ[19])) + TO_INT(TCC_REQ[20])) + TO_INT(TCC_REQ[21])) - + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) - + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) - + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L1 - L2 Read Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) - + TO_INT(TCC_READ[2])) + TO_INT(TCC_READ[3])) + TO_INT(TCC_READ[4])) + TO_INT(TCC_READ[5])) - + TO_INT(TCC_READ[6])) + TO_INT(TCC_READ[7])) + TO_INT(TCC_READ[8])) + TO_INT(TCC_READ[9])) - + TO_INT(TCC_READ[10])) + TO_INT(TCC_READ[11])) + TO_INT(TCC_READ[12])) + - TO_INT(TCC_READ[13])) + TO_INT(TCC_READ[14])) + TO_INT(TCC_READ[15])) + TO_INT(TCC_READ[16])) - + TO_INT(TCC_READ[17])) + TO_INT(TCC_READ[18])) + TO_INT(TCC_READ[19])) + - TO_INT(TCC_READ[20])) + TO_INT(TCC_READ[21])) + TO_INT(TCC_READ[22])) + TO_INT(TCC_READ[23])) - + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + - TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) - + TO_INT(TCC_READ[31])) / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) - + TO_INT(TCC_READ[2])) + TO_INT(TCC_READ[3])) + TO_INT(TCC_READ[4])) + TO_INT(TCC_READ[5])) - + TO_INT(TCC_READ[6])) + TO_INT(TCC_READ[7])) + TO_INT(TCC_READ[8])) + TO_INT(TCC_READ[9])) - + TO_INT(TCC_READ[10])) + TO_INT(TCC_READ[11])) + TO_INT(TCC_READ[12])) + - TO_INT(TCC_READ[13])) + TO_INT(TCC_READ[14])) + TO_INT(TCC_READ[15])) + TO_INT(TCC_READ[16])) - + TO_INT(TCC_READ[17])) + TO_INT(TCC_READ[18])) + TO_INT(TCC_READ[19])) + - TO_INT(TCC_READ[20])) + TO_INT(TCC_READ[21])) + TO_INT(TCC_READ[22])) + TO_INT(TCC_READ[23])) - + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + - TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) - + TO_INT(TCC_READ[31])) / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) - + TO_INT(TCC_READ[2])) + TO_INT(TCC_READ[3])) + TO_INT(TCC_READ[4])) + TO_INT(TCC_READ[5])) - + TO_INT(TCC_READ[6])) + TO_INT(TCC_READ[7])) + TO_INT(TCC_READ[8])) + TO_INT(TCC_READ[9])) - + TO_INT(TCC_READ[10])) + TO_INT(TCC_READ[11])) + TO_INT(TCC_READ[12])) + - TO_INT(TCC_READ[13])) + TO_INT(TCC_READ[14])) + TO_INT(TCC_READ[15])) + TO_INT(TCC_READ[16])) - + TO_INT(TCC_READ[17])) + TO_INT(TCC_READ[18])) + TO_INT(TCC_READ[19])) + - TO_INT(TCC_READ[20])) + TO_INT(TCC_READ[21])) + TO_INT(TCC_READ[22])) + TO_INT(TCC_READ[23])) - + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + - TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) - + TO_INT(TCC_READ[31])) / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) - + TO_INT(TCC_READ[2])) + TO_INT(TCC_READ[3])) + TO_INT(TCC_READ[4])) + TO_INT(TCC_READ[5])) - + TO_INT(TCC_READ[6])) + TO_INT(TCC_READ[7])) + TO_INT(TCC_READ[8])) + TO_INT(TCC_READ[9])) - + TO_INT(TCC_READ[10])) + TO_INT(TCC_READ[11])) + TO_INT(TCC_READ[12])) + - TO_INT(TCC_READ[13])) + TO_INT(TCC_READ[14])) + TO_INT(TCC_READ[15])) + TO_INT(TCC_READ[16])) - + TO_INT(TCC_READ[17])) + TO_INT(TCC_READ[18])) + TO_INT(TCC_READ[19])) + - TO_INT(TCC_READ[20])) + TO_INT(TCC_READ[21])) + TO_INT(TCC_READ[22])) + TO_INT(TCC_READ[23])) - + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + - TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) - + TO_INT(TCC_READ[31])) / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L1 - L2 Write Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) - + TO_INT(TCC_WRITE[2])) + TO_INT(TCC_WRITE[3])) + TO_INT(TCC_WRITE[4])) + - TO_INT(TCC_WRITE[5])) + TO_INT(TCC_WRITE[6])) + TO_INT(TCC_WRITE[7])) + TO_INT(TCC_WRITE[8])) - + TO_INT(TCC_WRITE[9])) + TO_INT(TCC_WRITE[10])) + TO_INT(TCC_WRITE[11])) - + TO_INT(TCC_WRITE[12])) + TO_INT(TCC_WRITE[13])) + TO_INT(TCC_WRITE[14])) - + TO_INT(TCC_WRITE[15])) + TO_INT(TCC_WRITE[16])) + TO_INT(TCC_WRITE[17])) - + TO_INT(TCC_WRITE[18])) + TO_INT(TCC_WRITE[19])) + TO_INT(TCC_WRITE[20])) - + TO_INT(TCC_WRITE[21])) + TO_INT(TCC_WRITE[22])) + TO_INT(TCC_WRITE[23])) - + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) - + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) - + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) - + TO_INT(TCC_WRITE[2])) + TO_INT(TCC_WRITE[3])) + TO_INT(TCC_WRITE[4])) + - TO_INT(TCC_WRITE[5])) + TO_INT(TCC_WRITE[6])) + TO_INT(TCC_WRITE[7])) + TO_INT(TCC_WRITE[8])) - + TO_INT(TCC_WRITE[9])) + TO_INT(TCC_WRITE[10])) + TO_INT(TCC_WRITE[11])) - + TO_INT(TCC_WRITE[12])) + TO_INT(TCC_WRITE[13])) + TO_INT(TCC_WRITE[14])) - + TO_INT(TCC_WRITE[15])) + TO_INT(TCC_WRITE[16])) + TO_INT(TCC_WRITE[17])) - + TO_INT(TCC_WRITE[18])) + TO_INT(TCC_WRITE[19])) + TO_INT(TCC_WRITE[20])) - + TO_INT(TCC_WRITE[21])) + TO_INT(TCC_WRITE[22])) + TO_INT(TCC_WRITE[23])) - + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) - + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) - + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) - + TO_INT(TCC_WRITE[2])) + TO_INT(TCC_WRITE[3])) + TO_INT(TCC_WRITE[4])) + - TO_INT(TCC_WRITE[5])) + TO_INT(TCC_WRITE[6])) + TO_INT(TCC_WRITE[7])) + TO_INT(TCC_WRITE[8])) - + TO_INT(TCC_WRITE[9])) + TO_INT(TCC_WRITE[10])) + TO_INT(TCC_WRITE[11])) - + TO_INT(TCC_WRITE[12])) + TO_INT(TCC_WRITE[13])) + TO_INT(TCC_WRITE[14])) - + TO_INT(TCC_WRITE[15])) + TO_INT(TCC_WRITE[16])) + TO_INT(TCC_WRITE[17])) - + TO_INT(TCC_WRITE[18])) + TO_INT(TCC_WRITE[19])) + TO_INT(TCC_WRITE[20])) - + TO_INT(TCC_WRITE[21])) + TO_INT(TCC_WRITE[22])) + TO_INT(TCC_WRITE[23])) - + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) - + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) - + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) - + TO_INT(TCC_WRITE[2])) + TO_INT(TCC_WRITE[3])) + TO_INT(TCC_WRITE[4])) + - TO_INT(TCC_WRITE[5])) + TO_INT(TCC_WRITE[6])) + TO_INT(TCC_WRITE[7])) + TO_INT(TCC_WRITE[8])) - + TO_INT(TCC_WRITE[9])) + TO_INT(TCC_WRITE[10])) + TO_INT(TCC_WRITE[11])) - + TO_INT(TCC_WRITE[12])) + TO_INT(TCC_WRITE[13])) + TO_INT(TCC_WRITE[14])) - + TO_INT(TCC_WRITE[15])) + TO_INT(TCC_WRITE[16])) + TO_INT(TCC_WRITE[17])) - + TO_INT(TCC_WRITE[18])) + TO_INT(TCC_WRITE[19])) + TO_INT(TCC_WRITE[20])) - + TO_INT(TCC_WRITE[21])) + TO_INT(TCC_WRITE[22])) + TO_INT(TCC_WRITE[23])) - + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) - + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) - + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L1 - L2 Atomic Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) - + TO_INT(TCC_ATOMIC[2])) + TO_INT(TCC_ATOMIC[3])) + TO_INT(TCC_ATOMIC[4])) - + TO_INT(TCC_ATOMIC[5])) + TO_INT(TCC_ATOMIC[6])) + TO_INT(TCC_ATOMIC[7])) - + TO_INT(TCC_ATOMIC[8])) + TO_INT(TCC_ATOMIC[9])) + TO_INT(TCC_ATOMIC[10])) - + TO_INT(TCC_ATOMIC[11])) + TO_INT(TCC_ATOMIC[12])) + TO_INT(TCC_ATOMIC[13])) - + TO_INT(TCC_ATOMIC[14])) + TO_INT(TCC_ATOMIC[15])) + TO_INT(TCC_ATOMIC[16])) - + TO_INT(TCC_ATOMIC[17])) + TO_INT(TCC_ATOMIC[18])) + TO_INT(TCC_ATOMIC[19])) - + TO_INT(TCC_ATOMIC[20])) + TO_INT(TCC_ATOMIC[21])) + TO_INT(TCC_ATOMIC[22])) - + TO_INT(TCC_ATOMIC[23])) + TO_INT(TCC_ATOMIC[24])) + TO_INT(TCC_ATOMIC[25])) - + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) - + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) - / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) - + TO_INT(TCC_ATOMIC[2])) + TO_INT(TCC_ATOMIC[3])) + TO_INT(TCC_ATOMIC[4])) - + TO_INT(TCC_ATOMIC[5])) + TO_INT(TCC_ATOMIC[6])) + TO_INT(TCC_ATOMIC[7])) - + TO_INT(TCC_ATOMIC[8])) + TO_INT(TCC_ATOMIC[9])) + TO_INT(TCC_ATOMIC[10])) - + TO_INT(TCC_ATOMIC[11])) + TO_INT(TCC_ATOMIC[12])) + TO_INT(TCC_ATOMIC[13])) - + TO_INT(TCC_ATOMIC[14])) + TO_INT(TCC_ATOMIC[15])) + TO_INT(TCC_ATOMIC[16])) - + TO_INT(TCC_ATOMIC[17])) + TO_INT(TCC_ATOMIC[18])) + TO_INT(TCC_ATOMIC[19])) - + TO_INT(TCC_ATOMIC[20])) + TO_INT(TCC_ATOMIC[21])) + TO_INT(TCC_ATOMIC[22])) - + TO_INT(TCC_ATOMIC[23])) + TO_INT(TCC_ATOMIC[24])) + TO_INT(TCC_ATOMIC[25])) - + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) - + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) - / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) - + TO_INT(TCC_ATOMIC[2])) + TO_INT(TCC_ATOMIC[3])) + TO_INT(TCC_ATOMIC[4])) - + TO_INT(TCC_ATOMIC[5])) + TO_INT(TCC_ATOMIC[6])) + TO_INT(TCC_ATOMIC[7])) - + TO_INT(TCC_ATOMIC[8])) + TO_INT(TCC_ATOMIC[9])) + TO_INT(TCC_ATOMIC[10])) - + TO_INT(TCC_ATOMIC[11])) + TO_INT(TCC_ATOMIC[12])) + TO_INT(TCC_ATOMIC[13])) - + TO_INT(TCC_ATOMIC[14])) + TO_INT(TCC_ATOMIC[15])) + TO_INT(TCC_ATOMIC[16])) - + TO_INT(TCC_ATOMIC[17])) + TO_INT(TCC_ATOMIC[18])) + TO_INT(TCC_ATOMIC[19])) - + TO_INT(TCC_ATOMIC[20])) + TO_INT(TCC_ATOMIC[21])) + TO_INT(TCC_ATOMIC[22])) - + TO_INT(TCC_ATOMIC[23])) + TO_INT(TCC_ATOMIC[24])) + TO_INT(TCC_ATOMIC[25])) - + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) - + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) - / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) - + TO_INT(TCC_ATOMIC[2])) + TO_INT(TCC_ATOMIC[3])) + TO_INT(TCC_ATOMIC[4])) - + TO_INT(TCC_ATOMIC[5])) + TO_INT(TCC_ATOMIC[6])) + TO_INT(TCC_ATOMIC[7])) - + TO_INT(TCC_ATOMIC[8])) + TO_INT(TCC_ATOMIC[9])) + TO_INT(TCC_ATOMIC[10])) - + TO_INT(TCC_ATOMIC[11])) + TO_INT(TCC_ATOMIC[12])) + TO_INT(TCC_ATOMIC[13])) - + TO_INT(TCC_ATOMIC[14])) + TO_INT(TCC_ATOMIC[15])) + TO_INT(TCC_ATOMIC[16])) - + TO_INT(TCC_ATOMIC[17])) + TO_INT(TCC_ATOMIC[18])) + TO_INT(TCC_ATOMIC[19])) - + TO_INT(TCC_ATOMIC[20])) + TO_INT(TCC_ATOMIC[21])) + TO_INT(TCC_ATOMIC[22])) - + TO_INT(TCC_ATOMIC[23])) + TO_INT(TCC_ATOMIC[24])) + TO_INT(TCC_ATOMIC[25])) - + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) - + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) - / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L2 - EA Read Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) - + TO_INT(TCC_EA_RDREQ[2])) + TO_INT(TCC_EA_RDREQ[3])) + TO_INT(TCC_EA_RDREQ[4])) - + TO_INT(TCC_EA_RDREQ[5])) + TO_INT(TCC_EA_RDREQ[6])) + TO_INT(TCC_EA_RDREQ[7])) - + TO_INT(TCC_EA_RDREQ[8])) + TO_INT(TCC_EA_RDREQ[9])) + TO_INT(TCC_EA_RDREQ[10])) - + TO_INT(TCC_EA_RDREQ[11])) + TO_INT(TCC_EA_RDREQ[12])) + TO_INT(TCC_EA_RDREQ[13])) - + TO_INT(TCC_EA_RDREQ[14])) + TO_INT(TCC_EA_RDREQ[15])) + TO_INT(TCC_EA_RDREQ[16])) - + TO_INT(TCC_EA_RDREQ[17])) + TO_INT(TCC_EA_RDREQ[18])) + TO_INT(TCC_EA_RDREQ[19])) - + TO_INT(TCC_EA_RDREQ[20])) + TO_INT(TCC_EA_RDREQ[21])) + TO_INT(TCC_EA_RDREQ[22])) - + TO_INT(TCC_EA_RDREQ[23])) + TO_INT(TCC_EA_RDREQ[24])) + TO_INT(TCC_EA_RDREQ[25])) - + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) - + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) - / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) - + TO_INT(TCC_EA_RDREQ[2])) + TO_INT(TCC_EA_RDREQ[3])) + TO_INT(TCC_EA_RDREQ[4])) - + TO_INT(TCC_EA_RDREQ[5])) + TO_INT(TCC_EA_RDREQ[6])) + TO_INT(TCC_EA_RDREQ[7])) - + TO_INT(TCC_EA_RDREQ[8])) + TO_INT(TCC_EA_RDREQ[9])) + TO_INT(TCC_EA_RDREQ[10])) - + TO_INT(TCC_EA_RDREQ[11])) + TO_INT(TCC_EA_RDREQ[12])) + TO_INT(TCC_EA_RDREQ[13])) - + TO_INT(TCC_EA_RDREQ[14])) + TO_INT(TCC_EA_RDREQ[15])) + TO_INT(TCC_EA_RDREQ[16])) - + TO_INT(TCC_EA_RDREQ[17])) + TO_INT(TCC_EA_RDREQ[18])) + TO_INT(TCC_EA_RDREQ[19])) - + TO_INT(TCC_EA_RDREQ[20])) + TO_INT(TCC_EA_RDREQ[21])) + TO_INT(TCC_EA_RDREQ[22])) - + TO_INT(TCC_EA_RDREQ[23])) + TO_INT(TCC_EA_RDREQ[24])) + TO_INT(TCC_EA_RDREQ[25])) - + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) - + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) - / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) - + TO_INT(TCC_EA_RDREQ[2])) + TO_INT(TCC_EA_RDREQ[3])) + TO_INT(TCC_EA_RDREQ[4])) - + TO_INT(TCC_EA_RDREQ[5])) + TO_INT(TCC_EA_RDREQ[6])) + TO_INT(TCC_EA_RDREQ[7])) - + TO_INT(TCC_EA_RDREQ[8])) + TO_INT(TCC_EA_RDREQ[9])) + TO_INT(TCC_EA_RDREQ[10])) - + TO_INT(TCC_EA_RDREQ[11])) + TO_INT(TCC_EA_RDREQ[12])) + TO_INT(TCC_EA_RDREQ[13])) - + TO_INT(TCC_EA_RDREQ[14])) + TO_INT(TCC_EA_RDREQ[15])) + TO_INT(TCC_EA_RDREQ[16])) - + TO_INT(TCC_EA_RDREQ[17])) + TO_INT(TCC_EA_RDREQ[18])) + TO_INT(TCC_EA_RDREQ[19])) - + TO_INT(TCC_EA_RDREQ[20])) + TO_INT(TCC_EA_RDREQ[21])) + TO_INT(TCC_EA_RDREQ[22])) - + TO_INT(TCC_EA_RDREQ[23])) + TO_INT(TCC_EA_RDREQ[24])) + TO_INT(TCC_EA_RDREQ[25])) - + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) - + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) - / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) - + TO_INT(TCC_EA_RDREQ[2])) + TO_INT(TCC_EA_RDREQ[3])) + TO_INT(TCC_EA_RDREQ[4])) - + TO_INT(TCC_EA_RDREQ[5])) + TO_INT(TCC_EA_RDREQ[6])) + TO_INT(TCC_EA_RDREQ[7])) - + TO_INT(TCC_EA_RDREQ[8])) + TO_INT(TCC_EA_RDREQ[9])) + TO_INT(TCC_EA_RDREQ[10])) - + TO_INT(TCC_EA_RDREQ[11])) + TO_INT(TCC_EA_RDREQ[12])) + TO_INT(TCC_EA_RDREQ[13])) - + TO_INT(TCC_EA_RDREQ[14])) + TO_INT(TCC_EA_RDREQ[15])) + TO_INT(TCC_EA_RDREQ[16])) - + TO_INT(TCC_EA_RDREQ[17])) + TO_INT(TCC_EA_RDREQ[18])) + TO_INT(TCC_EA_RDREQ[19])) - + TO_INT(TCC_EA_RDREQ[20])) + TO_INT(TCC_EA_RDREQ[21])) + TO_INT(TCC_EA_RDREQ[22])) - + TO_INT(TCC_EA_RDREQ[23])) + TO_INT(TCC_EA_RDREQ[24])) + TO_INT(TCC_EA_RDREQ[25])) - + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) - + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) - / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L2 - EA Write Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) - + TO_INT(TCC_EA_WRREQ[2])) + TO_INT(TCC_EA_WRREQ[3])) + TO_INT(TCC_EA_WRREQ[4])) - + TO_INT(TCC_EA_WRREQ[5])) + TO_INT(TCC_EA_WRREQ[6])) + TO_INT(TCC_EA_WRREQ[7])) - + TO_INT(TCC_EA_WRREQ[8])) + TO_INT(TCC_EA_WRREQ[9])) + TO_INT(TCC_EA_WRREQ[10])) - + TO_INT(TCC_EA_WRREQ[11])) + TO_INT(TCC_EA_WRREQ[12])) + TO_INT(TCC_EA_WRREQ[13])) - + TO_INT(TCC_EA_WRREQ[14])) + TO_INT(TCC_EA_WRREQ[15])) + TO_INT(TCC_EA_WRREQ[16])) - + TO_INT(TCC_EA_WRREQ[17])) + TO_INT(TCC_EA_WRREQ[18])) + TO_INT(TCC_EA_WRREQ[19])) - + TO_INT(TCC_EA_WRREQ[20])) + TO_INT(TCC_EA_WRREQ[21])) + TO_INT(TCC_EA_WRREQ[22])) - + TO_INT(TCC_EA_WRREQ[23])) + TO_INT(TCC_EA_WRREQ[24])) + TO_INT(TCC_EA_WRREQ[25])) - + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) - + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) - / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) - + TO_INT(TCC_EA_WRREQ[2])) + TO_INT(TCC_EA_WRREQ[3])) + TO_INT(TCC_EA_WRREQ[4])) - + TO_INT(TCC_EA_WRREQ[5])) + TO_INT(TCC_EA_WRREQ[6])) + TO_INT(TCC_EA_WRREQ[7])) - + TO_INT(TCC_EA_WRREQ[8])) + TO_INT(TCC_EA_WRREQ[9])) + TO_INT(TCC_EA_WRREQ[10])) - + TO_INT(TCC_EA_WRREQ[11])) + TO_INT(TCC_EA_WRREQ[12])) + TO_INT(TCC_EA_WRREQ[13])) - + TO_INT(TCC_EA_WRREQ[14])) + TO_INT(TCC_EA_WRREQ[15])) + TO_INT(TCC_EA_WRREQ[16])) - + TO_INT(TCC_EA_WRREQ[17])) + TO_INT(TCC_EA_WRREQ[18])) + TO_INT(TCC_EA_WRREQ[19])) - + TO_INT(TCC_EA_WRREQ[20])) + TO_INT(TCC_EA_WRREQ[21])) + TO_INT(TCC_EA_WRREQ[22])) - + TO_INT(TCC_EA_WRREQ[23])) + TO_INT(TCC_EA_WRREQ[24])) + TO_INT(TCC_EA_WRREQ[25])) - + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) - + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) - / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) - + TO_INT(TCC_EA_WRREQ[2])) + TO_INT(TCC_EA_WRREQ[3])) + TO_INT(TCC_EA_WRREQ[4])) - + TO_INT(TCC_EA_WRREQ[5])) + TO_INT(TCC_EA_WRREQ[6])) + TO_INT(TCC_EA_WRREQ[7])) - + TO_INT(TCC_EA_WRREQ[8])) + TO_INT(TCC_EA_WRREQ[9])) + TO_INT(TCC_EA_WRREQ[10])) - + TO_INT(TCC_EA_WRREQ[11])) + TO_INT(TCC_EA_WRREQ[12])) + TO_INT(TCC_EA_WRREQ[13])) - + TO_INT(TCC_EA_WRREQ[14])) + TO_INT(TCC_EA_WRREQ[15])) + TO_INT(TCC_EA_WRREQ[16])) - + TO_INT(TCC_EA_WRREQ[17])) + TO_INT(TCC_EA_WRREQ[18])) + TO_INT(TCC_EA_WRREQ[19])) - + TO_INT(TCC_EA_WRREQ[20])) + TO_INT(TCC_EA_WRREQ[21])) + TO_INT(TCC_EA_WRREQ[22])) - + TO_INT(TCC_EA_WRREQ[23])) + TO_INT(TCC_EA_WRREQ[24])) + TO_INT(TCC_EA_WRREQ[25])) - + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) - + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) - / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) - + TO_INT(TCC_EA_WRREQ[2])) + TO_INT(TCC_EA_WRREQ[3])) + TO_INT(TCC_EA_WRREQ[4])) - + TO_INT(TCC_EA_WRREQ[5])) + TO_INT(TCC_EA_WRREQ[6])) + TO_INT(TCC_EA_WRREQ[7])) - + TO_INT(TCC_EA_WRREQ[8])) + TO_INT(TCC_EA_WRREQ[9])) + TO_INT(TCC_EA_WRREQ[10])) - + TO_INT(TCC_EA_WRREQ[11])) + TO_INT(TCC_EA_WRREQ[12])) + TO_INT(TCC_EA_WRREQ[13])) - + TO_INT(TCC_EA_WRREQ[14])) + TO_INT(TCC_EA_WRREQ[15])) + TO_INT(TCC_EA_WRREQ[16])) - + TO_INT(TCC_EA_WRREQ[17])) + TO_INT(TCC_EA_WRREQ[18])) + TO_INT(TCC_EA_WRREQ[19])) - + TO_INT(TCC_EA_WRREQ[20])) + TO_INT(TCC_EA_WRREQ[21])) + TO_INT(TCC_EA_WRREQ[22])) - + TO_INT(TCC_EA_WRREQ[23])) + TO_INT(TCC_EA_WRREQ[24])) + TO_INT(TCC_EA_WRREQ[25])) - + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) - + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) - / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L2 - EA Atomic Req: - avg: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) - + TO_INT(TCC_EA_ATOMIC[2])) + TO_INT(TCC_EA_ATOMIC[3])) + TO_INT(TCC_EA_ATOMIC[4])) - + TO_INT(TCC_EA_ATOMIC[5])) + TO_INT(TCC_EA_ATOMIC[6])) + TO_INT(TCC_EA_ATOMIC[7])) - + TO_INT(TCC_EA_ATOMIC[8])) + TO_INT(TCC_EA_ATOMIC[9])) + TO_INT(TCC_EA_ATOMIC[10])) - + TO_INT(TCC_EA_ATOMIC[11])) + TO_INT(TCC_EA_ATOMIC[12])) + TO_INT(TCC_EA_ATOMIC[13])) - + TO_INT(TCC_EA_ATOMIC[14])) + TO_INT(TCC_EA_ATOMIC[15])) + TO_INT(TCC_EA_ATOMIC[16])) - + TO_INT(TCC_EA_ATOMIC[17])) + TO_INT(TCC_EA_ATOMIC[18])) + TO_INT(TCC_EA_ATOMIC[19])) - + TO_INT(TCC_EA_ATOMIC[20])) + TO_INT(TCC_EA_ATOMIC[21])) + TO_INT(TCC_EA_ATOMIC[22])) - + TO_INT(TCC_EA_ATOMIC[23])) + TO_INT(TCC_EA_ATOMIC[24])) + TO_INT(TCC_EA_ATOMIC[25])) - + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) - + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) - / 32) / $denom)) - std dev: STD((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) - + TO_INT(TCC_EA_ATOMIC[2])) + TO_INT(TCC_EA_ATOMIC[3])) + TO_INT(TCC_EA_ATOMIC[4])) - + TO_INT(TCC_EA_ATOMIC[5])) + TO_INT(TCC_EA_ATOMIC[6])) + TO_INT(TCC_EA_ATOMIC[7])) - + TO_INT(TCC_EA_ATOMIC[8])) + TO_INT(TCC_EA_ATOMIC[9])) + TO_INT(TCC_EA_ATOMIC[10])) - + TO_INT(TCC_EA_ATOMIC[11])) + TO_INT(TCC_EA_ATOMIC[12])) + TO_INT(TCC_EA_ATOMIC[13])) - + TO_INT(TCC_EA_ATOMIC[14])) + TO_INT(TCC_EA_ATOMIC[15])) + TO_INT(TCC_EA_ATOMIC[16])) - + TO_INT(TCC_EA_ATOMIC[17])) + TO_INT(TCC_EA_ATOMIC[18])) + TO_INT(TCC_EA_ATOMIC[19])) - + TO_INT(TCC_EA_ATOMIC[20])) + TO_INT(TCC_EA_ATOMIC[21])) + TO_INT(TCC_EA_ATOMIC[22])) - + TO_INT(TCC_EA_ATOMIC[23])) + TO_INT(TCC_EA_ATOMIC[24])) + TO_INT(TCC_EA_ATOMIC[25])) - + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) - + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) - / 32) / $denom)) - min: MIN((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) - + TO_INT(TCC_EA_ATOMIC[2])) + TO_INT(TCC_EA_ATOMIC[3])) + TO_INT(TCC_EA_ATOMIC[4])) - + TO_INT(TCC_EA_ATOMIC[5])) + TO_INT(TCC_EA_ATOMIC[6])) + TO_INT(TCC_EA_ATOMIC[7])) - + TO_INT(TCC_EA_ATOMIC[8])) + TO_INT(TCC_EA_ATOMIC[9])) + TO_INT(TCC_EA_ATOMIC[10])) - + TO_INT(TCC_EA_ATOMIC[11])) + TO_INT(TCC_EA_ATOMIC[12])) + TO_INT(TCC_EA_ATOMIC[13])) - + TO_INT(TCC_EA_ATOMIC[14])) + TO_INT(TCC_EA_ATOMIC[15])) + TO_INT(TCC_EA_ATOMIC[16])) - + TO_INT(TCC_EA_ATOMIC[17])) + TO_INT(TCC_EA_ATOMIC[18])) + TO_INT(TCC_EA_ATOMIC[19])) - + TO_INT(TCC_EA_ATOMIC[20])) + TO_INT(TCC_EA_ATOMIC[21])) + TO_INT(TCC_EA_ATOMIC[22])) - + TO_INT(TCC_EA_ATOMIC[23])) + TO_INT(TCC_EA_ATOMIC[24])) + TO_INT(TCC_EA_ATOMIC[25])) - + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) - + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) - / 32) / $denom)) - max: MAX((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) - + TO_INT(TCC_EA_ATOMIC[2])) + TO_INT(TCC_EA_ATOMIC[3])) + TO_INT(TCC_EA_ATOMIC[4])) - + TO_INT(TCC_EA_ATOMIC[5])) + TO_INT(TCC_EA_ATOMIC[6])) + TO_INT(TCC_EA_ATOMIC[7])) - + TO_INT(TCC_EA_ATOMIC[8])) + TO_INT(TCC_EA_ATOMIC[9])) + TO_INT(TCC_EA_ATOMIC[10])) - + TO_INT(TCC_EA_ATOMIC[11])) + TO_INT(TCC_EA_ATOMIC[12])) + TO_INT(TCC_EA_ATOMIC[13])) - + TO_INT(TCC_EA_ATOMIC[14])) + TO_INT(TCC_EA_ATOMIC[15])) + TO_INT(TCC_EA_ATOMIC[16])) - + TO_INT(TCC_EA_ATOMIC[17])) + TO_INT(TCC_EA_ATOMIC[18])) + TO_INT(TCC_EA_ATOMIC[19])) - + TO_INT(TCC_EA_ATOMIC[20])) + TO_INT(TCC_EA_ATOMIC[21])) + TO_INT(TCC_EA_ATOMIC[22])) - + TO_INT(TCC_EA_ATOMIC[23])) + TO_INT(TCC_EA_ATOMIC[24])) + TO_INT(TCC_EA_ATOMIC[25])) - + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) - + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) - / 32) / $denom)) - unit: (Req + $normUnit) - tips: - L2 - EA Read Lat: - avg: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) - + TCC_EA_RDREQ_LEVEL[2]) + TCC_EA_RDREQ_LEVEL[3]) + TCC_EA_RDREQ_LEVEL[4]) - + TCC_EA_RDREQ_LEVEL[5]) + TCC_EA_RDREQ_LEVEL[6]) + TCC_EA_RDREQ_LEVEL[7]) - + TCC_EA_RDREQ_LEVEL[8]) + TCC_EA_RDREQ_LEVEL[9]) + TCC_EA_RDREQ_LEVEL[10]) - + TCC_EA_RDREQ_LEVEL[11]) + TCC_EA_RDREQ_LEVEL[12]) + TCC_EA_RDREQ_LEVEL[13]) - + TCC_EA_RDREQ_LEVEL[14]) + TCC_EA_RDREQ_LEVEL[15]) + TCC_EA_RDREQ_LEVEL[16]) - + TCC_EA_RDREQ_LEVEL[17]) + TCC_EA_RDREQ_LEVEL[18]) + TCC_EA_RDREQ_LEVEL[19]) - + TCC_EA_RDREQ_LEVEL[20]) + TCC_EA_RDREQ_LEVEL[21]) + TCC_EA_RDREQ_LEVEL[22]) - + TCC_EA_RDREQ_LEVEL[23]) + TCC_EA_RDREQ_LEVEL[24]) + TCC_EA_RDREQ_LEVEL[25]) - + TCC_EA_RDREQ_LEVEL[26]) + TCC_EA_RDREQ_LEVEL[27]) + TCC_EA_RDREQ_LEVEL[28]) - + TCC_EA_RDREQ_LEVEL[29]) + TCC_EA_RDREQ_LEVEL[30]) + TCC_EA_RDREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) - + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) - + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) - + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) - + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) - + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) - + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) - + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) - + TCC_EA_RDREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + - TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) - + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) - + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) - + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) - + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) - + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) - + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) - + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) + TCC_EA_RDREQ[31]) != 0) else None)) - std dev: STD((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) - + TCC_EA_RDREQ_LEVEL[2]) + TCC_EA_RDREQ_LEVEL[3]) + TCC_EA_RDREQ_LEVEL[4]) - + TCC_EA_RDREQ_LEVEL[5]) + TCC_EA_RDREQ_LEVEL[6]) + TCC_EA_RDREQ_LEVEL[7]) - + TCC_EA_RDREQ_LEVEL[8]) + TCC_EA_RDREQ_LEVEL[9]) + TCC_EA_RDREQ_LEVEL[10]) - + TCC_EA_RDREQ_LEVEL[11]) + TCC_EA_RDREQ_LEVEL[12]) + TCC_EA_RDREQ_LEVEL[13]) - + TCC_EA_RDREQ_LEVEL[14]) + TCC_EA_RDREQ_LEVEL[15]) + TCC_EA_RDREQ_LEVEL[16]) - + TCC_EA_RDREQ_LEVEL[17]) + TCC_EA_RDREQ_LEVEL[18]) + TCC_EA_RDREQ_LEVEL[19]) - + TCC_EA_RDREQ_LEVEL[20]) + TCC_EA_RDREQ_LEVEL[21]) + TCC_EA_RDREQ_LEVEL[22]) - + TCC_EA_RDREQ_LEVEL[23]) + TCC_EA_RDREQ_LEVEL[24]) + TCC_EA_RDREQ_LEVEL[25]) - + TCC_EA_RDREQ_LEVEL[26]) + TCC_EA_RDREQ_LEVEL[27]) + TCC_EA_RDREQ_LEVEL[28]) - + TCC_EA_RDREQ_LEVEL[29]) + TCC_EA_RDREQ_LEVEL[30]) + TCC_EA_RDREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) - + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) - + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) - + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) - + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) - + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) - + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) - + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) - + TCC_EA_RDREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + - TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) - + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) - + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) - + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) - + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) - + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) - + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) - + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) + TCC_EA_RDREQ[31]) != 0) else None)) - min: MIN((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) - + TCC_EA_RDREQ_LEVEL[2]) + TCC_EA_RDREQ_LEVEL[3]) + TCC_EA_RDREQ_LEVEL[4]) - + TCC_EA_RDREQ_LEVEL[5]) + TCC_EA_RDREQ_LEVEL[6]) + TCC_EA_RDREQ_LEVEL[7]) - + TCC_EA_RDREQ_LEVEL[8]) + TCC_EA_RDREQ_LEVEL[9]) + TCC_EA_RDREQ_LEVEL[10]) - + TCC_EA_RDREQ_LEVEL[11]) + TCC_EA_RDREQ_LEVEL[12]) + TCC_EA_RDREQ_LEVEL[13]) - + TCC_EA_RDREQ_LEVEL[14]) + TCC_EA_RDREQ_LEVEL[15]) + TCC_EA_RDREQ_LEVEL[16]) - + TCC_EA_RDREQ_LEVEL[17]) + TCC_EA_RDREQ_LEVEL[18]) + TCC_EA_RDREQ_LEVEL[19]) - + TCC_EA_RDREQ_LEVEL[20]) + TCC_EA_RDREQ_LEVEL[21]) + TCC_EA_RDREQ_LEVEL[22]) - + TCC_EA_RDREQ_LEVEL[23]) + TCC_EA_RDREQ_LEVEL[24]) + TCC_EA_RDREQ_LEVEL[25]) - + TCC_EA_RDREQ_LEVEL[26]) + TCC_EA_RDREQ_LEVEL[27]) + TCC_EA_RDREQ_LEVEL[28]) - + TCC_EA_RDREQ_LEVEL[29]) + TCC_EA_RDREQ_LEVEL[30]) + TCC_EA_RDREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) - + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) - + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) - + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) - + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) - + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) - + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) - + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) - + TCC_EA_RDREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + - TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) - + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) - + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) - + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) - + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) - + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) - + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) - + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) + TCC_EA_RDREQ[31]) != 0) else None)) - max: MAX((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) - + TCC_EA_RDREQ_LEVEL[2]) + TCC_EA_RDREQ_LEVEL[3]) + TCC_EA_RDREQ_LEVEL[4]) - + TCC_EA_RDREQ_LEVEL[5]) + TCC_EA_RDREQ_LEVEL[6]) + TCC_EA_RDREQ_LEVEL[7]) - + TCC_EA_RDREQ_LEVEL[8]) + TCC_EA_RDREQ_LEVEL[9]) + TCC_EA_RDREQ_LEVEL[10]) - + TCC_EA_RDREQ_LEVEL[11]) + TCC_EA_RDREQ_LEVEL[12]) + TCC_EA_RDREQ_LEVEL[13]) - + TCC_EA_RDREQ_LEVEL[14]) + TCC_EA_RDREQ_LEVEL[15]) + TCC_EA_RDREQ_LEVEL[16]) - + TCC_EA_RDREQ_LEVEL[17]) + TCC_EA_RDREQ_LEVEL[18]) + TCC_EA_RDREQ_LEVEL[19]) - + TCC_EA_RDREQ_LEVEL[20]) + TCC_EA_RDREQ_LEVEL[21]) + TCC_EA_RDREQ_LEVEL[22]) - + TCC_EA_RDREQ_LEVEL[23]) + TCC_EA_RDREQ_LEVEL[24]) + TCC_EA_RDREQ_LEVEL[25]) - + TCC_EA_RDREQ_LEVEL[26]) + TCC_EA_RDREQ_LEVEL[27]) + TCC_EA_RDREQ_LEVEL[28]) - + TCC_EA_RDREQ_LEVEL[29]) + TCC_EA_RDREQ_LEVEL[30]) + TCC_EA_RDREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) - + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) - + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) - + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) - + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) - + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) - + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) - + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) - + TCC_EA_RDREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_RDREQ[0] + - TCC_EA_RDREQ[1]) + TCC_EA_RDREQ[2]) + TCC_EA_RDREQ[3]) + TCC_EA_RDREQ[4]) - + TCC_EA_RDREQ[5]) + TCC_EA_RDREQ[6]) + TCC_EA_RDREQ[7]) + TCC_EA_RDREQ[8]) - + TCC_EA_RDREQ[9]) + TCC_EA_RDREQ[10]) + TCC_EA_RDREQ[11]) + TCC_EA_RDREQ[12]) - + TCC_EA_RDREQ[13]) + TCC_EA_RDREQ[14]) + TCC_EA_RDREQ[15]) + TCC_EA_RDREQ[16]) - + TCC_EA_RDREQ[17]) + TCC_EA_RDREQ[18]) + TCC_EA_RDREQ[19]) + TCC_EA_RDREQ[20]) - + TCC_EA_RDREQ[21]) + TCC_EA_RDREQ[22]) + TCC_EA_RDREQ[23]) + TCC_EA_RDREQ[24]) - + TCC_EA_RDREQ[25]) + TCC_EA_RDREQ[26]) + TCC_EA_RDREQ[27]) + TCC_EA_RDREQ[28]) - + TCC_EA_RDREQ[29]) + TCC_EA_RDREQ[30]) + TCC_EA_RDREQ[31]) != 0) else None)) - unit: Cycles - tips: - L2 - EA Write Lat: - avg: AVG((((((((((((((((((((((((((((((((((TCC_EA_WRREQ_LEVEL[0] + TCC_EA_WRREQ_LEVEL[1]) - + TCC_EA_WRREQ_LEVEL[2]) + TCC_EA_WRREQ_LEVEL[3]) + TCC_EA_WRREQ_LEVEL[4]) - + TCC_EA_WRREQ_LEVEL[5]) + TCC_EA_WRREQ_LEVEL[6]) + TCC_EA_WRREQ_LEVEL[7]) - + TCC_EA_WRREQ_LEVEL[8]) + TCC_EA_WRREQ_LEVEL[9]) + TCC_EA_WRREQ_LEVEL[10]) - + TCC_EA_WRREQ_LEVEL[11]) + TCC_EA_WRREQ_LEVEL[12]) + TCC_EA_WRREQ_LEVEL[13]) - + TCC_EA_WRREQ_LEVEL[14]) + TCC_EA_WRREQ_LEVEL[15]) + TCC_EA_WRREQ_LEVEL[16]) - + TCC_EA_WRREQ_LEVEL[17]) + TCC_EA_WRREQ_LEVEL[18]) + TCC_EA_WRREQ_LEVEL[19]) - + TCC_EA_WRREQ_LEVEL[20]) + TCC_EA_WRREQ_LEVEL[21]) + TCC_EA_WRREQ_LEVEL[22]) - + TCC_EA_WRREQ_LEVEL[23]) + TCC_EA_WRREQ_LEVEL[24]) + TCC_EA_WRREQ_LEVEL[25]) - + TCC_EA_WRREQ_LEVEL[26]) + TCC_EA_WRREQ_LEVEL[27]) + TCC_EA_WRREQ_LEVEL[28]) - + TCC_EA_WRREQ_LEVEL[29]) + TCC_EA_WRREQ_LEVEL[30]) + TCC_EA_WRREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) - + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) - + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) - + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) - + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) - + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) - + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) - + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) - + TCC_EA_WRREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + - TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) - + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) - + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) - + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) - + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) - + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) - + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) - + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) + TCC_EA_WRREQ[31]) != 0) else None)) - std dev: STD((((((((((((((((((((((((((((((((((TCC_EA_WRREQ_LEVEL[0] + TCC_EA_WRREQ_LEVEL[1]) - + TCC_EA_WRREQ_LEVEL[2]) + TCC_EA_WRREQ_LEVEL[3]) + TCC_EA_WRREQ_LEVEL[4]) - + TCC_EA_WRREQ_LEVEL[5]) + TCC_EA_WRREQ_LEVEL[6]) + TCC_EA_WRREQ_LEVEL[7]) - + TCC_EA_WRREQ_LEVEL[8]) + TCC_EA_WRREQ_LEVEL[9]) + TCC_EA_WRREQ_LEVEL[10]) - + TCC_EA_WRREQ_LEVEL[11]) + TCC_EA_WRREQ_LEVEL[12]) + TCC_EA_WRREQ_LEVEL[13]) - + TCC_EA_WRREQ_LEVEL[14]) + TCC_EA_WRREQ_LEVEL[15]) + TCC_EA_WRREQ_LEVEL[16]) - + TCC_EA_WRREQ_LEVEL[17]) + TCC_EA_WRREQ_LEVEL[18]) + TCC_EA_WRREQ_LEVEL[19]) - + TCC_EA_WRREQ_LEVEL[20]) + TCC_EA_WRREQ_LEVEL[21]) + TCC_EA_WRREQ_LEVEL[22]) - + TCC_EA_WRREQ_LEVEL[23]) + TCC_EA_WRREQ_LEVEL[24]) + TCC_EA_WRREQ_LEVEL[25]) - + TCC_EA_WRREQ_LEVEL[26]) + TCC_EA_WRREQ_LEVEL[27]) + TCC_EA_WRREQ_LEVEL[28]) - + TCC_EA_WRREQ_LEVEL[29]) + TCC_EA_WRREQ_LEVEL[30]) + TCC_EA_WRREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) - + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) - + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) - + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) - + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) - + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) - + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) - + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) - + TCC_EA_WRREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + - TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) - + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) - + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) - + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) - + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) - + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) - + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) - + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) + TCC_EA_WRREQ[31]) != 0) else None)) - min: MIN((((((((((((((((((((((((((((((((((TCC_EA_WRREQ_LEVEL[0] + TCC_EA_WRREQ_LEVEL[1]) - + TCC_EA_WRREQ_LEVEL[2]) + TCC_EA_WRREQ_LEVEL[3]) + TCC_EA_WRREQ_LEVEL[4]) - + TCC_EA_WRREQ_LEVEL[5]) + TCC_EA_WRREQ_LEVEL[6]) + TCC_EA_WRREQ_LEVEL[7]) - + TCC_EA_WRREQ_LEVEL[8]) + TCC_EA_WRREQ_LEVEL[9]) + TCC_EA_WRREQ_LEVEL[10]) - + TCC_EA_WRREQ_LEVEL[11]) + TCC_EA_WRREQ_LEVEL[12]) + TCC_EA_WRREQ_LEVEL[13]) - + TCC_EA_WRREQ_LEVEL[14]) + TCC_EA_WRREQ_LEVEL[15]) + TCC_EA_WRREQ_LEVEL[16]) - + TCC_EA_WRREQ_LEVEL[17]) + TCC_EA_WRREQ_LEVEL[18]) + TCC_EA_WRREQ_LEVEL[19]) - + TCC_EA_WRREQ_LEVEL[20]) + TCC_EA_WRREQ_LEVEL[21]) + TCC_EA_WRREQ_LEVEL[22]) - + TCC_EA_WRREQ_LEVEL[23]) + TCC_EA_WRREQ_LEVEL[24]) + TCC_EA_WRREQ_LEVEL[25]) - + TCC_EA_WRREQ_LEVEL[26]) + TCC_EA_WRREQ_LEVEL[27]) + TCC_EA_WRREQ_LEVEL[28]) - + TCC_EA_WRREQ_LEVEL[29]) + TCC_EA_WRREQ_LEVEL[30]) + TCC_EA_WRREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) - + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) - + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) - + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) - + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) - + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) - + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) - + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) - + TCC_EA_WRREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + - TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) - + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) - + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) - + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) - + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) - + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) - + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) - + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) + TCC_EA_WRREQ[31]) != 0) else None)) - max: MAX((((((((((((((((((((((((((((((((((TCC_EA_WRREQ_LEVEL[0] + TCC_EA_WRREQ_LEVEL[1]) - + TCC_EA_WRREQ_LEVEL[2]) + TCC_EA_WRREQ_LEVEL[3]) + TCC_EA_WRREQ_LEVEL[4]) - + TCC_EA_WRREQ_LEVEL[5]) + TCC_EA_WRREQ_LEVEL[6]) + TCC_EA_WRREQ_LEVEL[7]) - + TCC_EA_WRREQ_LEVEL[8]) + TCC_EA_WRREQ_LEVEL[9]) + TCC_EA_WRREQ_LEVEL[10]) - + TCC_EA_WRREQ_LEVEL[11]) + TCC_EA_WRREQ_LEVEL[12]) + TCC_EA_WRREQ_LEVEL[13]) - + TCC_EA_WRREQ_LEVEL[14]) + TCC_EA_WRREQ_LEVEL[15]) + TCC_EA_WRREQ_LEVEL[16]) - + TCC_EA_WRREQ_LEVEL[17]) + TCC_EA_WRREQ_LEVEL[18]) + TCC_EA_WRREQ_LEVEL[19]) - + TCC_EA_WRREQ_LEVEL[20]) + TCC_EA_WRREQ_LEVEL[21]) + TCC_EA_WRREQ_LEVEL[22]) - + TCC_EA_WRREQ_LEVEL[23]) + TCC_EA_WRREQ_LEVEL[24]) + TCC_EA_WRREQ_LEVEL[25]) - + TCC_EA_WRREQ_LEVEL[26]) + TCC_EA_WRREQ_LEVEL[27]) + TCC_EA_WRREQ_LEVEL[28]) - + TCC_EA_WRREQ_LEVEL[29]) + TCC_EA_WRREQ_LEVEL[30]) + TCC_EA_WRREQ_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) - + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) - + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) - + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) - + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) - + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) - + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) - + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) - + TCC_EA_WRREQ[31])) if ((((((((((((((((((((((((((((((((TCC_EA_WRREQ[0] + - TCC_EA_WRREQ[1]) + TCC_EA_WRREQ[2]) + TCC_EA_WRREQ[3]) + TCC_EA_WRREQ[4]) - + TCC_EA_WRREQ[5]) + TCC_EA_WRREQ[6]) + TCC_EA_WRREQ[7]) + TCC_EA_WRREQ[8]) - + TCC_EA_WRREQ[9]) + TCC_EA_WRREQ[10]) + TCC_EA_WRREQ[11]) + TCC_EA_WRREQ[12]) - + TCC_EA_WRREQ[13]) + TCC_EA_WRREQ[14]) + TCC_EA_WRREQ[15]) + TCC_EA_WRREQ[16]) - + TCC_EA_WRREQ[17]) + TCC_EA_WRREQ[18]) + TCC_EA_WRREQ[19]) + TCC_EA_WRREQ[20]) - + TCC_EA_WRREQ[21]) + TCC_EA_WRREQ[22]) + TCC_EA_WRREQ[23]) + TCC_EA_WRREQ[24]) - + TCC_EA_WRREQ[25]) + TCC_EA_WRREQ[26]) + TCC_EA_WRREQ[27]) + TCC_EA_WRREQ[28]) - + TCC_EA_WRREQ[29]) + TCC_EA_WRREQ[30]) + TCC_EA_WRREQ[31]) != 0) else None)) - unit: Cycles - tips: - L2 - EA Atomic Lat: - avg: AVG((((((((((((((((((((((((((((((((((TCC_EA_ATOMIC_LEVEL[0] + TCC_EA_ATOMIC_LEVEL[1]) - + TCC_EA_ATOMIC_LEVEL[2]) + TCC_EA_ATOMIC_LEVEL[3]) + TCC_EA_ATOMIC_LEVEL[4]) - + TCC_EA_ATOMIC_LEVEL[5]) + TCC_EA_ATOMIC_LEVEL[6]) + TCC_EA_ATOMIC_LEVEL[7]) - + TCC_EA_ATOMIC_LEVEL[8]) + TCC_EA_ATOMIC_LEVEL[9]) + TCC_EA_ATOMIC_LEVEL[10]) - + TCC_EA_ATOMIC_LEVEL[11]) + TCC_EA_ATOMIC_LEVEL[12]) + TCC_EA_ATOMIC_LEVEL[13]) - + TCC_EA_ATOMIC_LEVEL[14]) + TCC_EA_ATOMIC_LEVEL[15]) + TCC_EA_ATOMIC_LEVEL[16]) - + TCC_EA_ATOMIC_LEVEL[17]) + TCC_EA_ATOMIC_LEVEL[18]) + TCC_EA_ATOMIC_LEVEL[19]) - + TCC_EA_ATOMIC_LEVEL[20]) + TCC_EA_ATOMIC_LEVEL[21]) + TCC_EA_ATOMIC_LEVEL[22]) - + TCC_EA_ATOMIC_LEVEL[23]) + TCC_EA_ATOMIC_LEVEL[24]) + TCC_EA_ATOMIC_LEVEL[25]) - + TCC_EA_ATOMIC_LEVEL[26]) + TCC_EA_ATOMIC_LEVEL[27]) + TCC_EA_ATOMIC_LEVEL[28]) - + TCC_EA_ATOMIC_LEVEL[29]) + TCC_EA_ATOMIC_LEVEL[30]) + TCC_EA_ATOMIC_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) - + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) - + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) - + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) - + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) - + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) - + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) - + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) - + TCC_EA_ATOMIC[31])) if ((((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] - + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) - + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) - + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) - + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) - + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) - + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) - + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) - + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) + TCC_EA_ATOMIC[31]) != 0) else - None)) - std dev: STD((((((((((((((((((((((((((((((((((TCC_EA_ATOMIC_LEVEL[0] + TCC_EA_ATOMIC_LEVEL[1]) - + TCC_EA_ATOMIC_LEVEL[2]) + TCC_EA_ATOMIC_LEVEL[3]) + TCC_EA_ATOMIC_LEVEL[4]) - + TCC_EA_ATOMIC_LEVEL[5]) + TCC_EA_ATOMIC_LEVEL[6]) + TCC_EA_ATOMIC_LEVEL[7]) - + TCC_EA_ATOMIC_LEVEL[8]) + TCC_EA_ATOMIC_LEVEL[9]) + TCC_EA_ATOMIC_LEVEL[10]) - + TCC_EA_ATOMIC_LEVEL[11]) + TCC_EA_ATOMIC_LEVEL[12]) + TCC_EA_ATOMIC_LEVEL[13]) - + TCC_EA_ATOMIC_LEVEL[14]) + TCC_EA_ATOMIC_LEVEL[15]) + TCC_EA_ATOMIC_LEVEL[16]) - + TCC_EA_ATOMIC_LEVEL[17]) + TCC_EA_ATOMIC_LEVEL[18]) + TCC_EA_ATOMIC_LEVEL[19]) - + TCC_EA_ATOMIC_LEVEL[20]) + TCC_EA_ATOMIC_LEVEL[21]) + TCC_EA_ATOMIC_LEVEL[22]) - + TCC_EA_ATOMIC_LEVEL[23]) + TCC_EA_ATOMIC_LEVEL[24]) + TCC_EA_ATOMIC_LEVEL[25]) - + TCC_EA_ATOMIC_LEVEL[26]) + TCC_EA_ATOMIC_LEVEL[27]) + TCC_EA_ATOMIC_LEVEL[28]) - + TCC_EA_ATOMIC_LEVEL[29]) + TCC_EA_ATOMIC_LEVEL[30]) + TCC_EA_ATOMIC_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) - + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) - + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) - + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) - + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) - + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) - + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) - + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) - + TCC_EA_ATOMIC[31])) if ((((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] - + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) - + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) - + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) - + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) - + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) - + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) - + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) - + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) + TCC_EA_ATOMIC[31]) != 0) else - None)) - min: MIN((((((((((((((((((((((((((((((((((TCC_EA_ATOMIC_LEVEL[0] + TCC_EA_ATOMIC_LEVEL[1]) - + TCC_EA_ATOMIC_LEVEL[2]) + TCC_EA_ATOMIC_LEVEL[3]) + TCC_EA_ATOMIC_LEVEL[4]) - + TCC_EA_ATOMIC_LEVEL[5]) + TCC_EA_ATOMIC_LEVEL[6]) + TCC_EA_ATOMIC_LEVEL[7]) - + TCC_EA_ATOMIC_LEVEL[8]) + TCC_EA_ATOMIC_LEVEL[9]) + TCC_EA_ATOMIC_LEVEL[10]) - + TCC_EA_ATOMIC_LEVEL[11]) + TCC_EA_ATOMIC_LEVEL[12]) + TCC_EA_ATOMIC_LEVEL[13]) - + TCC_EA_ATOMIC_LEVEL[14]) + TCC_EA_ATOMIC_LEVEL[15]) + TCC_EA_ATOMIC_LEVEL[16]) - + TCC_EA_ATOMIC_LEVEL[17]) + TCC_EA_ATOMIC_LEVEL[18]) + TCC_EA_ATOMIC_LEVEL[19]) - + TCC_EA_ATOMIC_LEVEL[20]) + TCC_EA_ATOMIC_LEVEL[21]) + TCC_EA_ATOMIC_LEVEL[22]) - + TCC_EA_ATOMIC_LEVEL[23]) + TCC_EA_ATOMIC_LEVEL[24]) + TCC_EA_ATOMIC_LEVEL[25]) - + TCC_EA_ATOMIC_LEVEL[26]) + TCC_EA_ATOMIC_LEVEL[27]) + TCC_EA_ATOMIC_LEVEL[28]) - + TCC_EA_ATOMIC_LEVEL[29]) + TCC_EA_ATOMIC_LEVEL[30]) + TCC_EA_ATOMIC_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) - + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) - + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) - + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) - + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) - + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) - + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) - + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) - + TCC_EA_ATOMIC[31])) if ((((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] - + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) - + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) - + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) - + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) - + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) - + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) - + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) - + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) + TCC_EA_ATOMIC[31]) != 0) else - None)) - max: MAX((((((((((((((((((((((((((((((((((TCC_EA_ATOMIC_LEVEL[0] + TCC_EA_ATOMIC_LEVEL[1]) - + TCC_EA_ATOMIC_LEVEL[2]) + TCC_EA_ATOMIC_LEVEL[3]) + TCC_EA_ATOMIC_LEVEL[4]) - + TCC_EA_ATOMIC_LEVEL[5]) + TCC_EA_ATOMIC_LEVEL[6]) + TCC_EA_ATOMIC_LEVEL[7]) - + TCC_EA_ATOMIC_LEVEL[8]) + TCC_EA_ATOMIC_LEVEL[9]) + TCC_EA_ATOMIC_LEVEL[10]) - + TCC_EA_ATOMIC_LEVEL[11]) + TCC_EA_ATOMIC_LEVEL[12]) + TCC_EA_ATOMIC_LEVEL[13]) - + TCC_EA_ATOMIC_LEVEL[14]) + TCC_EA_ATOMIC_LEVEL[15]) + TCC_EA_ATOMIC_LEVEL[16]) - + TCC_EA_ATOMIC_LEVEL[17]) + TCC_EA_ATOMIC_LEVEL[18]) + TCC_EA_ATOMIC_LEVEL[19]) - + TCC_EA_ATOMIC_LEVEL[20]) + TCC_EA_ATOMIC_LEVEL[21]) + TCC_EA_ATOMIC_LEVEL[22]) - + TCC_EA_ATOMIC_LEVEL[23]) + TCC_EA_ATOMIC_LEVEL[24]) + TCC_EA_ATOMIC_LEVEL[25]) - + TCC_EA_ATOMIC_LEVEL[26]) + TCC_EA_ATOMIC_LEVEL[27]) + TCC_EA_ATOMIC_LEVEL[28]) - + TCC_EA_ATOMIC_LEVEL[29]) + TCC_EA_ATOMIC_LEVEL[30]) + TCC_EA_ATOMIC_LEVEL[31]) - / (((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) - + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) - + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) - + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) - + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) - + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) - + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) - + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) - + TCC_EA_ATOMIC[31])) if ((((((((((((((((((((((((((((((((TCC_EA_ATOMIC[0] - + TCC_EA_ATOMIC[1]) + TCC_EA_ATOMIC[2]) + TCC_EA_ATOMIC[3]) + TCC_EA_ATOMIC[4]) - + TCC_EA_ATOMIC[5]) + TCC_EA_ATOMIC[6]) + TCC_EA_ATOMIC[7]) + TCC_EA_ATOMIC[8]) - + TCC_EA_ATOMIC[9]) + TCC_EA_ATOMIC[10]) + TCC_EA_ATOMIC[11]) + TCC_EA_ATOMIC[12]) - + TCC_EA_ATOMIC[13]) + TCC_EA_ATOMIC[14]) + TCC_EA_ATOMIC[15]) + TCC_EA_ATOMIC[16]) - + TCC_EA_ATOMIC[17]) + TCC_EA_ATOMIC[18]) + TCC_EA_ATOMIC[19]) + TCC_EA_ATOMIC[20]) - + TCC_EA_ATOMIC[21]) + TCC_EA_ATOMIC[22]) + TCC_EA_ATOMIC[23]) + TCC_EA_ATOMIC[24]) - + TCC_EA_ATOMIC[25]) + TCC_EA_ATOMIC[26]) + TCC_EA_ATOMIC[27]) + TCC_EA_ATOMIC[28]) - + TCC_EA_ATOMIC[29]) + TCC_EA_ATOMIC[30]) + TCC_EA_ATOMIC[31]) != 0) else - None)) - unit: Cycles - tips: - L2 - EA Read Stall (IO): - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - L2 - EA Read Stall (GMI): - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - L2 - EA Read Stall (DRAM): - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - L2 - EA Write Stall (IO): - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - L2 - EA Write Stall (GMI): - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - L2 - EA Write Stall (DRAM): - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - L2 - EA Write Starve: - avg: None # No perf counter - std dev: None # No perf counter - min: None # No perf counter - max: None # No perf counter - unit: (Cycles + $normUnit) - tips: - - metric_table: - id: 1802 - title: Channel 0-15 - columnwise: True - header: - channel: Channel - hit rate: L2 Cache Hit Rate (%) - req: Requests (Requests) - read req: L1-L2 Read (Requests) - write req: L1-L2 Write (Requests) - atomic req: L1-L2 Atomic (Requests) - ea read req: L2-EA Read (Requests) - ea write req: L2-EA Write (Requests) - ea atomic req: L2-EA Atomic (Requests) - ea read lat - cycles: L2-EA Read Latency (Cycles) - ea write lat - cycles: L2-EA Write Latency (Cycles) - ea atomic lat - cycles: L2-EA Atomic Latency (Cycles) - ea read stall - io: L2-EA Read Stall - IO (Cycles per) - ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per) - ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per) - ea write stall - io: L2-EA Write Stall - IO (Cycles per) - ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per) - ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per) - ea write stall - starve: L2-EA Write Stall - Starve (Cycles per) - tips: Tips - metric: - "0": - hit rate: - AVG((((100 * TCC_HIT[0]) / (TCC_HIT[0] + TCC_MISS[0])) if ((TCC_HIT[0] - + TCC_MISS[0]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[0]) / $denom)) - read req: AVG((TO_INT(TCC_READ[0]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[0]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[0] / TCC_EA_WRREQ[0]) if (TCC_EA_WRREQ[0] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[0] / TCC_EA_ATOMIC[0]) if - (TCC_EA_ATOMIC[0] != 0) else 0)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "1": - hit rate: - AVG((((100 * TCC_HIT[1]) / (TCC_HIT[1] + TCC_MISS[1])) if ((TCC_HIT[1] - + TCC_MISS[1]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[1]) / $denom)) - read req: AVG((TO_INT(TCC_READ[1]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[1]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[1] / TCC_EA_WRREQ[1]) if (TCC_EA_WRREQ[1] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[1] / TCC_EA_ATOMIC[1]) if - (TCC_EA_ATOMIC[1] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "2": - hit rate: - AVG((((100 * TCC_HIT[2]) / (TCC_HIT[2] + TCC_MISS[2])) if ((TCC_HIT[2] - + TCC_MISS[2]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[2]) / $denom)) - read req: AVG((TO_INT(TCC_READ[2]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[2]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[2] / TCC_EA_WRREQ[2]) if (TCC_EA_WRREQ[2] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[2] / TCC_EA_ATOMIC[2]) if - (TCC_EA_ATOMIC[2] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "3": - hit rate: - AVG((((100 * TCC_HIT[3]) / (TCC_HIT[3] + TCC_MISS[3])) if ((TCC_HIT[3] - + TCC_MISS[3]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[3]) / $denom)) - read req: AVG((TO_INT(TCC_READ[3]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[3]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[3] / TCC_EA_WRREQ[3]) if (TCC_EA_WRREQ[3] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[3] / TCC_EA_ATOMIC[3]) if - (TCC_EA_ATOMIC[3] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "4": - hit rate: - AVG((((100 * TCC_HIT[4]) / (TCC_HIT[4] + TCC_MISS[4])) if ((TCC_HIT[4] - + TCC_MISS[4]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[4]) / $denom)) - read req: AVG((TO_INT(TCC_READ[4]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[4]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[4] / TCC_EA_WRREQ[4]) if (TCC_EA_WRREQ[4] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[4] / TCC_EA_ATOMIC[4]) if - (TCC_EA_ATOMIC[4] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "5": - hit rate: - AVG((((100 * TCC_HIT[5]) / (TCC_HIT[5] + TCC_MISS[5])) if ((TCC_HIT[5] - + TCC_MISS[5]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[5]) / $denom)) - read req: AVG((TO_INT(TCC_READ[5]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[5]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[5] / TCC_EA_WRREQ[5]) if (TCC_EA_WRREQ[5] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[5] / TCC_EA_ATOMIC[5]) if - (TCC_EA_ATOMIC[5] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "6": - hit rate: - AVG((((100 * TCC_HIT[6]) / (TCC_HIT[6] + TCC_MISS[6])) if ((TCC_HIT[6] - + TCC_MISS[6]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[6]) / $denom)) - read req: AVG((TO_INT(TCC_READ[6]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[6]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[6] / TCC_EA_WRREQ[6]) if (TCC_EA_WRREQ[6] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[6] / TCC_EA_ATOMIC[6]) if - (TCC_EA_ATOMIC[6] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "7": - hit rate: - AVG((((100 * TCC_HIT[7]) / (TCC_HIT[7] + TCC_MISS[7])) if ((TCC_HIT[7] - + TCC_MISS[7]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[7]) / $denom)) - read req: AVG((TO_INT(TCC_READ[7]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[7]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[7] / TCC_EA_WRREQ[7]) if (TCC_EA_WRREQ[7] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[7] / TCC_EA_ATOMIC[7]) if - (TCC_EA_ATOMIC[7] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "8": - hit rate: - AVG((((100 * TCC_HIT[8]) / (TCC_HIT[8] + TCC_MISS[8])) if ((TCC_HIT[8] - + TCC_MISS[8]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[8]) / $denom)) - read req: AVG((TO_INT(TCC_READ[8]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[8]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[8] / TCC_EA_WRREQ[8]) if (TCC_EA_WRREQ[8] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[8] / TCC_EA_ATOMIC[8]) if - (TCC_EA_ATOMIC[8] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "9": - hit rate: - AVG((((100 * TCC_HIT[9]) / (TCC_HIT[9] + TCC_MISS[9])) if ((TCC_HIT[9] - + TCC_MISS[9]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[9]) / $denom)) - read req: AVG((TO_INT(TCC_READ[9]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[9]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[9] / TCC_EA_WRREQ[9]) if (TCC_EA_WRREQ[9] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[9] / TCC_EA_ATOMIC[9]) if - (TCC_EA_ATOMIC[9] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "10": - hit rate: - AVG((((100 * TCC_HIT[10]) / (TCC_HIT[10] + TCC_MISS[10])) if ((TCC_HIT[10] - + TCC_MISS[10]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[10]) / $denom)) - read req: AVG((TO_INT(TCC_READ[10]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[10]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[10] / TCC_EA_WRREQ[10]) if (TCC_EA_WRREQ[10] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[10] / TCC_EA_ATOMIC[10]) if - (TCC_EA_ATOMIC[10] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "11": - hit rate: - AVG((((100 * TCC_HIT[11]) / (TCC_HIT[11] + TCC_MISS[11])) if ((TCC_HIT[11] - + TCC_MISS[11]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[11]) / $denom)) - read req: AVG((TO_INT(TCC_READ[11]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[11]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[11] / TCC_EA_WRREQ[11]) if (TCC_EA_WRREQ[11] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[11] / TCC_EA_ATOMIC[11]) if - (TCC_EA_ATOMIC[11] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "12": - hit rate: - AVG((((100 * TCC_HIT[12]) / (TCC_HIT[12] + TCC_MISS[12])) if ((TCC_HIT[12] - + TCC_MISS[12]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[12]) / $denom)) - read req: AVG((TO_INT(TCC_READ[12]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[12]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[12] / TCC_EA_WRREQ[12]) if (TCC_EA_WRREQ[12] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[12] / TCC_EA_ATOMIC[12]) if - (TCC_EA_ATOMIC[12] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "13": - hit rate: - AVG((((100 * TCC_HIT[13]) / (TCC_HIT[13] + TCC_MISS[13])) if ((TCC_HIT[13] - + TCC_MISS[13]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[13]) / $denom)) - read req: AVG((TO_INT(TCC_READ[13]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[13]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[13] / TCC_EA_WRREQ[13]) if (TCC_EA_WRREQ[13] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[13] / TCC_EA_ATOMIC[13]) if - (TCC_EA_ATOMIC[13] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "14": - hit rate: - AVG((((100 * TCC_HIT[14]) / (TCC_HIT[14] + TCC_MISS[14])) if ((TCC_HIT[14] - + TCC_MISS[14]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[14]) / $denom)) - read req: AVG((TO_INT(TCC_READ[14]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[14]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[14] / TCC_EA_WRREQ[14]) if (TCC_EA_WRREQ[14] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[14] / TCC_EA_ATOMIC[14]) if - (TCC_EA_ATOMIC[14] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "15": - hit rate: - AVG((((100 * TCC_HIT[15]) / (TCC_HIT[15] + TCC_MISS[15])) if ((TCC_HIT[15] - + TCC_MISS[15]) != 0) else None)) - req: AVG((TO_INT(TCC_REQ[15]) / $denom)) - read req: AVG((TO_INT(TCC_READ[15]) / $denom)) - write req: AVG((TO_INT(TCC_WRITE[15]) / $denom)) - atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom)) - ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom)) - ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom)) - ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom)) - ea read lat - cycles: - AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15] - != 0) else None)) - ea write lat - cycles: - AVG(((TCC_EA_WRREQ_LEVEL[15] / TCC_EA_WRREQ[15]) if (TCC_EA_WRREQ[15] - != 0) else None)) - ea atomic lat - cycles: - AVG(((TCC_EA_ATOMIC_LEVEL[15] / TCC_EA_ATOMIC[15]) if - (TCC_EA_ATOMIC[15] != 0) else None)) - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: + # FIXME: other arggr metrics!! - metric_table: - id: 1803 - title: Channel 16-31 - columnwise: True + id: 1802 + title: L2 Cache Hit Rate (%) header: - channel: Channel - hit rate: L2 Cache Hit Rate (%) - req: Requests (Requests) - read req: L1-L2 Read (Requests) - write req: L1-L2 Write (Requests) - atomic req: L1-L2 Atomic (Requests) - ea read req: L2-EA Read (Requests) - ea write req: L2-EA Write (Requests) - ea atomic req: L2-EA Atomic (Requests) - ea read lat - cycles: L2-EA Read Latency (Cycles) - ea write lat - cycles: L2-EA Write Latency (Cycles) - ea atomic lat - cycles: L2-EA Atomic Latency (Cycles) - ea read stall - io: L2-EA Read Stall - IO (Cycles per) - ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per) - ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per) - ea write stall - io: L2-EA Write Stall - IO (Cycles per) - ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per) - ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per) - ea write stall - starve: L2-EA Write Stall - Starve (Cycles per) - tips: Tips + metric: Metric + expr: Expression metric: - "16": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter + "::_1": + expr: + (((100 * TCC_HIT[::_1]) / (TCC_HIT[::_1] + TCC_MISS[::_1])) if ((TCC_HIT[::_1] + + TCC_MISS[::_1]) != 0) else None) + placeholder_range: + "::_1": 32 + cli_style: simple_box + + - metric_table: + id: 1803 + title: L2 Requests (Requests) + header: + metric: Metric + expr: Expression + metric: + "::_1": + expr: (TO_INT(TCC_REQ[::_1]) / $denom) + placeholder_range: + "::_1": 32 + cli_style: simple_box + + - metric_table: + id: 1804 + title: L2 Access (Requests) + header: + metric: Metric + read req: L2 Read Req + write req: L2 Write Req + atomic req: L2 Atomic Req + metric: + "::_1": + read req: AVG((TO_INT(TCC_READ[::_1]) / $denom)) + write req: AVG((TO_INT(TCC_WRITE[::_1]) / $denom)) + atomic req: AVG((TO_INT(TCC_ATOMIC[::_1]) / $denom)) + placeholder_range: + "::_1": 32 + cli_style: simple_multiple_bar + + - metric_table: + id: 1805 + title: L2 - Fabric Access (Requests) + header: + metric: Metric + read req: L2 - Fabric Read Req + write req: L2 - Fabric Write and Atomic Req + atomic req: L2 - Fabric Atomic Req + metric: + "::_1": + read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom)) + write req: AVG((TO_INT(TCC_EA_WRREQ[::_1]) / $denom)) + atomic req: AVG((TO_INT(TCC_EA_ATOMIC[::_1]) / $denom)) + placeholder_range: + "::_1": 32 + cli_style: simple_multiple_bar + + # - metric_table: + # id: 1806 + # title: L2-EA Latency (Cycles) + # header: + # metric: Metric + # read lat: L2-EA Read + # write lat: L2-EA Write + # atomic lat: L2-EA Atomic + # metric: + # "::_1": + # read lat: + # AVG(((TCC_EA_RDREQ_LEVEL[::_1] / TCC_EA_RDREQ[::_1]) if (TCC_EA_RDREQ[::_1] + # != 0) else None)) + # write lat: + # AVG(((TCC_EA_WRREQ_LEVEL[::_1] / TCC_EA_WRREQ[::_1]) if (TCC_EA_WRREQ[::_1] + # != 0) else None)) + # atomic lat: + # AVG(((TCC_EA_ATOMIC_LEVEL[::_1] / TCC_EA_ATOMIC[::_1]) if + # (TCC_EA_ATOMIC[::_1] != 0) else 0)) + # placeholder_range: + # "::_1": 32 + # cli_style: simple_multiple_bar + + - metric_table: + id: 1806 + title: L2 - Fabric Read Latency (Cycles) + header: + metric: Metric + expr: Expression + metric: + "::_1": + expr: + ((TCC_EA_RDREQ_LEVEL[::_1] / TCC_EA_RDREQ[::_1]) if (TCC_EA_RDREQ[::_1] + != 0) else None) + placeholder_range: + "::_1": 32 + cli_style: simple_box + + - metric_table: + id: 1807 + title: L2 - Fabric Write Latency (Cycles) + header: + metric: Metric + expr: Expression + metric: + "::_1": + expr: + ((TCC_EA_WRREQ_LEVEL[::_1] / TCC_EA_WRREQ[::_1]) if (TCC_EA_WRREQ[::_1] + != 0) else None) + placeholder_range: + "::_1": 32 + cli_style: simple_box + + - metric_table: + id: 1808 + title: L2 - Fabric Atomic Latency (Cycles) + header: + metric: Metric + expr: Expression + metric: + "::_1": + expr: ((TCC_EA_ATOMIC_LEVEL[::_1] / TCC_EA_ATOMIC[::_1]) if + (TCC_EA_ATOMIC[::_1] != 0) else 0) + placeholder_range: + "::_1": 32 + cli_style: simple_box + + - metric_table: + id: 1809 + title: L2 - Fabric Read Stall (Cycles per normUnit) + header: + metric: Metric + ea read stall - pcie: L2 - Fabric Read Stall (PCIe) + ea read stall - if: L2 - Fabric Read Stall (Infinity Fabric™) + ea read stall - hbm: L2 - Fabric Read Stall (HBM) + metric: + "::_1": + ea read stall - pcie: None # No perf counter + ea read stall - if: None # No perf counter + ea read stall - hbm: None # No perf counter + placeholder_range: + "::_1": 32 + cli_style: simple_multiple_bar + + - metric_table: + id: 1810 + title: L2 - Fabric Write Stall (Cycles per normUnit) + header: + metric: Metric + ea write stall - pcie: L2 - Fabric Write Stall (PCIe) + ea write stall - if: L2 - Fabric Write Stall (Infinity Fabric™) + ea write stall - hbm: L2 - Fabric Write Stall (HBM) + ea write stall - starve: L2 - Fabric Write Starve + metric: + "::_1": + ea write stall - pcie: None # No perf counter + ea write stall - if: None # No perf counter + ea write stall - hbm: None # No perf counter ea write stall - starve: None # No perf counter - tips: - "17": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "18": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "19": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "20": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "21": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "22": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "23": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "24": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "25": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "26": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "27": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "28": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "29": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "30": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: - "31": - hit rate: None # No perf counter - req: None # No perf counter - read req: None # No perf counter - write req: None # No perf counter - atomic req: None # No perf counter - ea read req: None # No perf counter - ea write req: None # No perf counter - ea atomic req: None # No perf counter - ea read lat - cycles: None # No perf counter - ea write lat - cycles: None # No perf counter - ea atomic lat - cycles: None # No perf counter - ea read stall - io: None # No perf counter - ea read stall - gmi: None # No perf counter - ea read stall - dram: None # No perf counter - ea write stall - io: None # No perf counter - ea write stall - gmi: None # No perf counter - ea write stall - dram: None # No perf counter - ea write stall - starve: None # No perf counter - tips: + placeholder_range: + "::_1": 32 + cli_style: simple_multiple_bar \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml deleted file mode 100644 index 3e8ff0a0f6..0000000000 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml +++ /dev/null @@ -1,259 +0,0 @@ ---- -# Add description/tips for each metric in this section. -# So it could be shown in hover. -Metric Description: - -# Define the panel properties and properties of each metric in the panel. -Panel Config: - id: 1900 - title: Memory Chart Analysis - data source: - - metric_table: - id: 1901 - title: # subtitle for this table(optional) - header: - metric: Metric - value: Value - alias: Alias - tips: Tips - metric: - Wave Life: - value: ROUND(AVG(((4 * (SQ_WAVE_CYCLES / SQ_WAVES)) if (SQ_WAVES != 0) else - None)), 0) - alias: wave_life_ - tips: - Active CUs: - value: CONCAT(CONCAT($numActiveCUs, "/"), $numCU) - alias: active_cu_ - tips: - SALU: - value: ROUND(AVG((SQ_INSTS_SALU / $denom)), 0) - alias: salu_ - tips: - SMEM: - value: ROUND(AVG((SQ_INSTS_SMEM / $denom)), 0) - alias: smem_ - tips: - VALU: - value: ROUND(AVG((SQ_INSTS_VALU / $denom)), 0) - alias: valu_ - tips: - MFMA: - value: None # No perf counter - alias: mfma_ - tips: - VMEM: - value: ROUND(AVG((SQ_INSTS_VMEM / $denom)), 0) - alias: vmem_ - tips: - LDS: - value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0) - alias: lds_ - tips: - GWS: - value: ROUND(AVG((SQ_INSTS_GDS / $denom)), 0) - alias: gws_ - tips: - BR: - value: ROUND(AVG((SQ_INSTS_BRANCH / $denom)), 0) - alias: br_ - tips: - VGPR: - value: ROUND(AVG(vgpr), 0) - alias: vgpr_ - tips: - SGPR: - value: ROUND(AVG(SGPR), 0) - alias: sgpr_ - tips: - LDS Allocation: - value: ROUND(AVG(lds), 0) - alias: lds_alloc_ - tips: - Scratch Allocation: - value: ROUND(AVG(Scratch_Per_Workitem), 0) - alias: scratch_alloc_ - tips: - Wavefronts: - value: ROUND(AVG(SPI_CSN_WAVE), 0) - alias: wavefronts_ - tips: - Workgroups: - value: ROUND(AVG(SPI_CSN_NUM_THREADGROUPS), 0) - alias: workgroups_ - tips: - LDS Req: - value: ROUND(AVG((SQ_INSTS_LDS / $denom)), 0) - alias: lds_req_ - tips: - IL1 Fetch: - value: ROUND(AVG((SQC_ICACHE_REQ / $denom)), 0) - alias: il1_fetch_ - tips: - IL1 Hit: - value: ROUND((AVG((SQC_ICACHE_HITS / SQC_ICACHE_REQ)) * 100), 0) - alias: il1_hit_ - tips: - IL1_L2 Rd: - value: ROUND(AVG((SQC_TC_INST_REQ / $denom)), 0) - alias: il1_l2_req_ - tips: - vL1D Rd: - value: ROUND(AVG((SQC_DCACHE_REQ / $denom)), 0) - alias: sl1_rd_ - tips: - vL1D Hit: - value: ROUND((AVG(((SQC_DCACHE_HITS / SQC_DCACHE_REQ) if (SQC_DCACHE_REQ != - 0) else None)) * 100), 0) - alias: sl1_hit_ - tips: - vL1D_L2 Rd: - value: ROUND(AVG((SQC_TC_DATA_READ_REQ / $denom)), 0) - alias: sl1_l2_rd_ - tips: - vL1D_L2 Wr: - value: ROUND(AVG((SQC_TC_DATA_WRITE_REQ / $denom)), 0) - alias: sl1_l2_wr_ - tips: - vL1D_L2 Atomic: - value: ROUND(AVG((SQC_TC_DATA_ATOMIC_REQ / $denom)), 0) - alias: sl1_l2_atom_ - tips: - VL1 Rd: - value: ROUND(AVG((TCP_TOTAL_READ_sum / $denom)), 0) - alias: vl1_rd_ - tips: - VL1 Wr: - value: ROUND(AVG((TCP_TOTAL_WRITE_sum / $denom)), 0) - alias: vl1_wr_ - tips: - VL1 Atomic: - value: ROUND(AVG(((TCP_TOTAL_ATOMIC_WITH_RET_sum + TCP_TOTAL_ATOMIC_WITHOUT_RET_sum) - / $denom)), 0) - alias: vl1_atom_ - tips: - VL1 Hit: - value: ROUND(AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) - + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) - / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)), 0) - alias: vl1_hit_ - tips: - VL1 Lat: - value: ROUND(AVG(((TCP_TCP_LATENCY_sum / TCP_TA_TCP_STATE_READ_sum) if (TCP_TA_TCP_STATE_READ_sum - != 0) else None)), 0) - alias: vl1_lat_ - tips: - VL1_L2 Rd: - value: ROUND(AVG((TCP_TCC_READ_REQ_sum / $denom)), 0) - alias: vl1_l2_rd_ - tips: - VL1_L2 Wr: - value: ROUND(AVG((TCP_TCC_WRITE_REQ_sum / $denom)), 0) - alias: vl1_l2_wr_ - tips: - vL1_L2 Atomic: - value: ROUND(AVG(((TCP_TCC_ATOMIC_WITH_RET_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) - / $denom)), 0) - alias: vl1_l2_atom_ - tips: - L2 Rd: - value: ROUND(AVG((TCC_READ_sum / $denom)), 0) - alias: l2_rd_ - tips: - L2 Wr: - value: ROUND(AVG((TCC_WRITE_sum / $denom)), 0) - alias: l2_wr_ - tips: - L2 Atomic: - value: ROUND(AVG((TCC_ATOMIC_sum / $denom)), 0) - alias: l2_atom_ - tips: - L2 Hit: - value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum - + TCC_MISS_sum) != 0) else None)), 0) - alias: l2_hit_ - tips: - L2 Rd Lat: - value: ROUND(AVG(((TCP_TCC_READ_REQ_LATENCY_sum / (TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum)) - if ((TCP_TCC_READ_REQ_sum + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) != 0) else None)), - 0) - alias: l2_rd_lat_ - tips: - L2 Wr Lat: - value: ROUND(AVG(((TCP_TCC_WRITE_REQ_LATENCY_sum / (TCP_TCC_WRITE_REQ_sum + - TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) if ((TCP_TCC_WRITE_REQ_sum + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum) - != 0) else None)), 0) - alias: l2_wr_lat_ - tips: - Fabric Rd Lat: - value: ROUND(AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum - != 0) else None)), 0) - alias: fabric_rd_lat_ - tips: - Fabric Wr Lat: - value: ROUND(AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum - != 0) else None)), 0) - alias: fabric_wr_lat_ - tips: - Fabric Atomic Lat: - value: ROUND(AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum - != 0) else None)), 0) - alias: fabric_atom_lat_ - tips: - Fabric_L2 Rd: - value: ROUND(AVG((TCC_EA_RDREQ_sum / $denom)), 0) - alias: l2_fabric_rd_ - tips: - Fabric_L2 Wr: - value: ROUND(AVG((TCC_EA_WRREQ_sum / $denom)), 0) - alias: l2_fabric_wr_ - tips: - Fabric_l2 Atomic: - value: ROUND(AVG((TCC_EA_ATOMIC_sum / $denom)), 0) - alias: l2_fabric_atom_ - tips: - HBM Rd: - value: ROUND(AVG((TCC_EA_RDREQ_DRAM_sum / $denom)), 0) - alias: hbm_rd_ - tips: - HBM Wr: - value: ROUND(AVG((TCC_EA_WRREQ_DRAM_sum / $denom)), 0) - alias: hbm_wr_ - tips: - LDS Util: - value: ROUND(AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU))), - 0) - alias: lds_util_ - tips: - VL1 Coalesce: - value: ROUND(AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum - * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else 0)), 0) - alias: vl1_coales_ - tips: - VL1 Stall: - value: ROUND(AVG((((100 * TCP_TCR_TCP_STALL_CYCLES_sum) / TCP_GATE_EN1_sum) - if (TCP_GATE_EN1_sum != 0) else None)), 0) - alias: vl1_stall_ - tips: - LDS Lat: - value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) - if (SQ_INSTS_LDS != 0) else None)), 0) - alias: lds_lat_ - coll_level: SQ_INST_LEVEL_LDS - tips: - vL1D Lat: - value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQC_DCACHE_REQ) - if (SQC_DCACHE_REQ != 0) else None)), 0) - alias: sl1_lat_ - tips: - IL1 Lat: - value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / SQC_ICACHE_REQ) - if (SQC_ICACHE_REQ != 0) else None)), 0) - alias: il1_lat_ - tips: - Wave Occupancy: - value: ROUND(AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / $numActiveCUs)), 0) - alias: wave_occ_ - coll_level: SQ_LEVEL_WAVES - tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml index 774cb479e1..0d213c9fc3 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml @@ -14,10 +14,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit peak: Peak - pop: PoP + pop: Pct of Peak tips: Tips metric: VALU FLOPs: @@ -28,7 +28,7 @@ Panel Config: tips: VALU IOPs: value: None # No perf counter - unit: GOPs + unit: GIOPs peak: (((($sclk * $numCU) * 64) * 2) / 1000) pop: None # No perf counter tips: @@ -68,25 +68,37 @@ Panel Config: peak: $numCU pop: ((100 * $numActiveCUs) / $numCU) tips: - SALU Util: + SALU Utililization: value: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct peak: 100 pop: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU))) tips: - VALU Util: + VALU Utililization: value: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct peak: 100 pop: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU))) tips: - MFMA Util: + MFMA Utililization: value: None # No HW module unit: pct peak: 100 pop: None # No HW module tips: - VALU Active Threads/Wave: + VMEM Utilization: + value: None # No HW module + unit: pct + peak: 100 + pop: None # No HW module + tips: + Branch Utilization: + value: None # No HW module + unit: pct + peak: 100 + pop: None # No HW module + tips: + VALU Active Threads: value: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) unit: Threads @@ -94,25 +106,29 @@ Panel Config: pop: (AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) * 1.5625) tips: - IPC - Issue: - value: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) - / SQ_ACTIVE_INST_ANY)) + IPC: + value: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES)) unit: Instr/cycle peak: 5 - pop: ((100 * AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) - / SQ_ACTIVE_INST_ANY))) / 5) + pop: ((100 * AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES))) / 5) tips: - LDS BW: + Wavefront Occupancy: + value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE)) + unit: Wavefronts + peak: ($maxWavesPerCU * $numCU) + pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU + * $numCU)))) + coll_level: SQ_LEVEL_WAVES + tips: + Theoretical LDS Bandwidth: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp))) - unit: GB/sec + unit: GB/s peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: - LDS Bank Conflict: + LDS Bank Conflicts/Access: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) unit: Conflicts/access @@ -120,35 +136,7 @@ Panel Config: pop: ((100 * AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))) / 32) tips: - Instr Cache Hit Rate: - value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) - unit: pct - peak: 100 - pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) - tips: - Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) - unit: GB/s - peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk - / 1000) * 64) * $numSQC)) - tips: - Scalar L1D Cache Hit Rate: - value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) - if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) - unit: pct - peak: 100 - pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) - if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) - tips: - Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) - unit: GB/s - peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk - / 1000) * 64) * $numSQC)) - tips: - Vector L1D Cache Hit Rate: + vL1D Cache Hit Rate: value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else @@ -160,7 +148,7 @@ Panel Config: TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else None)) tips: - Vector L1D Cache BW: + vL1D Cache BW: value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) @@ -175,6 +163,13 @@ Panel Config: pop: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) tips: + L2 Cache BW: + value: AVG(((TCC_REQ_sum * 64) / (EndNs - BeginNs))) + unit: GB/s + peak: ((($sclk / 1000) * 64) * TO_INT($L2Banks)) + pop: ((100 * AVG(((TCC_REQ_sum * 64) / (EndNs - BeginNs)))) + / ((($sclk / 1000) * 64) * TO_INT($L2Banks))) + tips: L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))) @@ -195,36 +190,48 @@ Panel Config: value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None tips: L2-Fabric Write Latency: value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None tips: - Wave Occupancy: - value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE)) - unit: Wavefronts - peak: ($maxWavesPerCU * $numCU) - pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU - * $numCU)))) - coll_level: SQ_LEVEL_WAVES + sL1D Cache Hit Rate: + value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) + if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) + unit: pct + peak: 100 + pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) + if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: - Instr Fetch BW: - value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) + sL1D Cache BW: + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s - peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC - * (($sclk / 1000) * 32))) - coll_level: SQ_IFETCH_LEVEL + peak: ((($sclk / 1000) * 64) * $numSQC) + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk + / 1000) * 64) * $numSQC)) tips: - Instr Fetch Latency: + L1I Hit Rate: + value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) + unit: pct + peak: 100 + pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) + tips: + L1I BW: + value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + unit: GB/s + peak: ((($sclk / 1000) * 64) * $numSQC) + pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + / 1000) * 64) * $numSQC)) + tips: + L1I Fetch Latency: value: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0500_command-processor.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0500_command-processor.yaml index 5250918799..edd42da6e3 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0500_command-processor.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0500_command-processor.yaml @@ -19,19 +19,7 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy Cycles: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles/Kernel - tips: - CPF Busy: - avg: AVG(CPF_CPF_STAT_BUSY) - min: MIN(CPF_CPF_STAT_BUSY) - max: MAX(CPF_CPF_STAT_BUSY) - unit: Cycles/Kernel - tips: - CPF Util: + CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) min: MIN((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) @@ -47,15 +35,9 @@ Panel Config: != 0) else None)) max: MAX((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY != 0) else None)) - unit: Cycles/Kernel + unit: pct tips: - L2Cache Intf Busy: - avg: AVG(CPF_CPF_TCIU_BUSY) - min: MIN(CPF_CPF_TCIU_BUSY) - max: MAX(CPF_CPF_TCIU_BUSY) - unit: Cycles/Kernel - tips: - L2Cache Intf Util: + CPF-L2 Utilization: avg: AVG((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE)) if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None)) min: MIN((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE)) @@ -64,7 +46,7 @@ Panel Config: if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None)) unit: pct tips: - L2Cache Intf Stall: + CPF-L2 Stall: avg: AVG((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY != 0) else None)) min: MIN((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY @@ -73,11 +55,14 @@ Panel Config: != 0) else None)) unit: pct tips: - UTCL1 Stall: - avg: AVG(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - min: MIN(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - max: MAX(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - unit: Cycles/Kernel + CPF-UTCL1 Stall: + avg: AVG(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + min: MIN(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + max: MAX(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + unit: pct tips: - metric_table: @@ -91,19 +76,7 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy Cycles: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles - tips: - CPC Busy Cycles: - avg: AVG(CPC_CPC_STAT_BUSY) - min: MIN(CPC_CPC_STAT_BUSY) - max: MAX(CPC_CPC_STAT_BUSY) - unit: Cycles - tips: - CPC Util: + CPC Utilization: avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) @@ -112,12 +85,6 @@ Panel Config: if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) unit: pct tips: - CPC Stall Cycles: - avg: AVG(CPC_CPC_STAT_STALL) - min: MIN(CPC_CPC_STAT_STALL) - max: MAX(CPC_CPC_STAT_STALL) - unit: Cycles - tips: CPC Stall Rate: avg: AVG((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY != 0) else None)) @@ -127,28 +94,19 @@ Panel Config: != 0) else None)) unit: pct tips: - CPC Packet Decoding: - avg: AVG(CPC_ME1_BUSY_FOR_PACKET_DECODE) - min: MIN(CPC_ME1_BUSY_FOR_PACKET_DECODE) - max: MAX(CPC_ME1_BUSY_FOR_PACKET_DECODE) - unit: Cycles - tips: - SPI Intf Busy Cycles: - avg: AVG(CPC_ME1_DC0_SPI_BUSY) - min: MIN(CPC_ME1_DC0_SPI_BUSY) - max: MAX(CPC_ME1_DC0_SPI_BUSY) - unit: Cycles - tips: - SPI Intf Util: - avg: AVG((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) - min: MIN((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) - max: MAX((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) + CPC Packet Decoding Utilization: + avg: AVG((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + min: MIN((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + max: MAX((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) unit: pct tips: - L2Cache Intf Util: + CPC-Workgroup Manager Utilization: + avg: AVG((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + min: MIN((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + max: MAX((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + unit: Pct + tips: + CPC-L2 Utilization: avg: AVG((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE)) if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE)) @@ -157,19 +115,16 @@ Panel Config: if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None)) unit: pct tips: - UTCL1 Stall Cycles: - avg: AVG(CPC_UTCL1_STALL_ON_TRANSLATION) - min: MIN(CPC_UTCL1_STALL_ON_TRANSLATION) - max: MAX(CPC_UTCL1_STALL_ON_TRANSLATION) - unit: Cycles + CPC-UTCL1 Stall: + avg: AVG(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + min: MIN(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + max: MAX(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + unit: pct tips: - UTCL2 Intf Busy Cycles: - avg: AVG(CPC_CPC_UTCL2IU_BUSY) - min: MIN(CPC_CPC_UTCL2IU_BUSY) - max: MAX(CPC_CPC_UTCL2IU_BUSY) - unit: Cycles - tips: - UTCL2 Intf Util: + CPC-UTCL2 Utilization: avg: AVG((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE)) if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml index 38b81ed4fc..24d4036ecb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0600_shader-processor-input.yaml @@ -6,11 +6,11 @@ Metric Description: # Define the panel properties and properties of each metric in the panel. Panel Config: id: 600 - title: Shader Processor Input (SPI) + title: Workgroup Manager (SPI) data source: - metric_table: id: 601 - title: SPI Stats + title: Workgroup Manager Utilizations header: metric: Metric avg: Avg @@ -19,29 +19,35 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles + Accelerator Utilization: + avg: AVG(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + min: MIN(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + max: MAX(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + unit: Pct tips: - CS Busy: - avg: AVG(SPI_CSN_BUSY) - min: MIN(SPI_CSN_BUSY) - max: MAX(SPI_CSN_BUSY) - unit: Cycles + Scheduler-Pipe Utilization: + avg: AVG(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + min: MIN(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + max: MAX(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + unit: Pct tips: - SPI Busy: - avg: AVG(GRBM_SPI_BUSY) - min: MIN(GRBM_SPI_BUSY) - max: MAX(GRBM_SPI_BUSY) - unit: Cycles + Workgroup Manager Utilization: + avg: AVG(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + min: MIN(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + max: MAX(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + unit: Pct tips: - SQ Busy: - avg: AVG(SQ_BUSY_CYCLES) - min: MIN(SQ_BUSY_CYCLES) - max: MAX(SQ_BUSY_CYCLES) - unit: Cycles + Shader Engine Utilization: + avg: AVG(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + min: MIN(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + max: MAX(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + unit: Pct + tips: + SIMD Utilization: + avg: AVG(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct tips: Dispatched Workgroups: avg: AVG(SPI_CSN_NUM_THREADGROUPS) @@ -55,105 +61,6 @@ Panel Config: max: MAX(SPI_CSN_WAVE) unit: Wavefronts tips: - Wave Alloc Failed: - avg: AVG(SPI_RA_REQ_NO_ALLOC) - min: MIN(SPI_RA_REQ_NO_ALLOC) - max: MAX(SPI_RA_REQ_NO_ALLOC) - unit: Cycles - tips: - Wave Alloc Failed - CS: - avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN) - min: MIN(SPI_RA_REQ_NO_ALLOC_CSN) - max: MAX(SPI_RA_REQ_NO_ALLOC_CSN) - unit: Cycles - tips: - - - metric_table: - id: 602 - title: SPI Resource Allocation - header: - metric: Metric - avg: Avg - min: Min - max: Max - unit: Unit - tips: Tips - metric: - Wave request Failed (CS): - avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN) - min: MIN(SPI_RA_REQ_NO_ALLOC_CSN) - max: MAX(SPI_RA_REQ_NO_ALLOC_CSN) - unit: Cycles - tips: - CS Stall: - avg: AVG(SPI_RA_RES_STALL_CSN) - min: MIN(SPI_RA_RES_STALL_CSN) - max: MAX(SPI_RA_RES_STALL_CSN) - unit: Cycles - tips: - CS Stall Rate: - avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - min: MIN((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - max: MAX((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - unit: pct - tips: - Scratch Stall: - avg: AVG(SPI_RA_TMP_STALL_CSN) - min: MIN(SPI_RA_TMP_STALL_CSN) - max: MAX(SPI_RA_TMP_STALL_CSN) - unit: Cycles - tips: - Insufficient SIMD Waveslots: - avg: AVG(SPI_RA_WAVE_SIMD_FULL_CSN) - min: MIN(SPI_RA_WAVE_SIMD_FULL_CSN) - max: MAX(SPI_RA_WAVE_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient SIMD VGPRs: - avg: AVG(SPI_RA_VGPR_SIMD_FULL_CSN) - min: MIN(SPI_RA_VGPR_SIMD_FULL_CSN) - max: MAX(SPI_RA_VGPR_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient SIMD SGPRs: - avg: AVG(SPI_RA_SGPR_SIMD_FULL_CSN) - min: MIN(SPI_RA_SGPR_SIMD_FULL_CSN) - max: MAX(SPI_RA_SGPR_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient CU LDS: - avg: AVG(SPI_RA_LDS_CU_FULL_CSN) - min: MIN(SPI_RA_LDS_CU_FULL_CSN) - max: MAX(SPI_RA_LDS_CU_FULL_CSN) - unit: CU - tips: - Insufficient CU Barries: - avg: AVG(SPI_RA_BAR_CU_FULL_CSN) - min: MIN(SPI_RA_BAR_CU_FULL_CSN) - max: MAX(SPI_RA_BAR_CU_FULL_CSN) - unit: CU - tips: - Insufficient Bulky Resource: - avg: AVG(SPI_RA_BULKY_CU_FULL_CSN) - min: MIN(SPI_RA_BULKY_CU_FULL_CSN) - max: MAX(SPI_RA_BULKY_CU_FULL_CSN) - unit: CU - tips: - Reach CU Threadgroups Limit: - avg: AVG(SPI_RA_TGLIM_CU_FULL_CSN) - min: MIN(SPI_RA_TGLIM_CU_FULL_CSN) - max: MAX(SPI_RA_TGLIM_CU_FULL_CSN) - unit: Cycles - tips: - Reach CU Wave Limit: - avg: AVG(SPI_RA_WVLIM_STALL_CSN) - min: MIN(SPI_RA_WVLIM_STALL_CSN) - max: MAX(SPI_RA_WVLIM_STALL_CSN) - unit: Cycles - tips: VGPR Writes: avg: AVG((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else None)) @@ -172,3 +79,89 @@ Panel Config: None)) unit: Cycles/wave tips: + - metric_table: + id: 602 + title: Workgroup Manager - Resource Allocation + header: + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit + tips: Tips + metric: + Not-scheduled Rate (Workgroup Manager): + avg: AVG((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + min: MIN((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + max: MAX((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + unit: Pct + tips: + Not-scheduled Rate (Scheduler-Pipe): + avg: AVG((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + min: MIN((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + max: MAX((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + unit: Pct + tips: + Scheduler-Pipe Stall Rate: + avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + min: MIN((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + max: MAX((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + unit: Pct + tips: + Scratch Stall Rate: + avg: AVG((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + min: MIN((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + max: MAX((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + unit: Pct + tips: + Insufficient SIMD Waveslots: + avg: AVG(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient SIMD VGPRs: + avg: AVG(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient SIMD SGPRs: + avg: AVG(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient CU LDS: + avg: AVG(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient CU Barriers: + avg: AVG(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Reached CU Workgroup Limit: + avg: AVG(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Reached CU Wavefront Limit: + avg: AVG(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml index 2dceb05dc7..42a863af49 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml @@ -77,7 +77,7 @@ Panel Config: avg: AVG(Scratch_Per_Workitem) min: MIN(Scratch_Per_Workitem) max: MAX(Scratch_Per_Workitem) - unit: Bytes + unit: Bytes/Workitem tips: - metric_table: @@ -103,7 +103,7 @@ Panel Config: max: MAX(GRBM_GUI_ACTIVE) unit: Cycle tips: - Instr/wavefront: + Instructions per wavefront: avg: AVG((SQ_INSTS / SQ_WAVES)) min: MIN((SQ_INSTS / SQ_WAVES)) max: MAX((SQ_INSTS / SQ_WAVES)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml index 4d8f36a237..9aac87117d 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1000_compute-unit-instruction-mix.yaml @@ -10,7 +10,7 @@ Panel Config: data source: - metric_table: id: 1001 - title: Instruction Mix + title: Overall Instruction Mix header: metric: Metric avg: Avg @@ -18,11 +18,11 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_bar label_txt: (# of instr + $normUnit) metric: - VALU - Vector: + VALU: avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom)) min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom)) max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom)) @@ -40,7 +40,7 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (instr + $normUnit) tips: - VALU - MFMA: + MFMA: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -64,12 +64,6 @@ Panel Config: max: MAX((SQ_INSTS_BRANCH / $denom)) unit: (instr + $normUnit) tips: - GDS: - avg: AVG((SQ_INSTS_GDS / $denom)) - min: MIN((SQ_INSTS_GDS / $denom)) - max: MAX((SQ_INSTS_GDS / $denom)) - unit: (instr + $normUnit) - tips: - metric_table: id: 1002 @@ -81,7 +75,7 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_bar label_txt: (# of instr + $normUnit) metric: @@ -103,7 +97,7 @@ Panel Config: max: None # No perf counter unit: (instr + $normUnit) tips: - F16-Mult: + F16-MUL: avg: None # No perf counter min: None # No perf counter max: None # No perf counter @@ -127,7 +121,7 @@ Panel Config: max: None # No perf counter unit: (instr + $normUnit) tips: - F32-Mult: + F32-MUL: avg: None # No perf counter min: None # No perf counter max: None # No perf counter @@ -151,7 +145,7 @@ Panel Config: max: None # No perf counter unit: (instr + $normUnit) tips: - F64-Mult: + F64-MUL: avg: None # No perf counter min: None # No perf counter max: None # No perf counter @@ -180,55 +174,100 @@ Panel Config: id: 1003 title: VMEM Instr Mix header: - type: Type - count: Count + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit tips: Tips metric: - Buffer Instr: - count: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) - tips: - Buffer Read: - count: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) - tips: - Buffer Write: - count: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) - tips: - Buffer Atomic: - count: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) - tips: - Flat Instr: - count: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) - tips: - Flat Read: - count: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) - tips: - Flat Write: - count: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) - tips: - Flat Atomic: - count: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) - tips: + Global/Generic Instr: + avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Read: + avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Write: + avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Atomic: + avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Instr: + avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Read: + avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Write: + avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Atomic: + avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: - metric_table: id: 1004 title: MFMA Arithmetic Instr Mix header: - type: Type - count: Count + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit tips: Tips metric: MFMA-I8: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: MFMA-F16: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: MFMA-BF16: - count: None # No HW module - tips: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: MFMA-F32: - count: None # No HW module + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) tips: MFMA-F64: - count: None # No HW module - tips: \ No newline at end of file + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: (instr + $normUnit) + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml index 8e749cf3da..8dfcef927b 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1100_compute-unit-compute-pipeline.yaml @@ -13,31 +13,58 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg + unit: Unit + peak: Peak + pop: Pct of Peak tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) xrange: [0, 110] metric: - valu_flops_pop: + VALU FLOPs: value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_bf16_pop: + VALU IOPs: value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_f16_pop: + MFMA FLOPs (BF16): value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_f32_pop: + MFMA FLOPs (F16): value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_f64_pop: + MFMA FLOPs (F32): value: None # No perf counter + Unit: None + peak: None + pop: None tips: - mfma_flops_i8_pop: + MFMA FLOPs (F64): value: None # No perf counter + Unit: None + peak: None + pop: None + tips: + MFMA IOPs (INT8): + value: None # No perf counter + Unit: None + peak: None + pop: None tips: - metric_table: @@ -51,36 +78,48 @@ Panel Config: unit: Unit tips: Tips metric: - IPC (Avg): + IPC: avg: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES)) min: MIN((SQ_INSTS / SQ_BUSY_CU_CYCLES)) max: MAX((SQ_INSTS / SQ_BUSY_CU_CYCLES)) unit: Instr/cycle tips: - IPC (Issue): - avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + IPC (Issued): + avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) - min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) - max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) unit: Instr/cycle tips: - SALU Util: + SALU Utilization: avg: AVG((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) min: MIN((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) max: MAX((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) unit: pct tips: - VALU Util: + VALU Utilization: avg: AVG((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) min: MIN((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) max: MAX((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) unit: pct tips: + VMEM Utilization: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: pct + tips: + Branch Utilization: + avg: None # No HW module + min: None # No HW module + max: None # No HW module + unit: pct + tips: VALU Active Threads: avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) @@ -90,7 +129,7 @@ Panel Config: != 0) else None)) unit: Threads tips: - MFMA Util: + MFMA Utilization: avg: None # No HW module min: None # No HW module max: None # No HW module @@ -102,6 +141,20 @@ Panel Config: max: None # No HW module unit: cycles/instr tips: + VMEM Latency: + avg: None # No perf counter + min: None # No perf counter + max: None # No perf counter + unit: Cycles + coll_level: SQ_INST_LEVEL_VMEM + tips: + SMEM Latency: + avg: None # No perf counter + min: None # No perf counter + max: None # No perf counter + unit: Cycles + coll_level: SQ_INST_LEVEL_SMEM + tips: - metric_table: id: 1103 @@ -121,7 +174,7 @@ Panel Config: max: None # No perf counter unit: (OPs + $normUnit) tips: - INT8 OPs: + IOPs (Total): avg: None # No perf counter min: None # No perf counter max: None # No perf counter @@ -151,5 +204,11 @@ Panel Config: max: None # No perf counter unit: (OPs + $normUnit) tips: + INT8 OPs: + avg: None # No perf counter + min: None # No perf counter + max: None # No perf counter + unit: (OPs + $normUnit) + tips: - \ No newline at end of file + diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml index d225d31e0a..66bd078b28 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml @@ -16,7 +16,7 @@ Panel Config: value: Value unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) @@ -30,11 +30,13 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) unit: Pct of Peak tips: - Bandwidth (Pct-of-Peak): + unit: pct + Theoretical Bandwidth: value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) unit: Pct of Peak tips: + unit: pct Bank Conflict Rate: value: AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) @@ -58,7 +60,7 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - Bandwidth: + Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) @@ -67,7 +69,14 @@ Panel Config: / $denom)) unit: (Bytes + $normUnit) tips: - Bank Conficts/Access: + LDS Latency: + avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + unit: Cycles + coll_level: SQ_INST_LEVEL_LDS + tips: + Bank Conflicts/Access: avg: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) min: MIN(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -82,7 +91,7 @@ Panel Config: max: MAX((SQ_LDS_IDX_ACTIVE / $denom)) unit: (Cycles + $normUnit) tips: - Atomic Cycles: + Atomic Return Cycles: avg: AVG((SQ_LDS_ATOMIC_RETURN / $denom)) min: MIN((SQ_LDS_ATOMIC_RETURN / $denom)) max: MAX((SQ_LDS_ATOMIC_RETURN / $denom)) @@ -110,12 +119,5 @@ Panel Config: avg: AVG((SQ_LDS_MEM_VIOLATIONS / $denom)) min: MIN((SQ_LDS_MEM_VIOLATIONS / $denom)) max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) - unit: ( + $normUnit) - tips: - LDS Latency: - avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - unit: Cycles - coll_level: SQ_INST_LEVEL_LDS + unit: (Accesses + $normUnit) tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml index 7425cade78..5cfe101e71 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml @@ -13,10 +13,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) @@ -27,11 +27,16 @@ Panel Config: * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: - Cache Hit: + Cache Hit Rate: value: AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) unit: Pct of Peak tips: + L1I-L2 Bandwidth: + value: AVG(((SQC_TC_INST_REQ * 100000) / (2 * ($sclk * $numSQC) + * (EndNs - BeginNs)))) + unit: Pct of Peak + tips: - metric_table: id: 1302 @@ -68,7 +73,7 @@ Panel Config: max: MAX((SQC_ICACHE_MISSES_DUPLICATE / $denom)) unit: (Misses + $normUnit) tips: - Cache Hit: + Cache Hit Rate: avg: AVG(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) min: MIN(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + @@ -76,4 +81,28 @@ Panel Config: max: MAX(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) unit: pct - tips: \ No newline at end of file + tips: + Instruction Fetch Latency: + avg: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + min: MIN((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + max: MAX((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + unit: Cycles + coll_level: SQ_IFETCH_LEVEL + tips: + - metric_table: + id: 1303 + title: Instruction Cache - L2 Interface + header: + metric: Metric + mean: Mean + min: Min + max: Max + unit: Unit + tips: Tips + metric: + L1I-L2 Bandwidth: + mean: AVG(((SQC_TC_INST_REQ * 64) / $denom)) + min: MIN(((SQC_TC_INST_REQ * 64) / $denom)) + max: MAX(((SQC_TC_INST_REQ * 64) / $denom)) + unit: (Bytes + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml index ab4f5109ab..34f319ad2f 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml @@ -12,11 +12,11 @@ Panel Config: id: 1401 title: Speed-of-Light header: - mertic: Metric - value: Value + metric: Metric + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) @@ -27,12 +27,17 @@ Panel Config: * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: - Cache Hit: + Cache Hit Rate: value: AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE)) if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None)) unit: Pct of Peak tips: + sL1D-L2 BW: + value: AVG(((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 100000) + / (2 * ($sclk * $numSQC) * (EndNs - BeginNs))) + unit: Pct of Peak + tips: - metric_table: id: 1402 @@ -69,7 +74,7 @@ Panel Config: max: MAX((SQC_DCACHE_MISSES_DUPLICATE / $denom)) unit: (Req + $normUnit) tips: - Cache Hit: + Cache Hit Rate: avg: AVG((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) + SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None)) @@ -138,6 +143,12 @@ Panel Config: unit: Unit tips: Tips metric: + sL1D-L2 BW: + mean: AVG(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + min: MIN(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + max: MAX(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + unit: (Bytes + $normUnit) + tips: Read Req: avg: AVG((SQC_TC_DATA_READ_REQ / $denom)) min: MIN((SQC_TC_DATA_READ_REQ / $denom)) @@ -156,9 +167,9 @@ Panel Config: max: MAX((SQC_TC_DATA_ATOMIC_REQ / $denom)) unit: (Req + $normUnit) tips: - Stall: + Stall Cycles: avg: AVG((SQC_TC_STALL / $denom)) min: MIN((SQC_TC_STALL / $denom)) max: MAX((SQC_TC_STALL / $denom)) unit: (Cycles + $normUnit) - tips: \ No newline at end of file + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml index 8f71cedc99..773bb7c763 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1500_TA_and_TD.yaml @@ -6,11 +6,11 @@ Metric Description: # Define the panel properties and properties of each metric in the panel. Panel Config: id: 1500 - title: Texture Addresser and Texture Data (TA/TD) + title: Address Processing Unit and Data Return Path (TA/TD) data source: - metric_table: id: 1501 - title: TA + title: Address Processing Unit header: metric: Metric avg: Avg @@ -19,25 +19,25 @@ Panel Config: unit: Unit tips: Tips metric: - TA Busy: + Address Processing Unit Busy: avg: AVG(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TA Addr Stall: + Address Stall: avg: AVG(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TA Data Stall: + Data Stall: avg: AVG(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TD2TA Addr Stall: + Data-Processor → Address Stall: avg: AVG(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) @@ -47,69 +47,69 @@ Panel Config: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) max: MAX((TA_TOTAL_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Instr: + Global/Generic Instructions: avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Read Instr: + Global/Generic Read Instructions: avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Write Instr: + Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Atomic Instr: + Global/Generic Atomic Instructions: avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Instr: + Spill/Stack Instructions: avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Read Instr: + Spill/Stack Read Instructions: avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Write Instr: + Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Atomic Instr: + Spill/Stack Atomic Instructions: avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Total Cylces: + Spill/Stack Total Cycles: avg: AVG((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) unit: (Cycles + $normUnit) tips: - Buffer Coalesced Read: + Spill/Stack Coalesced Read: avg: AVG((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) unit: (Cycles + $normUnit) tips: - Buffer Coalesced Write: + Spill/Stack Coalesced Write: avg: AVG((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) @@ -118,7 +118,7 @@ Panel Config: - metric_table: id: 1502 - title: TD + title: Data-Return Path header: metric: Metric avg: Avg @@ -127,48 +127,48 @@ Panel Config: unit: Unit tips: Tips metric: - TD Busy: + Data-Return Busy: avg: AVG(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TD Stall: + Cache RAM → Data-Return Stall: avg: AVG(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - SPI2TD Stall: + Workgroup manager → Data-Return Stall: avg: # No perf counter min: # No perf counter max: # No perf counter unit: pct tips: - Coalescable Instr: + Coalescable Instructions: avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom)) min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom)) max: MAX((TD_COALESCABLE_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Load Instr: + Read Instructions: avg: AVG((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) min: MIN((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) max: MAX((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Store Instr: + Write Instructions: avg: AVG((TD_STORE_WAVEFRONT_sum / $denom)) min: MIN((TD_STORE_WAVEFRONT_sum / $denom)) max: MAX((TD_STORE_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Atomic Instr: + Atomic Instructions: avg: AVG((TD_ATOMIC_WAVEFRONT_sum / $denom)) min: MIN((TD_ATOMIC_WAVEFRONT_sum / $denom)) max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml index 7b8ed6f8ae..096fcb9b3c 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml @@ -13,37 +13,37 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) xrange: [0, 110] metric: - Buffer Coalescing: - value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum - * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None)) - unit: Pct of Peak - tips: - Cache Util: - value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum - != 0) else None)) - unit: Pct of Peak - tips: - Cache BW: - value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) - / ((($sclk / 1000) * 64) * $numCU)) - unit: Pct of Peak - tips: - Cache Hit: + Hit rate: value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else None)) unit: Pct of Peak tips: + Bandwidth: + value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) + / ((($sclk / 1000) * 64) * $numCU)) + unit: Pct of Peak + tips: + Utilization: + value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum + != 0) else None)) + unit: Pct of Peak + tips: + Coalescing: + value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum + * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None)) + unit: Pct of Peak + tips: - metric_table: id: 1602 @@ -141,11 +141,26 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - unit: GB/s + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + unit: (Bytes + $normUnit) tips: + Cache Hit Rate: + avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + unit: pct + tips: Cache Accesses: avg: AVG((TCP_TOTAL_CACHE_ACCESSES_sum / $denom)) min: MIN((TCP_TOTAL_CACHE_ACCESSES_sum / $denom)) @@ -164,22 +179,7 @@ Panel Config: / $denom)) unit: (Req + $normUnit) tips: - Cache Hit Rate: - avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - unit: pct - tips: - Invalidate: + Invalidations: avg: AVG((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) min: MIN((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) max: MAX((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) @@ -188,9 +188,9 @@ Panel Config: L1-L2 BW: avg: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) - min: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + min: MIN(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) - max: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + max: MAX(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: @@ -258,7 +258,7 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_multi_bar metric: NC - Read: @@ -388,17 +388,17 @@ Panel Config: avg: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: (Hits + $normUnit) + units: (Req + $normUnit) tips: - Misses (Translation): + Translation Misses: avg: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: (Misses + $normUnit) + units: (Req + $normUnit) tips: - Misses (Permission): + Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: (Misses + $normUnit) + units: (Req + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml index 2b98c2593d..2037ce0827 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml @@ -13,31 +13,35 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar metric: - L2 Util: + Utilization: value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE))) unit: pct + tips: + Bandwidth: + value: ((100 * AVG(((TCC_REQ_sum * 64) / (EndNs - BeginNs)))) / ((($sclk / 1000) * 64) * TO_INT($L2Banks))) + unit: pct tips: - Cache Hit: + Hit Rate: value: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)) unit: pct - tips: - L2-EA Rd BW: + tips: + L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s - tips: - L2-EA Wr BW: + tips: + L2-Fabric Write and Atomic BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s - tips: + tips: - metric_table: id: 1702 @@ -50,7 +54,7 @@ Panel Config: unit: Unit tips: Tips metric: - Read BW: + L2-Fabric Read BW: avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / $denom)) min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) @@ -58,8 +62,26 @@ Panel Config: max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / $denom)) unit: (Bytes + $normUnit) - tips: - Write BW: + tips: + HBM Read Traffic: + avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + Remote Read Traffic: + avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + Uncached Read Traffic: + avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + L2-Fabric Write and Atomic BW: avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / $denom)) min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) @@ -67,55 +89,31 @@ Panel Config: max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / $denom)) unit: (Bytes + $normUnit) - tips: - Read (32B): - avg: AVG((TCC_EA_RDREQ_32B_sum / $denom)) - min: MIN((TCC_EA_RDREQ_32B_sum / $denom)) - max: MAX((TCC_EA_RDREQ_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read (Uncached 32B): - avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read (64B): - avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - unit: (Req + $normUnit) - tips: - HBM Read: - avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom)) - min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom)) - max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write (32B): - avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - unit: (Req + $normUnit) - tips: - Write (Uncached 32B): - avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write (64B): - avg: AVG((TCC_EA_WRREQ_64B_sum / $denom)) - min: MIN((TCC_EA_WRREQ_64B_sum / $denom)) - max: MAX((TCC_EA_WRREQ_64B_sum / $denom)) - unit: (Req + $normUnit) - tips: - HBM Write: - avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom)) - min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom)) - max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom)) - unit: (Req + $normUnit) - tips: + tips: + HBM Write and Atomic Traffic: + avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Remote Write and Atomic Traffic: + avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Atomic Traffic: + avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Uncached Write and Atomic Traffic: + avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: Read Latency: avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) @@ -124,7 +122,7 @@ Panel Config: max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) unit: Cycles - tips: + tips: Write Latency: avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) @@ -133,7 +131,7 @@ Panel Config: max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) unit: Cycles - tips: + tips: Atomic Latency: avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum != 0) else None)) @@ -142,7 +140,7 @@ Panel Config: max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum != 0) else None)) unit: Cycles - tips: + tips: Read Stall: avg: AVG((((100 * ((TCC_EA_RDREQ_IO_CREDIT_STALL_sum + TCC_EA_RDREQ_GMI_CREDIT_STALL_sum) + TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != @@ -154,7 +152,7 @@ Panel Config: + TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != 0) else None)) unit: pct - tips: + tips: Write Stall: avg: AVG((((100 * ((TCC_EA_WRREQ_IO_CREDIT_STALL_sum + TCC_EA_WRREQ_GMI_CREDIT_STALL_sum) + TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != @@ -166,7 +164,7 @@ Panel Config: + TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != 0) else None)) unit: pct - tips: + tips: - metric_table: id: 1703 @@ -179,54 +177,48 @@ Panel Config: unit: Unit tips: Tips metric: + Bandwidth: + avg: AVG((TCC_REQ_sum * 64) / $denom) + min: MIN((TCC_REQ_sum * 64) / $denom) + max: MAX((TCC_REQ_sum * 64) / $denom) + unit: (Bytes + $normUnit) + tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) max: MAX((TCC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: - Streaming Req: - avg: AVG((TCC_STREAMING_REQ_sum / $denom)) - min: MIN((TCC_STREAMING_REQ_sum / $denom)) - max: MAX((TCC_STREAMING_REQ_sum / $denom)) - unit: (Req + $normUnit) - tips: + tips: Read Req: avg: AVG((TCC_READ_sum / $denom)) min: MIN((TCC_READ_sum / $denom)) max: MAX((TCC_READ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Write Req: avg: AVG((TCC_WRITE_sum / $denom)) min: MIN((TCC_WRITE_sum / $denom)) max: MAX((TCC_WRITE_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Atomic Req: avg: AVG((TCC_ATOMIC_sum / $denom)) min: MIN((TCC_ATOMIC_sum / $denom)) max: MAX((TCC_ATOMIC_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) - tips: - Hits: - avg: AVG((TCC_HIT_sum / $denom)) - min: MIN((TCC_HIT_sum / $denom)) - max: MAX((TCC_HIT_sum / $denom)) - unit: (Hits + $normUnit) - tips: - Misses: - avg: AVG((TCC_MISS_sum / $denom)) - min: MIN((TCC_MISS_sum / $denom)) - max: MAX((TCC_MISS_sum / $denom)) - unit: (Misses + $normUnit) - tips: + tips: + Streaming Req: + avg: AVG((TCC_STREAMING_REQ_sum / $denom)) + min: MIN((TCC_STREAMING_REQ_sum / $denom)) + max: MAX((TCC_STREAMING_REQ_sum / $denom)) + unit: (Req + $normUnit) + tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -235,65 +227,77 @@ Panel Config: max: MAX((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) unit: pct - tips: + tips: + Hits: + avg: AVG((TCC_HIT_sum / $denom)) + min: MIN((TCC_HIT_sum / $denom)) + max: MAX((TCC_HIT_sum / $denom)) + unit: (Hits + $normUnit) + tips: + Misses: + avg: AVG((TCC_MISS_sum / $denom)) + min: MIN((TCC_MISS_sum / $denom)) + max: MAX((TCC_MISS_sum / $denom)) + unit: (Misses + $normUnit) + tips: Writeback: avg: AVG((TCC_WRITEBACK_sum / $denom)) min: MIN((TCC_WRITEBACK_sum / $denom)) max: MAX((TCC_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: + unit: (Cachelines + $normUnit) + tips: + Writeback (Internal): + avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom)) + min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom)) + max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Writeback (vL1D Req): + avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Evict (Normal): + avg: AVG((TCC_NORMAL_EVICT_sum / $denom)) + min: MIN((TCC_NORMAL_EVICT_sum / $denom)) + max: MAX((TCC_NORMAL_EVICT_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Evict (vL1D Req): + avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: NC Req: avg: AVG((TCC_NC_REQ_sum / $denom)) min: MIN((TCC_NC_REQ_sum / $denom)) max: MAX((TCC_NC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: UC Req: avg: AVG((TCC_UC_REQ_sum / $denom)) min: MIN((TCC_UC_REQ_sum / $denom)) max: MAX((TCC_UC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: CC Req: avg: AVG((TCC_CC_REQ_sum / $denom)) min: MIN((TCC_CC_REQ_sum / $denom)) max: MAX((TCC_CC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: RW Req: avg: AVG((TCC_RW_REQ_sum / $denom)) min: MIN((TCC_RW_REQ_sum / $denom)) max: MAX((TCC_RW_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: - Writeback (Normal): - avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom)) - min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom)) - max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: - Writeback (TC Req): - avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: - Evict (Normal): - avg: AVG((TCC_NORMAL_EVICT_sum / $denom)) - min: MIN((TCC_NORMAL_EVICT_sum / $denom)) - max: MAX((TCC_NORMAL_EVICT_sum / $denom)) - unit: ( + $normUnit) - tips: - Evict (TC Req): - avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - unit: ( + $normUnit) - tips: + tips: - metric_table: id: 1704 - title: L2 - EA Interface Stalls + title: L2 - Fabric Interface Stalls header: metric: Metric type: Type @@ -303,62 +307,140 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_multi_bar metric: - Read - Remote Socket Stall: - type: Remote Socket Stall + Read - PCIe Stall: + type: PCIe Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read - Peer GCD Stall: - type: Peer GCD Stall + avg: AVG(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Read - Infinity Fabric™ Stall: + type: Infinity Fabric™ Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Read - HBM Stall: type: HBM Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write - Remote Socket Stall: - type: Remote Socket Stall + avg: AVG(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Write - PCIe Stall: + type: PCIe Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write - Peer GCD Stall: - type: Peer GCD Stall + avg: AVG(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Write - Infinity Fabric™ Stall: + type: Infinity Fabric™ Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Write - HBM Stall: - type: HBM Stall + type: HBM Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Write - Credit Starvation: type: Credit Starvation transaction: Write - avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) - min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) - max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) + avg: AVG(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + + - metric_table: + id: 1705 + title: L2 - Fabric Detailed Transaction Breakdown + header: + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit + tips: Tips + metric: + Read (32B): + avg: AVG((TCC_EA_RDREQ_32B_sum / $denom)) + min: MIN((TCC_EA_RDREQ_32B_sum / $denom)) + max: MAX((TCC_EA_RDREQ_32B_sum / $denom)) unit: (Req + $normUnit) - tips: \ No newline at end of file + tips: + Read (Uncached): + avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + unit: (Req + $normUnit) + tips: + Read (64B): + avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + unit: (Req + $normUnit) + tips: + HBM Read: + avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom)) + min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom)) + max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom)) + unit: (Req + $normUnit) + tips: + Remote Read: + avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (32B): + avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (Uncached): + avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (64B): + avg: AVG((TCC_EA_WRREQ_64B_sum / $denom)) + min: MIN((TCC_EA_WRREQ_64B_sum / $denom)) + max: MAX((TCC_EA_WRREQ_64B_sum / $denom)) + unit: (Req + $normUnit) + tips: + HBM Write and Atomic: + avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom)) + min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom)) + max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom)) + unit: (Req + $normUnit) + tips: + Remote Write and Atomic: + avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + unit: (Req + $normUnit) + tips: + Atomic: + avg: AVG((TCC_EA_ATOMIC_sum / $denom)) + min: MIN((TCC_EA_ATOMIC_sum / $denom)) + max: MAX((TCC_EA_ATOMIC_sum / $denom)) + unit: (Req + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml index 117b8e0a87..ae982b365a 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1800_L2_cache_per_channel.yaml @@ -186,7 +186,7 @@ Panel Config: - metric_table: id: 1803 - title: Requests (Requests) + title: L2 Requests (Requests) header: metric: Metric expr: Expression @@ -199,12 +199,12 @@ Panel Config: - metric_table: id: 1804 - title: L1-L2 Access (Requests) + title: L2 Access (Requests) header: metric: Metric - read req: L1-L2 Read - write req: L1-L2 Write - atomic req: L1-L2 Atomic + read req: L2 Read Req + write req: L2 Write Req + atomic req: L2 Atomic Req metric: "::_1": read req: AVG((TO_INT(TCC_READ[::_1]) / $denom)) @@ -216,12 +216,12 @@ Panel Config: - metric_table: id: 1805 - title: L2-EA Access (Requests) + title: L2 - Fabric Access (Requests) header: metric: Metric - read req: L2-EA Read - write req: L2-EA Write - atomic req: L2-EA Atomic + read req: L2 - Fabric Read Req + write req: L2 - Fabric Write and Atomic Req + atomic req: L2 - Fabric Atomic Req metric: "::_1": read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom)) @@ -256,7 +256,7 @@ Panel Config: - metric_table: id: 1806 - title: L2-EA Read Latency (Cycles) + title: L2 - Fabric Read Latency (Cycles) header: metric: Metric expr: Expression @@ -271,7 +271,7 @@ Panel Config: - metric_table: id: 1807 - title: L2-EA Write Latency (Cycles) + title: L2 - Fabric Write Latency (Cycles) header: metric: Metric expr: Expression @@ -286,7 +286,7 @@ Panel Config: - metric_table: id: 1808 - title: L2-EA Atomic Latency (Cycles) + title: L2 - Fabric Atomic Latency (Cycles) header: metric: Metric expr: Expression @@ -300,35 +300,35 @@ Panel Config: - metric_table: id: 1809 - title: L2-EA Read Stall (Cycles per normUnit) + title: L2 - Fabric Read Stall (Cycles per normUnit) header: metric: Metric - ea read stall - io: L2-EA Read Stall - IO - ea read stall - gmi: L2-EA Read Stall - GMI - ea read stall - dram: L2-EA Read Stall - DRAM + ea read stall - pcie: L2 - Fabric Read Stall (PCIe) + ea read stall - if: L2 - Fabric Read Stall (Infinity Fabric™) + ea read stall - hbm: L2 - Fabric Read Stall (HBM) metric: "::_1": - ea read stall - io: AVG((TO_INT(TCC_EA_RDREQ_IO_CREDIT_STALL[::_1]) / $denom)) - ea read stall - gmi: AVG((TO_INT(TCC_EA_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom)) - ea read stall - dram: AVG((TO_INT(TCC_EA_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) + ea read stall - pcie: AVG((TO_INT(TCC_EA_RDREQ_IO_CREDIT_STALL[::_1]) / $denom)) + ea read stall - if: AVG((TO_INT(TCC_EA_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom)) + ea read stall - hbm: AVG((TO_INT(TCC_EA_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) placeholder_range: "::_1": 32 cli_style: simple_multiple_bar - metric_table: id: 1810 - title: L2-EA Write Stall (Cycles per normUnit) + title: L2 - Fabric Write Stall (Cycles per normUnit) header: metric: Metric - ea write stall - io: L2-EA Write Stall - IO - ea write stall - gmi: L2-EA Write Stall - GMI - ea write stall - dram: L2-EA Write Stall - DRAM - ea write stall - starve: L2-EA Write Stall - Starve + ea write stall - pcie: L2 - Fabric Write Stall (PCIe) + ea write stall - if: L2 - Fabric Write Stall (Infinity Fabric™) + ea write stall - hbm: L2 - Fabric Write Stall (HBM) + ea write stall - starve: L2 - Fabric Write Starve metric: "::_1": - ea write stall - io: AVG((TO_INT(TCC_EA_WRREQ_IO_CREDIT_STALL[::_1]) / $denom)) - ea write stall - gmi: AVG((TO_INT(TCC_EA_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom)) - ea write stall - dram: AVG((TO_INT(TCC_EA_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) + ea write stall - pcie: AVG((TO_INT(TCC_EA_WRREQ_IO_CREDIT_STALL[::_1]) / $denom)) + ea write stall - if: AVG((TO_INT(TCC_EA_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom)) + ea write stall - hbm: AVG((TO_INT(TCC_EA_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) ea write stall - starve: AVG((TO_INT(TCC_TOO_MANY_EA_WRREQS_STALL[::_1]) / $denom)) placeholder_range: "::_1": 32 diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml index cd8edfb075..5554a5e56c 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml @@ -14,10 +14,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit peak: Peak - pop: PoP + pop: Pct of Peak tips: Tips metric: VALU FLOPs: @@ -83,19 +83,19 @@ Panel Config: peak: $numCU pop: ((100 * $numActiveCUs) / $numCU) tips: - SALU Util: + SALU Utilization: value: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct peak: 100 pop: AVG(((100 * SQ_ACTIVE_INST_SCA) / (GRBM_GUI_ACTIVE * $numCU))) tips: - VALU Util: + VALU Utilization: value: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct peak: 100 pop: AVG(((100 * SQ_ACTIVE_INST_VALU) / (GRBM_GUI_ACTIVE * $numCU))) tips: - MFMA Util: + MFMA Utilization: value: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((GRBM_GUI_ACTIVE * $numCU) * 4))) unit: pct @@ -103,7 +103,20 @@ Panel Config: pop: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((GRBM_GUI_ACTIVE * $numCU) * 4))) tips: - VALU Active Threads/Wave: + VMEM Utilization: + value: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / GRBM_GUI_ACTIVE) / $numCU)) + unit: pct + peak: 100 + pop: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / GRBM_GUI_ACTIVE) / $numCU)) + tips: + Branch Utilization: + value: AVG((((100 * SQ_ACTIVE_INST_MISC) / GRBM_GUI_ACTIVE) / $numCU)) + unit: pct + peak: 100 + pop: AVG((((100 * SQ_ACTIVE_INST_MISC) / GRBM_GUI_ACTIVE) / $numCU)) + unit: pct + tips: + VALU Active Threads: value: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) unit: Threads @@ -111,25 +124,29 @@ Panel Config: pop: (AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) * 1.5625) tips: - IPC - Issue: - value: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) - / SQ_ACTIVE_INST_ANY)) + IPC: + value: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES)) unit: Instr/cycle peak: 5 - pop: ((100 * AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) - / SQ_ACTIVE_INST_ANY))) / 5) + pop: ((100 * AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES))) / 5) tips: - LDS BW: + Wavefront Occupancy: + value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE)) + unit: Wavefronts + peak: ($maxWavesPerCU * $numCU) + pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU + * $numCU)))) + coll_level: SQ_LEVEL_WAVES + tips: + Theoretical LDS Bandwidth: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp))) - unit: GB/sec + unit: GB/s peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: - LDS Bank Conflict: + LDS Bank Conflicts/Access: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) unit: Conflicts/access @@ -137,35 +154,7 @@ Panel Config: pop: ((100 * AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))) / 32) tips: - Instr Cache Hit Rate: - value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) - unit: pct - peak: 100 - pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) - tips: - Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) - unit: GB/s - peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk - / 1000) * 64) * $numSQC)) - tips: - Scalar L1D Cache Hit Rate: - value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) - if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) - unit: pct - peak: 100 - pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) - if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) - tips: - Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) - unit: GB/s - peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk - / 1000) * 64) * $numSQC)) - tips: - Vector L1D Cache Hit Rate: + vL1D Cache Hit Rate: value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else @@ -177,7 +166,7 @@ Panel Config: TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else None)) tips: - Vector L1D Cache BW: + vL1D Cache BW: value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) @@ -192,6 +181,13 @@ Panel Config: pop: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) tips: + L2 Cache BW: + value: AVG(((TCC_REQ_sum * 128) / (EndNs - BeginNs))) + unit: GB/s + peak: ((($sclk / 1000) * 128) * TO_INT($L2Banks)) + pop: ((100 * AVG(((TCC_REQ_sum * 128) / (EndNs - BeginNs)))) + / ((($sclk / 1000) * 128) * TO_INT($L2Banks))) + tips: L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))) @@ -212,36 +208,48 @@ Panel Config: value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None tips: L2-Fabric Write Latency: value: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None tips: - Wave Occupancy: - value: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE)) - unit: Wavefronts - peak: ($maxWavesPerCU * $numCU) - pop: (100 * AVG(((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE) / ($maxWavesPerCU - * $numCU)))) - coll_level: SQ_LEVEL_WAVES + sL1D Cache Hit Rate: + value: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) + if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) + unit: pct + peak: 100 + pop: AVG((((100 * SQC_DCACHE_HITS) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES)) + if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: - Instr Fetch BW: - value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) + sL1D Cache BW: + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s - peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC - * (($sclk / 1000) * 32))) - coll_level: SQ_IFETCH_LEVEL + peak: ((($sclk / 1000) * 64) * $numSQC) + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk + / 1000) * 64) * $numSQC)) tips: - Instr Fetch Latency: + L1I Hit Rate: + value: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) + unit: pct + peak: 100 + pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) + tips: + L1I BW: + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) + unit: GB/s + peak: ((($sclk / 1000) * 64) * $numSQC) + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk + / 1000) * 64) * $numSQC)) + tips: + L1I Fetch Latency: value: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) unit: Cycles - peak: '' - pop: '' + peak: None + pop: None coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0500_command-processor.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0500_command-processor.yaml index d954f61625..b4a1f0b104 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0500_command-processor.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0500_command-processor.yaml @@ -19,19 +19,7 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy Cycles: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles/Kernel - tips: - CPF Busy: - avg: AVG(CPF_CPF_STAT_BUSY) - min: MIN(CPF_CPF_STAT_BUSY) - max: MAX(CPF_CPF_STAT_BUSY) - unit: Cycles/Kernel - tips: - CPF Util: + CPF Utilization: avg: AVG((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) if ((CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE) != 0) else None)) min: MIN((((100 * CPF_CPF_STAT_BUSY) / (CPF_CPF_STAT_BUSY + CPF_CPF_STAT_IDLE)) @@ -47,15 +35,9 @@ Panel Config: != 0) else None)) max: MAX((((100 * CPF_CPF_STAT_STALL) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY != 0) else None)) - unit: Cycles/Kernel + unit: pct tips: - L2Cache Intf Busy: - avg: AVG(CPF_CPF_TCIU_BUSY) - min: MIN(CPF_CPF_TCIU_BUSY) - max: MAX(CPF_CPF_TCIU_BUSY) - unit: Cycles/Kernel - tips: - L2Cache Intf Util: + CPF-L2 Utilization: avg: AVG((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE)) if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None)) min: MIN((((100 * CPF_CPF_TCIU_BUSY) / (CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE)) @@ -64,7 +46,7 @@ Panel Config: if ((CPF_CPF_TCIU_BUSY + CPF_CPF_TCIU_IDLE) != 0) else None)) unit: pct tips: - L2Cache Intf Stall: + CPF-L2 Stall: avg: AVG((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY != 0) else None)) min: MIN((((100 * CPF_CPF_TCIU_STALL) / CPF_CPF_TCIU_BUSY) if (CPF_CPF_TCIU_BUSY @@ -73,16 +55,19 @@ Panel Config: != 0) else None)) unit: pct tips: - UTCL1 Stall: - avg: AVG(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - min: MIN(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - max: MAX(CPF_CMP_UTCL1_STALL_ON_TRANSLATION) - unit: Cycles/Kernel - tips: + CPF-UTCL1 Stall: + avg: AVG(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + min: MIN(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + max: MAX(((100 * CPF_CMP_UTCL1_STALL_ON_TRANSLATION) / CPF_CPF_STAT_BUSY) if (CPF_CPF_STAT_BUSY + != 0) else None) + unit: pct + tips: - metric_table: id: 502 - title: Command Processor Compute + title: Packet Processor header: metric: Metric avg: Avg @@ -91,19 +76,7 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy Cycles: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles - tips: - CPC Busy Cycles: - avg: AVG(CPC_CPC_STAT_BUSY) - min: MIN(CPC_CPC_STAT_BUSY) - max: MAX(CPC_CPC_STAT_BUSY) - unit: Cycles - tips: - CPC Util: + CPC Utilization: avg: AVG((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_STAT_BUSY) / (CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE)) @@ -112,12 +85,6 @@ Panel Config: if ((CPC_CPC_STAT_BUSY + CPC_CPC_STAT_IDLE) != 0) else None)) unit: pct tips: - CPC Stall Cycles: - avg: AVG(CPC_CPC_STAT_STALL) - min: MIN(CPC_CPC_STAT_STALL) - max: MAX(CPC_CPC_STAT_STALL) - unit: Cycles - tips: CPC Stall Rate: avg: AVG((((100 * CPC_CPC_STAT_STALL) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY != 0) else None)) @@ -127,28 +94,19 @@ Panel Config: != 0) else None)) unit: pct tips: - CPC Packet Decoding: - avg: AVG(CPC_ME1_BUSY_FOR_PACKET_DECODE) - min: MIN(CPC_ME1_BUSY_FOR_PACKET_DECODE) - max: MAX(CPC_ME1_BUSY_FOR_PACKET_DECODE) - unit: Cycles - tips: - SPI Intf Busy Cycles: - avg: AVG(CPC_ME1_DC0_SPI_BUSY) - min: MIN(CPC_ME1_DC0_SPI_BUSY) - max: MAX(CPC_ME1_DC0_SPI_BUSY) - unit: Cycles - tips: - SPI Intf Util: - avg: AVG((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) - min: MIN((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) - max: MAX((((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY - != 0) else None)) + CPC Packet Decoding Utilization: + avg: AVG((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + min: MIN((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + max: MAX((100 * CPC_ME1_BUSY_FOR_PACKET_DECODE) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) unit: pct tips: - L2Cache Intf Util: + CPC-Workgroup Manager Utilization: + avg: AVG((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + min: MIN((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + max: MAX((100 * CPC_ME1_DC0_SPI_BUSY) / CPC_CPC_STAT_BUSY if (CPC_CPC_STAT_BUSY != 0) else None) + unit: Pct + tips: + CPC-L2 Utilization: avg: AVG((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE)) if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_TCIU_BUSY) / (CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE)) @@ -157,19 +115,16 @@ Panel Config: if ((CPC_CPC_TCIU_BUSY + CPC_CPC_TCIU_IDLE) != 0) else None)) unit: pct tips: - UTCL1 Stall Cycles: - avg: AVG(CPC_UTCL1_STALL_ON_TRANSLATION) - min: MIN(CPC_UTCL1_STALL_ON_TRANSLATION) - max: MAX(CPC_UTCL1_STALL_ON_TRANSLATION) - unit: Cycles + CPC-UTCL1 Stall: + avg: AVG(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + min: MIN(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + max: MAX(((100 * CPC_UTCL1_STALL_ON_TRANSLATION) / CPC_CPC_STAT_BUSY) if (CPC_CPC_STAT_BUSY + != 0) else None) + unit: pct tips: - UTCL2 Intf Busy Cycles: - avg: AVG(CPC_CPC_UTCL2IU_BUSY) - min: MIN(CPC_CPC_UTCL2IU_BUSY) - max: MAX(CPC_CPC_UTCL2IU_BUSY) - unit: Cycles - tips: - UTCL2 Intf Util: + CPC-UTCL2 Utilization: avg: AVG((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE)) if ((CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE) != 0) else None)) min: MIN((((100 * CPC_CPC_UTCL2IU_BUSY) / (CPC_CPC_UTCL2IU_BUSY + CPC_CPC_UTCL2IU_IDLE)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml index bab48700ac..24d4036ecb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0600_shader-processor-input.yaml @@ -6,11 +6,11 @@ Metric Description: # Define the panel properties and properties of each metric in the panel. Panel Config: id: 600 - title: Shader Processor Input (SPI) + title: Workgroup Manager (SPI) data source: - metric_table: id: 601 - title: SPI Stats + title: Workgroup Manager Utilizations header: metric: Metric avg: Avg @@ -19,29 +19,35 @@ Panel Config: unit: Unit tips: Tips metric: - GPU Busy: - avg: AVG(GRBM_GUI_ACTIVE) - min: MIN(GRBM_GUI_ACTIVE) - max: MAX(GRBM_GUI_ACTIVE) - unit: Cycles + Accelerator Utilization: + avg: AVG(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + min: MIN(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + max: MAX(100 * GRBM_GUI_ACTIVE / GRBM_COUNT) + unit: Pct tips: - CS Busy: - avg: AVG(SPI_CSN_BUSY) - min: MIN(SPI_CSN_BUSY) - max: MAX(SPI_CSN_BUSY) - unit: Cycles + Scheduler-Pipe Utilization: + avg: AVG(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + min: MIN(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + max: MAX(100 * SPI_CSN_BUSY / (GRBM_GUI_ACTIVE * $numPipes * $numSE)) + unit: Pct tips: - SPI Busy: - avg: AVG(GRBM_SPI_BUSY) - min: MIN(GRBM_SPI_BUSY) - max: MAX(GRBM_SPI_BUSY) - unit: Cycles + Workgroup Manager Utilization: + avg: AVG(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + min: MIN(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + max: MAX(100 * GRBM_SPI_BUSY / GRBM_GUI_ACTIVE) + unit: Pct tips: - SQ Busy: - avg: AVG(SQ_BUSY_CYCLES) - min: MIN(SQ_BUSY_CYCLES) - max: MAX(SQ_BUSY_CYCLES) - unit: Cycles + Shader Engine Utilization: + avg: AVG(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + min: MIN(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + max: MAX(100 * SQ_BUSY_CYCLES / (GRBM_GUI_ACTIVE * $numSE)) + unit: Pct + tips: + SIMD Utilization: + avg: AVG(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SQ_BUSY_CU_CYCLES / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct tips: Dispatched Workgroups: avg: AVG(SPI_CSN_NUM_THREADGROUPS) @@ -55,105 +61,6 @@ Panel Config: max: MAX(SPI_CSN_WAVE) unit: Wavefronts tips: - Wave Alloc Failed: - avg: AVG(SPI_RA_REQ_NO_ALLOC) - min: MIN(SPI_RA_REQ_NO_ALLOC) - max: MAX(SPI_RA_REQ_NO_ALLOC) - unit: Cycles - tips: - Wave Alloc Failed - CS: - avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN) - min: MIN(SPI_RA_REQ_NO_ALLOC_CSN) - max: MAX(SPI_RA_REQ_NO_ALLOC_CSN) - unit: Cycles - tips: - - - metric_table: - id: 602 - title: SPI Resource Allocation - header: - metric: Metric - avg: Avg - min: Min - max: Max - unit: Unit - tips: Tips - metric: - Wave request Failed (CS): - avg: AVG(SPI_RA_REQ_NO_ALLOC_CSN) - min: MIN(SPI_RA_REQ_NO_ALLOC_CSN) - max: MAX(SPI_RA_REQ_NO_ALLOC_CSN) - unit: Cycles - tips: - CS Stall: - avg: AVG(SPI_RA_RES_STALL_CSN) - min: MIN(SPI_RA_RES_STALL_CSN) - max: MAX(SPI_RA_RES_STALL_CSN) - unit: Cycles - tips: - CS Stall Rate: - avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - min: MIN((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - max: MAX((((100 * SPI_RA_RES_STALL_CSN) / GRBM_SPI_BUSY) if (GRBM_SPI_BUSY != - 0) else None)) - unit: pct - tips: - Scratch Stall: - avg: AVG(SPI_RA_TMP_STALL_CSN) - min: MIN(SPI_RA_TMP_STALL_CSN) - max: MAX(SPI_RA_TMP_STALL_CSN) - unit: Cycles - tips: - Insufficient SIMD Waveslots: - avg: AVG(SPI_RA_WAVE_SIMD_FULL_CSN) - min: MIN(SPI_RA_WAVE_SIMD_FULL_CSN) - max: MAX(SPI_RA_WAVE_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient SIMD VGPRs: - avg: AVG(SPI_RA_VGPR_SIMD_FULL_CSN) - min: MIN(SPI_RA_VGPR_SIMD_FULL_CSN) - max: MAX(SPI_RA_VGPR_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient SIMD SGPRs: - avg: AVG(SPI_RA_SGPR_SIMD_FULL_CSN) - min: MIN(SPI_RA_SGPR_SIMD_FULL_CSN) - max: MAX(SPI_RA_SGPR_SIMD_FULL_CSN) - unit: SIMD - tips: - Insufficient CU LDS: - avg: AVG(SPI_RA_LDS_CU_FULL_CSN) - min: MIN(SPI_RA_LDS_CU_FULL_CSN) - max: MAX(SPI_RA_LDS_CU_FULL_CSN) - unit: CU - tips: - Insufficient CU Barries: - avg: AVG(SPI_RA_BAR_CU_FULL_CSN) - min: MIN(SPI_RA_BAR_CU_FULL_CSN) - max: MAX(SPI_RA_BAR_CU_FULL_CSN) - unit: CU - tips: - Insufficient Bulky Resource: - avg: AVG(SPI_RA_BULKY_CU_FULL_CSN) - min: MIN(SPI_RA_BULKY_CU_FULL_CSN) - max: MAX(SPI_RA_BULKY_CU_FULL_CSN) - unit: CU - tips: - Reach CU Threadgroups Limit: - avg: AVG(SPI_RA_TGLIM_CU_FULL_CSN) - min: MIN(SPI_RA_TGLIM_CU_FULL_CSN) - max: MAX(SPI_RA_TGLIM_CU_FULL_CSN) - unit: Cycles - tips: - Reach CU Wave Limit: - avg: AVG(SPI_RA_WVLIM_STALL_CSN) - min: MIN(SPI_RA_WVLIM_STALL_CSN) - max: MAX(SPI_RA_WVLIM_STALL_CSN) - unit: Cycles - tips: VGPR Writes: avg: AVG((((4 * SPI_VWC_CSC_WR) / SPI_CSN_WAVE) if (SPI_CSN_WAVE != 0) else None)) @@ -172,3 +79,89 @@ Panel Config: None)) unit: Cycles/wave tips: + - metric_table: + id: 602 + title: Workgroup Manager - Resource Allocation + header: + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit + tips: Tips + metric: + Not-scheduled Rate (Workgroup Manager): + avg: AVG((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + min: MIN((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + max: MAX((100 * SPI_RA_REQ_NO_ALLOC_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + unit: Pct + tips: + Not-scheduled Rate (Scheduler-Pipe): + avg: AVG((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + min: MIN((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + max: MAX((100 * SPI_RA_REQ_NO_ALLOC / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None) + unit: Pct + tips: + Scheduler-Pipe Stall Rate: + avg: AVG((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + min: MIN((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + max: MAX((((100 * SPI_RA_RES_STALL_CSN) / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != + 0) else None)) + unit: Pct + tips: + Scratch Stall Rate: + avg: AVG((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + min: MIN((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + max: MAX((100 * SPI_RA_TMP_STALL_CSN / (GRBM_SPI_BUSY * $numSE)) if (GRBM_SPI_BUSY != 0) else None) + unit: Pct + tips: + Insufficient SIMD Waveslots: + avg: AVG(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_WAVE_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient SIMD VGPRs: + avg: AVG(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_VGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient SIMD SGPRs: + avg: AVG(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(100 * SPI_RA_SGPR_SIMD_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient CU LDS: + avg: AVG(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_LDS_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Insufficient CU Barriers: + avg: AVG(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_BAR_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Reached CU Workgroup Limit: + avg: AVG(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_TGLIM_CU_FULL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: + Reached CU Wavefront Limit: + avg: AVG(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + min: MIN(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + max: MAX(400 * SPI_RA_WVLIM_STALL_CSN / (GRBM_GUI_ACTIVE * $numCU)) + unit: Pct + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml index 3a174862df..91c8207a2d 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml @@ -77,7 +77,7 @@ Panel Config: avg: AVG(Scratch_Per_Workitem) min: MIN(Scratch_Per_Workitem) max: MAX(Scratch_Per_Workitem) - unit: Bytes + unit: Bytes/Workitem tips: - metric_table: @@ -103,7 +103,7 @@ Panel Config: max: MAX(GRBM_GUI_ACTIVE) unit: Cycle tips: - Instr/wavefront: + Instructions per wavefront: avg: AVG((SQ_INSTS / SQ_WAVES)) min: MIN((SQ_INSTS / SQ_WAVES)) max: MAX((SQ_INSTS / SQ_WAVES)) diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml index 92894b31d3..f7867b6ea2 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1000_compute-unit-instruction-mix.yaml @@ -10,7 +10,7 @@ Panel Config: data source: - metric_table: id: 1001 - title: Instruction Mix + title: Overall Instruction Mix header: metric: Metric avg: Avg @@ -18,11 +18,11 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_bar label_txt: (# of instr + $normUnit) metric: - VALU - Vector: + VALU: avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom)) min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom)) max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom)) @@ -40,7 +40,7 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (instr + $normUnit) tips: - VALU - MFMA: + MFMA: avg: AVG((SQ_INSTS_MFMA / $denom)) min: MIN((SQ_INSTS_MFMA / $denom)) max: MAX((SQ_INSTS_MFMA / $denom)) @@ -64,12 +64,6 @@ Panel Config: max: MAX((SQ_INSTS_BRANCH / $denom)) unit: (instr + $normUnit) tips: - GDS: - avg: AVG((SQ_INSTS_GDS / $denom)) - min: MIN((SQ_INSTS_GDS / $denom)) - max: MAX((SQ_INSTS_GDS / $denom)) - unit: (instr + $normUnit) - tips: - metric_table: id: 1002 @@ -81,7 +75,7 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_bar label_txt: (# of instr + $normUnit) metric: @@ -180,55 +174,100 @@ Panel Config: id: 1003 title: VMEM Instr Mix header: - type: type - count: Count + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit tips: Tips metric: - Buffer Instr: - count: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) - tips: - Buffer Read: - count: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) - tips: - Buffer Write: - count: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) - tips: - Buffer Atomic: - count: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) - tips: - Flat Instr: - count: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) - tips: - Flat Read: - count: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) - tips: - Flat Write: - count: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) - tips: - Flat Atomic: - count: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) - tips: + Global/Generic Instr: + avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Read: + avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Write: + avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Global/Generic Atomic: + avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Instr: + avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Read: + avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Write: + avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: + Spill/Stack Atomic: + avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) + unit: (instr + $normUnit) + tips: - metric_table: id: 1004 title: MFMA Arithmetic Instr Mix header: - type: type - count: Count + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit tips: Tips metric: MFMA-I8: - count: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom)) + avg: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom)) + min: MIN((SQ_INSTS_VALU_MFMA_I8 / $denom)) + max: MAX((SQ_INSTS_VALU_MFMA_I8 / $denom)) + unit: (instr + $normUnit) tips: MFMA-F16: - count: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom)) + avg: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom)) + min: MIN((SQ_INSTS_VALU_MFMA_F16 / $denom)) + max: MAX((SQ_INSTS_VALU_MFMA_F16 / $denom)) + unit: (instr + $normUnit) tips: MFMA-BF16: - count: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom)) + avg: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom)) + min: MIN((SQ_INSTS_VALU_MFMA_BF16 / $denom)) + max: MAX((SQ_INSTS_VALU_MFMA_BF16 / $denom)) + unit: (instr + $normUnit) tips: MFMA-F32: - count: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom)) - tips: + avg: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom)) + min: MIN((SQ_INSTS_VALU_MFMA_F32 / $denom)) + max: MAX((SQ_INSTS_VALU_MFMA_F32 / $denom)) + unit: (instr + $normUnit) + tips: MFMA-F64: - count: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom)) - tips: \ No newline at end of file + avg: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom)) + min: MIN((SQ_INSTS_VALU_MFMA_F64 / $denom)) + max: MAX((SQ_INSTS_VALU_MFMA_F64 / $denom)) + unit: (instr + $normUnit) + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml index 97a2f1c842..100fc5051f 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml @@ -13,48 +13,73 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit + peak: Peak + pop: Pct of Peak tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) xrange: [0, 110] metric: - valu_flops_pop: - value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + VALU FLOPs: + value: AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16) + + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64)))) + / (EndNs - BeginNs))) + unit: GFLOP + peak: (((($sclk * $numCU) * 64) * 2) / 1000) + pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) - unit: Pct of Peak tips: - mfma_flops_bf16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) - / ((($sclk * $numCU) * 512) / 1000)) - unit: Pct of Peak + VALU IOPs: + value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp))) + unit: GIOP + peak: (((($sclk * $numCU) * 64) * 2) / 1000) + pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp + - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: - mfma_flops_f16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) + MFMA FLOPs (BF16): + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) + unit: GFLOP + peak: ((($sclk * $numCU) * 1024) / 1000) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) - unit: Pct of Peak tips: - mfma_flops_f32_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) - / ((($sclk * $numCU) * 256) / 1000)) - unit: Pct of Peak - tips: - mfma_flops_f64_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) - / ((($sclk * $numCU) * 256) / 1000)) - unit: Pct of Peak - tips: - mfma_flops_i8_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) + MFMA FLOPs (F16): + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))) + unit: GFLOP + peak: ((($sclk * $numCU) * 1024) / 1000) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) + / ((($sclk * $numCU) * 1024) / 1000)) + tips: + MFMA FLOPs (F32): + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))) + unit: GFLOP + peak: ((($sclk * $numCU) * 256) / 1000) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) + / ((($sclk * $numCU) * 256) / 1000)) + tips: + MFMA FLOPs (F64): + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))) + unit: GFLOP + peak: ((($sclk * $numCU) * 256) / 1000) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) + / ((($sclk * $numCU) * 256) / 1000)) + tips: + MFMA IOPs (INT8): + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) + unit: GIOP + peak: ((($sclk * $numCU) * 1024) / 1000) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) - unit: Pct of Peak tips: - metric_table: @@ -68,36 +93,48 @@ Panel Config: unit: Unit tips: Tips metric: - IPC (Avg): + IPC: avg: AVG((SQ_INSTS / SQ_BUSY_CU_CYCLES)) min: MIN((SQ_INSTS / SQ_BUSY_CU_CYCLES)) max: MAX((SQ_INSTS / SQ_BUSY_CU_CYCLES)) unit: Instr/cycle tips: - IPC (Issue): - avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + IPC (Issued): + avg: AVG(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) - min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + min: MIN(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) - max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM) - + SQ_INSTS_GDS) + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED) + max: MAX(((((((((SQ_INSTS_VALU + SQ_INSTS_VMEM) + SQ_INSTS_SALU) + SQ_INSTS_SMEM)) + + SQ_INSTS_BRANCH) + SQ_INSTS_SENDMSG) + SQ_INSTS_VSKIPPED + SQ_INSTS_LDS) / SQ_ACTIVE_INST_ANY)) unit: Instr/cycle tips: - SALU Util: + SALU Utilization: avg: AVG((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) min: MIN((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) max: MAX((((100 * SQ_ACTIVE_INST_SCA) / GRBM_GUI_ACTIVE) / $numCU)) unit: pct tips: - VALU Util: + VALU Utilization: avg: AVG((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) min: MIN((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) max: MAX((((100 * SQ_ACTIVE_INST_VALU) / GRBM_GUI_ACTIVE) / $numCU)) unit: pct tips: + VMEM Utilization: + avg: AVG((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / GRBM_GUI_ACTIVE) / $numCU)) + min: MIN((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / GRBM_GUI_ACTIVE) / $numCU)) + max: MAX((((100 * (SQ_ACTIVE_INST_FLAT+SQ_ACTIVE_INST_VMEM)) / GRBM_GUI_ACTIVE) / $numCU)) + unit: pct + tips: + Branch Utilization: + avg: AVG((((100 * SQ_ACTIVE_INST_MISC) / GRBM_GUI_ACTIVE) / $numCU)) + min: MIN((((100 * SQ_ACTIVE_INST_MISC) / GRBM_GUI_ACTIVE) / $numCU)) + max: MAX((((100 * SQ_ACTIVE_INST_MISC) / GRBM_GUI_ACTIVE) / $numCU)) + unit: pct + tips: VALU Active Threads: avg: AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU) if (SQ_ACTIVE_INST_VALU != 0) else None)) @@ -107,7 +144,7 @@ Panel Config: != 0) else None)) unit: Threads tips: - MFMA Util: + MFMA Utilization: avg: AVG(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE))) min: MIN(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE))) max: MAX(((100 * SQ_VALU_MFMA_BUSY_CYCLES) / ((4 * $numCU) * GRBM_GUI_ACTIVE))) @@ -122,6 +159,26 @@ Panel Config: else None)) unit: cycles/instr tips: + VMEM Latency: + avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_VMEM) if (SQ_INSTS_VMEM != 0) + else None)) + min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_VMEM) if (SQ_INSTS_VMEM != 0) + else None)) + max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_VMEM) if (SQ_INSTS_VMEM != 0) + else None)) + unit: Cycles + coll_level: SQ_INST_LEVEL_VMEM + tips: + SMEM Latency: + avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_SMEM) if (SQ_INSTS_SMEM != 0) + else None)) + min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_SMEM) if (SQ_INSTS_SMEM != 0) + else None)) + max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_SMEM) if (SQ_INSTS_SMEM != 0) + else None)) + unit: Cycles + coll_level: SQ_INST_LEVEL_SMEM + tips: - metric_table: id: 1103 @@ -158,10 +215,10 @@ Panel Config: $denom)) unit: (OPs + $normUnit) tips: - INT8 OPs: - avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) - min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) - max: MAX(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) + IOPs (Total): + avg: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) + (SQ_INSTS_VALU_MFMA_MOPS_I8 * 512)) / $denom) + min: MIN(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) + (SQ_INSTS_VALU_MFMA_MOPS_I8 * 512)) / $denom) + max: MAX(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) + (SQ_INSTS_VALU_MFMA_MOPS_I8 * 512)) / $denom) unit: (OPs + $normUnit) tips: F16 OPs: @@ -199,4 +256,10 @@ Panel Config: max: MAX((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom)) unit: (OPs + $normUnit) + tips: + INT8 OPs: + avg: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) + min: MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) + max: MAX(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / $denom)) + unit: (OPs + $normUnit) tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml index d225d31e0a..655c6eb480 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml @@ -13,10 +13,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) @@ -26,20 +26,24 @@ Panel Config: value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU))) unit: Pct of Peak tips: + unit: pct Access Rate: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) unit: Pct of Peak tips: - Bandwidth (Pct-of-Peak): + unit: pct + Theoretical Bandwidth (% of Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) unit: Pct of Peak tips: + unit: pct Bank Conflict Rate: value: AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) unit: Pct of Peak tips: + unit: pct - metric_table: id: 1202 @@ -58,7 +62,7 @@ Panel Config: max: MAX((SQ_INSTS_LDS / $denom)) unit: (Instr + $normUnit) tips: - Bandwidth: + Theoretical Bandwidth: avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) @@ -67,7 +71,14 @@ Panel Config: / $denom)) unit: (Bytes + $normUnit) tips: - Bank Conficts/Access: + LDS Latency: + avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) + unit: Cycles + coll_level: SQ_INST_LEVEL_LDS + tips: + Bank Conflicts/Access: avg: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None)) min: MIN(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -82,7 +93,7 @@ Panel Config: max: MAX((SQ_LDS_IDX_ACTIVE / $denom)) unit: (Cycles + $normUnit) tips: - Atomic Cycles: + Atomic Return Cycles: avg: AVG((SQ_LDS_ATOMIC_RETURN / $denom)) min: MIN((SQ_LDS_ATOMIC_RETURN / $denom)) max: MAX((SQ_LDS_ATOMIC_RETURN / $denom)) @@ -110,12 +121,5 @@ Panel Config: avg: AVG((SQ_LDS_MEM_VIOLATIONS / $denom)) min: MIN((SQ_LDS_MEM_VIOLATIONS / $denom)) max: MAX((SQ_LDS_MEM_VIOLATIONS / $denom)) - unit: ( + $normUnit) - tips: - LDS Latency: - avg: AVG(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - min: MIN(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - max: MAX(((SQ_ACCUM_PREV_HIRES / SQ_INSTS_LDS) if (SQ_INSTS_LDS != 0) else None)) - unit: Cycles - coll_level: SQ_INST_LEVEL_LDS + unit: (Accesses + $normUnit) tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml index 20b437e6a4..76532c7d10 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml @@ -13,10 +13,10 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) @@ -27,11 +27,16 @@ Panel Config: * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: - Cache Hit: + Cache Hit Rate: value: AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) unit: Pct of Peak tips: + L1I-L2 Bandwidth: + value: AVG(((SQC_TC_INST_REQ * 100000) / (2 * ($sclk * $numSQC) + * (EndNs - BeginNs)))) + unit: Pct of Peak + tips: - metric_table: id: 1302 @@ -68,7 +73,7 @@ Panel Config: max: MAX((SQC_ICACHE_MISSES_DUPLICATE / $denom)) unit: (Misses + $normUnit) tips: - Cache Hit: + Cache Hit Rate: avg: AVG(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) min: MIN(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + @@ -76,4 +81,28 @@ Panel Config: max: MAX(((100 * SQC_ICACHE_HITS) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES) + SQC_ICACHE_MISSES_DUPLICATE))) unit: pct - tips: \ No newline at end of file + tips: + Instruction Fetch Latency: + avg: AVG((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + min: MIN((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + max: MAX((SQ_ACCUM_PREV_HIRES / SQ_IFETCH)) + unit: Cycles + coll_level: SQ_IFETCH_LEVEL + tips: + - metric_table: + id: 1303 + title: Instruction Cache - L2 Interface + header: + metric: Metric + mean: Mean + min: Min + max: Max + unit: Unit + tips: Tips + metric: + L1I-L2 Bandwidth: + mean: AVG(((SQC_TC_INST_REQ * 64) / $denom)) + min: MIN(((SQC_TC_INST_REQ * 64) / $denom)) + max: MAX(((SQC_TC_INST_REQ * 64) / $denom)) + unit: (Bytes + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml index ab4f5109ab..34f319ad2f 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml @@ -12,11 +12,11 @@ Panel Config: id: 1401 title: Speed-of-Light header: - mertic: Metric - value: Value + metric: Metric + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) @@ -27,12 +27,17 @@ Panel Config: * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: - Cache Hit: + Cache Hit Rate: value: AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE)) if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None)) unit: Pct of Peak tips: + sL1D-L2 BW: + value: AVG(((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 100000) + / (2 * ($sclk * $numSQC) * (EndNs - BeginNs))) + unit: Pct of Peak + tips: - metric_table: id: 1402 @@ -69,7 +74,7 @@ Panel Config: max: MAX((SQC_DCACHE_MISSES_DUPLICATE / $denom)) unit: (Req + $normUnit) tips: - Cache Hit: + Cache Hit Rate: avg: AVG((((100 * SQC_DCACHE_HITS) / ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) + SQC_DCACHE_MISSES_DUPLICATE)) if (((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None)) @@ -138,6 +143,12 @@ Panel Config: unit: Unit tips: Tips metric: + sL1D-L2 BW: + mean: AVG(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + min: MIN(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + max: MAX(((((SQC_TC_DATA_READ_REQ + SQC_TC_DATA_WRITE_REQ + SQC_TC_DATA_ATOMIC_REQ) * 64)) / $denom)) + unit: (Bytes + $normUnit) + tips: Read Req: avg: AVG((SQC_TC_DATA_READ_REQ / $denom)) min: MIN((SQC_TC_DATA_READ_REQ / $denom)) @@ -156,9 +167,9 @@ Panel Config: max: MAX((SQC_TC_DATA_ATOMIC_REQ / $denom)) unit: (Req + $normUnit) tips: - Stall: + Stall Cycles: avg: AVG((SQC_TC_STALL / $denom)) min: MIN((SQC_TC_STALL / $denom)) max: MAX((SQC_TC_STALL / $denom)) unit: (Cycles + $normUnit) - tips: \ No newline at end of file + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml index 03af854976..5f7d73df82 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1500_TA_and_TD.yaml @@ -6,11 +6,11 @@ Metric Description: # Define the panel properties and properties of each metric in the panel. Panel Config: id: 1500 - title: Texture Addresser and Texture Data (TA/TD) + title: Address Processing Unit and Data Return Path (TA/TD) data source: - metric_table: id: 1501 - title: TA + title: Address Processing Unit header: metric: Metric avg: Avg @@ -19,25 +19,25 @@ Panel Config: unit: Unit tips: Tips metric: - TA Busy: + Address Processing Unit Busy: avg: AVG(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_TA_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TA Addr Stall: + Address Stall: avg: AVG(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_ADDR_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TA Data Stall: + Data Stall: avg: AVG(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_DATA_STALLED_BY_TC_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TD2TA Addr Stall: + Data-Processor → Address Stall: avg: AVG(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TA_ADDR_STALLED_BY_TD_CYCLES_sum) / (GRBM_GUI_ACTIVE * $numCU))) @@ -47,69 +47,69 @@ Panel Config: avg: AVG((TA_TOTAL_WAVEFRONTS_sum / $denom)) min: MIN((TA_TOTAL_WAVEFRONTS_sum / $denom)) max: MAX((TA_TOTAL_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Instr: + Global/Generic Instructions: avg: AVG((TA_FLAT_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Read Instr: + Global/Generic Read Instructions: avg: AVG((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_READ_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Write Instr: + Global/Generic Write Instructions: avg: AVG((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_WRITE_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Flat Atomic Instr: + Global/Generic Atomic Instructions: avg: AVG((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) min: MIN((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) max: MAX((TA_FLAT_ATOMIC_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Instr: + Spill/Stack Instructions: avg: AVG((TA_BUFFER_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Read Instr: + Spill/Stack Read Instructions: avg: AVG((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_READ_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Write Instr: + Spill/Stack Write Instructions: avg: AVG((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_WRITE_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Atomic Instr: + Spill/Stack Atomic Instructions: avg: AVG((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) min: MIN((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) max: MAX((TA_BUFFER_ATOMIC_WAVEFRONTS_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Buffer Total Cylces: + Spill/Stack Total Cycles: avg: AVG((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_TOTAL_CYCLES_sum / $denom)) unit: (Cycles + $normUnit) tips: - Buffer Coalesced Read: + Spill/Stack Coalesced Read: avg: AVG((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_COALESCED_READ_CYCLES_sum / $denom)) unit: (Cycles + $normUnit) tips: - Buffer Coalesced Write: + Spill/Stack Coalesced Write: avg: AVG((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) min: MIN((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) max: MAX((TA_BUFFER_COALESCED_WRITE_CYCLES_sum / $denom)) @@ -127,48 +127,48 @@ Panel Config: unit: Unit tips: Tips metric: - TD Busy: + Data-Return Busy: avg: AVG(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_TD_BUSY_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - TC2TD Stall: + Cache RAM → Data-Return Stall: avg: AVG(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_TC_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - SPI2TD Stall: + Workgroup manager → Data-Return Stall: avg: AVG(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) min: MIN(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) max: MAX(((100 * TD_SPI_STALL_sum) / (GRBM_GUI_ACTIVE * $numCU))) unit: pct tips: - Coalescable Instr: + Coalescable Instructions: avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom)) min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom)) max: MAX((TD_COALESCABLE_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Load Instr: + Read Instructions: avg: AVG((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) min: MIN((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) max: MAX((((TD_LOAD_WAVEFRONT_sum - TD_STORE_WAVEFRONT_sum) - TD_ATOMIC_WAVEFRONT_sum) / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Store Instr: + Write Instructions: avg: AVG((TD_STORE_WAVEFRONT_sum / $denom)) min: MIN((TD_STORE_WAVEFRONT_sum / $denom)) max: MAX((TD_STORE_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: - Atomic Instr: + Atomic Instructions: avg: AVG((TD_ATOMIC_WAVEFRONT_sum / $denom)) min: MIN((TD_ATOMIC_WAVEFRONT_sum / $denom)) max: MAX((TD_ATOMIC_WAVEFRONT_sum / $denom)) - unit: (Instr + $normUnit) + unit: (Instructions + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml index db3a363ced..c0733e193c 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml @@ -13,35 +13,35 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar range_color: [1, 100] label_txt: (%) xrange: [0, 110] metric: - Buffer Coalescing: - value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum - * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None)) + Hit rate: + value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) + / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) unit: Pct of Peak tips: - Cache Util: - value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum - != 0) else None)) - unit: Pct of Peak - tips: - Cache BW: + Bandwidth: value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) unit: Pct of Peak tips: - Cache Hit: - value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) - + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) - / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) + Utilization: + value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum + != 0) else None)) + unit: Pct of Peak + tips: + Coalescing: + value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum + * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None)) unit: Pct of Peak tips: @@ -141,11 +141,26 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) - unit: GB/s + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / $denom)) + unit: (Bytes + $normUnit) tips: + Cache Hit Rate: + avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / + TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else + None)) + unit: pct + tips: Cache Accesses: avg: AVG((TCP_TOTAL_CACHE_ACCESSES_sum / $denom)) min: MIN((TCP_TOTAL_CACHE_ACCESSES_sum / $denom)) @@ -164,22 +179,7 @@ Panel Config: / $denom)) unit: (Req + $normUnit) tips: - Cache Hit Rate: - avg: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - min: MIN(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - max: MAX(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + - TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / - TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else - None)) - unit: pct - tips: - Invalidate: + Invalidations: avg: AVG((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) min: MIN((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) max: MAX((TCP_TOTAL_WRITEBACK_INVALIDATES_sum / $denom)) @@ -188,9 +188,9 @@ Panel Config: L1-L2 BW: avg: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) - min: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + min: MIN(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) - max: AVG(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + max: MAX(((64 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum) + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum)) / $denom)) unit: (Bytes + $normUnit) tips: @@ -258,7 +258,7 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_multi_bar metric: NC - Read: @@ -388,17 +388,17 @@ Panel Config: avg: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: (Hits + $normUnit) + units: (Req + $normUnit) tips: - Misses (Translation): + Translation Misses: avg: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: (Misses + $normUnit) + units: (Req + $normUnit) tips: - Misses (Permission): + Permission Misses: avg: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: (Misses + $normUnit) - tips: \ No newline at end of file + units: (Req + $normUnit) + tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml index 198437750f..2b09bec17a 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml @@ -13,31 +13,35 @@ Panel Config: title: Speed-of-Light header: metric: Metric - value: Value + value: Avg unit: Unit tips: Tips - cli_style: + style: type: simple_bar metric: - L2 Util: + Utilization: value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE))) unit: pct + tips: + Bandwidth: + value: ((100 * AVG(((TCC_REQ_sum * 128) / (EndNs - BeginNs)))) / ((($sclk / 1000) * 128) * TO_INT($L2Banks))) + unit: pct tips: - Cache Hit: + Hit Rate: value: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)) unit: pct - tips: - L2-EA Rd BW: + tips: + L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s - tips: - L2-EA Wr BW: + tips: + L2-Fabric Write and Atomic BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s - tips: + tips: - metric_table: id: 1702 @@ -50,7 +54,7 @@ Panel Config: unit: Unit tips: Tips metric: - Read BW: + L2-Fabric Read BW: avg: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / $denom)) min: MIN((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) @@ -58,8 +62,26 @@ Panel Config: max: MAX((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) * 64)) / $denom)) unit: (Bytes + $normUnit) - tips: - Write BW: + tips: + HBM Read Traffic: + avg: AVG((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_RDREQ_DRAM_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + Remote Read Traffic: + avg: AVG((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum) / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + Uncached Read Traffic: + avg: AVG((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_RD_UNCACHED_32B_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) + unit: pct + tips: + L2-Fabric Write and Atomic BW: avg: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / $denom)) min: MIN((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) @@ -67,55 +89,31 @@ Panel Config: max: MAX((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) * 32)) / $denom)) unit: (Bytes + $normUnit) - tips: - Read (32B): - avg: AVG((TCC_EA_RDREQ_32B_sum / $denom)) - min: MIN((TCC_EA_RDREQ_32B_sum / $denom)) - max: MAX((TCC_EA_RDREQ_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read (Uncached 32B): - avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read (64B): - avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) - unit: (Req + $normUnit) - tips: - HBM Read: - avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom)) - min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom)) - max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write (32B): - avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) - unit: (Req + $normUnit) - tips: - Write (Uncached 32B): - avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write (64B): - avg: AVG((TCC_EA_WRREQ_64B_sum / $denom)) - min: MIN((TCC_EA_WRREQ_64B_sum / $denom)) - max: MAX((TCC_EA_WRREQ_64B_sum / $denom)) - unit: (Req + $normUnit) - tips: - HBM Write: - avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom)) - min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom)) - max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom)) - unit: (Req + $normUnit) - tips: + tips: + HBM Write and Atomic Traffic: + avg: AVG((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_WRREQ_DRAM_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Remote Write and Atomic Traffic: + avg: AVG((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum) / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Atomic Traffic: + avg: AVG((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_ATOMIC_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: + Uncached Write and Atomic Traffic: + avg: AVG((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + min: MIN((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + max: MAX((100 * (TCC_EA_WR_UNCACHED_32B_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) + unit: pct + tips: Read Latency: avg: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) @@ -124,7 +122,7 @@ Panel Config: max: MAX(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum != 0) else None)) unit: Cycles - tips: + tips: Write Latency: avg: AVG(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) @@ -133,7 +131,7 @@ Panel Config: max: MAX(((TCC_EA_WRREQ_LEVEL_sum / TCC_EA_WRREQ_sum) if (TCC_EA_WRREQ_sum != 0) else None)) unit: Cycles - tips: + tips: Atomic Latency: avg: AVG(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum != 0) else None)) @@ -142,7 +140,7 @@ Panel Config: max: MAX(((TCC_EA_ATOMIC_LEVEL_sum / TCC_EA_ATOMIC_sum) if (TCC_EA_ATOMIC_sum != 0) else None)) unit: Cycles - tips: + tips: Read Stall: avg: AVG((((100 * ((TCC_EA_RDREQ_IO_CREDIT_STALL_sum + TCC_EA_RDREQ_GMI_CREDIT_STALL_sum) + TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != @@ -154,7 +152,7 @@ Panel Config: + TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != 0) else None)) unit: pct - tips: + tips: Write Stall: avg: AVG((((100 * ((TCC_EA_WRREQ_IO_CREDIT_STALL_sum + TCC_EA_WRREQ_GMI_CREDIT_STALL_sum) + TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != @@ -166,7 +164,7 @@ Panel Config: + TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum)) / TCC_BUSY_sum) if (TCC_BUSY_sum != 0) else None)) unit: pct - tips: + tips: - metric_table: id: 1703 @@ -179,54 +177,48 @@ Panel Config: unit: Unit tips: Tips metric: + Bandwidth: + avg: AVG((TCC_REQ_sum * 128) / $denom) + min: MIN((TCC_REQ_sum * 128) / $denom) + max: MAX((TCC_REQ_sum * 128) / $denom) + unit: (Bytes + $normUnit) + tips: Req: avg: AVG((TCC_REQ_sum / $denom)) min: MIN((TCC_REQ_sum / $denom)) max: MAX((TCC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: - Streaming Req: - avg: AVG((TCC_STREAMING_REQ_sum / $denom)) - min: MIN((TCC_STREAMING_REQ_sum / $denom)) - max: MAX((TCC_STREAMING_REQ_sum / $denom)) - unit: (Req + $normUnit) - tips: + tips: Read Req: avg: AVG((TCC_READ_sum / $denom)) min: MIN((TCC_READ_sum / $denom)) max: MAX((TCC_READ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Write Req: avg: AVG((TCC_WRITE_sum / $denom)) min: MIN((TCC_WRITE_sum / $denom)) max: MAX((TCC_WRITE_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: Atomic Req: avg: AVG((TCC_ATOMIC_sum / $denom)) min: MIN((TCC_ATOMIC_sum / $denom)) max: MAX((TCC_ATOMIC_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: + Streaming Req: + avg: AVG((TCC_STREAMING_REQ_sum / $denom)) + min: MIN((TCC_STREAMING_REQ_sum / $denom)) + max: MAX((TCC_STREAMING_REQ_sum / $denom)) + unit: (Req + $normUnit) + tips: Probe Req: avg: AVG((TCC_PROBE_sum / $denom)) min: MIN((TCC_PROBE_sum / $denom)) max: MAX((TCC_PROBE_sum / $denom)) unit: (Req + $normUnit) - tips: - Hits: - avg: AVG((TCC_HIT_sum / $denom)) - min: MIN((TCC_HIT_sum / $denom)) - max: MAX((TCC_HIT_sum / $denom)) - unit: (Hits + $normUnit) - tips: - Misses: - avg: AVG((TCC_MISS_sum / $denom)) - min: MIN((TCC_MISS_sum / $denom)) - max: MAX((TCC_MISS_sum / $denom)) - unit: (Misses + $normUnit) - tips: + tips: Cache Hit: avg: AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) @@ -235,61 +227,73 @@ Panel Config: max: MAX((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else None)) unit: pct - tips: + tips: + Hits: + avg: AVG((TCC_HIT_sum / $denom)) + min: MIN((TCC_HIT_sum / $denom)) + max: MAX((TCC_HIT_sum / $denom)) + unit: (Hits + $normUnit) + tips: + Misses: + avg: AVG((TCC_MISS_sum / $denom)) + min: MIN((TCC_MISS_sum / $denom)) + max: MAX((TCC_MISS_sum / $denom)) + unit: (Misses + $normUnit) + tips: Writeback: avg: AVG((TCC_WRITEBACK_sum / $denom)) min: MIN((TCC_WRITEBACK_sum / $denom)) max: MAX((TCC_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: + unit: (Cachelines + $normUnit) + tips: + Writeback (Internal): + avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom)) + min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom)) + max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Writeback (vL1D Req): + avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Evict (Internal): + avg: AVG((TCC_NORMAL_EVICT_sum / $denom)) + min: MIN((TCC_NORMAL_EVICT_sum / $denom)) + max: MAX((TCC_NORMAL_EVICT_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: + Evict (vL1D Req): + avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) + unit: (Cachelines + $normUnit) + tips: NC Req: avg: AVG((TCC_NC_REQ_sum / $denom)) min: MIN((TCC_NC_REQ_sum / $denom)) max: MAX((TCC_NC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: UC Req: avg: AVG((TCC_UC_REQ_sum / $denom)) min: MIN((TCC_UC_REQ_sum / $denom)) max: MAX((TCC_UC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: CC Req: avg: AVG((TCC_CC_REQ_sum / $denom)) min: MIN((TCC_CC_REQ_sum / $denom)) max: MAX((TCC_CC_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: + tips: RW Req: avg: AVG((TCC_RW_REQ_sum / $denom)) min: MIN((TCC_RW_REQ_sum / $denom)) max: MAX((TCC_RW_REQ_sum / $denom)) unit: (Req + $normUnit) - tips: - Writeback (Normal): - avg: AVG((TCC_NORMAL_WRITEBACK_sum / $denom)) - min: MIN((TCC_NORMAL_WRITEBACK_sum / $denom)) - max: MAX((TCC_NORMAL_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: - Writeback (TC Req): - avg: AVG((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - min: MIN((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - max: MAX((TCC_ALL_TC_OP_WB_WRITEBACK_sum / $denom)) - unit: ( + $normUnit) - tips: - Evict (Normal): - avg: AVG((TCC_NORMAL_EVICT_sum / $denom)) - min: MIN((TCC_NORMAL_EVICT_sum / $denom)) - max: MAX((TCC_NORMAL_EVICT_sum / $denom)) - unit: ( + $normUnit) - tips: - Evict (TC Req): - avg: AVG((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - min: MIN((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - max: MAX((TCC_ALL_TC_OP_INV_EVICT_sum / $denom)) - unit: ( + $normUnit) - tips: + tips: - metric_table: id: 1704 @@ -303,62 +307,140 @@ Panel Config: max: Max unit: Unit tips: Tips - cli_style: + style: type: simple_multi_bar metric: - Read - Remote Socket Stall: - type: Remote Socket Stall + Read - PCIe Stall: + type: PCIe Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Read - Peer GCD Stall: - type: Peer GCD Stall + avg: AVG(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Read - Infinity Fabric™ Stall: + type: Infinity Fabric™ Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Read - HBM Stall: type: HBM Stall transaction: Read - avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write - Remote Socket Stall: - type: Remote Socket Stall + avg: AVG(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Write - PCIe Stall: + type: PCIe Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: - Write - Peer GCD Stall: - type: Peer GCD Stall + avg: AVG(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_IO_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + Write - Infinity Fabric™ Stall: + type: Infinity Fabric™ Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Write - HBM Stall: type: HBM Stall transaction: Write - avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom)) - unit: (Req + $normUnit) - tips: + avg: AVG(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: Write - Credit Starvation: type: Credit Starvation transaction: Write - avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) - min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) - max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom)) + avg: AVG(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + min: MIN(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + max: MAX(((100 * (TCC_TOO_MANY_EA_WRREQS_STALL_sum / TCC_BUSY_sum)) if (TCC_BUSY_sum != 0) else None)) + unit: pct + tips: + + - metric_table: + id: 1705 + title: L2 - Fabric Detailed Transaction Breakdown + header: + metric: Metric + avg: Avg + min: Min + max: Max + unit: Unit + tips: Tips + metric: + Read (32B): + avg: AVG((TCC_EA_RDREQ_32B_sum / $denom)) + min: MIN((TCC_EA_RDREQ_32B_sum / $denom)) + max: MAX((TCC_EA_RDREQ_32B_sum / $denom)) unit: (Req + $normUnit) - tips: \ No newline at end of file + tips: + Read (Uncached): + avg: AVG((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + min: MIN((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + max: MAX((TCC_EA_RD_UNCACHED_32B_sum / $denom)) + unit: (Req + $normUnit) + tips: + Read (64B): + avg: AVG(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + min: MIN(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + max: MAX(((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) / $denom)) + unit: (Req + $normUnit) + tips: + HBM Read: + avg: AVG((TCC_EA_RDREQ_DRAM_sum / $denom)) + min: MIN((TCC_EA_RDREQ_DRAM_sum / $denom)) + max: MAX((TCC_EA_RDREQ_DRAM_sum / $denom)) + unit: (Req + $normUnit) + tips: + Remote Read: + avg: AVG((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + min: MIN((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + max: MAX((MAX((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_DRAM_sum), 0) / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (32B): + avg: AVG(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + min: MIN(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + max: MAX(((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (Uncached): + avg: AVG((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + min: MIN((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + max: MAX((TCC_EA_WR_UNCACHED_32B_sum / $denom)) + unit: (Req + $normUnit) + tips: + Write and Atomic (64B): + avg: AVG((TCC_EA_WRREQ_64B_sum / $denom)) + min: MIN((TCC_EA_WRREQ_64B_sum / $denom)) + max: MAX((TCC_EA_WRREQ_64B_sum / $denom)) + unit: (Req + $normUnit) + tips: + HBM Write and Atomic: + avg: AVG((TCC_EA_WRREQ_DRAM_sum / $denom)) + min: MIN((TCC_EA_WRREQ_DRAM_sum / $denom)) + max: MAX((TCC_EA_WRREQ_DRAM_sum / $denom)) + unit: (Req + $normUnit) + tips: + Remote Write and Atomic: + avg: AVG((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + min: MIN((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + max: MAX((MAX((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_DRAM_sum), 0) / $denom)) + unit: (Req + $normUnit) + tips: + Atomic: + avg: AVG((TCC_EA_ATOMIC_sum / $denom)) + min: MIN((TCC_EA_ATOMIC_sum / $denom)) + max: MAX((TCC_EA_ATOMIC_sum / $denom)) + unit: (Req + $normUnit) + tips: \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml index 5d54deeb9e..e65d3890e6 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -186,7 +186,7 @@ Panel Config: - metric_table: id: 1803 - title: Requests (Requests) + title: L2 Requests (Requests) header: metric: Metric expr: Expression @@ -199,12 +199,12 @@ Panel Config: - metric_table: id: 1804 - title: L1-L2 Access (Requests) + title: L2 Access (Requests) header: metric: Metric - read req: L1-L2 Read - write req: L1-L2 Write - atomic req: L1-L2 Atomic + read req: L2 Read Req + write req: L2 Write Req + atomic req: L2 Atomic Req metric: "::_1": read req: AVG((TO_INT(TCC_READ[::_1]) / $denom)) @@ -216,12 +216,12 @@ Panel Config: - metric_table: id: 1805 - title: L2-EA Access (Requests) + title: L2 - Fabric Access (Requests) header: metric: Metric - read req: L2-EA Read - write req: L2-EA Write - atomic req: L2-EA Atomic + read req: L2 - Fabric Read Req + write req: L2 - Fabric Write and Atomic Req + atomic req: L2 - Fabric Atomic Req metric: "::_1": read req: AVG((TO_INT(TCC_EA_RDREQ[::_1]) / $denom)) @@ -256,7 +256,7 @@ Panel Config: - metric_table: id: 1806 - title: L2-EA Read Latency (Cycles) + title: L2 - Fabric Read Latency (Cycles) header: metric: Metric expr: Expression @@ -271,7 +271,7 @@ Panel Config: - metric_table: id: 1807 - title: L2-EA Write Latency (Cycles) + title: L2 - Fabric Write Latency (Cycles) header: metric: Metric expr: Expression @@ -286,7 +286,7 @@ Panel Config: - metric_table: id: 1808 - title: L2-EA Atomic Latency (Cycles) + title: L2 - Fabric Atomic Latency (Cycles) header: metric: Metric expr: Expression @@ -300,35 +300,35 @@ Panel Config: - metric_table: id: 1809 - title: L2-EA Read Stall (Cycles per normUnit) + title: L2 - Fabric Read Stall (Cycles per normUnit) header: metric: Metric - ea read stall - io: L2-EA Read Stall - IO - ea read stall - gmi: L2-EA Read Stall - GMI - ea read stall - dram: L2-EA Read Stall - DRAM + ea read stall - pcie: L2 - Fabric Read Stall (PCIe) + ea read stall - if: L2 - Fabric Read Stall (Infinity Fabric™) + ea read stall - hbm: L2 - Fabric Read Stall (HBM) metric: "::_1": - ea read stall - io: AVG((TO_INT(TCC_EA_RDREQ_IO_CREDIT_STALL[::_1]) / $denom)) - ea read stall - gmi: AVG((TO_INT(TCC_EA_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom)) - ea read stall - dram: AVG((TO_INT(TCC_EA_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) + ea read stall - pcie: AVG((TO_INT(TCC_EA_RDREQ_IO_CREDIT_STALL[::_1]) / $denom)) + ea read stall - if: AVG((TO_INT(TCC_EA_RDREQ_GMI_CREDIT_STALL[::_1]) / $denom)) + ea read stall - hbm: AVG((TO_INT(TCC_EA_RDREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) placeholder_range: "::_1": 32 cli_style: simple_multiple_bar - metric_table: id: 1810 - title: L2-EA Write Stall (Cycles per normUnit) + title: L2 - Fabric Write Stall (Cycles per normUnit) header: metric: Metric - ea write stall - io: L2-EA Write Stall - IO - ea write stall - gmi: L2-EA Write Stall - GMI - ea write stall - dram: L2-EA Write Stall - DRAM - ea write stall - starve: L2-EA Write Stall - Starve + ea write stall - pcie: L2 - Fabric Write Stall (PCIe) + ea write stall - if: L2 - Fabric Write Stall (Infinity Fabric™) + ea write stall - hbm: L2 - Fabric Write Stall (HBM) + ea write stall - starve: L2 - Fabric Write Starve metric: "::_1": - ea write stall - io: AVG((TO_INT(TCC_EA_WRREQ_IO_CREDIT_STALL[::_1]) / $denom)) - ea write stall - gmi: AVG((TO_INT(TCC_EA_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom)) - ea write stall - dram: AVG((TO_INT(TCC_EA_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) + ea write stall - pcie: AVG((TO_INT(TCC_EA_WRREQ_IO_CREDIT_STALL[::_1]) / $denom)) + ea write stall - if: AVG((TO_INT(TCC_EA_WRREQ_GMI_CREDIT_STALL[::_1]) / $denom)) + ea write stall - hbm: AVG((TO_INT(TCC_EA_WRREQ_DRAM_CREDIT_STALL[::_1]) / $denom)) ea write stall - starve: AVG((TO_INT(TCC_TOO_MANY_EA_WRREQS_STALL[::_1]) / $denom)) placeholder_range: "::_1": 32 diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/panel_config_template.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/panel_config_template.yaml index e241896b40..4b81bad0e9 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/panel_config_template.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/panel_config_template.yaml @@ -30,7 +30,7 @@ Panel Config: value: Value unit: Unit peak: Peak - pop: PoP + pop: Pct of Peak tips: Tips metric: METRIC01: