diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml index 986b2f0aec..774cb479e1 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0200_system-speed-of-light.yaml @@ -106,11 +106,11 @@ Panel Config: tips: LDS BW: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -127,10 +127,10 @@ Panel Config: pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) tips: Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Scalar L1D Cache Hit Rate: @@ -142,10 +142,10 @@ Panel Config: if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Vector L1D Cache Hit Rate: @@ -161,10 +161,10 @@ Panel Config: None)) tips: Vector L1D Cache BW: - value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) - pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: L2 Cache Hit Rate: @@ -177,19 +177,19 @@ Panel Config: tips: L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs)))) / $hbmBW) + * 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Write BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs)))) / $hbmBW) + * 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Read Latency: value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum @@ -214,10 +214,10 @@ Panel Config: coll_level: SQ_LEVEL_WAVES tips: Instr Fetch BW: - value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32)) + value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) unit: GB/s peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC + pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC * (($sclk / 1000) * 32))) coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml index 70141193e6..2dceb05dc7 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/0700_wavefront-launch.yaml @@ -20,15 +20,15 @@ Panel Config: tips: Tips metric: Grid Size: - avg: AVG(grd) - min: MIN(grd) - max: MAX(grd) + avg: AVG(Grid_Size) + min: MIN(Grid_Size) + max: MAX(Grid_Size) unit: Work Items tips: Workgroup Size: - avg: AVG(wgr) - min: MIN(wgr) - max: MAX(wgr) + avg: AVG(Workgroup_Size) + min: MIN(Workgroup_Size) + max: MAX(Workgroup_Size) unit: Work Items tips: Total Wavefronts: @@ -50,33 +50,33 @@ Panel Config: unit: Wavefronts tips: VGPRs: - avg: AVG(arch_vgpr) - min: MIN(arch_vgpr) - max: MAX(arch_vgpr) + avg: AVG(Arch_VGPR) + min: MIN(Arch_VGPR) + max: MAX(Arch_VGPR) unit: Registers tips: AGPRs: - avg: AVG(accum_vgpr) - min: MIN(accum_vgpr) - max: MAX(accum_vgpr) + avg: AVG(Accum_VGPR) + min: MIN(Accum_VGPR) + max: MAX(Accum_VGPR) unit: Registers tips: SGPRs: - avg: AVG(sgpr) - min: MIN(sgpr) - max: MAX(sgpr) + avg: AVG(SGPR) + min: MIN(SGPR) + max: MAX(SGPR) unit: Registers tips: LDS Allocation: - avg: AVG(lds) - min: MIN(lds) - max: MAX(lds) + avg: AVG(LDS_Per_Workgroup) + min: MIN(LDS_Per_Workgroup) + max: MAX(LDS_Per_Workgroup) unit: Bytes tips: Scratch Allocation: - avg: AVG(scr) - min: MIN(scr) - max: MAX(scr) + avg: AVG(Scratch_Per_Workitem) + min: MIN(Scratch_Per_Workitem) + max: MAX(Scratch_Per_Workitem) unit: Bytes tips: @@ -92,9 +92,9 @@ Panel Config: tips: Tips metric: Kernel Time (Nanosec): - avg: AVG((EndNs - BeginNs)) - min: MIN((EndNs - BeginNs)) - max: MAX((EndNs - BeginNs)) + avg: AVG((End_Timestamp - Start_Timestamp)) + min: MIN((End_Timestamp - Start_Timestamp)) + max: MAX((End_Timestamp - Start_Timestamp)) unit: ns tips: Kernel Time (Cycles): diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml index 3fd52c3b1b..4ae5333608 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1200_lds.yaml @@ -32,7 +32,7 @@ Panel Config: tips: Bandwidth (Pct-of-Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) unit: Pct of Peak tips: Bank Conflict Rate: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml index 05dc759803..bbbf6ebe26 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1300_instruction-cache.yaml @@ -24,7 +24,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: Cache Hit: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml index 563caad13f..d0c89f1baf 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1400_constant-cache.yaml @@ -24,7 +24,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: Cache Hit: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml index 01e6d29d71..1dee69c726 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1600_L1_cache.yaml @@ -33,7 +33,7 @@ Panel Config: unit: Pct of Peak tips: Cache BW: - value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) unit: Pct of Peak tips: @@ -141,9 +141,9 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: Cache Accesses: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml index 0b5f5e827a..5004e31ea2 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1700_L2_cache.yaml @@ -30,12 +30,12 @@ Panel Config: tips: L2-EA Rd BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: L2-EA Wr BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml index 905204601d..3e8ff0a0f6 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx906/1900_memory_chart.yaml @@ -63,7 +63,7 @@ Panel Config: alias: vgpr_ tips: SGPR: - value: ROUND(AVG(sgpr), 0) + value: ROUND(AVG(SGPR), 0) alias: sgpr_ tips: LDS Allocation: @@ -71,7 +71,7 @@ Panel Config: alias: lds_alloc_ tips: Scratch Allocation: - value: ROUND(AVG(scr), 0) + value: ROUND(AVG(Scratch_Per_Workitem), 0) alias: scratch_alloc_ tips: Wavefronts: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml index 986b2f0aec..774cb479e1 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0200_system-speed-of-light.yaml @@ -106,11 +106,11 @@ Panel Config: tips: LDS BW: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -127,10 +127,10 @@ Panel Config: pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) tips: Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Scalar L1D Cache Hit Rate: @@ -142,10 +142,10 @@ Panel Config: if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Vector L1D Cache Hit Rate: @@ -161,10 +161,10 @@ Panel Config: None)) tips: Vector L1D Cache BW: - value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) - pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: L2 Cache Hit Rate: @@ -177,19 +177,19 @@ Panel Config: tips: L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs)))) / $hbmBW) + * 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Write BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs)))) / $hbmBW) + * 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Read Latency: value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum @@ -214,10 +214,10 @@ Panel Config: coll_level: SQ_LEVEL_WAVES tips: Instr Fetch BW: - value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32)) + value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) unit: GB/s peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC + pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC * (($sclk / 1000) * 32))) coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0300_mem_chart.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0300_mem_chart.yaml index 7e6cdee8f1..b3adfeeb84 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0300_mem_chart.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0300_mem_chart.yaml @@ -82,15 +82,15 @@ Panel Config: tips: SGPR: #alias: sgpr_ - value: ROUND(AVG(sgpr), 0) + value: ROUND(AVG(SGPR), 0) tips: LDS Allocation: #alias: lds_alloc_ - value: ROUND(AVG(lds), 0) + value: ROUND(AVG(LDS_Per_Workgroup), 0) tips: Scratch Allocation: #alias: scratch_alloc_ - value: ROUND(AVG(scr), 0) + value: ROUND(AVG(Scratch_Per_Workitem), 0) tips: Wavefronts: #alias: wavefronts_ diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml index 70141193e6..2dceb05dc7 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/0700_wavefront-launch.yaml @@ -20,15 +20,15 @@ Panel Config: tips: Tips metric: Grid Size: - avg: AVG(grd) - min: MIN(grd) - max: MAX(grd) + avg: AVG(Grid_Size) + min: MIN(Grid_Size) + max: MAX(Grid_Size) unit: Work Items tips: Workgroup Size: - avg: AVG(wgr) - min: MIN(wgr) - max: MAX(wgr) + avg: AVG(Workgroup_Size) + min: MIN(Workgroup_Size) + max: MAX(Workgroup_Size) unit: Work Items tips: Total Wavefronts: @@ -50,33 +50,33 @@ Panel Config: unit: Wavefronts tips: VGPRs: - avg: AVG(arch_vgpr) - min: MIN(arch_vgpr) - max: MAX(arch_vgpr) + avg: AVG(Arch_VGPR) + min: MIN(Arch_VGPR) + max: MAX(Arch_VGPR) unit: Registers tips: AGPRs: - avg: AVG(accum_vgpr) - min: MIN(accum_vgpr) - max: MAX(accum_vgpr) + avg: AVG(Accum_VGPR) + min: MIN(Accum_VGPR) + max: MAX(Accum_VGPR) unit: Registers tips: SGPRs: - avg: AVG(sgpr) - min: MIN(sgpr) - max: MAX(sgpr) + avg: AVG(SGPR) + min: MIN(SGPR) + max: MAX(SGPR) unit: Registers tips: LDS Allocation: - avg: AVG(lds) - min: MIN(lds) - max: MAX(lds) + avg: AVG(LDS_Per_Workgroup) + min: MIN(LDS_Per_Workgroup) + max: MAX(LDS_Per_Workgroup) unit: Bytes tips: Scratch Allocation: - avg: AVG(scr) - min: MIN(scr) - max: MAX(scr) + avg: AVG(Scratch_Per_Workitem) + min: MIN(Scratch_Per_Workitem) + max: MAX(Scratch_Per_Workitem) unit: Bytes tips: @@ -92,9 +92,9 @@ Panel Config: tips: Tips metric: Kernel Time (Nanosec): - avg: AVG((EndNs - BeginNs)) - min: MIN((EndNs - BeginNs)) - max: MAX((EndNs - BeginNs)) + avg: AVG((End_Timestamp - Start_Timestamp)) + min: MIN((End_Timestamp - Start_Timestamp)) + max: MAX((End_Timestamp - Start_Timestamp)) unit: ns tips: Kernel Time (Cycles): diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml index f720bfb84b..d225d31e0a 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1200_lds.yaml @@ -32,7 +32,7 @@ Panel Config: tips: Bandwidth (Pct-of-Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) unit: Pct of Peak tips: Bank Conflict Rate: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml index 17ca1c0716..7425cade78 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1300_instruction-cache.yaml @@ -24,7 +24,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: Cache Hit: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml index 4d4a6c8c95..ab4f5109ab 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1400_constant-cache.yaml @@ -24,7 +24,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: Cache Hit: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml index a476dc3523..7b8ed6f8ae 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1600_L1_cache.yaml @@ -33,7 +33,7 @@ Panel Config: unit: Pct of Peak tips: Cache BW: - value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) unit: Pct of Peak tips: @@ -141,9 +141,9 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: Cache Accesses: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml index 42245be4b2..2b98c2593d 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx908/1700_L2_cache.yaml @@ -30,12 +30,12 @@ Panel Config: tips: L2-EA Rd BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: L2-EA Wr BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml index c197c0fc58..cd8edfb075 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0200_system-speed-of-light.yaml @@ -25,56 +25,56 @@ Panel Config: + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64)))) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: VALU IOPs: - value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs))) + value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) - pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - - BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) + pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp + - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: MFMA FLOPs (BF16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 1024) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) tips: MFMA FLOPs (F16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 1024) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) tips: MFMA FLOPs (F32): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA FLOPs (F64): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA IOPs (Int8): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: ((($sclk * $numCU) * 1024) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) tips: Active CUs: @@ -123,11 +123,11 @@ Panel Config: tips: LDS BW: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -144,10 +144,10 @@ Panel Config: pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) tips: Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Scalar L1D Cache Hit Rate: @@ -159,10 +159,10 @@ Panel Config: if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Vector L1D Cache Hit Rate: @@ -178,10 +178,10 @@ Panel Config: None)) tips: Vector L1D Cache BW: - value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) - pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: L2 Cache Hit Rate: @@ -194,19 +194,19 @@ Panel Config: tips: L2-Fabric Read BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs)))) / $hbmBW) + * 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Write BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs)))) / $hbmBW) + * 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Read Latency: value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum @@ -231,10 +231,10 @@ Panel Config: coll_level: SQ_LEVEL_WAVES tips: Instr Fetch BW: - value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32)) + value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) unit: GB/s peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC + pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC * (($sclk / 1000) * 32))) coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0300_mem_chart.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0300_mem_chart.yaml index 7e6cdee8f1..b3adfeeb84 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0300_mem_chart.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0300_mem_chart.yaml @@ -82,15 +82,15 @@ Panel Config: tips: SGPR: #alias: sgpr_ - value: ROUND(AVG(sgpr), 0) + value: ROUND(AVG(SGPR), 0) tips: LDS Allocation: #alias: lds_alloc_ - value: ROUND(AVG(lds), 0) + value: ROUND(AVG(LDS_Per_Workgroup), 0) tips: Scratch Allocation: #alias: scratch_alloc_ - value: ROUND(AVG(scr), 0) + value: ROUND(AVG(Scratch_Per_Workitem), 0) tips: Wavefronts: #alias: wavefronts_ diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml index 13ba5b8e16..3a174862df 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml @@ -20,15 +20,15 @@ Panel Config: tips: Tips metric: Grid Size: - avg: AVG(grd) - min: MIN(grd) - max: MAX(grd) + avg: AVG(Grid_Size) + min: MIN(Grid_Size) + max: MAX(Grid_Size) unit: Work Items tips: Workgroup Size: - avg: AVG(wgr) - min: MIN(wgr) - max: MAX(wgr) + avg: AVG(Workgroup_Size) + min: MIN(Workgroup_Size) + max: MAX(Workgroup_Size) unit: Work Items tips: Total Wavefronts: @@ -50,33 +50,33 @@ Panel Config: unit: Wavefronts tips: VGPRs: - avg: AVG(arch_vgpr) - min: MIN(arch_vgpr) - max: MAX(arch_vgpr) + avg: AVG(Arch_VGPR) + min: MIN(Arch_VGPR) + max: MAX(Arch_VGPR) unit: Registers tips: AGPRs: - avg: AVG(accum_vgpr) - min: MIN(accum_vgpr) - max: MAX(accum_vgpr) + avg: AVG(Accum_VGPR) + min: MIN(Accum_VGPR) + max: MAX(Accum_VGPR) unit: Registers tips: SGPRs: - avg: AVG(sgpr) - min: MIN(sgpr) - max: MAX(sgpr) + avg: AVG(SGPR) + min: MIN(SGPR) + max: MAX(SGPR) unit: Registers tips: LDS Allocation: - avg: AVG(lds) - min: MIN(lds) - max: MAX(lds) + avg: AVG(LDS_Per_Workgroup) + min: MIN(LDS_Per_Workgroup) + max: MAX(LDS_Per_Workgroup) unit: Bytes tips: Scratch Allocation: - avg: AVG(scr) - min: MIN(scr) - max: MAX(scr) + avg: AVG(Scratch_Per_Workitem) + min: MIN(Scratch_Per_Workitem) + max: MAX(Scratch_Per_Workitem) unit: Bytes tips: @@ -92,9 +92,9 @@ Panel Config: tips: Tips metric: Kernel Time (Nanosec): - avg: AVG((EndNs - BeginNs)) - min: MIN((EndNs - BeginNs)) - max: MAX((EndNs - BeginNs)) + avg: AVG((End_Timestamp - Start_Timestamp)) + min: MIN((End_Timestamp - Start_Timestamp)) + max: MAX((End_Timestamp - Start_Timestamp)) unit: ns tips: Kernel Time (Cycles): diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml index d4b1939642..97a2f1c842 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1100_compute-unit-compute-pipeline.yaml @@ -27,32 +27,32 @@ Panel Config: + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) unit: Pct of Peak tips: mfma_flops_bf16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 512) / 1000)) unit: Pct of Peak tips: mfma_flops_f16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: mfma_flops_f32_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_f64_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_i8_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml index f720bfb84b..d225d31e0a 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1200_lds.yaml @@ -32,7 +32,7 @@ Panel Config: tips: Bandwidth (Pct-of-Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) unit: Pct of Peak tips: Bank Conflict Rate: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml index ce0259eaec..20b437e6a4 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1300_instruction-cache.yaml @@ -24,7 +24,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: Cache Hit: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml index 4d4a6c8c95..ab4f5109ab 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1400_constant-cache.yaml @@ -24,7 +24,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) unit: Pct of Peak tips: Cache Hit: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml index 79c5e5f0d9..db3a363ced 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1600_L1_cache.yaml @@ -33,7 +33,7 @@ Panel Config: unit: Pct of Peak tips: Cache BW: - value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) unit: Pct of Peak tips: @@ -141,9 +141,9 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: Cache Accesses: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml index 002802998c..198437750f 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx90a/1700_L2_cache.yaml @@ -30,12 +30,12 @@ Panel Config: tips: L2-EA Rd BW: value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: L2-EA Wr BW: value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml index bf9f3e0de0..e25b9f66e3 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0200_system-speed-of-light.yaml @@ -25,56 +25,56 @@ Panel Config: + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64)))) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: VALU IOPs: - value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs))) + value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) - pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - - BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) + pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp + - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: MFMA FLOPs (BF16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: MFMA FLOPs (F16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: MFMA FLOPs (F32): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA FLOPs (F64): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA IOPs (Int8): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: Active CUs: @@ -123,11 +123,11 @@ Panel Config: tips: LDS BW: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -144,10 +144,10 @@ Panel Config: pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) tips: Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Scalar L1D Cache Hit Rate: @@ -159,10 +159,10 @@ Panel Config: if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Vector L1D Cache Hit Rate: @@ -178,10 +178,10 @@ Panel Config: None)) tips: Vector L1D Cache BW: - value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) - pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: L2 Cache Hit Rate: @@ -194,19 +194,19 @@ Panel Config: tips: L2-Fabric Read BW: value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs)))) / $hbmBW) + * 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Write BW: value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs)))) / $hbmBW) + * 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Read Latency: value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum @@ -231,10 +231,10 @@ Panel Config: coll_level: SQ_LEVEL_WAVES tips: Instr Fetch BW: - value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32)) + value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) unit: GB/s peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC + pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC * (($sclk / 1000) * 32))) coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0700_wavefront-launch.yaml index 33288726f5..ba8398e5cb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/0700_wavefront-launch.yaml @@ -92,9 +92,9 @@ Panel Config: tips: Tips metric: Kernel Time (Nanosec): - avg: AVG((EndNs - BeginNs)) - min: MIN((EndNs - BeginNs)) - max: MAX((EndNs - BeginNs)) + avg: AVG((End_Timestamp - Start_Timestamp)) + min: MIN((End_Timestamp - Start_Timestamp)) + max: MAX((End_Timestamp - Start_Timestamp)) unit: ns tips: Kernel Time (Cycles): diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml index 718ac72fb7..0119b4caf9 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1100_compute-unit-compute-pipeline.yaml @@ -22,32 +22,32 @@ Panel Config: + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) unit: Pct of Peak tips: mfma_flops_bf16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 512) / 1000)) unit: Pct of Peak tips: mfma_flops_f16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: mfma_flops_f32_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_f64_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_i8_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1200_lds.yaml index d25a9d1bb8..c765733051 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1200_lds.yaml @@ -25,7 +25,7 @@ Panel Config: Bandwidth (Pct-of-Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1300_instruction-cache.yaml index 7558e6ae0e..bb69b4dfcb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1300_instruction-cache.yaml @@ -18,7 +18,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) tips: Cache Hit: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1400_constant-cache.yaml index 1a7c11364f..8e49b66757 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1400_constant-cache.yaml @@ -18,7 +18,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) tips: Cache Hit: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1600_L1_cache.yaml index b4230140aa..4fe54909b0 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1600_L1_cache.yaml @@ -28,7 +28,7 @@ Panel Config: tips: Cache BW: value: - ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: Cache Hit: @@ -118,9 +118,9 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: Cache Accesses: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1700_L2_cache.yaml index 46a87ed83a..64644851dd 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx940/1700_L2_cache.yaml @@ -30,13 +30,13 @@ Panel Config: L2-EA Rd BW: value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: L2-EA Wr BW: value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml index bf9f3e0de0..e25b9f66e3 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0200_system-speed-of-light.yaml @@ -25,56 +25,56 @@ Panel Config: + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64)))) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: VALU IOPs: - value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs))) + value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) - pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - - BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) + pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp + - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: MFMA FLOPs (BF16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: MFMA FLOPs (F16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: MFMA FLOPs (F32): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA FLOPs (F64): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA IOPs (Int8): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: Active CUs: @@ -123,11 +123,11 @@ Panel Config: tips: LDS BW: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -144,10 +144,10 @@ Panel Config: pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) tips: Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Scalar L1D Cache Hit Rate: @@ -159,10 +159,10 @@ Panel Config: if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Vector L1D Cache Hit Rate: @@ -178,10 +178,10 @@ Panel Config: None)) tips: Vector L1D Cache BW: - value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) - pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: L2 Cache Hit Rate: @@ -194,19 +194,19 @@ Panel Config: tips: L2-Fabric Read BW: value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs)))) / $hbmBW) + * 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Write BW: value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs)))) / $hbmBW) + * 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Read Latency: value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum @@ -231,10 +231,10 @@ Panel Config: coll_level: SQ_LEVEL_WAVES tips: Instr Fetch BW: - value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32)) + value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) unit: GB/s peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC + pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC * (($sclk / 1000) * 32))) coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0700_wavefront-launch.yaml index 33288726f5..ba8398e5cb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/0700_wavefront-launch.yaml @@ -92,9 +92,9 @@ Panel Config: tips: Tips metric: Kernel Time (Nanosec): - avg: AVG((EndNs - BeginNs)) - min: MIN((EndNs - BeginNs)) - max: MAX((EndNs - BeginNs)) + avg: AVG((End_Timestamp - Start_Timestamp)) + min: MIN((End_Timestamp - Start_Timestamp)) + max: MAX((End_Timestamp - Start_Timestamp)) unit: ns tips: Kernel Time (Cycles): diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml index 718ac72fb7..0119b4caf9 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1100_compute-unit-compute-pipeline.yaml @@ -22,32 +22,32 @@ Panel Config: + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) unit: Pct of Peak tips: mfma_flops_bf16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 512) / 1000)) unit: Pct of Peak tips: mfma_flops_f16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: mfma_flops_f32_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_f64_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_i8_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1200_lds.yaml index d25a9d1bb8..c765733051 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1200_lds.yaml @@ -25,7 +25,7 @@ Panel Config: Bandwidth (Pct-of-Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1300_instruction-cache.yaml index 7558e6ae0e..bb69b4dfcb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1300_instruction-cache.yaml @@ -18,7 +18,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) tips: Cache Hit: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1400_constant-cache.yaml index 1a7c11364f..8e49b66757 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1400_constant-cache.yaml @@ -18,7 +18,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) tips: Cache Hit: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1600_L1_cache.yaml index b4230140aa..4fe54909b0 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1600_L1_cache.yaml @@ -28,7 +28,7 @@ Panel Config: tips: Cache BW: value: - ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: Cache Hit: @@ -118,9 +118,9 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: Cache Accesses: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1700_L2_cache.yaml index 46a87ed83a..64644851dd 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx941/1700_L2_cache.yaml @@ -30,13 +30,13 @@ Panel Config: L2-EA Rd BW: value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: L2-EA Wr BW: value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml index bf9f3e0de0..e25b9f66e3 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0200_system-speed-of-light.yaml @@ -25,56 +25,56 @@ Panel Config: + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64)))) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16) + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: VALU IOPs: - value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs))) + value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: (((($sclk * $numCU) * 64) * 2) / 1000) - pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - - BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) + pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp + - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) tips: MFMA FLOPs (BF16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: MFMA FLOPs (F16): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: MFMA FLOPs (F32): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA FLOPs (F64): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))) unit: GFLOP peak: ((($sclk * $numCU) * 256) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) tips: MFMA IOPs (Int8): - value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))) + value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))) unit: GIOP peak: ((($sclk * $numCU) * 4096) / 1000) - pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 4096) / 1000)) tips: Active CUs: @@ -123,11 +123,11 @@ Panel Config: tips: LDS BW: value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs))) + / (End_Timestamp - Start_Timestamp))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT)) @@ -144,10 +144,10 @@ Panel Config: pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES))) tips: Instr Cache BW: - value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Scalar L1D Cache Hit Rate: @@ -159,10 +159,10 @@ Panel Config: if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None)) tips: Scalar L1D Cache BW: - value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64)) + value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64)) unit: GB/s peak: ((($sclk / 1000) * 64) * $numSQC) - pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk + pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk / 1000) * 64) * $numSQC)) tips: Vector L1D Cache Hit Rate: @@ -178,10 +178,10 @@ Panel Config: None)) tips: Vector L1D Cache BW: - value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: ((($sclk / 1000) * 64) * $numCU) - pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: L2 Cache Hit Rate: @@ -194,19 +194,19 @@ Panel Config: tips: L2-Fabric Read BW: value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs)))) / $hbmBW) + * 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Write BW: value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s peak: $hbmBW pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs)))) / $hbmBW) + * 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW) tips: L2-Fabric Read Latency: value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum @@ -231,10 +231,10 @@ Panel Config: coll_level: SQ_LEVEL_WAVES tips: Instr Fetch BW: - value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32)) + value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32)) unit: GB/s peak: ((($sclk / 1000) * 32) * $numSQC) - pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC + pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC * (($sclk / 1000) * 32))) coll_level: SQ_IFETCH_LEVEL tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0700_wavefront-launch.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0700_wavefront-launch.yaml index 33288726f5..ba8398e5cb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0700_wavefront-launch.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/0700_wavefront-launch.yaml @@ -92,9 +92,9 @@ Panel Config: tips: Tips metric: Kernel Time (Nanosec): - avg: AVG((EndNs - BeginNs)) - min: MIN((EndNs - BeginNs)) - max: MAX((EndNs - BeginNs)) + avg: AVG((End_Timestamp - Start_Timestamp)) + min: MIN((End_Timestamp - Start_Timestamp)) + max: MAX((End_Timestamp - Start_Timestamp)) unit: ns tips: Kernel Time (Cycles): diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml index 718ac72fb7..0119b4caf9 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1100_compute-unit-compute-pipeline.yaml @@ -22,32 +22,32 @@ Panel Config: + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) - + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk + + (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000)) unit: Pct of Peak tips: mfma_flops_bf16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 512) / 1000)) unit: Pct of Peak tips: mfma_flops_f16_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: mfma_flops_f32_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_f64_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 256) / 1000)) unit: Pct of Peak tips: mfma_flops_i8_pop: - value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))) + value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))) / ((($sclk * $numCU) * 1024) / 1000)) unit: Pct of Peak tips: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1200_lds.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1200_lds.yaml index d25a9d1bb8..c765733051 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1200_lds.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1200_lds.yaml @@ -25,7 +25,7 @@ Panel Config: Bandwidth (Pct-of-Peak): value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) - / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) + / (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1300_instruction-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1300_instruction-cache.yaml index 7558e6ae0e..bb69b4dfcb 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1300_instruction-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1300_instruction-cache.yaml @@ -18,7 +18,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) tips: Cache Hit: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1400_constant-cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1400_constant-cache.yaml index 1a7c11364f..8e49b66757 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1400_constant-cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1400_constant-cache.yaml @@ -18,7 +18,7 @@ Panel Config: metric: Bandwidth: value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC) - * (EndNs - BeginNs)))) + * (End_Timestamp - Start_Timestamp)))) tips: Cache Hit: value: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1600_L1_cache.yaml index b4230140aa..4fe54909b0 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1600_L1_cache.yaml @@ -28,7 +28,7 @@ Panel Config: tips: Cache BW: value: - ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))) + ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))) / ((($sclk / 1000) * 64) * $numCU)) tips: Cache Hit: @@ -118,9 +118,9 @@ Panel Config: unit: (Req + $normUnit) tips: Cache BW: - avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) - max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))) + avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) + max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: Cache Accesses: diff --git a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1700_L2_cache.yaml b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1700_L2_cache.yaml index 46a87ed83a..64644851dd 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1700_L2_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_soc/analysis_configs/gfx942/1700_L2_cache.yaml @@ -30,13 +30,13 @@ Panel Config: L2-EA Rd BW: value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum) - * 64)) / (EndNs - BeginNs))) + * 64)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: L2-EA Wr BW: value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum) - * 32)) / (EndNs - BeginNs))) + * 32)) / (End_Timestamp - Start_Timestamp))) unit: GB/s tips: