Update metric configs incorrectly using legacy rocprof headers

Signed-off-by: colramos-amd <colramos@amd.com>


[ROCm/rocprofiler-compute commit: 4174788e59]
This commit is contained in:
colramos-amd
2024-02-01 16:22:44 -06:00
والد 075d5b7845
کامیت 4fbbe78d08
49فایلهای تغییر یافته به همراه311 افزوده شده و 311 حذف شده
@@ -106,11 +106,11 @@ Panel Config:
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
@@ -127,10 +127,10 @@ Panel Config:
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
@@ -142,10 +142,10 @@ Panel Config:
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
@@ -161,10 +161,10 @@ Panel Config:
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
@@ -177,19 +177,19 @@ Panel Config:
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
@@ -214,10 +214,10 @@ Panel Config:
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -20,15 +20,15 @@ Panel Config:
tips: Tips
metric:
Grid Size:
avg: AVG(grd)
min: MIN(grd)
max: MAX(grd)
avg: AVG(Grid_Size)
min: MIN(Grid_Size)
max: MAX(Grid_Size)
unit: Work Items
tips:
Workgroup Size:
avg: AVG(wgr)
min: MIN(wgr)
max: MAX(wgr)
avg: AVG(Workgroup_Size)
min: MIN(Workgroup_Size)
max: MAX(Workgroup_Size)
unit: Work Items
tips:
Total Wavefronts:
@@ -50,33 +50,33 @@ Panel Config:
unit: Wavefronts
tips:
VGPRs:
avg: AVG(arch_vgpr)
min: MIN(arch_vgpr)
max: MAX(arch_vgpr)
avg: AVG(Arch_VGPR)
min: MIN(Arch_VGPR)
max: MAX(Arch_VGPR)
unit: Registers
tips:
AGPRs:
avg: AVG(accum_vgpr)
min: MIN(accum_vgpr)
max: MAX(accum_vgpr)
avg: AVG(Accum_VGPR)
min: MIN(Accum_VGPR)
max: MAX(Accum_VGPR)
unit: Registers
tips:
SGPRs:
avg: AVG(sgpr)
min: MIN(sgpr)
max: MAX(sgpr)
avg: AVG(SGPR)
min: MIN(SGPR)
max: MAX(SGPR)
unit: Registers
tips:
LDS Allocation:
avg: AVG(lds)
min: MIN(lds)
max: MAX(lds)
avg: AVG(LDS_Per_Workgroup)
min: MIN(LDS_Per_Workgroup)
max: MAX(LDS_Per_Workgroup)
unit: Bytes
tips:
Scratch Allocation:
avg: AVG(scr)
min: MIN(scr)
max: MAX(scr)
avg: AVG(Scratch_Per_Workitem)
min: MIN(Scratch_Per_Workitem)
max: MAX(Scratch_Per_Workitem)
unit: Bytes
tips:
@@ -92,9 +92,9 @@ Panel Config:
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
avg: AVG((End_Timestamp - Start_Timestamp))
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
Kernel Time (Cycles):
@@ -32,7 +32,7 @@ Panel Config:
tips:
Bandwidth (Pct-of-Peak):
value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
unit: Pct of Peak
tips:
Bank Conflict Rate:
@@ -24,7 +24,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
unit: Pct of Peak
tips:
Cache Hit:
@@ -24,7 +24,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
unit: Pct of Peak
tips:
Cache Hit:
@@ -33,7 +33,7 @@ Panel Config:
unit: Pct of Peak
tips:
Cache BW:
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
unit: Pct of Peak
tips:
@@ -141,9 +141,9 @@ Panel Config:
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
Cache Accesses:
@@ -30,12 +30,12 @@ Panel Config:
tips:
L2-EA Rd BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-EA Wr BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
@@ -63,7 +63,7 @@ Panel Config:
alias: vgpr_
tips:
SGPR:
value: ROUND(AVG(sgpr), 0)
value: ROUND(AVG(SGPR), 0)
alias: sgpr_
tips:
LDS Allocation:
@@ -71,7 +71,7 @@ Panel Config:
alias: lds_alloc_
tips:
Scratch Allocation:
value: ROUND(AVG(scr), 0)
value: ROUND(AVG(Scratch_Per_Workitem), 0)
alias: scratch_alloc_
tips:
Wavefronts:
@@ -106,11 +106,11 @@ Panel Config:
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
@@ -127,10 +127,10 @@ Panel Config:
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
@@ -142,10 +142,10 @@ Panel Config:
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
@@ -161,10 +161,10 @@ Panel Config:
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
@@ -177,19 +177,19 @@ Panel Config:
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
@@ -214,10 +214,10 @@ Panel Config:
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -82,15 +82,15 @@ Panel Config:
tips:
SGPR:
#alias: sgpr_
value: ROUND(AVG(sgpr), 0)
value: ROUND(AVG(SGPR), 0)
tips:
LDS Allocation:
#alias: lds_alloc_
value: ROUND(AVG(lds), 0)
value: ROUND(AVG(LDS_Per_Workgroup), 0)
tips:
Scratch Allocation:
#alias: scratch_alloc_
value: ROUND(AVG(scr), 0)
value: ROUND(AVG(Scratch_Per_Workitem), 0)
tips:
Wavefronts:
#alias: wavefronts_
@@ -20,15 +20,15 @@ Panel Config:
tips: Tips
metric:
Grid Size:
avg: AVG(grd)
min: MIN(grd)
max: MAX(grd)
avg: AVG(Grid_Size)
min: MIN(Grid_Size)
max: MAX(Grid_Size)
unit: Work Items
tips:
Workgroup Size:
avg: AVG(wgr)
min: MIN(wgr)
max: MAX(wgr)
avg: AVG(Workgroup_Size)
min: MIN(Workgroup_Size)
max: MAX(Workgroup_Size)
unit: Work Items
tips:
Total Wavefronts:
@@ -50,33 +50,33 @@ Panel Config:
unit: Wavefronts
tips:
VGPRs:
avg: AVG(arch_vgpr)
min: MIN(arch_vgpr)
max: MAX(arch_vgpr)
avg: AVG(Arch_VGPR)
min: MIN(Arch_VGPR)
max: MAX(Arch_VGPR)
unit: Registers
tips:
AGPRs:
avg: AVG(accum_vgpr)
min: MIN(accum_vgpr)
max: MAX(accum_vgpr)
avg: AVG(Accum_VGPR)
min: MIN(Accum_VGPR)
max: MAX(Accum_VGPR)
unit: Registers
tips:
SGPRs:
avg: AVG(sgpr)
min: MIN(sgpr)
max: MAX(sgpr)
avg: AVG(SGPR)
min: MIN(SGPR)
max: MAX(SGPR)
unit: Registers
tips:
LDS Allocation:
avg: AVG(lds)
min: MIN(lds)
max: MAX(lds)
avg: AVG(LDS_Per_Workgroup)
min: MIN(LDS_Per_Workgroup)
max: MAX(LDS_Per_Workgroup)
unit: Bytes
tips:
Scratch Allocation:
avg: AVG(scr)
min: MIN(scr)
max: MAX(scr)
avg: AVG(Scratch_Per_Workitem)
min: MIN(Scratch_Per_Workitem)
max: MAX(Scratch_Per_Workitem)
unit: Bytes
tips:
@@ -92,9 +92,9 @@ Panel Config:
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
avg: AVG((End_Timestamp - Start_Timestamp))
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
Kernel Time (Cycles):
@@ -32,7 +32,7 @@ Panel Config:
tips:
Bandwidth (Pct-of-Peak):
value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
unit: Pct of Peak
tips:
Bank Conflict Rate:
@@ -24,7 +24,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
unit: Pct of Peak
tips:
Cache Hit:
@@ -24,7 +24,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
unit: Pct of Peak
tips:
Cache Hit:
@@ -33,7 +33,7 @@ Panel Config:
unit: Pct of Peak
tips:
Cache BW:
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
unit: Pct of Peak
tips:
@@ -141,9 +141,9 @@ Panel Config:
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
Cache Accesses:
@@ -30,12 +30,12 @@ Panel Config:
tips:
L2-EA Rd BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-EA Wr BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
@@ -25,56 +25,56 @@ Panel Config:
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
tips:
VALU IOPs:
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
tips:
MFMA FLOPs (BF16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 1024) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
tips:
MFMA FLOPs (F16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 1024) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
tips:
MFMA FLOPs (F32):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA FLOPs (F64):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA IOPs (Int8):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: ((($sclk * $numCU) * 1024) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
tips:
Active CUs:
@@ -123,11 +123,11 @@ Panel Config:
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
@@ -144,10 +144,10 @@ Panel Config:
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
@@ -159,10 +159,10 @@ Panel Config:
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
@@ -178,10 +178,10 @@ Panel Config:
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
@@ -194,19 +194,19 @@ Panel Config:
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
@@ -231,10 +231,10 @@ Panel Config:
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -82,15 +82,15 @@ Panel Config:
tips:
SGPR:
#alias: sgpr_
value: ROUND(AVG(sgpr), 0)
value: ROUND(AVG(SGPR), 0)
tips:
LDS Allocation:
#alias: lds_alloc_
value: ROUND(AVG(lds), 0)
value: ROUND(AVG(LDS_Per_Workgroup), 0)
tips:
Scratch Allocation:
#alias: scratch_alloc_
value: ROUND(AVG(scr), 0)
value: ROUND(AVG(Scratch_Per_Workitem), 0)
tips:
Wavefronts:
#alias: wavefronts_
@@ -20,15 +20,15 @@ Panel Config:
tips: Tips
metric:
Grid Size:
avg: AVG(grd)
min: MIN(grd)
max: MAX(grd)
avg: AVG(Grid_Size)
min: MIN(Grid_Size)
max: MAX(Grid_Size)
unit: Work Items
tips:
Workgroup Size:
avg: AVG(wgr)
min: MIN(wgr)
max: MAX(wgr)
avg: AVG(Workgroup_Size)
min: MIN(Workgroup_Size)
max: MAX(Workgroup_Size)
unit: Work Items
tips:
Total Wavefronts:
@@ -50,33 +50,33 @@ Panel Config:
unit: Wavefronts
tips:
VGPRs:
avg: AVG(arch_vgpr)
min: MIN(arch_vgpr)
max: MAX(arch_vgpr)
avg: AVG(Arch_VGPR)
min: MIN(Arch_VGPR)
max: MAX(Arch_VGPR)
unit: Registers
tips:
AGPRs:
avg: AVG(accum_vgpr)
min: MIN(accum_vgpr)
max: MAX(accum_vgpr)
avg: AVG(Accum_VGPR)
min: MIN(Accum_VGPR)
max: MAX(Accum_VGPR)
unit: Registers
tips:
SGPRs:
avg: AVG(sgpr)
min: MIN(sgpr)
max: MAX(sgpr)
avg: AVG(SGPR)
min: MIN(SGPR)
max: MAX(SGPR)
unit: Registers
tips:
LDS Allocation:
avg: AVG(lds)
min: MIN(lds)
max: MAX(lds)
avg: AVG(LDS_Per_Workgroup)
min: MIN(LDS_Per_Workgroup)
max: MAX(LDS_Per_Workgroup)
unit: Bytes
tips:
Scratch Allocation:
avg: AVG(scr)
min: MIN(scr)
max: MAX(scr)
avg: AVG(Scratch_Per_Workitem)
min: MIN(Scratch_Per_Workitem)
max: MAX(Scratch_Per_Workitem)
unit: Bytes
tips:
@@ -92,9 +92,9 @@ Panel Config:
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
avg: AVG((End_Timestamp - Start_Timestamp))
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
Kernel Time (Cycles):
@@ -27,32 +27,32 @@ Panel Config:
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
unit: Pct of Peak
tips:
mfma_flops_bf16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 512) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f32_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f64_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_i8_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
@@ -32,7 +32,7 @@ Panel Config:
tips:
Bandwidth (Pct-of-Peak):
value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
unit: Pct of Peak
tips:
Bank Conflict Rate:
@@ -24,7 +24,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
unit: Pct of Peak
tips:
Cache Hit:
@@ -24,7 +24,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
unit: Pct of Peak
tips:
Cache Hit:
@@ -33,7 +33,7 @@ Panel Config:
unit: Pct of Peak
tips:
Cache BW:
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
unit: Pct of Peak
tips:
@@ -141,9 +141,9 @@ Panel Config:
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
Cache Accesses:
@@ -30,12 +30,12 @@ Panel Config:
tips:
L2-EA Rd BW:
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-EA Wr BW:
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
@@ -25,56 +25,56 @@ Panel Config:
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
tips:
VALU IOPs:
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
tips:
MFMA FLOPs (BF16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F32):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA FLOPs (F64):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA IOPs (Int8):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
Active CUs:
@@ -123,11 +123,11 @@ Panel Config:
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
@@ -144,10 +144,10 @@ Panel Config:
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
@@ -159,10 +159,10 @@ Panel Config:
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
@@ -178,10 +178,10 @@ Panel Config:
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
@@ -194,19 +194,19 @@ Panel Config:
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
@@ -231,10 +231,10 @@ Panel Config:
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -92,9 +92,9 @@ Panel Config:
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
avg: AVG((End_Timestamp - Start_Timestamp))
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
Kernel Time (Cycles):
@@ -22,32 +22,32 @@ Panel Config:
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
unit: Pct of Peak
tips:
mfma_flops_bf16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 512) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f32_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f64_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_i8_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
@@ -25,7 +25,7 @@ Panel Config:
Bandwidth (Pct-of-Peak):
value:
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
Bank Conflict Rate:
value:
@@ -18,7 +18,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
tips:
Cache Hit:
value:
@@ -18,7 +18,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
tips:
Cache Hit:
value:
@@ -28,7 +28,7 @@ Panel Config:
tips:
Cache BW:
value:
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
Cache Hit:
@@ -118,9 +118,9 @@ Panel Config:
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
Cache Accesses:
@@ -30,13 +30,13 @@ Panel Config:
L2-EA Rd BW:
value:
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-EA Wr BW:
value:
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
@@ -25,56 +25,56 @@ Panel Config:
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
tips:
VALU IOPs:
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
tips:
MFMA FLOPs (BF16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F32):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA FLOPs (F64):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA IOPs (Int8):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
Active CUs:
@@ -123,11 +123,11 @@ Panel Config:
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
@@ -144,10 +144,10 @@ Panel Config:
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
@@ -159,10 +159,10 @@ Panel Config:
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
@@ -178,10 +178,10 @@ Panel Config:
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
@@ -194,19 +194,19 @@ Panel Config:
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
@@ -231,10 +231,10 @@ Panel Config:
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -92,9 +92,9 @@ Panel Config:
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
avg: AVG((End_Timestamp - Start_Timestamp))
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
Kernel Time (Cycles):
@@ -22,32 +22,32 @@ Panel Config:
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
unit: Pct of Peak
tips:
mfma_flops_bf16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 512) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f32_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f64_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_i8_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
@@ -25,7 +25,7 @@ Panel Config:
Bandwidth (Pct-of-Peak):
value:
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
Bank Conflict Rate:
value:
@@ -18,7 +18,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
tips:
Cache Hit:
value:
@@ -18,7 +18,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
tips:
Cache Hit:
value:
@@ -28,7 +28,7 @@ Panel Config:
tips:
Cache BW:
value:
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
Cache Hit:
@@ -118,9 +118,9 @@ Panel Config:
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
Cache Accesses:
@@ -30,13 +30,13 @@ Panel Config:
L2-EA Rd BW:
value:
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-EA Wr BW:
value:
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
@@ -25,56 +25,56 @@ Panel Config:
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
tips:
VALU IOPs:
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
tips:
MFMA FLOPs (BF16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F16):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
MFMA FLOPs (F32):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA FLOPs (F64):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GFLOP
peak: ((($sclk * $numCU) * 256) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
tips:
MFMA IOPs (Int8):
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
unit: GIOP
peak: ((($sclk * $numCU) * 4096) / 1000)
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 4096) / 1000))
tips:
Active CUs:
@@ -123,11 +123,11 @@ Panel Config:
tips:
LDS BW:
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)))
/ (End_Timestamp - Start_Timestamp)))
unit: GB/sec
peak: (($sclk * $numCU) * 0.128)
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
LDS Bank Conflict:
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
@@ -144,10 +144,10 @@ Panel Config:
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
tips:
Instr Cache BW:
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Scalar L1D Cache Hit Rate:
@@ -159,10 +159,10 @@ Panel Config:
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
tips:
Scalar L1D Cache BW:
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numSQC)
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
/ 1000) * 64) * $numSQC))
tips:
Vector L1D Cache Hit Rate:
@@ -178,10 +178,10 @@ Panel Config:
None))
tips:
Vector L1D Cache BW:
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: ((($sclk / 1000) * 64) * $numCU)
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
L2 Cache Hit Rate:
@@ -194,19 +194,19 @@ Panel Config:
tips:
L2-Fabric Read BW:
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Write BW:
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
peak: $hbmBW
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
tips:
L2-Fabric Read Latency:
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
@@ -231,10 +231,10 @@ Panel Config:
coll_level: SQ_LEVEL_WAVES
tips:
Instr Fetch BW:
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
unit: GB/s
peak: ((($sclk / 1000) * 32) * $numSQC)
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
* (($sclk / 1000) * 32)))
coll_level: SQ_IFETCH_LEVEL
tips:
@@ -92,9 +92,9 @@ Panel Config:
tips: Tips
metric:
Kernel Time (Nanosec):
avg: AVG((EndNs - BeginNs))
min: MIN((EndNs - BeginNs))
max: MAX((EndNs - BeginNs))
avg: AVG((End_Timestamp - Start_Timestamp))
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
Kernel Time (Cycles):
@@ -22,32 +22,32 @@ Panel Config:
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
* $numCU) * 64) * 2) / 1000))
unit: Pct of Peak
tips:
mfma_flops_bf16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 512) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f16_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f32_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_f64_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 256) / 1000))
unit: Pct of Peak
tips:
mfma_flops_i8_pop:
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk * $numCU) * 1024) / 1000))
unit: Pct of Peak
tips:
@@ -25,7 +25,7 @@ Panel Config:
Bandwidth (Pct-of-Peak):
value:
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
tips:
Bank Conflict Rate:
value:
@@ -18,7 +18,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
tips:
Cache Hit:
value:
@@ -18,7 +18,7 @@ Panel Config:
metric:
Bandwidth:
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
* (EndNs - BeginNs))))
* (End_Timestamp - Start_Timestamp))))
tips:
Cache Hit:
value:
@@ -28,7 +28,7 @@ Panel Config:
tips:
Cache BW:
value:
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
/ ((($sclk / 1000) * 64) * $numCU))
tips:
Cache Hit:
@@ -118,9 +118,9 @@ Panel Config:
unit: (Req + $normUnit)
tips:
Cache BW:
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
Cache Accesses:
@@ -30,13 +30,13 @@ Panel Config:
L2-EA Rd BW:
value:
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
* 64)) / (EndNs - BeginNs)))
* 64)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips:
L2-EA Wr BW:
value:
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
* 32)) / (EndNs - BeginNs)))
* 32)) / (End_Timestamp - Start_Timestamp)))
unit: GB/s
tips: