Update metric configs incorrectly using legacy rocprof headers
Signed-off-by: colramos-amd <colramos@amd.com>
[ROCm/rocprofiler-compute commit: 4174788e59]
This commit is contained in:
+14
-14
@@ -106,11 +106,11 @@ Panel Config:
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
@@ -127,10 +127,10 @@ Panel Config:
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
@@ -142,10 +142,10 @@ Panel Config:
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
@@ -161,10 +161,10 @@ Panel Config:
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
@@ -177,19 +177,19 @@ Panel Config:
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
@@ -214,10 +214,10 @@ Panel Config:
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
|
||||
+24
-24
@@ -20,15 +20,15 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Grid Size:
|
||||
avg: AVG(grd)
|
||||
min: MIN(grd)
|
||||
max: MAX(grd)
|
||||
avg: AVG(Grid_Size)
|
||||
min: MIN(Grid_Size)
|
||||
max: MAX(Grid_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Workgroup Size:
|
||||
avg: AVG(wgr)
|
||||
min: MIN(wgr)
|
||||
max: MAX(wgr)
|
||||
avg: AVG(Workgroup_Size)
|
||||
min: MIN(Workgroup_Size)
|
||||
max: MAX(Workgroup_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Total Wavefronts:
|
||||
@@ -50,33 +50,33 @@ Panel Config:
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
VGPRs:
|
||||
avg: AVG(arch_vgpr)
|
||||
min: MIN(arch_vgpr)
|
||||
max: MAX(arch_vgpr)
|
||||
avg: AVG(Arch_VGPR)
|
||||
min: MIN(Arch_VGPR)
|
||||
max: MAX(Arch_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
AGPRs:
|
||||
avg: AVG(accum_vgpr)
|
||||
min: MIN(accum_vgpr)
|
||||
max: MAX(accum_vgpr)
|
||||
avg: AVG(Accum_VGPR)
|
||||
min: MIN(Accum_VGPR)
|
||||
max: MAX(Accum_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
SGPRs:
|
||||
avg: AVG(sgpr)
|
||||
min: MIN(sgpr)
|
||||
max: MAX(sgpr)
|
||||
avg: AVG(SGPR)
|
||||
min: MIN(SGPR)
|
||||
max: MAX(SGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
LDS Allocation:
|
||||
avg: AVG(lds)
|
||||
min: MIN(lds)
|
||||
max: MAX(lds)
|
||||
avg: AVG(LDS_Per_Workgroup)
|
||||
min: MIN(LDS_Per_Workgroup)
|
||||
max: MAX(LDS_Per_Workgroup)
|
||||
unit: Bytes
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
avg: AVG(scr)
|
||||
min: MIN(scr)
|
||||
max: MAX(scr)
|
||||
avg: AVG(Scratch_Per_Workitem)
|
||||
min: MIN(Scratch_Per_Workitem)
|
||||
max: MAX(Scratch_Per_Workitem)
|
||||
unit: Bytes
|
||||
tips:
|
||||
|
||||
@@ -92,9 +92,9 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@ Panel Config:
|
||||
tips:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
|
||||
+1
-1
@@ -24,7 +24,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache Hit:
|
||||
|
||||
+1
-1
@@ -24,7 +24,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache Hit:
|
||||
|
||||
+4
-4
@@ -33,7 +33,7 @@ Panel Config:
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache BW:
|
||||
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
@@ -141,9 +141,9 @@ Panel Config:
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
|
||||
+2
-2
@@ -30,12 +30,12 @@ Panel Config:
|
||||
tips:
|
||||
L2-EA Rd BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
|
||||
+2
-2
@@ -63,7 +63,7 @@ Panel Config:
|
||||
alias: vgpr_
|
||||
tips:
|
||||
SGPR:
|
||||
value: ROUND(AVG(sgpr), 0)
|
||||
value: ROUND(AVG(SGPR), 0)
|
||||
alias: sgpr_
|
||||
tips:
|
||||
LDS Allocation:
|
||||
@@ -71,7 +71,7 @@ Panel Config:
|
||||
alias: lds_alloc_
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
value: ROUND(AVG(scr), 0)
|
||||
value: ROUND(AVG(Scratch_Per_Workitem), 0)
|
||||
alias: scratch_alloc_
|
||||
tips:
|
||||
Wavefronts:
|
||||
|
||||
+14
-14
@@ -106,11 +106,11 @@ Panel Config:
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
@@ -127,10 +127,10 @@ Panel Config:
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
@@ -142,10 +142,10 @@ Panel Config:
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
@@ -161,10 +161,10 @@ Panel Config:
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
@@ -177,19 +177,19 @@ Panel Config:
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
@@ -214,10 +214,10 @@ Panel Config:
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
|
||||
+3
-3
@@ -82,15 +82,15 @@ Panel Config:
|
||||
tips:
|
||||
SGPR:
|
||||
#alias: sgpr_
|
||||
value: ROUND(AVG(sgpr), 0)
|
||||
value: ROUND(AVG(SGPR), 0)
|
||||
tips:
|
||||
LDS Allocation:
|
||||
#alias: lds_alloc_
|
||||
value: ROUND(AVG(lds), 0)
|
||||
value: ROUND(AVG(LDS_Per_Workgroup), 0)
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
#alias: scratch_alloc_
|
||||
value: ROUND(AVG(scr), 0)
|
||||
value: ROUND(AVG(Scratch_Per_Workitem), 0)
|
||||
tips:
|
||||
Wavefronts:
|
||||
#alias: wavefronts_
|
||||
|
||||
+24
-24
@@ -20,15 +20,15 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Grid Size:
|
||||
avg: AVG(grd)
|
||||
min: MIN(grd)
|
||||
max: MAX(grd)
|
||||
avg: AVG(Grid_Size)
|
||||
min: MIN(Grid_Size)
|
||||
max: MAX(Grid_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Workgroup Size:
|
||||
avg: AVG(wgr)
|
||||
min: MIN(wgr)
|
||||
max: MAX(wgr)
|
||||
avg: AVG(Workgroup_Size)
|
||||
min: MIN(Workgroup_Size)
|
||||
max: MAX(Workgroup_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Total Wavefronts:
|
||||
@@ -50,33 +50,33 @@ Panel Config:
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
VGPRs:
|
||||
avg: AVG(arch_vgpr)
|
||||
min: MIN(arch_vgpr)
|
||||
max: MAX(arch_vgpr)
|
||||
avg: AVG(Arch_VGPR)
|
||||
min: MIN(Arch_VGPR)
|
||||
max: MAX(Arch_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
AGPRs:
|
||||
avg: AVG(accum_vgpr)
|
||||
min: MIN(accum_vgpr)
|
||||
max: MAX(accum_vgpr)
|
||||
avg: AVG(Accum_VGPR)
|
||||
min: MIN(Accum_VGPR)
|
||||
max: MAX(Accum_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
SGPRs:
|
||||
avg: AVG(sgpr)
|
||||
min: MIN(sgpr)
|
||||
max: MAX(sgpr)
|
||||
avg: AVG(SGPR)
|
||||
min: MIN(SGPR)
|
||||
max: MAX(SGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
LDS Allocation:
|
||||
avg: AVG(lds)
|
||||
min: MIN(lds)
|
||||
max: MAX(lds)
|
||||
avg: AVG(LDS_Per_Workgroup)
|
||||
min: MIN(LDS_Per_Workgroup)
|
||||
max: MAX(LDS_Per_Workgroup)
|
||||
unit: Bytes
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
avg: AVG(scr)
|
||||
min: MIN(scr)
|
||||
max: MAX(scr)
|
||||
avg: AVG(Scratch_Per_Workitem)
|
||||
min: MIN(Scratch_Per_Workitem)
|
||||
max: MAX(Scratch_Per_Workitem)
|
||||
unit: Bytes
|
||||
tips:
|
||||
|
||||
@@ -92,9 +92,9 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@ Panel Config:
|
||||
tips:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
|
||||
+1
-1
@@ -24,7 +24,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache Hit:
|
||||
|
||||
+1
-1
@@ -24,7 +24,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache Hit:
|
||||
|
||||
+4
-4
@@ -33,7 +33,7 @@ Panel Config:
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache BW:
|
||||
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
@@ -141,9 +141,9 @@ Panel Config:
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
|
||||
+2
-2
@@ -30,12 +30,12 @@ Panel Config:
|
||||
tips:
|
||||
L2-EA Rd BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
|
||||
+29
-29
@@ -25,56 +25,56 @@ Panel Config:
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
|
||||
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
VALU IOPs:
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
|
||||
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
|
||||
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 1024) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 1024) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F64):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($sclk * $numCU) * 1024) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
@@ -123,11 +123,11 @@ Panel Config:
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
@@ -144,10 +144,10 @@ Panel Config:
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
@@ -159,10 +159,10 @@ Panel Config:
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
@@ -178,10 +178,10 @@ Panel Config:
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
@@ -194,19 +194,19 @@ Panel Config:
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA_RDREQ_LEVEL_sum / TCC_EA_RDREQ_sum) if (TCC_EA_RDREQ_sum
|
||||
@@ -231,10 +231,10 @@ Panel Config:
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
|
||||
+3
-3
@@ -82,15 +82,15 @@ Panel Config:
|
||||
tips:
|
||||
SGPR:
|
||||
#alias: sgpr_
|
||||
value: ROUND(AVG(sgpr), 0)
|
||||
value: ROUND(AVG(SGPR), 0)
|
||||
tips:
|
||||
LDS Allocation:
|
||||
#alias: lds_alloc_
|
||||
value: ROUND(AVG(lds), 0)
|
||||
value: ROUND(AVG(LDS_Per_Workgroup), 0)
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
#alias: scratch_alloc_
|
||||
value: ROUND(AVG(scr), 0)
|
||||
value: ROUND(AVG(Scratch_Per_Workitem), 0)
|
||||
tips:
|
||||
Wavefronts:
|
||||
#alias: wavefronts_
|
||||
|
||||
+24
-24
@@ -20,15 +20,15 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Grid Size:
|
||||
avg: AVG(grd)
|
||||
min: MIN(grd)
|
||||
max: MAX(grd)
|
||||
avg: AVG(Grid_Size)
|
||||
min: MIN(Grid_Size)
|
||||
max: MAX(Grid_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Workgroup Size:
|
||||
avg: AVG(wgr)
|
||||
min: MIN(wgr)
|
||||
max: MAX(wgr)
|
||||
avg: AVG(Workgroup_Size)
|
||||
min: MIN(Workgroup_Size)
|
||||
max: MAX(Workgroup_Size)
|
||||
unit: Work Items
|
||||
tips:
|
||||
Total Wavefronts:
|
||||
@@ -50,33 +50,33 @@ Panel Config:
|
||||
unit: Wavefronts
|
||||
tips:
|
||||
VGPRs:
|
||||
avg: AVG(arch_vgpr)
|
||||
min: MIN(arch_vgpr)
|
||||
max: MAX(arch_vgpr)
|
||||
avg: AVG(Arch_VGPR)
|
||||
min: MIN(Arch_VGPR)
|
||||
max: MAX(Arch_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
AGPRs:
|
||||
avg: AVG(accum_vgpr)
|
||||
min: MIN(accum_vgpr)
|
||||
max: MAX(accum_vgpr)
|
||||
avg: AVG(Accum_VGPR)
|
||||
min: MIN(Accum_VGPR)
|
||||
max: MAX(Accum_VGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
SGPRs:
|
||||
avg: AVG(sgpr)
|
||||
min: MIN(sgpr)
|
||||
max: MAX(sgpr)
|
||||
avg: AVG(SGPR)
|
||||
min: MIN(SGPR)
|
||||
max: MAX(SGPR)
|
||||
unit: Registers
|
||||
tips:
|
||||
LDS Allocation:
|
||||
avg: AVG(lds)
|
||||
min: MIN(lds)
|
||||
max: MAX(lds)
|
||||
avg: AVG(LDS_Per_Workgroup)
|
||||
min: MIN(LDS_Per_Workgroup)
|
||||
max: MAX(LDS_Per_Workgroup)
|
||||
unit: Bytes
|
||||
tips:
|
||||
Scratch Allocation:
|
||||
avg: AVG(scr)
|
||||
min: MIN(scr)
|
||||
max: MAX(scr)
|
||||
avg: AVG(Scratch_Per_Workitem)
|
||||
min: MIN(Scratch_Per_Workitem)
|
||||
max: MAX(Scratch_Per_Workitem)
|
||||
unit: Bytes
|
||||
tips:
|
||||
|
||||
@@ -92,9 +92,9 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
|
||||
+6
-6
@@ -27,32 +27,32 @@ Panel Config:
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_bf16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 512) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f32_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f64_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_i8_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@ Panel Config:
|
||||
tips:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
|
||||
+1
-1
@@ -24,7 +24,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache Hit:
|
||||
|
||||
+1
-1
@@ -24,7 +24,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache Hit:
|
||||
|
||||
+4
-4
@@ -33,7 +33,7 @@ Panel Config:
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
Cache BW:
|
||||
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
@@ -141,9 +141,9 @@ Panel Config:
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
|
||||
+2
-2
@@ -30,12 +30,12 @@ Panel Config:
|
||||
tips:
|
||||
L2-EA Rd BW:
|
||||
value: AVG((((TCC_EA_RDREQ_32B_sum * 32) + ((TCC_EA_RDREQ_sum - TCC_EA_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value: AVG((((TCC_EA_WRREQ_64B_sum * 64) + ((TCC_EA_WRREQ_sum - TCC_EA_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
|
||||
+29
-29
@@ -25,56 +25,56 @@ Panel Config:
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
|
||||
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
VALU IOPs:
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
|
||||
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
|
||||
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F64):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
@@ -123,11 +123,11 @@ Panel Config:
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
@@ -144,10 +144,10 @@ Panel Config:
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
@@ -159,10 +159,10 @@ Panel Config:
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
@@ -178,10 +178,10 @@ Panel Config:
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
@@ -194,19 +194,19 @@ Panel Config:
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
@@ -231,10 +231,10 @@ Panel Config:
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
|
||||
+3
-3
@@ -92,9 +92,9 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
|
||||
+6
-6
@@ -22,32 +22,32 @@ Panel Config:
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_bf16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 512) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f32_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f64_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_i8_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
|
||||
+1
-1
@@ -25,7 +25,7 @@ Panel Config:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value:
|
||||
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
value:
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
|
||||
+4
-4
@@ -28,7 +28,7 @@ Panel Config:
|
||||
tips:
|
||||
Cache BW:
|
||||
value:
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
Cache Hit:
|
||||
@@ -118,9 +118,9 @@ Panel Config:
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
|
||||
+2
-2
@@ -30,13 +30,13 @@ Panel Config:
|
||||
L2-EA Rd BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
|
||||
+29
-29
@@ -25,56 +25,56 @@ Panel Config:
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
|
||||
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
VALU IOPs:
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
|
||||
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
|
||||
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F64):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
@@ -123,11 +123,11 @@ Panel Config:
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
@@ -144,10 +144,10 @@ Panel Config:
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
@@ -159,10 +159,10 @@ Panel Config:
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
@@ -178,10 +178,10 @@ Panel Config:
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
@@ -194,19 +194,19 @@ Panel Config:
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
@@ -231,10 +231,10 @@ Panel Config:
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
|
||||
+3
-3
@@ -92,9 +92,9 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
|
||||
+6
-6
@@ -22,32 +22,32 @@ Panel Config:
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_bf16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 512) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f32_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f64_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_i8_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
|
||||
+1
-1
@@ -25,7 +25,7 @@ Panel Config:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value:
|
||||
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
value:
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
|
||||
+4
-4
@@ -28,7 +28,7 @@ Panel Config:
|
||||
tips:
|
||||
Cache BW:
|
||||
value:
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
Cache Hit:
|
||||
@@ -118,9 +118,9 @@ Panel Config:
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
|
||||
+2
-2
@@ -30,13 +30,13 @@ Panel Config:
|
||||
L2-EA Rd BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
|
||||
+29
-29
@@ -25,56 +25,56 @@ Panel Config:
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32 + SQ_INSTS_VALU_MUL_F32)
|
||||
+ SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32)))) + (64 * (((SQ_INSTS_VALU_ADD_F64
|
||||
+ SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64) + (2 * SQ_INSTS_VALU_FMA_F64))))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
VALU IOPs:
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs - BeginNs)))
|
||||
value: AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: (((($sclk * $numCU) * 64) * 2) / 1000)
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (EndNs
|
||||
- BeginNs)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
pop: ((100 * AVG(((64 * (SQ_INSTS_VALU_INT32 + SQ_INSTS_VALU_INT64)) / (End_Timestamp
|
||||
- Start_Timestamp)))) / (((($sclk * $numCU) * 64) * 2) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F64):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($sclk * $numCU) * 256) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
tips:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs)))
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($sclk * $numCU) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
@@ -123,11 +123,11 @@ Panel Config:
|
||||
tips:
|
||||
LDS BW:
|
||||
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)))
|
||||
/ (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/sec
|
||||
peak: (($sclk * $numCU) * 0.128)
|
||||
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
LDS Bank Conflict:
|
||||
value: AVG(((SQ_LDS_BANK_CONFLICT / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
|
||||
@@ -144,10 +144,10 @@ Panel Config:
|
||||
pop: AVG(((100 * SQC_ICACHE_HITS) / (SQC_ICACHE_HITS + SQC_ICACHE_MISSES)))
|
||||
tips:
|
||||
Instr Cache BW:
|
||||
value: AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_ICACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Scalar L1D Cache Hit Rate:
|
||||
@@ -159,10 +159,10 @@ Panel Config:
|
||||
if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES) != 0) else None))
|
||||
tips:
|
||||
Scalar L1D Cache BW:
|
||||
value: AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))
|
||||
value: AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numSQC)
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (EndNs - BeginNs)) * 64))) / ((($sclk
|
||||
pop: ((100 * AVG(((SQC_DCACHE_REQ / (End_Timestamp - Start_Timestamp)) * 64))) / ((($sclk
|
||||
/ 1000) * 64) * $numSQC))
|
||||
tips:
|
||||
Vector L1D Cache Hit Rate:
|
||||
@@ -178,10 +178,10 @@ Panel Config:
|
||||
None))
|
||||
tips:
|
||||
Vector L1D Cache BW:
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 64) * $numCU)
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
L2 Cache Hit Rate:
|
||||
@@ -194,19 +194,19 @@ Panel Config:
|
||||
tips:
|
||||
L2-Fabric Read BW:
|
||||
value: AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Write BW:
|
||||
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
peak: $hbmBW
|
||||
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))) / $hbmBW)
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))) / $hbmBW)
|
||||
tips:
|
||||
L2-Fabric Read Latency:
|
||||
value: AVG(((TCC_EA0_RDREQ_LEVEL_sum / TCC_EA0_RDREQ_sum) if (TCC_EA0_RDREQ_sum
|
||||
@@ -231,10 +231,10 @@ Panel Config:
|
||||
coll_level: SQ_LEVEL_WAVES
|
||||
tips:
|
||||
Instr Fetch BW:
|
||||
value: AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))
|
||||
value: AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))
|
||||
unit: GB/s
|
||||
peak: ((($sclk / 1000) * 32) * $numSQC)
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (EndNs - BeginNs)) * 32))) / ($numSQC
|
||||
pop: ((100 * AVG(((SQ_IFETCH / (End_Timestamp - Start_Timestamp)) * 32))) / ($numSQC
|
||||
* (($sclk / 1000) * 32)))
|
||||
coll_level: SQ_IFETCH_LEVEL
|
||||
tips:
|
||||
|
||||
+3
-3
@@ -92,9 +92,9 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Kernel Time (Nanosec):
|
||||
avg: AVG((EndNs - BeginNs))
|
||||
min: MIN((EndNs - BeginNs))
|
||||
max: MAX((EndNs - BeginNs))
|
||||
avg: AVG((End_Timestamp - Start_Timestamp))
|
||||
min: MIN((End_Timestamp - Start_Timestamp))
|
||||
max: MAX((End_Timestamp - Start_Timestamp))
|
||||
unit: ns
|
||||
tips:
|
||||
Kernel Time (Cycles):
|
||||
|
||||
+6
-6
@@ -22,32 +22,32 @@ Panel Config:
|
||||
+ SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
|
||||
+ SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
|
||||
+ (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
|
||||
+ (2 * SQ_INSTS_VALU_FMA_F64)))) / (End_Timestamp - Start_Timestamp)))) / (((($sclk
|
||||
* $numCU) * 64) * 2) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_bf16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 512) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f16_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f32_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_f64_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 256) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
mfma_flops_i8_pop:
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
|
||||
value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk * $numCU) * 1024) / 1000))
|
||||
unit: Pct of Peak
|
||||
tips:
|
||||
|
||||
+1
-1
@@ -25,7 +25,7 @@ Panel Config:
|
||||
Bandwidth (Pct-of-Peak):
|
||||
value:
|
||||
AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
|
||||
/ (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
|
||||
/ (End_Timestamp - Start_Timestamp)) / (($sclk * $numCU) * 0.00128)))
|
||||
tips:
|
||||
Bank Conflict Rate:
|
||||
value:
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ Panel Config:
|
||||
metric:
|
||||
Bandwidth:
|
||||
value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
|
||||
* (EndNs - BeginNs))))
|
||||
* (End_Timestamp - Start_Timestamp))))
|
||||
tips:
|
||||
Cache Hit:
|
||||
value:
|
||||
|
||||
+4
-4
@@ -28,7 +28,7 @@ Panel Config:
|
||||
tips:
|
||||
Cache BW:
|
||||
value:
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
|
||||
((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($sclk / 1000) * 64) * $numCU))
|
||||
tips:
|
||||
Cache Hit:
|
||||
@@ -118,9 +118,9 @@ Panel Config:
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Cache BW:
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs)))
|
||||
avg: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
min: MIN(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
max: MAX(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
Cache Accesses:
|
||||
|
||||
+2
-2
@@ -30,13 +30,13 @@ Panel Config:
|
||||
L2-EA Rd BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_RDREQ_32B_sum * 32) + ((TCC_EA0_RDREQ_sum - TCC_EA0_RDREQ_32B_sum)
|
||||
* 64)) / (EndNs - BeginNs)))
|
||||
* 64)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
L2-EA Wr BW:
|
||||
value:
|
||||
AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
|
||||
* 32)) / (EndNs - BeginNs)))
|
||||
* 32)) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GB/s
|
||||
tips:
|
||||
|
||||
|
||||
مرجع در شماره جدید
Block a user