Fix peak flops of F8 I8 F16 and BF16 on MI300
[ROCm/rocprofiler-compute commit: ab6665d317]
This commit is contained in:
@@ -67,6 +67,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* Fixed option specs-correction
|
||||
* Fixed kernel name and kernel dispatch filtering when using rocprof v3
|
||||
* Fixed not collecting TCC channel counters in rocprof v3
|
||||
* Fixed peak FLOPS of F8 I8 F16 and BF16 on MI300
|
||||
|
||||
### Known issues
|
||||
|
||||
|
||||
+7
-7
@@ -47,21 +47,21 @@ Panel Config:
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
@@ -87,9 +87,9 @@ Panel Config:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
value: $numActiveCUs
|
||||
|
||||
+8
-8
@@ -44,23 +44,23 @@ Panel Config:
|
||||
MFMA FLOPs (F8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
@@ -86,9 +86,9 @@ Panel Config:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
|
||||
+7
-7
@@ -47,21 +47,21 @@ Panel Config:
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
@@ -87,9 +87,9 @@ Panel Config:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
value: $numActiveCUs
|
||||
|
||||
+8
-8
@@ -44,23 +44,23 @@ Panel Config:
|
||||
MFMA FLOPs (F8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
@@ -86,9 +86,9 @@ Panel Config:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
|
||||
- metric_table:
|
||||
|
||||
+7
-7
@@ -47,21 +47,21 @@ Panel Config:
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
@@ -87,9 +87,9 @@ Panel Config:
|
||||
MFMA IOPs (Int8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
Active CUs:
|
||||
value: $numActiveCUs
|
||||
|
||||
+9
-9
@@ -44,23 +44,23 @@ Panel Config:
|
||||
MFMA FLOPs (F8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (BF16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F16):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GFLOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 2048) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 2048) / 1000))
|
||||
tips:
|
||||
MFMA FLOPs (F32):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
@@ -86,10 +86,10 @@ Panel Config:
|
||||
MFMA IOPs (INT8):
|
||||
value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp)))
|
||||
unit: GIOP
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 8192) / 1000)
|
||||
peak: ((($max_sclk * $cu_per_gpu) * 4096) / 1000)
|
||||
pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (End_Timestamp - Start_Timestamp))))
|
||||
/ ((($max_sclk * $cu_per_gpu) * 8192) / 1000))
|
||||
tips:
|
||||
/ ((($max_sclk * $cu_per_gpu) * 4096) / 1000))
|
||||
tips: All Peak FLOPS/clock/CU come from https://github.com/ROCm/amd_matrix_instruction_calculator/
|
||||
|
||||
- metric_table:
|
||||
id: 1102
|
||||
|
||||
Reference in New Issue
Block a user