Add datatypes for roofline profiling (#642)

Rebuild of rocm-amdgpu-bench roofline binaries for MI200/MI300 systems with rocm6.
Added datatype options to roofline feature.

---------

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
This commit is contained in:
cfallows-amd
2025-03-26 21:07:48 -04:00
committed by GitHub
parent 04f92b72a9
commit 6cb5bcdbe9
10 changed files with 31 additions and 25 deletions
+3
View File
@@ -17,6 +17,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* --roofline-data-type / -R option added to specify which datatypes the user wants to capture in the roofline PDF plot outputs
* Default is FP32, but user can specify as many types as desired to overlay on the same plot output
* Additional datatypes for roofline profiling
* Now supports FP8, FP16, BF16, FP32, FP64, I8, I32, I64 (dependent on gpu architecture)
### Changed
* Change normal_unit default to per_kernel
+1 -1
View File
@@ -372,7 +372,7 @@ Examples:
"-R",
"--roofline-data-type",
required=False,
choices=["FP8", "FP16", "BF16", "FP32", "FP64", "I8"],
choices=["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"],
metavar="",
nargs="+",
type=str,
+21 -18
View File
@@ -319,26 +319,29 @@ class Roofline:
)
# Plot peak MFMA ceiling
fig.add_trace(
go.Scatter(
x=self.__ceiling_data["mfma"][0],
y=self.__ceiling_data["mfma"][1],
name="Peak MFMA-{}".format(dtype),
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
(
None
if self.__run_parameters["is_standalone"]
else "{} G{}/s".format(
if dtype in MFMA_DATATYPES:
fig.add_trace(
go.Scatter(
x=self.__ceiling_data["mfma"][0],
y=self.__ceiling_data["mfma"][1],
name="Peak MFMA-{}".format(dtype),
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
(
None
if self.__run_parameters["is_standalone"]
else "{} G{}/s".format(
to_int(self.__ceiling_data["mfma"][2]), ops_flops
)
),
"{} G{}/s".format(
to_int(self.__ceiling_data["mfma"][2]), ops_flops
)
),
"{} G{}/s".format(to_int(self.__ceiling_data["mfma"][2]), ops_flops),
],
textposition="top left",
),
],
textposition="top left",
)
)
)
#######################
# Plot Application AI
#######################
+6 -6
View File
@@ -44,13 +44,13 @@ FONT_WEIGHT = "bold"
# SUPPORTED_DATATYPES table is based on datatype support in rocm-amdgpu-bench repository
# Indicates which datatypes per gpu arch can be generated by the roofline binary
SUPPORTED_DATATYPES = {
"gfx90a": ["FP16", "BF16", "FP32", "FP64", "I8"], # Unsupported: F8
"gfx940": ["FP8", "FP16", "FP32", "FP64"], # Unsupported: BF16, I8
"gfx941": ["FP8", "FP16", "FP32", "FP64"], # Unsupported: BF16, I8
"gfx942": ["FP8", "FP16", "FP32", "FP64"], # Unsupported: BF16, I8
"gfx90a": ["FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported: F8
"gfx940": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx941": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx942": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
}
PEAK_OPS_DATATYPES = ["FP8", "FP32", "FP64"]
PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"]
MFMA_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8"]
TOP_N = 10
@@ -177,7 +177,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
graphPoints["valu"].append(peakOps)
# Plot MFMA roof
if x1_mfma != -1 or (dtype in MFMA_DATATYPES): # assert that mfma has been assigned
if x1_mfma != -1 and (dtype in MFMA_DATATYPES): # assert that mfma has been assigned
x0_mfma = XMAX
if x2_mfma < x0_mfma:
x0_mfma = x2_mfma
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.