Add datatypes for roofline profiling (#642)
Rebuild of rocm-amdgpu-bench roofline binaries for MI200/MI300 systems with rocm6. Added datatype options to roofline feature. --------- Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
This commit is contained in:
@@ -17,6 +17,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* --roofline-data-type / -R option added to specify which datatypes the user wants to capture in the roofline PDF plot outputs
|
||||
* Default is FP32, but user can specify as many types as desired to overlay on the same plot output
|
||||
|
||||
* Additional datatypes for roofline profiling
|
||||
* Now supports FP8, FP16, BF16, FP32, FP64, I8, I32, I64 (dependent on gpu architecture)
|
||||
|
||||
### Changed
|
||||
|
||||
* Change normal_unit default to per_kernel
|
||||
|
||||
+1
-1
@@ -372,7 +372,7 @@ Examples:
|
||||
"-R",
|
||||
"--roofline-data-type",
|
||||
required=False,
|
||||
choices=["FP8", "FP16", "BF16", "FP32", "FP64", "I8"],
|
||||
choices=["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"],
|
||||
metavar="",
|
||||
nargs="+",
|
||||
type=str,
|
||||
|
||||
+21
-18
@@ -319,26 +319,29 @@ class Roofline:
|
||||
)
|
||||
|
||||
# Plot peak MFMA ceiling
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=self.__ceiling_data["mfma"][0],
|
||||
y=self.__ceiling_data["mfma"][1],
|
||||
name="Peak MFMA-{}".format(dtype),
|
||||
mode=plot_mode,
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
(
|
||||
None
|
||||
if self.__run_parameters["is_standalone"]
|
||||
else "{} G{}/s".format(
|
||||
if dtype in MFMA_DATATYPES:
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=self.__ceiling_data["mfma"][0],
|
||||
y=self.__ceiling_data["mfma"][1],
|
||||
name="Peak MFMA-{}".format(dtype),
|
||||
mode=plot_mode,
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
(
|
||||
None
|
||||
if self.__run_parameters["is_standalone"]
|
||||
else "{} G{}/s".format(
|
||||
to_int(self.__ceiling_data["mfma"][2]), ops_flops
|
||||
)
|
||||
),
|
||||
"{} G{}/s".format(
|
||||
to_int(self.__ceiling_data["mfma"][2]), ops_flops
|
||||
)
|
||||
),
|
||||
"{} G{}/s".format(to_int(self.__ceiling_data["mfma"][2]), ops_flops),
|
||||
],
|
||||
textposition="top left",
|
||||
),
|
||||
],
|
||||
textposition="top left",
|
||||
)
|
||||
)
|
||||
)
|
||||
#######################
|
||||
# Plot Application AI
|
||||
#######################
|
||||
|
||||
@@ -44,13 +44,13 @@ FONT_WEIGHT = "bold"
|
||||
# SUPPORTED_DATATYPES table is based on datatype support in rocm-amdgpu-bench repository
|
||||
# Indicates which datatypes per gpu arch can be generated by the roofline binary
|
||||
SUPPORTED_DATATYPES = {
|
||||
"gfx90a": ["FP16", "BF16", "FP32", "FP64", "I8"], # Unsupported: F8
|
||||
"gfx940": ["FP8", "FP16", "FP32", "FP64"], # Unsupported: BF16, I8
|
||||
"gfx941": ["FP8", "FP16", "FP32", "FP64"], # Unsupported: BF16, I8
|
||||
"gfx942": ["FP8", "FP16", "FP32", "FP64"], # Unsupported: BF16, I8
|
||||
"gfx90a": ["FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported: F8
|
||||
"gfx940": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
"gfx941": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
"gfx942": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
}
|
||||
|
||||
PEAK_OPS_DATATYPES = ["FP8", "FP32", "FP64"]
|
||||
PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"]
|
||||
MFMA_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8"]
|
||||
|
||||
TOP_N = 10
|
||||
@@ -177,7 +177,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
|
||||
graphPoints["valu"].append(peakOps)
|
||||
|
||||
# Plot MFMA roof
|
||||
if x1_mfma != -1 or (dtype in MFMA_DATATYPES): # assert that mfma has been assigned
|
||||
if x1_mfma != -1 and (dtype in MFMA_DATATYPES): # assert that mfma has been assigned
|
||||
x0_mfma = XMAX
|
||||
if x2_mfma < x0_mfma:
|
||||
x0_mfma = x2_mfma
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user