fixed long kernel names cut off in --kernel-names option (#728)
* reformatted kernel roofline PDF to use table * restored kernel symbol icons * enhance code readability * restored cell text wrap --------- Signed-off-by: jamessiddeley-amd <James.Siddeley@amd.com>
Tento commit je obsažen v:
+159
-45
@@ -23,6 +23,7 @@
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import textwrap
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import OrderedDict
|
||||
@@ -52,6 +53,14 @@ from utils.utils import mibench
|
||||
|
||||
SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20]
|
||||
|
||||
def wrap_text(text, width=92):
|
||||
"""
|
||||
Wraps text using textwrap and joins lines with <br> for Plotly.
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
wrapped_lines = textwrap.wrap(text, width=width, break_long_words=True, replace_whitespace=False)
|
||||
return "<br>".join(wrapped_lines)
|
||||
|
||||
class Roofline:
|
||||
def __init__(self, args, mspec, run_parameters=None):
|
||||
@@ -67,6 +76,7 @@ class Roofline:
|
||||
"mem_level": "ALL",
|
||||
"include_kernel_names": False,
|
||||
"is_standalone": False,
|
||||
"roofline_data_type": ["FP32"] # default to FP32
|
||||
}
|
||||
)
|
||||
self.__ai_data = None
|
||||
@@ -75,9 +85,9 @@ class Roofline:
|
||||
# Set roofline run parameters from args
|
||||
if hasattr(self.__args, "path") and not run_parameters:
|
||||
self.__run_parameters["workload_dir"] = self.__args.path
|
||||
if hasattr(self.__args, "roof_only") and self.__args.roof_only == True:
|
||||
if hasattr(self.__args, "roof_only") and self.__args.roof_only:
|
||||
self.__run_parameters["is_standalone"] = True
|
||||
if hasattr(self.__args, "kernel_names") and self.__args.kernel_names == True:
|
||||
if hasattr(self.__args, "kernel_names") and self.__args.kernel_names:
|
||||
self.__run_parameters["include_kernel_names"] = True
|
||||
if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL":
|
||||
self.__run_parameters["mem_level"] = self.__args.mem_level
|
||||
@@ -93,19 +103,23 @@ class Roofline:
|
||||
console_error("--roof-only is required for --kernel-names")
|
||||
|
||||
def roof_setup(self):
|
||||
# set default workload path if not specified
|
||||
if self.__run_parameters["workload_dir"] == str(
|
||||
Path(os.getcwd()).joinpath("workloads")
|
||||
):
|
||||
# Setup the workload directory for roofline profiling.
|
||||
workload_dir_val = self.__run_parameters.get("workload_dir")
|
||||
if workload_dir_val and Path(workload_dir_val).name == "workloads" and Path(workload_dir_val).parent == Path(os.getcwd()):
|
||||
app_name = getattr(self.__args, "name", "default_app_name")
|
||||
gpu_model_name = getattr(self.__mspec, "gpu_model", "default_gpu_model")
|
||||
self.__run_parameters["workload_dir"] = str(
|
||||
Path(self.__run_parameters["workload_dir"]).joinpath(
|
||||
self.__args.name,
|
||||
self.__mspec.gpu_model,
|
||||
Path(workload_dir_val).joinpath(
|
||||
app_name,
|
||||
gpu_model_name,
|
||||
)
|
||||
)
|
||||
# create new directory for roofline if it doesn't exist
|
||||
if not Path(self.__run_parameters["workload_dir"]).is_dir():
|
||||
os.makedirs(self.__run_parameters["workload_dir"])
|
||||
|
||||
current_workload_dir = self.__run_parameters.get("workload_dir")
|
||||
if current_workload_dir:
|
||||
Path(current_workload_dir).mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
console_error("Workload directory is not set. Cannot perform setup.", exit=False)
|
||||
|
||||
@demarcate
|
||||
def empirical_roofline(
|
||||
@@ -119,24 +133,23 @@ class Roofline:
|
||||
):
|
||||
self.roof_setup()
|
||||
|
||||
# Create arithmetic intensity data that will populate the roofline model
|
||||
console_debug("roofline", "Path: %s" % self.__run_parameters["workload_dir"])
|
||||
self.__ai_data = calc_ai(self.__mspec, self.__run_parameters["sort_type"], ret_df)
|
||||
console_debug("roofline", "Path: %s" % self.__run_parameters.get("workload_dir"))
|
||||
self.__ai_data = calc_ai(self.__mspec, self.__run_parameters.get("sort_type"), ret_df)
|
||||
|
||||
msg = "AI at each mem level:"
|
||||
for i in self.__ai_data:
|
||||
msg += "\n\t%s -> %s" % (i, self.__ai_data[i])
|
||||
console_debug(msg)
|
||||
|
||||
# Generate a roofline figure for the datatypes
|
||||
ops_figure = flops_figure = None
|
||||
ops_dt_list = flops_dt_list = ""
|
||||
for dt in self.__run_parameters["roofline_data_type"]:
|
||||
# Do not generate a roofline figure if the datatype is not supported on this gpu_arch
|
||||
if not str(dt) in SUPPORTED_DATATYPES[self.__mspec.gpu_arch]:
|
||||
|
||||
for dt in self.__run_parameters.get("roofline_data_type", []):
|
||||
gpu_arch = getattr(self.__mspec, "gpu_arch", "unknown_arch")
|
||||
if 'SUPPORTED_DATATYPES' not in globals() or gpu_arch not in SUPPORTED_DATATYPES or str(dt) not in SUPPORTED_DATATYPES[gpu_arch]:
|
||||
console_error(
|
||||
"{} is not a supported datatype for roofline profiling on {}".format(
|
||||
str(dt), self.__mspec.gpu_model
|
||||
"{} is not a supported datatype for roofline profiling on {} (arch: {})".format(
|
||||
str(dt), getattr(self.__mspec, "gpu_model", "N/A"), gpu_arch
|
||||
),
|
||||
exit=False,
|
||||
)
|
||||
@@ -165,26 +178,116 @@ class Roofline:
|
||||
flops_figure = self.generate_plot(dtype=str(dt))
|
||||
flops_dt_list += "_" + str(dt)
|
||||
|
||||
# Create a legend and distinct kernel markers. This can be saved, optionally
|
||||
self.__figure = go.Figure(
|
||||
go.Scatter(
|
||||
mode="markers",
|
||||
x=[0] * 10,
|
||||
y=self.__ai_data["kernelNames"],
|
||||
marker_symbol=SYMBOLS,
|
||||
marker_size=15,
|
||||
)
|
||||
)
|
||||
self.__figure.update_layout(
|
||||
title="Kernel Names and Markers",
|
||||
margin=dict(b=0, r=0),
|
||||
xaxis_range=[-1, 1],
|
||||
xaxis_side="top",
|
||||
yaxis_side="right",
|
||||
height=400,
|
||||
width=1000,
|
||||
)
|
||||
self.__figure.update_xaxes(dtick=1)
|
||||
if self.__run_parameters.get("include_kernel_names", False):
|
||||
if self.__ai_data is None:
|
||||
console_error("Roofline Error: self.__ai_data is not populated. Cannot generate kernel names info.", exit=False)
|
||||
original_kernel_names = []
|
||||
else:
|
||||
original_kernel_names = self.__ai_data.get("kernelNames", [])
|
||||
|
||||
num_kernels = len(original_kernel_names)
|
||||
|
||||
self.__figure.data = []
|
||||
self.__figure.layout = {}
|
||||
|
||||
if num_kernels == 0:
|
||||
console_log("roofline", "No kernel names found to generate 'Kernel Names and Markers' info.")
|
||||
self.__figure.add_annotation(text="No kernel names to display.",
|
||||
showarrow=False, xref="paper", yref="paper", x=0.5, y=0.5)
|
||||
self.__figure.update_layout(
|
||||
title_text="Kernel Names and Markers", title_x=0.5,
|
||||
xaxis=dict(visible=False), yaxis=dict(visible=False),
|
||||
plot_bgcolor='white', paper_bgcolor='white',
|
||||
height=200, width=400
|
||||
)
|
||||
else:
|
||||
symbols_list = []
|
||||
kernel_names_list = []
|
||||
|
||||
for i in range(num_kernels):
|
||||
symbols_list.append(SYMBOLS[i % len(SYMBOLS)])
|
||||
kernel_names_list.append(original_kernel_names[i])
|
||||
|
||||
self.__figure = go.Figure()
|
||||
|
||||
self.__figure.add_trace(go.Scatter(
|
||||
x=[0.1] * num_kernels,
|
||||
y=list(range(num_kernels, 0, -1)),
|
||||
mode='markers',
|
||||
marker=dict(
|
||||
symbol=symbols_list,
|
||||
size=15,
|
||||
color='blue',
|
||||
line=dict(width=1, color='black')
|
||||
),
|
||||
showlegend=False,
|
||||
hoverinfo='skip'
|
||||
))
|
||||
|
||||
for i, kernel_name in enumerate(kernel_names_list):
|
||||
self.__figure.add_annotation(
|
||||
x=0.25,
|
||||
y=num_kernels - i,
|
||||
text=wrap_text(kernel_name),
|
||||
showarrow=False,
|
||||
xanchor='left',
|
||||
yanchor='middle',
|
||||
align='left',
|
||||
font=dict(size=11, color='black')
|
||||
)
|
||||
|
||||
self.__figure.add_annotation(
|
||||
x=0.1, y=num_kernels + 1,
|
||||
text="<b>Symbol</b>",
|
||||
showarrow=False,
|
||||
xanchor='center',
|
||||
yanchor='middle',
|
||||
font=dict(size=12, color='black')
|
||||
)
|
||||
self.__figure.add_annotation(
|
||||
x=0.25, y=num_kernels + 1,
|
||||
text="<b>Kernel Name</b>",
|
||||
showarrow=False,
|
||||
xanchor='left',
|
||||
yanchor='middle',
|
||||
font=dict(size=12, color='black')
|
||||
)
|
||||
|
||||
for i in range(num_kernels + 1):
|
||||
self.__figure.add_shape(
|
||||
type="line",
|
||||
x0=0, x1=1,
|
||||
y0=i + 0.5, y1=i + 0.5,
|
||||
line=dict(color="lightgray", width=1)
|
||||
)
|
||||
|
||||
self.__figure.add_shape(
|
||||
type="line",
|
||||
x0=0.2, x1=0.2,
|
||||
y0=0.5, y1=num_kernels + 1.5,
|
||||
line=dict(color="lightgray", width=1)
|
||||
)
|
||||
|
||||
self.__figure.update_layout(
|
||||
title="Kernel Names and Corresponding Markers",
|
||||
title_x=0.5,
|
||||
xaxis=dict(
|
||||
visible=False,
|
||||
range=[0, 1]
|
||||
),
|
||||
yaxis=dict(
|
||||
visible=False,
|
||||
range=[0, num_kernels + 2],
|
||||
autorange=False
|
||||
),
|
||||
height=max(400, num_kernels * 40 + 150),
|
||||
width=1000,
|
||||
margin=dict(l=50, r=50, t=70, b=30),
|
||||
plot_bgcolor='white',
|
||||
paper_bgcolor='white'
|
||||
)
|
||||
|
||||
|
||||
# Output will be different depending on interaction type:
|
||||
# Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output
|
||||
if self.__run_parameters["is_standalone"]:
|
||||
@@ -246,7 +349,10 @@ class Roofline:
|
||||
|
||||
@demarcate
|
||||
def generate_plot(self, dtype, fig=None) -> go.Figure():
|
||||
"""Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data."""
|
||||
"""
|
||||
Create graph object from ai_data (coordinate points) and ceiling_data
|
||||
(peak FLOP and BW) data.
|
||||
"""
|
||||
if fig is None:
|
||||
fig = go.Figure()
|
||||
skipAI = False
|
||||
@@ -330,13 +436,21 @@ class Roofline:
|
||||
#######################
|
||||
# Plot ceilings
|
||||
#######################
|
||||
if self.__run_parameters["mem_level"] == "ALL":
|
||||
mem_level_config = self.__run_parameters.get("mem_level", "ALL")
|
||||
if mem_level_config == "ALL":
|
||||
cache_hierarchy = ["HBM", "L2", "L1", "LDS"]
|
||||
else:
|
||||
cache_hierarchy = self.__run_parameters["mem_level"]
|
||||
cache_hierarchy = mem_level_config if isinstance(mem_level_config, list) else [mem_level_config]
|
||||
|
||||
# Plot peak BW ceiling(s)
|
||||
for cache_level in cache_hierarchy:
|
||||
|
||||
if (not self.__ceiling_data or cache_level.lower() not in self.__ceiling_data or
|
||||
not isinstance(self.__ceiling_data[cache_level.lower()], (list, tuple)) or
|
||||
len(self.__ceiling_data[cache_level.lower()]) < 3):
|
||||
console_error(f"Ceiling data for {cache_level} is missing or malformed for dtype {dtype}.", exit=False)
|
||||
continue
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=self.__ceiling_data[cache_level.lower()][0],
|
||||
@@ -350,7 +464,7 @@ class Roofline:
|
||||
),
|
||||
(
|
||||
None
|
||||
if self.__run_parameters["is_standalone"]
|
||||
if self.__run_parameters.get("is_standalone")
|
||||
else "{} GB/s".format(
|
||||
to_int(self.__ceiling_data[cache_level.lower()][2])
|
||||
)
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele