diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py
index 682a0c3181..52fd62b8f3 100644
--- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py
+++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py
@@ -220,7 +220,7 @@ class webui_analysis(OmniAnalyze_Base):
"device_id": 0,
"sort_type": "kernels",
"mem_level": "ALL",
- "include_kernel_names": False,
+ "include_kernel_names": True,
"is_standalone": False,
"roofline_data_type": self.__roofline_data_type,
"kernel_filter": False,
diff --git a/projects/rocprofiler-compute/src/roofline.py b/projects/rocprofiler-compute/src/roofline.py
index 5c39ade822..57f47bc67d 100644
--- a/projects/rocprofiler-compute/src/roofline.py
+++ b/projects/rocprofiler-compute/src/roofline.py
@@ -22,10 +22,8 @@
# THE SOFTWARE.
##############################################################################
-
import argparse
import textwrap
-import time
from abc import abstractmethod
from collections import OrderedDict
from pathlib import Path
@@ -36,6 +34,7 @@ import pandas as pd
import plotext as plt
import plotly.graph_objects as go
from dash import dcc, html
+from plotly.subplots import make_subplots
from utils import file_io, rocpd_data, schema
from utils.logger import (
@@ -58,7 +57,7 @@ from utils.specs import MachineSpecs
SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20]
-def wrap_text(text: str, width: int = 92) -> str:
+def wrap_text(text: str, width: int = 100) -> str:
"""
Wraps text using textwrap and joins lines with
for Plotly.
"""
@@ -94,7 +93,6 @@ class Roofline:
"device_id": 0,
"sort_type": "kernels",
"mem_level": "ALL",
- "include_kernel_names": False,
"is_standalone": False,
"roofline_data_type": ["FP32"], # default to FP32
"kernel_filter": False,
@@ -109,8 +107,6 @@ class Roofline:
self.__run_parameters["workload_dir"] = self.__args.path
if hasattr(self.__args, "no_roof") and not self.__args.no_roof:
self.__run_parameters["is_standalone"] = True
- if hasattr(self.__args, "kernel_names") and self.__args.kernel_names:
- self.__run_parameters["include_kernel_names"] = True
if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL":
self.__run_parameters["mem_level"] = self.__args.mem_level
if hasattr(self.__args, "sort") and self.__args.sort != "ALL":
@@ -120,19 +116,10 @@ class Roofline:
hasattr(self.__args, "gpu_kernel") and self.__args.gpu_kernel
):
self.__run_parameters["kernel_filter"] = True
- self.validate_parameters()
def get_args(self) -> argparse.Namespace:
return self.__args
- def validate_parameters(self) -> None:
- if self.__run_parameters["include_kernel_names"] and (
- not self.__run_parameters["is_standalone"]
- ):
- console_warning(
- "--kernel-names is nonactionable when used with --no-roof option"
- )
-
def roof_setup(self) -> None:
# Setup the workload directory for roofline profiling.
workload_dir_val = self.__run_parameters.get("workload_dir")
@@ -250,6 +237,46 @@ class Roofline:
return df
+ def _determine_kernel_bound_status(
+ self,
+ ai_value: float,
+ performance: float,
+ cache_level: str,
+ ceiling_data: dict[str, Any],
+ ) -> str:
+ """
+ Calculate if a kernel point is memory-bound or compute-bound
+ based on its own cache level's roofline
+ """
+ cache_key = cache_level.replace("ai_", "")
+
+ # Get bw for this cache level
+ if cache_key not in ceiling_data or not ceiling_data[cache_key]:
+ return "Unknown"
+
+ cache_data = ceiling_data[cache_key]
+ if not isinstance(cache_data, (list, tuple)) or len(cache_data) < 3:
+ return "Unknown"
+
+ bandwidth = cache_data[2]
+
+ # Get min peak performance
+ min_peak = float("inf")
+ if "valu" in ceiling_data and ceiling_data["valu"]:
+ min_peak = min(min_peak, ceiling_data["valu"][2])
+ if "mfma" in ceiling_data and ceiling_data["mfma"]:
+ min_peak = min(min_peak, ceiling_data["mfma"][2])
+
+ if min_peak == float("inf"):
+ return "Unknown"
+
+ x_intersect = min_peak / bandwidth
+
+ if ai_value < x_intersect:
+ return "Memory Bound"
+ else:
+ return "Compute Bound"
+
@demarcate
def empirical_roofline(
self, ret_df: dict[str, pd.DataFrame]
@@ -280,9 +307,27 @@ class Roofline:
msg += f"\n\t{key} -> {value}"
console_debug(msg)
+ kernel_names_data = None
+ if self.__ai_data and "kernelNames" in self.__ai_data:
+ original_kernel_names = self.__ai_data.get("kernelNames", [])
+ filtered_kernel_names = [
+ name
+ for name in original_kernel_names
+ if name != "nan" and isinstance(name, str)
+ ]
+ if len(filtered_kernel_names) > 0:
+ kernel_names_data = {
+ "kernel_names": filtered_kernel_names,
+ "num_kernels": len(filtered_kernel_names),
+ }
+
ops_figure = flops_figure = None
ops_dt_list = flops_dt_list = kernel_list = ""
+ # collect ceiling data for all datatypes to find global minimums
+ all_ops_ceiling_data = {}
+ all_flops_ceiling_data = {}
+
for dt in self.__run_parameters.get("roofline_data_type", []):
gpu_arch = getattr(self.__mspec, "gpu_arch", "unknown_arch")
if (
@@ -306,170 +351,60 @@ class Roofline:
fig=ops_figure,
)
else:
- ops_figure = self.generate_plot(dtype=str(dt))
+ ops_figure = self.generate_plot(
+ dtype=str(dt),
+ kernel_names_data=kernel_names_data,
+ )
ops_dt_list += "_" + str(dt)
+ # store ceiling data for this datatype
+ all_ops_ceiling_data[str(dt)] = self.__ceiling_data
if ops_flops == "Flops":
if flops_figure:
- flops_figure = self.generate_plot(dtype=str(dt), fig=flops_figure)
+ flops_figure = self.generate_plot(
+ dtype=str(dt),
+ fig=flops_figure,
+ )
else:
- flops_figure = self.generate_plot(dtype=str(dt))
+ flops_figure = self.generate_plot(
+ dtype=str(dt),
+ kernel_names_data=kernel_names_data,
+ )
flops_dt_list += "_" + str(dt)
-
- if self.__run_parameters.get("include_kernel_names", False):
- if self.__ai_data is None:
- console_error(
- "Roofline Error: self.__ai_data is not populated. "
- "Cannot generate kernel names info.",
- exit=False,
- )
- original_kernel_names = []
- else:
- original_kernel_names = self.__ai_data.get("kernelNames", [])
-
- num_kernels = len(original_kernel_names)
- self.__figure.data = []
- self.__figure.layout = {}
-
- if num_kernels == 0:
- # Create empty kernel names figure when no kernels are found
- console_log(
- "roofline",
- "No kernel names found to generate "
- "'Kernel Names and Markers' info.",
- )
- self.__figure.add_annotation(
- text="No kernel names to display.",
- showarrow=False,
- xref="paper",
- yref="paper",
- x=0.5,
- y=0.5,
- )
- self.__figure.update_layout(
- title_text="Kernel Names and Markers",
- title_x=0.5,
- xaxis=dict(visible=False),
- yaxis=dict(visible=False),
- plot_bgcolor="white",
- paper_bgcolor="white",
- height=200,
- width=400,
- )
- else:
- # Create populated kernel names figure with symbols and names.
- symbols_list = [SYMBOLS[i % len(SYMBOLS)] for i in range(num_kernels)]
-
- self.__figure = go.Figure()
- self.__figure.add_trace(
- go.Scatter(
- x=[0.1] * num_kernels,
- y=list(range(num_kernels, 0, -1)),
- mode="markers",
- marker=dict(
- symbol=symbols_list,
- size=15,
- color="blue",
- line=dict(width=1, color="black"),
- ),
- showlegend=False,
- hoverinfo="skip",
- )
- )
-
- # Add kernel name annotations
- for i, kernel_name in enumerate(original_kernel_names):
- self.__figure.add_annotation(
- x=0.25,
- y=num_kernels - i,
- text=wrap_text(kernel_name),
- showarrow=False,
- xanchor="left",
- yanchor="middle",
- align="left",
- font=dict(size=11, color="black"),
- )
-
- # Add formatting elements to kernel names figure.
- self.__figure.add_annotation(
- x=0.1,
- y=num_kernels + 1,
- text="Symbol",
- showarrow=False,
- xanchor="center",
- yanchor="middle",
- font=dict(size=12, color="black"),
- )
- self.__figure.add_annotation(
- x=0.25,
- y=num_kernels + 1,
- text="Kernel Name",
- showarrow=False,
- xanchor="left",
- yanchor="middle",
- font=dict(size=12, color="black"),
- )
-
- # Add grid lines
- for i in range(num_kernels + 1):
- self.__figure.add_shape(
- type="line",
- x0=0,
- x1=1,
- y0=i + 0.5,
- y1=i + 0.5,
- line=dict(color="lightgray", width=1),
- )
-
- self.__figure.add_shape(
- type="line",
- x0=0.2,
- x1=0.2,
- y0=0.5,
- y1=num_kernels + 1.5,
- line=dict(color="lightgray", width=1),
- )
-
- self.__figure.update_layout(
- title="Kernel Names and Corresponding Markers",
- title_x=0.5,
- xaxis=dict(visible=False, range=[0, 1]),
- yaxis=dict(
- visible=False, range=[0, num_kernels + 2], autorange=False
- ),
- height=max(400, num_kernels * 40 + 150),
- width=1000,
- margin=dict(l=50, r=50, t=70, b=30),
- plot_bgcolor="white",
- paper_bgcolor="white",
- )
+ # Store ceiling data for this datatype
+ all_flops_ceiling_data[str(dt)] = self.__ceiling_data
# Output will be different depending on interaction type:
# Save PDFs if we're in "standalone roofline" mode,
- # otherwise return HTML to be used in GUI output
+ # otherwise return HTML to be used in GUI outputif flops_figure:
+
if self.__run_parameters["is_standalone"]:
dev_id = str(self.__run_parameters["device_id"])
if self.__run_parameters.get("kernel_filter", False):
for name in sorted(self.__args.kernel):
kernel_list += "_" + name
- # Re-save to remove loading MathJax pop up
- for _ in range(2):
- if ops_figure:
- ops_figure.write_image(
- f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{ops_dt_list}{kernel_list}.pdf"
- )
- if flops_figure:
- flops_figure.write_image(
- f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{flops_dt_list}{kernel_list}.pdf"
- )
+ if ops_figure:
+ actual_height = int(ops_figure.layout.height)
+ # minimum height of 1000 to avoid cutting off content
+ pdf_height = max(actual_height, 1000)
- # only save a legend if kernel_names option is toggled
- if self.__run_parameters["include_kernel_names"]:
- self.__figure.write_image(
- f"{self.__run_parameters['workload_dir']}/kernelName_legend{kernel_list}.pdf"
- )
- time.sleep(1)
+ ops_figure.write_image(
+ f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{ops_dt_list}{kernel_list}.pdf",
+ width=1000,
+ height=pdf_height,
+ )
+
+ if flops_figure:
+ actual_height = int(flops_figure.layout.height)
+ # minimum height of 1000 to avoid cutting off content
+ pdf_height = max(actual_height, 1000)
+
+ flops_figure.write_image(
+ f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{flops_dt_list}{kernel_list}.pdf",
+ width=1000,
+ height=pdf_height,
+ )
console_log("roofline", "Empirical Roofline PDFs saved!")
else:
@@ -512,99 +447,202 @@ class Roofline:
)
@demarcate
- def generate_plot(self, dtype: str, fig: Optional[go.Figure] = None) -> go.Figure:
+ def generate_plot(
+ self,
+ dtype: str,
+ fig: Optional[go.Figure] = None,
+ kernel_names_data: Optional[dict] = None,
+ ) -> go.Figure:
"""
Create graph object from ai_data (coordinate points) and ceiling_data
(peak FLOP and BW) data.
"""
- if fig is None:
- fig = go.Figure()
- skipAI = False
- else:
- skipAI = True # Don't repeat AI plotting
+ is_new_figure = fig is None
+ has_kernel_names = kernel_names_data is not None and is_new_figure
+ skipAI = not is_new_figure
- plot_mode = "lines+text" if self.__run_parameters["is_standalone"] else "lines"
+ subplot_row = None
+ total_figure_height = 600 # default height
+
+ if is_new_figure:
+ if has_kernel_names:
+ raw_kernel_names = kernel_names_data.get("kernel_names", [])
+ num_kernels = len(raw_kernel_names)
+
+ wrapped_kernel_names = [wrap_text(name) for name in raw_kernel_names]
+ lines_per_kernel = [
+ text.count("
") + 1 for text in wrapped_kernel_names
+ ]
+ temp_ceiling_data = construct_roof(
+ roofline_parameters=self.__run_parameters,
+ dtype=dtype,
+ ai_data=self.__ai_data,
+ )
+
+ plot_points_data = []
+ cache_colors = {
+ "ai_l1": "blue",
+ "ai_l2": "green",
+ "ai_hbm": "red",
+ "ai_lds": "orange",
+ }
+
+ for cache_level in ["ai_l1", "ai_l2", "ai_hbm"]:
+ if cache_level in self.__ai_data:
+ x_vals = self.__ai_data[cache_level][0]
+ y_vals = self.__ai_data[cache_level][1]
+
+ for i in range(min(len(x_vals), num_kernels)):
+ if x_vals[i] > 0 and y_vals[i] > 0:
+ status = self._determine_kernel_bound_status(
+ ai_value=x_vals[i],
+ performance=y_vals[i],
+ cache_level=cache_level,
+ ceiling_data=temp_ceiling_data,
+ )
+
+ plot_points_data.append({
+ "symbol": None,
+ "color": cache_colors.get(cache_level, "gray"),
+ "cache_level": cache_level.replace(
+ "ai_", "", 1
+ ).upper(),
+ "ai": f"{x_vals[i]:.2f}",
+ "performance": f"{y_vals[i]:.2f}",
+ "status": status,
+ "kernel_idx": i,
+ })
+
+ ######################################
+ # Define Figure Measurement Constants
+ ######################################
+
+ ROOFLINE_PLOT_HEIGHT = 500 # Default height of plot itself
+
+ POINTS_ROW_HEIGHT = 25 # Pixel height of each plot point row
+ num_plot_points = len(plot_points_data) # Number of plot points
+ PLOT_POINTS_HEIGHT = (
+ num_plot_points + 2
+ ) * POINTS_ROW_HEIGHT # +2 for header and spacing
+
+ BASE_ROW_HEIGHT = 15 # Base pixel height of each kernel name row
+ KERNEL_PADDING = 8 # Padding in between each kernel name row
+ KERNEL_NAMES_HEIGHT = (
+ sum(lines_per_kernel) * BASE_ROW_HEIGHT
+ + (num_kernels - 1) * KERNEL_PADDING
+ + BASE_ROW_HEIGHT
+ )
+
+ total_figure_height = (
+ ROOFLINE_PLOT_HEIGHT + PLOT_POINTS_HEIGHT + KERNEL_NAMES_HEIGHT
+ )
+
+ total_content_height = (
+ ROOFLINE_PLOT_HEIGHT + PLOT_POINTS_HEIGHT + KERNEL_NAMES_HEIGHT
+ )
+ roofline_ratio = ROOFLINE_PLOT_HEIGHT / total_content_height
+ plot_points_ratio = PLOT_POINTS_HEIGHT / total_content_height
+ kernel_names_ratio = 1 - roofline_ratio - plot_points_ratio
+ SUBPLOT_SPACING_PX = 80 # Constant - num of pixels between each subplot
+ fig = make_subplots(
+ rows=3,
+ cols=1,
+ row_heights=[roofline_ratio, plot_points_ratio, kernel_names_ratio],
+ subplot_titles=[
+ f"Roofline Analysis ({dtype})",
+ "Plot Points & Values",
+ "Full Kernel Names",
+ ],
+ vertical_spacing=SUBPLOT_SPACING_PX / total_figure_height,
+ specs=[
+ [{"type": "scatter"}], # Roofline plot
+ [{"type": "scatter"}], # Plot points table
+ [{"type": "scatter"}], # Kernel names table
+ ],
+ )
+
+ subplot_row = 1
+ skipAI = False
+ else:
+ # Adding to existing figure
+ if hasattr(fig, "_grid_ref") and fig._grid_ref is not None:
+ subplot_row = 1
+ if hasattr(fig, "layout") and hasattr(fig.layout, "height"):
+ total_figure_height = fig.layout.height
+ skipAI = True
self.__ceiling_data = construct_roof(
roofline_parameters=self.__run_parameters,
dtype=dtype,
+ ai_data=self.__ai_data,
)
console_debug("roofline", f"Ceiling data:\n{self.__ceiling_data}")
- ops_flops = "OP" if dtype.startswith("I") else "FLOP" # For printing purposes
+ ops_flops = "OP" if dtype.startswith("I") else "FLOP"
+ subplot_kwargs = {"row": subplot_row, "col": 1} if subplot_row else {}
#######################
# Plot Application AI
#######################
- # Plot the arithmetic intensity points for each cache level
- if ops_flops == "FLOP":
- if not skipAI:
+ if ops_flops == "FLOP" and not skipAI:
+ kernel_names = self.__ai_data.get("kernelNames", [])
+ symbols_list = [SYMBOLS[i % len(SYMBOLS)] for i in range(len(kernel_names))]
+ show_in_legend = not self.__run_parameters["is_standalone"]
+ if self.__ai_data["ai_l1"][0]:
fig.add_trace(
go.Scatter(
x=self.__ai_data["ai_l1"][0],
y=self.__ai_data["ai_l1"][1],
- name="ai_l1",
+ name="L1",
mode="markers",
- marker_symbol=(
- SYMBOLS
- if self.__run_parameters["include_kernel_names"]
- else None
+ marker=dict(
+ color="blue",
+ size=10,
+ symbol=symbols_list[: len(self.__ai_data["ai_l1"][0])],
),
- )
+ showlegend=show_in_legend,
+ ),
+ **subplot_kwargs,
)
+
+ if self.__ai_data["ai_l2"][0]:
fig.add_trace(
go.Scatter(
x=self.__ai_data["ai_l2"][0],
y=self.__ai_data["ai_l2"][1],
- name="ai_l2",
+ name="L2",
mode="markers",
- marker_symbol=(
- SYMBOLS
- if self.__run_parameters["include_kernel_names"]
- else None
+ marker=dict(
+ color="green",
+ size=10,
+ symbol=symbols_list[: len(self.__ai_data["ai_l2"][0])],
),
- )
+ showlegend=show_in_legend,
+ ),
+ **subplot_kwargs,
)
+
+ if self.__ai_data["ai_hbm"][0]:
fig.add_trace(
go.Scatter(
x=self.__ai_data["ai_hbm"][0],
y=self.__ai_data["ai_hbm"][1],
- name="ai_hbm",
+ name="HBM",
mode="markers",
- marker_symbol=(
- SYMBOLS
- if self.__run_parameters["include_kernel_names"]
- else None
+ marker=dict(
+ color="red",
+ size=10,
+ symbol=symbols_list[: len(self.__ai_data["ai_hbm"][0])],
),
- )
+ showlegend=show_in_legend,
+ ),
+ **subplot_kwargs,
)
- # Set layout
- fig.update_layout(
- xaxis_title="Arithmetic Intensity (FLOPs/Byte)",
- yaxis_title="Performance (GFLOP/sec)",
- hovermode="x unified",
- margin=dict(l=50, r=50, b=50, t=50, pad=4),
- )
- else:
- # Set layout
- fig.update_layout(
- xaxis_title="Bandwidth (GB/sec)",
- yaxis_title="Performance (GOP/sec)",
- hovermode="x unified",
- margin=dict(l=50, r=50, b=50, t=50, pad=4),
- )
- console_debug(
- "roofline",
- "Roofline analysis only supports AI for "
- "floating point calculations at this time",
- )
-
#######################
- # Plot ceilings
+ # Bandwidth Ceilings
#######################
mem_level_config = self.__run_parameters.get("mem_level", "ALL")
-
cache_hierarchy = (
["HBM", "L2", "L1", "LDS"]
if mem_level_config == "ALL"
@@ -615,94 +653,482 @@ class Roofline:
)
)
- # Plot peak BW ceiling(s)
- for cache_level in cache_hierarchy:
- cache_key = cache_level.lower()
-
+ bandwidth_lines = []
+ for level in cache_hierarchy:
+ key = level.lower()
+ line_data = self.__ceiling_data.get(key)
if (
- not self.__ceiling_data
- or cache_level.lower() not in self.__ceiling_data
- or not isinstance(
- self.__ceiling_data[cache_level.lower()], (list, tuple)
- )
- or len(self.__ceiling_data[cache_level.lower()]) < 3
+ line_data
+ and isinstance(line_data, (list, tuple))
+ and len(line_data) >= 3
):
- console_error(
- f"Ceiling data for {cache_level} is missing "
- f"or malformed for dtype {dtype}.",
- exit=False,
+ bandwidth_lines.append({
+ "key": key,
+ "level": level,
+ "x": line_data[0],
+ "y": line_data[1],
+ "value": line_data[2],
+ "dtype": dtype,
+ })
+
+ for bw_line in bandwidth_lines:
+ value = to_int(bw_line["value"])
+ level = bw_line["level"]
+
+ trace_to_update = None
+ for trace in fig.data:
+ is_correct_level = trace.name and trace.name.startswith(
+ f"{level.upper()}-"
)
- continue
+ has_correct_value = False
+ if trace.name and "
" in trace.name:
+ try:
+ # Extract value from legend name
+ value_part = trace.name.split("
")[1]
+ existing_val = int(value_part.split()[0])
+ if existing_val == value:
+ has_correct_value = True
+ except (ValueError, IndexError):
+ pass
+
+ if is_correct_level and has_correct_value:
+ trace_to_update = trace
+ break
+
+ if trace_to_update:
+ try:
+ # Extract existing datatypes from name
+ name_part = trace_to_update.name.split("
")[0]
+ existing_dts_str = name_part.split("-", 1)[1]
+ existing_dts = [dt.strip() for dt in existing_dts_str.split(",")]
+ except Exception:
+ continue
+
+ all_dts = sorted(list(set(existing_dts + [dtype])))
+ all_dts_str = ", ".join(all_dts)
+ legend_name = f"{level.upper()}-{all_dts_str}
{value} GB/s"
+
+ fig.update_traces(
+ patch={
+ "name": legend_name,
+ "hovertemplate": f"{legend_name}",
+ },
+ selector={"name": trace_to_update.name},
+ )
+ else:
+ # New bandwidth line with value in legend
+ legend_name = f"{level.upper()}-{dtype}
{value} GB/s"
+
+ fig.add_trace(
+ go.Scatter(
+ x=bw_line["x"],
+ y=bw_line["y"],
+ name=legend_name,
+ mode="lines",
+ hovertemplate=f"{legend_name}",
+ ),
+ **subplot_kwargs,
+ )
+
+ #######################
+ # Peak Performance
+ #######################
+ valu_data = (
+ self.__ceiling_data.get("valu") if dtype in PEAK_OPS_DATATYPES else None
+ )
+ mfma_data = self.__ceiling_data.get("mfma") if dtype in MFMA_DATATYPES else None
+
+ if valu_data:
+ legend_name = f"Peak VALU-{dtype}
{to_int(valu_data[2])} G{ops_flops}/s"
+ fig.add_trace(
+ go.Scatter(
+ x=valu_data[0],
+ y=valu_data[1],
+ name=legend_name,
+ mode="lines",
+ hovertemplate=f"{legend_name}",
+ ),
+ **subplot_kwargs,
+ )
+
+ if mfma_data:
+ legend_name = f"Peak MFMA-{dtype}
{to_int(mfma_data[2])} G{ops_flops}/s"
+ fig.add_trace(
+ go.Scatter(
+ x=mfma_data[0],
+ y=mfma_data[1],
+ name=legend_name,
+ mode="lines",
+ hovertemplate=f"{legend_name}",
+ ),
+ **subplot_kwargs,
+ )
+
+ #######################
+ # Plot Points Table
+ #######################
+ if is_new_figure and has_kernel_names:
+ symbols_list = [SYMBOLS[i % len(SYMBOLS)] for i in range(num_kernels)]
+
+ for point in plot_points_data:
+ point["symbol"] = symbols_list[point["kernel_idx"]]
+
+ if not plot_points_data or len(plot_points_data) == 0:
+ fig.add_annotation(
+ x=0.5,
+ y=1,
+ text="No plot points available",
+ showarrow=False,
+ xanchor="center",
+ yanchor="middle",
+ font=dict(size=12, color="black"),
+ row=2,
+ col=1,
+ )
+
+ fig.update_xaxes(visible=False, range=[0, 1], row=2, col=1)
+ fig.update_yaxes(visible=False, range=[0, 2], row=2, col=1)
+
+ else:
+ header_y = len(plot_points_data) + 1
+ header_positions = {
+ "Symbol": 0.020,
+ f"{ops_flops}s/Byte": 0.15,
+ f"G{ops_flops}/s": 0.35,
+ "Status": 0.55,
+ "Cache Level": 0.80,
+ }
+
+ for header_text, x_pos in header_positions.items():
+ fig.add_annotation(
+ x=x_pos,
+ y=header_y,
+ text=f"{header_text}",
+ showarrow=False,
+ xanchor="left",
+ yanchor="middle",
+ font=dict(size=11, color="black"),
+ row=2,
+ col=1,
+ )
+
+ # Scatter plot for symbols
+ symbol_x = []
+ symbol_y = []
+ symbol_markers = []
+ symbol_colors = []
+
+ for idx, point in enumerate(plot_points_data):
+ symbol_x.append(0.05)
+ symbol_y.append(len(plot_points_data) - idx)
+ symbol_markers.append(point["symbol"])
+ symbol_colors.append(point["color"])
+
+ fig.add_trace(
+ go.Scatter(
+ x=symbol_x,
+ y=symbol_y,
+ mode="markers",
+ marker=dict(
+ symbol=symbol_markers,
+ size=11,
+ color=symbol_colors,
+ line=dict(width=0, color="black"),
+ ),
+ customdata=[
+ [point["kernel_idx"], point["cache_level"]]
+ for point in plot_points_data
+ ],
+ showlegend=False,
+ hoverinfo="skip",
+ ),
+ row=2,
+ col=1,
+ )
+ # ai, perf, status, cache_level
+ data_positions = [0.15, 0.35, 0.55, 0.80]
+
+ for idx, point in enumerate(plot_points_data):
+ y_pos = len(plot_points_data) - idx
+
+ # Background shading for every other row
+ if idx % 2 == 0:
+ fig.add_shape(
+ type="rect",
+ x0=0,
+ x1=1,
+ y0=y_pos - 1 / 2,
+ y1=y_pos + 1 / 2,
+ fillcolor="rgba(220, 220, 220, 0.3)",
+ line_width=0,
+ layer="below",
+ row=2,
+ col=1,
+ )
+
+ # Border lines for this row
+ fig.add_shape(
+ type="line",
+ x0=0,
+ x1=1,
+ y0=y_pos - 0.5,
+ y1=y_pos - 0.5,
+ line=dict(color="rgba(150, 150, 150, 0.5)", width=1),
+ row=2,
+ col=1,
+ )
+
+ fig.add_annotation(
+ x=data_positions[0],
+ y=y_pos,
+ text=point["ai"],
+ showarrow=False,
+ xanchor="left",
+ yanchor="middle",
+ font=dict(size=10, color="black"),
+ row=2,
+ col=1,
+ )
+ fig.add_annotation(
+ x=data_positions[1],
+ y=y_pos,
+ text=point["performance"],
+ showarrow=False,
+ xanchor="left",
+ yanchor="middle",
+ font=dict(size=10, color="black"),
+ row=2,
+ col=1,
+ )
+
+ status_text = point["status"]
+
+ if "Compute Bound" in status_text:
+ status_color = "DarkOrange"
+ elif "Memory Bound" in status_text:
+ status_color = "blue"
+ else:
+ status_color = "gray"
+ fig.add_annotation(
+ x=data_positions[2],
+ y=y_pos,
+ text=status_text,
+ showarrow=False,
+ xanchor="left",
+ yanchor="middle",
+ font=dict(size=10, color=status_color),
+ row=2,
+ col=1,
+ )
+
+ fig.add_annotation(
+ x=data_positions[3],
+ y=y_pos,
+ text=point["cache_level"],
+ showarrow=False,
+ xanchor="left",
+ yanchor="middle",
+ font=dict(size=10, color="black"),
+ row=2,
+ col=1,
+ )
+
+ # Vertical column separators
+ column_x_positions = [0.12, 0.32, 0.52, 0.75]
+ for x_pos in column_x_positions:
+ fig.add_shape(
+ type="line",
+ x0=x_pos,
+ x1=x_pos,
+ y0=0.5,
+ y1=header_y + 0.5,
+ line=dict(color="rgba(150, 150, 150, 0.5)", width=1),
+ row=2,
+ col=1,
+ )
+
+ # Configure Plot Points subplot axes
+ fig.update_xaxes(
+ visible=False, range=[0, 1], fixedrange=True, row=2, col=1
+ )
+ fig.update_yaxes(
+ visible=False,
+ range=[0, (len(plot_points_data) + 1.5)],
+ fixedrange=True,
+ row=2,
+ col=1,
+ )
+
+ #######################
+ # Kernel Names Table
+ #######################
+
+ y_positions = []
+ row_heights = []
+ current_y = 0
+ KERNEL_PADDING = 0
+ for i in range(num_kernels):
+ # Height for this kernel is proportional to its number of lines
+ kernel_height = lines_per_kernel[i]
+ row_heights.append(kernel_height)
+ # Position at the center of this kernel's allocated space
+ current_y += kernel_height / 2
+ y_positions.append(current_y)
+ current_y += kernel_height / 2 + KERNEL_PADDING
+
+ # Reverse to display top to bottom
+ y_positions = [current_y - y - KERNEL_PADDING / 2 for y in y_positions]
+ max_y = current_y
+ min_y = 0
+
+ kernel_symbol_x = []
+ kernel_symbol_y = []
+ kernel_symbol_markers = []
+
+ for i in range(num_kernels):
+ kernel_symbol_x.append(0.05)
+ kernel_symbol_y.append(y_positions[i])
+ kernel_symbol_markers.append(symbols_list[i])
+
+ # Background shading for every other row
+ if i % 2 == 0:
+ fig.add_shape(
+ type="rect",
+ x0=0,
+ x1=1,
+ y0=y_positions[i] - row_heights[i] / 2,
+ y1=y_positions[i] + row_heights[i] / 2,
+ fillcolor="rgba(220, 220, 220, 0.3)",
+ line_width=0,
+ layer="below",
+ row=3,
+ col=1,
+ )
+
+ # Border lines for this kernel
+ fig.add_shape(
+ type="line",
+ x0=0,
+ x1=1,
+ y0=y_positions[i] - row_heights[i] / 2,
+ y1=y_positions[i] - row_heights[i] / 2,
+ line=dict(color="rgba(150, 150, 150, 0.5)", width=1),
+ row=3,
+ col=1,
+ )
+
+ # Kernel name annotation with wrapped text (left aligned)
+ fig.add_annotation(
+ x=0.15,
+ y=y_positions[i],
+ text=wrapped_kernel_names[i],
+ showarrow=False,
+ xanchor="left",
+ yanchor="middle",
+ align="left",
+ font=dict(size=10, color="black"),
+ row=3,
+ col=1,
+ )
+
+ # Vertical separator between symbol and kernel name
+ fig.add_shape(
+ type="line",
+ x0=0.12,
+ x1=0.12,
+ y0=min_y,
+ y1=max_y,
+ line=dict(color="rgba(150, 150, 150, 0.5)", width=1),
+ row=3,
+ col=1,
+ )
fig.add_trace(
go.Scatter(
- x=self.__ceiling_data[cache_level.lower()][0],
- y=self.__ceiling_data[cache_level.lower()][1],
- name=f"{cache_level}-{dtype}",
- mode=plot_mode,
- hovertemplate="%{text}",
- text=[
- f"{to_int(self.__ceiling_data[cache_key][2])} GB/s",
- (
- None
- if self.__run_parameters.get("is_standalone")
- else f"{to_int(self.__ceiling_data[cache_key][2])} GB/s"
- ),
- ],
- textposition="top right",
- )
+ x=kernel_symbol_x,
+ y=kernel_symbol_y,
+ mode="markers",
+ marker=dict(
+ symbol=kernel_symbol_markers,
+ size=11,
+ color="black",
+ line=dict(width=0, color="black"),
+ ),
+ showlegend=False,
+ hoverinfo="skip",
+ ),
+ row=3,
+ col=1,
)
- # Plot peak VALU ceiling
- if dtype in PEAK_OPS_DATATYPES:
- fig.add_trace(
- go.Scatter(
- x=self.__ceiling_data["valu"][0],
- y=self.__ceiling_data["valu"][1],
- name=f"Peak VALU-{dtype}",
- mode=plot_mode,
- hovertemplate="%{text}",
- text=[
- (
- None
- if self.__run_parameters["is_standalone"]
- else (
- f"{to_int(self.__ceiling_data['valu'][2])} G"
- f"{ops_flops}/s"
- )
- ),
- f"{to_int(self.__ceiling_data['valu'][2])} G{ops_flops}/s",
- ],
- textposition="top left",
- )
+ # Configure Kernel Names subplot axes
+ fig.update_xaxes(visible=False, range=[0, 1], fixedrange=True, row=3, col=1)
+ fig.update_yaxes(
+ visible=False, range=[min_y, max_y], fixedrange=True, row=3, col=1
)
- # Plot peak MFMA ceiling
- if dtype in MFMA_DATATYPES:
- fig.add_trace(
- go.Scatter(
- x=self.__ceiling_data["mfma"][0],
- y=self.__ceiling_data["mfma"][1],
- name=f"Peak MFMA-{dtype}",
- mode=plot_mode,
- hovertemplate="%{text}",
- text=[
- (
- None
- if self.__run_parameters["is_standalone"]
- else (
- f"{to_int(self.__ceiling_data['mfma'][2])} "
- f"G{ops_flops}/s"
- )
- ),
- f"{to_int(self.__ceiling_data['mfma'][2])} G{ops_flops}/s",
- ],
- textposition="top left",
+ #######################
+ # Layout Configuration
+ #######################
+ if is_new_figure:
+ if subplot_row:
+ fig.update_xaxes(
+ type="log",
+ autorange=True,
+ title_text=f"Arithmetic Intensity ({ops_flops}s/Byte)",
+ row=1,
+ col=1,
+ )
+ fig.update_yaxes(
+ type="log",
+ autorange=True,
+ title_text=f"Performance (G{ops_flops}/sec)",
+ row=1,
+ col=1,
+ )
+ fig.update_layout(
+ height=int(total_figure_height),
+ width=1000,
+ hovermode="x unified",
+ margin=dict(l=50, r=180, b=50, t=80, pad=7),
+ legend=dict(
+ orientation="v",
+ yanchor="top",
+ y=1,
+ xanchor="left",
+ x=1.01,
+ font=dict(size=10),
+ ),
+ )
+ else:
+ # Fallback to simple figure without subplots
+ fig.update_layout(
+ xaxis_title=f"Arithmetic Intensity ({ops_flops}s/Byte)",
+ yaxis_title=f"Performance (G{ops_flops}/sec)",
+ xaxis_type="log",
+ yaxis_type="log",
+ xaxis_autorange=True,
+ yaxis_autorange=True,
+ height=int(total_figure_height),
+ hovermode="x unified",
+ margin=dict(l=50, r=50, b=50, t=50, pad=7),
)
- )
- fig.update_xaxes(type="log", autorange=True)
- fig.update_yaxes(type="log", autorange=True)
+ # Update subplot title for additional datatypes
+ if (
+ not is_new_figure
+ and subplot_row
+ and hasattr(fig, "layout")
+ and hasattr(fig.layout, "annotations")
+ ):
+ for annotation in fig.layout.annotations:
+ if annotation.text and "Roofline Analysis" in annotation.text:
+ if "(" in annotation.text and ")" in annotation.text:
+ existing_text = annotation.text.split("(")[0]
+ existing_types = annotation.text.split("(")[1].split(")")[0]
+ new_types = f"{existing_types}, {dtype}"
+ annotation.text = f"{existing_text}({new_types})"
+ break
return fig
diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py
index c91fd76bb4..f604cadbd2 100644
--- a/projects/rocprofiler-compute/src/utils/roofline_calc.py
+++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py
@@ -26,7 +26,7 @@
import csv
from dataclasses import dataclass
from pathlib import Path
-from typing import Any, Union
+from typing import Any, Optional, Union
import pandas as pd
@@ -201,9 +201,23 @@ def calc_ceilings(
roofline_parameters: dict[str, Any],
dtype: str,
benchmark_data: dict[str, list[str]],
+ ai_data: Optional[dict] = None,
) -> dict[str, list[Union[list[float], float, None]]]:
"""Given benchmarking data, calculate ceilings (or peak performance) for
empirical roofline"""
+
+ if ai_data:
+ max_ai = 0
+ for cache_level in ["ai_l1", "ai_l2", "ai_hbm"]:
+ if cache_level in ai_data and ai_data[cache_level][0]:
+ cache_max = max(ai_data[cache_level][0])
+ max_ai = max(max_ai, cache_max)
+
+ dynamic_xmax = max_ai * 1.2 if max_ai > 0 else 1000
+ else:
+ dynamic_xmax = 1000
+ print(XMAX, dynamic_xmax)
+
# TODO: This is where filtering by memory level will need to occur for standalone
graph_points: dict[str, list[Union[list[float], float, None]]] = {
"hbm": [],
@@ -281,18 +295,24 @@ def calc_ceilings(
# ----------------------------------------------------------------------------------
if dtype in PEAK_OPS_DATATYPES:
# Plot FMA roof
- x0 = min(x2, XMAX) if x2 < XMAX else XMAX
+ x0 = min(x2, dynamic_xmax) if x2 < dynamic_xmax else dynamic_xmax
- console_debug(f"FMA ROOF [{x0}, {XMAX}], [{peak_ops},{peak_ops}]")
- graph_points["valu"].extend([[x0, XMAX], [peak_ops, peak_ops], peak_ops])
+ console_debug(f"FMA ROOF [{x0}, {dynamic_xmax}], [{peak_ops},{peak_ops}]")
+ graph_points["valu"].extend([
+ [x0, dynamic_xmax],
+ [peak_ops, peak_ops],
+ peak_ops,
+ ])
# Plot MFMA roof
if dtype in MFMA_DATATYPES: # assert that mfma has been assigned
- x0_mfma = min(x2_mfma, XMAX) if x2_mfma < XMAX else XMAX
+ x0_mfma = min(x2_mfma, dynamic_xmax) if x2_mfma < dynamic_xmax else dynamic_xmax
- console_debug(f"MFMA ROOF [{x0_mfma}, {XMAX}], [{peak_mfma},{peak_mfma}]")
+ console_debug(
+ f"MFMA ROOF [{x0_mfma}, {dynamic_xmax}], [{peak_mfma},{peak_mfma}]"
+ )
graph_points["mfma"].extend([
- [x0_mfma, XMAX],
+ [x0_mfma, dynamic_xmax],
[peak_mfma, peak_mfma],
peak_mfma,
])
@@ -774,7 +794,7 @@ def calc_ai_profile(
def construct_roof(
- roofline_parameters: dict[str, Any], dtype: str
+ roofline_parameters: dict[str, Any], dtype: str, ai_data: Optional[dict] = None
) -> dict[str, list[Union[list[float], float, None]]]:
workload_dir = roofline_parameters.get("workload_dir")
if isinstance(workload_dir, list):
@@ -817,4 +837,4 @@ def construct_roof(
# ------------------
# Generate Roofline
# ------------------
- return calc_ceilings(roofline_parameters, dtype, benchmark_data)
+ return calc_ceilings(roofline_parameters, dtype, benchmark_data, ai_data)
diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py
index bc4794ff62..7e106f68e4 100644
--- a/projects/rocprofiler-compute/tests/test_profile_general.py
+++ b/projects/rocprofiler-compute/tests/test_profile_general.py
@@ -156,7 +156,6 @@ ALL_CSVS_MI350 = sorted([
ROOF_ONLY_FILES = sorted([
"empirRoof_gpu-0_FP32.pdf",
- "kernelName_legend.pdf",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
@@ -946,7 +945,6 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
return
# Test `--kernel` filtering outputs are present and labelled correctly
- filter_kernelName = "kernelName_legend_" + config["kernel_name_1"]
filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"]
plot_configurations = [
@@ -967,7 +965,7 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
"--kernel",
config["kernel_name_1"],
],
- "expected_files": [filter_kernelName, filter_empirRoof],
+ "expected_files": [filter_empirRoof],
},
]
@@ -1387,10 +1385,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
- expected_files = ROOF_ONLY_FILES.copy()
- expected_files.remove("kernelName_legend.pdf")
- expected_files = sorted(expected_files)
- assert sorted(list(file_dict.keys())) == expected_files
+ assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -1420,10 +1415,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
- expected_files = ROOF_ONLY_FILES.copy()
- expected_files.remove("kernelName_legend.pdf")
- expected_files = sorted(expected_files)
- assert sorted(list(file_dict.keys())) == expected_files
+ assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -1453,10 +1445,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
- expected_files = ROOF_ONLY_FILES.copy()
- expected_files.remove("kernelName_legend.pdf")
- expected_files = sorted(expected_files)
- assert sorted(list(file_dict.keys())) == expected_files
+ assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -1486,10 +1475,7 @@ def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
- expected_files = ROOF_ONLY_FILES.copy()
- expected_files.remove("kernelName_legend.pdf")
- expected_files = sorted(expected_files)
- assert sorted(list(file_dict.keys())) == expected_files
+ assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],