diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 682a0c3181..52fd62b8f3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -220,7 +220,7 @@ class webui_analysis(OmniAnalyze_Base): "device_id": 0, "sort_type": "kernels", "mem_level": "ALL", - "include_kernel_names": False, + "include_kernel_names": True, "is_standalone": False, "roofline_data_type": self.__roofline_data_type, "kernel_filter": False, diff --git a/projects/rocprofiler-compute/src/roofline.py b/projects/rocprofiler-compute/src/roofline.py index 5c39ade822..57f47bc67d 100644 --- a/projects/rocprofiler-compute/src/roofline.py +++ b/projects/rocprofiler-compute/src/roofline.py @@ -22,10 +22,8 @@ # THE SOFTWARE. ############################################################################## - import argparse import textwrap -import time from abc import abstractmethod from collections import OrderedDict from pathlib import Path @@ -36,6 +34,7 @@ import pandas as pd import plotext as plt import plotly.graph_objects as go from dash import dcc, html +from plotly.subplots import make_subplots from utils import file_io, rocpd_data, schema from utils.logger import ( @@ -58,7 +57,7 @@ from utils.specs import MachineSpecs SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20] -def wrap_text(text: str, width: int = 92) -> str: +def wrap_text(text: str, width: int = 100) -> str: """ Wraps text using textwrap and joins lines with
for Plotly. """ @@ -94,7 +93,6 @@ class Roofline: "device_id": 0, "sort_type": "kernels", "mem_level": "ALL", - "include_kernel_names": False, "is_standalone": False, "roofline_data_type": ["FP32"], # default to FP32 "kernel_filter": False, @@ -109,8 +107,6 @@ class Roofline: self.__run_parameters["workload_dir"] = self.__args.path if hasattr(self.__args, "no_roof") and not self.__args.no_roof: self.__run_parameters["is_standalone"] = True - if hasattr(self.__args, "kernel_names") and self.__args.kernel_names: - self.__run_parameters["include_kernel_names"] = True if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL": self.__run_parameters["mem_level"] = self.__args.mem_level if hasattr(self.__args, "sort") and self.__args.sort != "ALL": @@ -120,19 +116,10 @@ class Roofline: hasattr(self.__args, "gpu_kernel") and self.__args.gpu_kernel ): self.__run_parameters["kernel_filter"] = True - self.validate_parameters() def get_args(self) -> argparse.Namespace: return self.__args - def validate_parameters(self) -> None: - if self.__run_parameters["include_kernel_names"] and ( - not self.__run_parameters["is_standalone"] - ): - console_warning( - "--kernel-names is nonactionable when used with --no-roof option" - ) - def roof_setup(self) -> None: # Setup the workload directory for roofline profiling. workload_dir_val = self.__run_parameters.get("workload_dir") @@ -250,6 +237,46 @@ class Roofline: return df + def _determine_kernel_bound_status( + self, + ai_value: float, + performance: float, + cache_level: str, + ceiling_data: dict[str, Any], + ) -> str: + """ + Calculate if a kernel point is memory-bound or compute-bound + based on its own cache level's roofline + """ + cache_key = cache_level.replace("ai_", "") + + # Get bw for this cache level + if cache_key not in ceiling_data or not ceiling_data[cache_key]: + return "Unknown" + + cache_data = ceiling_data[cache_key] + if not isinstance(cache_data, (list, tuple)) or len(cache_data) < 3: + return "Unknown" + + bandwidth = cache_data[2] + + # Get min peak performance + min_peak = float("inf") + if "valu" in ceiling_data and ceiling_data["valu"]: + min_peak = min(min_peak, ceiling_data["valu"][2]) + if "mfma" in ceiling_data and ceiling_data["mfma"]: + min_peak = min(min_peak, ceiling_data["mfma"][2]) + + if min_peak == float("inf"): + return "Unknown" + + x_intersect = min_peak / bandwidth + + if ai_value < x_intersect: + return "Memory Bound" + else: + return "Compute Bound" + @demarcate def empirical_roofline( self, ret_df: dict[str, pd.DataFrame] @@ -280,9 +307,27 @@ class Roofline: msg += f"\n\t{key} -> {value}" console_debug(msg) + kernel_names_data = None + if self.__ai_data and "kernelNames" in self.__ai_data: + original_kernel_names = self.__ai_data.get("kernelNames", []) + filtered_kernel_names = [ + name + for name in original_kernel_names + if name != "nan" and isinstance(name, str) + ] + if len(filtered_kernel_names) > 0: + kernel_names_data = { + "kernel_names": filtered_kernel_names, + "num_kernels": len(filtered_kernel_names), + } + ops_figure = flops_figure = None ops_dt_list = flops_dt_list = kernel_list = "" + # collect ceiling data for all datatypes to find global minimums + all_ops_ceiling_data = {} + all_flops_ceiling_data = {} + for dt in self.__run_parameters.get("roofline_data_type", []): gpu_arch = getattr(self.__mspec, "gpu_arch", "unknown_arch") if ( @@ -306,170 +351,60 @@ class Roofline: fig=ops_figure, ) else: - ops_figure = self.generate_plot(dtype=str(dt)) + ops_figure = self.generate_plot( + dtype=str(dt), + kernel_names_data=kernel_names_data, + ) ops_dt_list += "_" + str(dt) + # store ceiling data for this datatype + all_ops_ceiling_data[str(dt)] = self.__ceiling_data if ops_flops == "Flops": if flops_figure: - flops_figure = self.generate_plot(dtype=str(dt), fig=flops_figure) + flops_figure = self.generate_plot( + dtype=str(dt), + fig=flops_figure, + ) else: - flops_figure = self.generate_plot(dtype=str(dt)) + flops_figure = self.generate_plot( + dtype=str(dt), + kernel_names_data=kernel_names_data, + ) flops_dt_list += "_" + str(dt) - - if self.__run_parameters.get("include_kernel_names", False): - if self.__ai_data is None: - console_error( - "Roofline Error: self.__ai_data is not populated. " - "Cannot generate kernel names info.", - exit=False, - ) - original_kernel_names = [] - else: - original_kernel_names = self.__ai_data.get("kernelNames", []) - - num_kernels = len(original_kernel_names) - self.__figure.data = [] - self.__figure.layout = {} - - if num_kernels == 0: - # Create empty kernel names figure when no kernels are found - console_log( - "roofline", - "No kernel names found to generate " - "'Kernel Names and Markers' info.", - ) - self.__figure.add_annotation( - text="No kernel names to display.", - showarrow=False, - xref="paper", - yref="paper", - x=0.5, - y=0.5, - ) - self.__figure.update_layout( - title_text="Kernel Names and Markers", - title_x=0.5, - xaxis=dict(visible=False), - yaxis=dict(visible=False), - plot_bgcolor="white", - paper_bgcolor="white", - height=200, - width=400, - ) - else: - # Create populated kernel names figure with symbols and names. - symbols_list = [SYMBOLS[i % len(SYMBOLS)] for i in range(num_kernels)] - - self.__figure = go.Figure() - self.__figure.add_trace( - go.Scatter( - x=[0.1] * num_kernels, - y=list(range(num_kernels, 0, -1)), - mode="markers", - marker=dict( - symbol=symbols_list, - size=15, - color="blue", - line=dict(width=1, color="black"), - ), - showlegend=False, - hoverinfo="skip", - ) - ) - - # Add kernel name annotations - for i, kernel_name in enumerate(original_kernel_names): - self.__figure.add_annotation( - x=0.25, - y=num_kernels - i, - text=wrap_text(kernel_name), - showarrow=False, - xanchor="left", - yanchor="middle", - align="left", - font=dict(size=11, color="black"), - ) - - # Add formatting elements to kernel names figure. - self.__figure.add_annotation( - x=0.1, - y=num_kernels + 1, - text="Symbol", - showarrow=False, - xanchor="center", - yanchor="middle", - font=dict(size=12, color="black"), - ) - self.__figure.add_annotation( - x=0.25, - y=num_kernels + 1, - text="Kernel Name", - showarrow=False, - xanchor="left", - yanchor="middle", - font=dict(size=12, color="black"), - ) - - # Add grid lines - for i in range(num_kernels + 1): - self.__figure.add_shape( - type="line", - x0=0, - x1=1, - y0=i + 0.5, - y1=i + 0.5, - line=dict(color="lightgray", width=1), - ) - - self.__figure.add_shape( - type="line", - x0=0.2, - x1=0.2, - y0=0.5, - y1=num_kernels + 1.5, - line=dict(color="lightgray", width=1), - ) - - self.__figure.update_layout( - title="Kernel Names and Corresponding Markers", - title_x=0.5, - xaxis=dict(visible=False, range=[0, 1]), - yaxis=dict( - visible=False, range=[0, num_kernels + 2], autorange=False - ), - height=max(400, num_kernels * 40 + 150), - width=1000, - margin=dict(l=50, r=50, t=70, b=30), - plot_bgcolor="white", - paper_bgcolor="white", - ) + # Store ceiling data for this datatype + all_flops_ceiling_data[str(dt)] = self.__ceiling_data # Output will be different depending on interaction type: # Save PDFs if we're in "standalone roofline" mode, - # otherwise return HTML to be used in GUI output + # otherwise return HTML to be used in GUI outputif flops_figure: + if self.__run_parameters["is_standalone"]: dev_id = str(self.__run_parameters["device_id"]) if self.__run_parameters.get("kernel_filter", False): for name in sorted(self.__args.kernel): kernel_list += "_" + name - # Re-save to remove loading MathJax pop up - for _ in range(2): - if ops_figure: - ops_figure.write_image( - f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{ops_dt_list}{kernel_list}.pdf" - ) - if flops_figure: - flops_figure.write_image( - f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{flops_dt_list}{kernel_list}.pdf" - ) + if ops_figure: + actual_height = int(ops_figure.layout.height) + # minimum height of 1000 to avoid cutting off content + pdf_height = max(actual_height, 1000) - # only save a legend if kernel_names option is toggled - if self.__run_parameters["include_kernel_names"]: - self.__figure.write_image( - f"{self.__run_parameters['workload_dir']}/kernelName_legend{kernel_list}.pdf" - ) - time.sleep(1) + ops_figure.write_image( + f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{ops_dt_list}{kernel_list}.pdf", + width=1000, + height=pdf_height, + ) + + if flops_figure: + actual_height = int(flops_figure.layout.height) + # minimum height of 1000 to avoid cutting off content + pdf_height = max(actual_height, 1000) + + flops_figure.write_image( + f"{self.__run_parameters['workload_dir']}/empirRoof_gpu-{dev_id}{flops_dt_list}{kernel_list}.pdf", + width=1000, + height=pdf_height, + ) console_log("roofline", "Empirical Roofline PDFs saved!") else: @@ -512,99 +447,202 @@ class Roofline: ) @demarcate - def generate_plot(self, dtype: str, fig: Optional[go.Figure] = None) -> go.Figure: + def generate_plot( + self, + dtype: str, + fig: Optional[go.Figure] = None, + kernel_names_data: Optional[dict] = None, + ) -> go.Figure: """ Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data. """ - if fig is None: - fig = go.Figure() - skipAI = False - else: - skipAI = True # Don't repeat AI plotting + is_new_figure = fig is None + has_kernel_names = kernel_names_data is not None and is_new_figure + skipAI = not is_new_figure - plot_mode = "lines+text" if self.__run_parameters["is_standalone"] else "lines" + subplot_row = None + total_figure_height = 600 # default height + + if is_new_figure: + if has_kernel_names: + raw_kernel_names = kernel_names_data.get("kernel_names", []) + num_kernels = len(raw_kernel_names) + + wrapped_kernel_names = [wrap_text(name) for name in raw_kernel_names] + lines_per_kernel = [ + text.count("
") + 1 for text in wrapped_kernel_names + ] + temp_ceiling_data = construct_roof( + roofline_parameters=self.__run_parameters, + dtype=dtype, + ai_data=self.__ai_data, + ) + + plot_points_data = [] + cache_colors = { + "ai_l1": "blue", + "ai_l2": "green", + "ai_hbm": "red", + "ai_lds": "orange", + } + + for cache_level in ["ai_l1", "ai_l2", "ai_hbm"]: + if cache_level in self.__ai_data: + x_vals = self.__ai_data[cache_level][0] + y_vals = self.__ai_data[cache_level][1] + + for i in range(min(len(x_vals), num_kernels)): + if x_vals[i] > 0 and y_vals[i] > 0: + status = self._determine_kernel_bound_status( + ai_value=x_vals[i], + performance=y_vals[i], + cache_level=cache_level, + ceiling_data=temp_ceiling_data, + ) + + plot_points_data.append({ + "symbol": None, + "color": cache_colors.get(cache_level, "gray"), + "cache_level": cache_level.replace( + "ai_", "", 1 + ).upper(), + "ai": f"{x_vals[i]:.2f}", + "performance": f"{y_vals[i]:.2f}", + "status": status, + "kernel_idx": i, + }) + + ###################################### + # Define Figure Measurement Constants + ###################################### + + ROOFLINE_PLOT_HEIGHT = 500 # Default height of plot itself + + POINTS_ROW_HEIGHT = 25 # Pixel height of each plot point row + num_plot_points = len(plot_points_data) # Number of plot points + PLOT_POINTS_HEIGHT = ( + num_plot_points + 2 + ) * POINTS_ROW_HEIGHT # +2 for header and spacing + + BASE_ROW_HEIGHT = 15 # Base pixel height of each kernel name row + KERNEL_PADDING = 8 # Padding in between each kernel name row + KERNEL_NAMES_HEIGHT = ( + sum(lines_per_kernel) * BASE_ROW_HEIGHT + + (num_kernels - 1) * KERNEL_PADDING + + BASE_ROW_HEIGHT + ) + + total_figure_height = ( + ROOFLINE_PLOT_HEIGHT + PLOT_POINTS_HEIGHT + KERNEL_NAMES_HEIGHT + ) + + total_content_height = ( + ROOFLINE_PLOT_HEIGHT + PLOT_POINTS_HEIGHT + KERNEL_NAMES_HEIGHT + ) + roofline_ratio = ROOFLINE_PLOT_HEIGHT / total_content_height + plot_points_ratio = PLOT_POINTS_HEIGHT / total_content_height + kernel_names_ratio = 1 - roofline_ratio - plot_points_ratio + SUBPLOT_SPACING_PX = 80 # Constant - num of pixels between each subplot + fig = make_subplots( + rows=3, + cols=1, + row_heights=[roofline_ratio, plot_points_ratio, kernel_names_ratio], + subplot_titles=[ + f"Roofline Analysis ({dtype})", + "Plot Points & Values", + "Full Kernel Names", + ], + vertical_spacing=SUBPLOT_SPACING_PX / total_figure_height, + specs=[ + [{"type": "scatter"}], # Roofline plot + [{"type": "scatter"}], # Plot points table + [{"type": "scatter"}], # Kernel names table + ], + ) + + subplot_row = 1 + skipAI = False + else: + # Adding to existing figure + if hasattr(fig, "_grid_ref") and fig._grid_ref is not None: + subplot_row = 1 + if hasattr(fig, "layout") and hasattr(fig.layout, "height"): + total_figure_height = fig.layout.height + skipAI = True self.__ceiling_data = construct_roof( roofline_parameters=self.__run_parameters, dtype=dtype, + ai_data=self.__ai_data, ) console_debug("roofline", f"Ceiling data:\n{self.__ceiling_data}") - ops_flops = "OP" if dtype.startswith("I") else "FLOP" # For printing purposes + ops_flops = "OP" if dtype.startswith("I") else "FLOP" + subplot_kwargs = {"row": subplot_row, "col": 1} if subplot_row else {} ####################### # Plot Application AI ####################### - # Plot the arithmetic intensity points for each cache level - if ops_flops == "FLOP": - if not skipAI: + if ops_flops == "FLOP" and not skipAI: + kernel_names = self.__ai_data.get("kernelNames", []) + symbols_list = [SYMBOLS[i % len(SYMBOLS)] for i in range(len(kernel_names))] + show_in_legend = not self.__run_parameters["is_standalone"] + if self.__ai_data["ai_l1"][0]: fig.add_trace( go.Scatter( x=self.__ai_data["ai_l1"][0], y=self.__ai_data["ai_l1"][1], - name="ai_l1", + name="L1", mode="markers", - marker_symbol=( - SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None + marker=dict( + color="blue", + size=10, + symbol=symbols_list[: len(self.__ai_data["ai_l1"][0])], ), - ) + showlegend=show_in_legend, + ), + **subplot_kwargs, ) + + if self.__ai_data["ai_l2"][0]: fig.add_trace( go.Scatter( x=self.__ai_data["ai_l2"][0], y=self.__ai_data["ai_l2"][1], - name="ai_l2", + name="L2", mode="markers", - marker_symbol=( - SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None + marker=dict( + color="green", + size=10, + symbol=symbols_list[: len(self.__ai_data["ai_l2"][0])], ), - ) + showlegend=show_in_legend, + ), + **subplot_kwargs, ) + + if self.__ai_data["ai_hbm"][0]: fig.add_trace( go.Scatter( x=self.__ai_data["ai_hbm"][0], y=self.__ai_data["ai_hbm"][1], - name="ai_hbm", + name="HBM", mode="markers", - marker_symbol=( - SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None + marker=dict( + color="red", + size=10, + symbol=symbols_list[: len(self.__ai_data["ai_hbm"][0])], ), - ) + showlegend=show_in_legend, + ), + **subplot_kwargs, ) - # Set layout - fig.update_layout( - xaxis_title="Arithmetic Intensity (FLOPs/Byte)", - yaxis_title="Performance (GFLOP/sec)", - hovermode="x unified", - margin=dict(l=50, r=50, b=50, t=50, pad=4), - ) - else: - # Set layout - fig.update_layout( - xaxis_title="Bandwidth (GB/sec)", - yaxis_title="Performance (GOP/sec)", - hovermode="x unified", - margin=dict(l=50, r=50, b=50, t=50, pad=4), - ) - console_debug( - "roofline", - "Roofline analysis only supports AI for " - "floating point calculations at this time", - ) - ####################### - # Plot ceilings + # Bandwidth Ceilings ####################### mem_level_config = self.__run_parameters.get("mem_level", "ALL") - cache_hierarchy = ( ["HBM", "L2", "L1", "LDS"] if mem_level_config == "ALL" @@ -615,94 +653,482 @@ class Roofline: ) ) - # Plot peak BW ceiling(s) - for cache_level in cache_hierarchy: - cache_key = cache_level.lower() - + bandwidth_lines = [] + for level in cache_hierarchy: + key = level.lower() + line_data = self.__ceiling_data.get(key) if ( - not self.__ceiling_data - or cache_level.lower() not in self.__ceiling_data - or not isinstance( - self.__ceiling_data[cache_level.lower()], (list, tuple) - ) - or len(self.__ceiling_data[cache_level.lower()]) < 3 + line_data + and isinstance(line_data, (list, tuple)) + and len(line_data) >= 3 ): - console_error( - f"Ceiling data for {cache_level} is missing " - f"or malformed for dtype {dtype}.", - exit=False, + bandwidth_lines.append({ + "key": key, + "level": level, + "x": line_data[0], + "y": line_data[1], + "value": line_data[2], + "dtype": dtype, + }) + + for bw_line in bandwidth_lines: + value = to_int(bw_line["value"]) + level = bw_line["level"] + + trace_to_update = None + for trace in fig.data: + is_correct_level = trace.name and trace.name.startswith( + f"{level.upper()}-" ) - continue + has_correct_value = False + if trace.name and "
" in trace.name: + try: + # Extract value from legend name + value_part = trace.name.split("
")[1] + existing_val = int(value_part.split()[0]) + if existing_val == value: + has_correct_value = True + except (ValueError, IndexError): + pass + + if is_correct_level and has_correct_value: + trace_to_update = trace + break + + if trace_to_update: + try: + # Extract existing datatypes from name + name_part = trace_to_update.name.split("
")[0] + existing_dts_str = name_part.split("-", 1)[1] + existing_dts = [dt.strip() for dt in existing_dts_str.split(",")] + except Exception: + continue + + all_dts = sorted(list(set(existing_dts + [dtype]))) + all_dts_str = ", ".join(all_dts) + legend_name = f"{level.upper()}-{all_dts_str}
{value} GB/s" + + fig.update_traces( + patch={ + "name": legend_name, + "hovertemplate": f"{legend_name}", + }, + selector={"name": trace_to_update.name}, + ) + else: + # New bandwidth line with value in legend + legend_name = f"{level.upper()}-{dtype}
{value} GB/s" + + fig.add_trace( + go.Scatter( + x=bw_line["x"], + y=bw_line["y"], + name=legend_name, + mode="lines", + hovertemplate=f"{legend_name}", + ), + **subplot_kwargs, + ) + + ####################### + # Peak Performance + ####################### + valu_data = ( + self.__ceiling_data.get("valu") if dtype in PEAK_OPS_DATATYPES else None + ) + mfma_data = self.__ceiling_data.get("mfma") if dtype in MFMA_DATATYPES else None + + if valu_data: + legend_name = f"Peak VALU-{dtype}
{to_int(valu_data[2])} G{ops_flops}/s" + fig.add_trace( + go.Scatter( + x=valu_data[0], + y=valu_data[1], + name=legend_name, + mode="lines", + hovertemplate=f"{legend_name}", + ), + **subplot_kwargs, + ) + + if mfma_data: + legend_name = f"Peak MFMA-{dtype}
{to_int(mfma_data[2])} G{ops_flops}/s" + fig.add_trace( + go.Scatter( + x=mfma_data[0], + y=mfma_data[1], + name=legend_name, + mode="lines", + hovertemplate=f"{legend_name}", + ), + **subplot_kwargs, + ) + + ####################### + # Plot Points Table + ####################### + if is_new_figure and has_kernel_names: + symbols_list = [SYMBOLS[i % len(SYMBOLS)] for i in range(num_kernels)] + + for point in plot_points_data: + point["symbol"] = symbols_list[point["kernel_idx"]] + + if not plot_points_data or len(plot_points_data) == 0: + fig.add_annotation( + x=0.5, + y=1, + text="No plot points available", + showarrow=False, + xanchor="center", + yanchor="middle", + font=dict(size=12, color="black"), + row=2, + col=1, + ) + + fig.update_xaxes(visible=False, range=[0, 1], row=2, col=1) + fig.update_yaxes(visible=False, range=[0, 2], row=2, col=1) + + else: + header_y = len(plot_points_data) + 1 + header_positions = { + "Symbol": 0.020, + f"{ops_flops}s/Byte": 0.15, + f"G{ops_flops}/s": 0.35, + "Status": 0.55, + "Cache Level": 0.80, + } + + for header_text, x_pos in header_positions.items(): + fig.add_annotation( + x=x_pos, + y=header_y, + text=f"{header_text}", + showarrow=False, + xanchor="left", + yanchor="middle", + font=dict(size=11, color="black"), + row=2, + col=1, + ) + + # Scatter plot for symbols + symbol_x = [] + symbol_y = [] + symbol_markers = [] + symbol_colors = [] + + for idx, point in enumerate(plot_points_data): + symbol_x.append(0.05) + symbol_y.append(len(plot_points_data) - idx) + symbol_markers.append(point["symbol"]) + symbol_colors.append(point["color"]) + + fig.add_trace( + go.Scatter( + x=symbol_x, + y=symbol_y, + mode="markers", + marker=dict( + symbol=symbol_markers, + size=11, + color=symbol_colors, + line=dict(width=0, color="black"), + ), + customdata=[ + [point["kernel_idx"], point["cache_level"]] + for point in plot_points_data + ], + showlegend=False, + hoverinfo="skip", + ), + row=2, + col=1, + ) + # ai, perf, status, cache_level + data_positions = [0.15, 0.35, 0.55, 0.80] + + for idx, point in enumerate(plot_points_data): + y_pos = len(plot_points_data) - idx + + # Background shading for every other row + if idx % 2 == 0: + fig.add_shape( + type="rect", + x0=0, + x1=1, + y0=y_pos - 1 / 2, + y1=y_pos + 1 / 2, + fillcolor="rgba(220, 220, 220, 0.3)", + line_width=0, + layer="below", + row=2, + col=1, + ) + + # Border lines for this row + fig.add_shape( + type="line", + x0=0, + x1=1, + y0=y_pos - 0.5, + y1=y_pos - 0.5, + line=dict(color="rgba(150, 150, 150, 0.5)", width=1), + row=2, + col=1, + ) + + fig.add_annotation( + x=data_positions[0], + y=y_pos, + text=point["ai"], + showarrow=False, + xanchor="left", + yanchor="middle", + font=dict(size=10, color="black"), + row=2, + col=1, + ) + fig.add_annotation( + x=data_positions[1], + y=y_pos, + text=point["performance"], + showarrow=False, + xanchor="left", + yanchor="middle", + font=dict(size=10, color="black"), + row=2, + col=1, + ) + + status_text = point["status"] + + if "Compute Bound" in status_text: + status_color = "DarkOrange" + elif "Memory Bound" in status_text: + status_color = "blue" + else: + status_color = "gray" + fig.add_annotation( + x=data_positions[2], + y=y_pos, + text=status_text, + showarrow=False, + xanchor="left", + yanchor="middle", + font=dict(size=10, color=status_color), + row=2, + col=1, + ) + + fig.add_annotation( + x=data_positions[3], + y=y_pos, + text=point["cache_level"], + showarrow=False, + xanchor="left", + yanchor="middle", + font=dict(size=10, color="black"), + row=2, + col=1, + ) + + # Vertical column separators + column_x_positions = [0.12, 0.32, 0.52, 0.75] + for x_pos in column_x_positions: + fig.add_shape( + type="line", + x0=x_pos, + x1=x_pos, + y0=0.5, + y1=header_y + 0.5, + line=dict(color="rgba(150, 150, 150, 0.5)", width=1), + row=2, + col=1, + ) + + # Configure Plot Points subplot axes + fig.update_xaxes( + visible=False, range=[0, 1], fixedrange=True, row=2, col=1 + ) + fig.update_yaxes( + visible=False, + range=[0, (len(plot_points_data) + 1.5)], + fixedrange=True, + row=2, + col=1, + ) + + ####################### + # Kernel Names Table + ####################### + + y_positions = [] + row_heights = [] + current_y = 0 + KERNEL_PADDING = 0 + for i in range(num_kernels): + # Height for this kernel is proportional to its number of lines + kernel_height = lines_per_kernel[i] + row_heights.append(kernel_height) + # Position at the center of this kernel's allocated space + current_y += kernel_height / 2 + y_positions.append(current_y) + current_y += kernel_height / 2 + KERNEL_PADDING + + # Reverse to display top to bottom + y_positions = [current_y - y - KERNEL_PADDING / 2 for y in y_positions] + max_y = current_y + min_y = 0 + + kernel_symbol_x = [] + kernel_symbol_y = [] + kernel_symbol_markers = [] + + for i in range(num_kernels): + kernel_symbol_x.append(0.05) + kernel_symbol_y.append(y_positions[i]) + kernel_symbol_markers.append(symbols_list[i]) + + # Background shading for every other row + if i % 2 == 0: + fig.add_shape( + type="rect", + x0=0, + x1=1, + y0=y_positions[i] - row_heights[i] / 2, + y1=y_positions[i] + row_heights[i] / 2, + fillcolor="rgba(220, 220, 220, 0.3)", + line_width=0, + layer="below", + row=3, + col=1, + ) + + # Border lines for this kernel + fig.add_shape( + type="line", + x0=0, + x1=1, + y0=y_positions[i] - row_heights[i] / 2, + y1=y_positions[i] - row_heights[i] / 2, + line=dict(color="rgba(150, 150, 150, 0.5)", width=1), + row=3, + col=1, + ) + + # Kernel name annotation with wrapped text (left aligned) + fig.add_annotation( + x=0.15, + y=y_positions[i], + text=wrapped_kernel_names[i], + showarrow=False, + xanchor="left", + yanchor="middle", + align="left", + font=dict(size=10, color="black"), + row=3, + col=1, + ) + + # Vertical separator between symbol and kernel name + fig.add_shape( + type="line", + x0=0.12, + x1=0.12, + y0=min_y, + y1=max_y, + line=dict(color="rgba(150, 150, 150, 0.5)", width=1), + row=3, + col=1, + ) fig.add_trace( go.Scatter( - x=self.__ceiling_data[cache_level.lower()][0], - y=self.__ceiling_data[cache_level.lower()][1], - name=f"{cache_level}-{dtype}", - mode=plot_mode, - hovertemplate="%{text}", - text=[ - f"{to_int(self.__ceiling_data[cache_key][2])} GB/s", - ( - None - if self.__run_parameters.get("is_standalone") - else f"{to_int(self.__ceiling_data[cache_key][2])} GB/s" - ), - ], - textposition="top right", - ) + x=kernel_symbol_x, + y=kernel_symbol_y, + mode="markers", + marker=dict( + symbol=kernel_symbol_markers, + size=11, + color="black", + line=dict(width=0, color="black"), + ), + showlegend=False, + hoverinfo="skip", + ), + row=3, + col=1, ) - # Plot peak VALU ceiling - if dtype in PEAK_OPS_DATATYPES: - fig.add_trace( - go.Scatter( - x=self.__ceiling_data["valu"][0], - y=self.__ceiling_data["valu"][1], - name=f"Peak VALU-{dtype}", - mode=plot_mode, - hovertemplate="%{text}", - text=[ - ( - None - if self.__run_parameters["is_standalone"] - else ( - f"{to_int(self.__ceiling_data['valu'][2])} G" - f"{ops_flops}/s" - ) - ), - f"{to_int(self.__ceiling_data['valu'][2])} G{ops_flops}/s", - ], - textposition="top left", - ) + # Configure Kernel Names subplot axes + fig.update_xaxes(visible=False, range=[0, 1], fixedrange=True, row=3, col=1) + fig.update_yaxes( + visible=False, range=[min_y, max_y], fixedrange=True, row=3, col=1 ) - # Plot peak MFMA ceiling - if dtype in MFMA_DATATYPES: - fig.add_trace( - go.Scatter( - x=self.__ceiling_data["mfma"][0], - y=self.__ceiling_data["mfma"][1], - name=f"Peak MFMA-{dtype}", - mode=plot_mode, - hovertemplate="%{text}", - text=[ - ( - None - if self.__run_parameters["is_standalone"] - else ( - f"{to_int(self.__ceiling_data['mfma'][2])} " - f"G{ops_flops}/s" - ) - ), - f"{to_int(self.__ceiling_data['mfma'][2])} G{ops_flops}/s", - ], - textposition="top left", + ####################### + # Layout Configuration + ####################### + if is_new_figure: + if subplot_row: + fig.update_xaxes( + type="log", + autorange=True, + title_text=f"Arithmetic Intensity ({ops_flops}s/Byte)", + row=1, + col=1, + ) + fig.update_yaxes( + type="log", + autorange=True, + title_text=f"Performance (G{ops_flops}/sec)", + row=1, + col=1, + ) + fig.update_layout( + height=int(total_figure_height), + width=1000, + hovermode="x unified", + margin=dict(l=50, r=180, b=50, t=80, pad=7), + legend=dict( + orientation="v", + yanchor="top", + y=1, + xanchor="left", + x=1.01, + font=dict(size=10), + ), + ) + else: + # Fallback to simple figure without subplots + fig.update_layout( + xaxis_title=f"Arithmetic Intensity ({ops_flops}s/Byte)", + yaxis_title=f"Performance (G{ops_flops}/sec)", + xaxis_type="log", + yaxis_type="log", + xaxis_autorange=True, + yaxis_autorange=True, + height=int(total_figure_height), + hovermode="x unified", + margin=dict(l=50, r=50, b=50, t=50, pad=7), ) - ) - fig.update_xaxes(type="log", autorange=True) - fig.update_yaxes(type="log", autorange=True) + # Update subplot title for additional datatypes + if ( + not is_new_figure + and subplot_row + and hasattr(fig, "layout") + and hasattr(fig.layout, "annotations") + ): + for annotation in fig.layout.annotations: + if annotation.text and "Roofline Analysis" in annotation.text: + if "(" in annotation.text and ")" in annotation.text: + existing_text = annotation.text.split("(")[0] + existing_types = annotation.text.split("(")[1].split(")")[0] + new_types = f"{existing_types}, {dtype}" + annotation.text = f"{existing_text}({new_types})" + break return fig diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py index c91fd76bb4..f604cadbd2 100644 --- a/projects/rocprofiler-compute/src/utils/roofline_calc.py +++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py @@ -26,7 +26,7 @@ import csv from dataclasses import dataclass from pathlib import Path -from typing import Any, Union +from typing import Any, Optional, Union import pandas as pd @@ -201,9 +201,23 @@ def calc_ceilings( roofline_parameters: dict[str, Any], dtype: str, benchmark_data: dict[str, list[str]], + ai_data: Optional[dict] = None, ) -> dict[str, list[Union[list[float], float, None]]]: """Given benchmarking data, calculate ceilings (or peak performance) for empirical roofline""" + + if ai_data: + max_ai = 0 + for cache_level in ["ai_l1", "ai_l2", "ai_hbm"]: + if cache_level in ai_data and ai_data[cache_level][0]: + cache_max = max(ai_data[cache_level][0]) + max_ai = max(max_ai, cache_max) + + dynamic_xmax = max_ai * 1.2 if max_ai > 0 else 1000 + else: + dynamic_xmax = 1000 + print(XMAX, dynamic_xmax) + # TODO: This is where filtering by memory level will need to occur for standalone graph_points: dict[str, list[Union[list[float], float, None]]] = { "hbm": [], @@ -281,18 +295,24 @@ def calc_ceilings( # ---------------------------------------------------------------------------------- if dtype in PEAK_OPS_DATATYPES: # Plot FMA roof - x0 = min(x2, XMAX) if x2 < XMAX else XMAX + x0 = min(x2, dynamic_xmax) if x2 < dynamic_xmax else dynamic_xmax - console_debug(f"FMA ROOF [{x0}, {XMAX}], [{peak_ops},{peak_ops}]") - graph_points["valu"].extend([[x0, XMAX], [peak_ops, peak_ops], peak_ops]) + console_debug(f"FMA ROOF [{x0}, {dynamic_xmax}], [{peak_ops},{peak_ops}]") + graph_points["valu"].extend([ + [x0, dynamic_xmax], + [peak_ops, peak_ops], + peak_ops, + ]) # Plot MFMA roof if dtype in MFMA_DATATYPES: # assert that mfma has been assigned - x0_mfma = min(x2_mfma, XMAX) if x2_mfma < XMAX else XMAX + x0_mfma = min(x2_mfma, dynamic_xmax) if x2_mfma < dynamic_xmax else dynamic_xmax - console_debug(f"MFMA ROOF [{x0_mfma}, {XMAX}], [{peak_mfma},{peak_mfma}]") + console_debug( + f"MFMA ROOF [{x0_mfma}, {dynamic_xmax}], [{peak_mfma},{peak_mfma}]" + ) graph_points["mfma"].extend([ - [x0_mfma, XMAX], + [x0_mfma, dynamic_xmax], [peak_mfma, peak_mfma], peak_mfma, ]) @@ -774,7 +794,7 @@ def calc_ai_profile( def construct_roof( - roofline_parameters: dict[str, Any], dtype: str + roofline_parameters: dict[str, Any], dtype: str, ai_data: Optional[dict] = None ) -> dict[str, list[Union[list[float], float, None]]]: workload_dir = roofline_parameters.get("workload_dir") if isinstance(workload_dir, list): @@ -817,4 +837,4 @@ def construct_roof( # ------------------ # Generate Roofline # ------------------ - return calc_ceilings(roofline_parameters, dtype, benchmark_data) + return calc_ceilings(roofline_parameters, dtype, benchmark_data, ai_data) diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index bc4794ff62..7e106f68e4 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -156,7 +156,6 @@ ALL_CSVS_MI350 = sorted([ ROOF_ONLY_FILES = sorted([ "empirRoof_gpu-0_FP32.pdf", - "kernelName_legend.pdf", "pmc_perf.csv", "pmc_perf_0.csv", "pmc_perf_1.csv", @@ -946,7 +945,6 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute): return # Test `--kernel` filtering outputs are present and labelled correctly - filter_kernelName = "kernelName_legend_" + config["kernel_name_1"] filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"] plot_configurations = [ @@ -967,7 +965,7 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute): "--kernel", config["kernel_name_1"], ], - "expected_files": [filter_kernelName, filter_empirRoof], + "expected_files": [filter_empirRoof], }, ] @@ -1387,10 +1385,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute): file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) - expected_files = ROOF_ONLY_FILES.copy() - expected_files.remove("kernelName_legend.pdf") - expected_files = sorted(expected_files) - assert sorted(list(file_dict.keys())) == expected_files + assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES validate( inspect.stack()[0][3], @@ -1420,10 +1415,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute): assert returncode == 0 file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) - expected_files = ROOF_ONLY_FILES.copy() - expected_files.remove("kernelName_legend.pdf") - expected_files = sorted(expected_files) - assert sorted(list(file_dict.keys())) == expected_files + assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES validate( inspect.stack()[0][3], @@ -1453,10 +1445,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute): assert returncode == 0 file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) - expected_files = ROOF_ONLY_FILES.copy() - expected_files.remove("kernelName_legend.pdf") - expected_files = sorted(expected_files) - assert sorted(list(file_dict.keys())) == expected_files + assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES validate( inspect.stack()[0][3], @@ -1486,10 +1475,7 @@ def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute): assert returncode == 0 file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) - expected_files = ROOF_ONLY_FILES.copy() - expected_files.remove("kernelName_legend.pdf") - expected_files = sorted(expected_files) - assert sorted(list(file_dict.keys())) == expected_files + assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES validate( inspect.stack()[0][3],