Files
rocm-systems/src/roofline.py
T
colramos-amd 33e763178e Removing unused modules
Signed-off-by: colramos-amd <colramos@amd.com>
2024-04-01 14:30:21 -05:00

443 wiersze
17 KiB
Python

##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
from abc import ABC, abstractmethod
import os
import time
from dash import dcc
from utils.utils import (
mibench,
gen_sysinfo,
demarcate,
console_error,
console_log,
console_debug,
)
from dash import html
import plotly.graph_objects as go
from utils.roofline_calc import calc_ai, constuct_roof
import numpy as np
SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20]
class Roofline:
def __init__(self, args, mspec, run_parameters=None):
self.__args = args
self.__mspec = mspec
self.__run_parameters = (
run_parameters
if run_parameters
else {
"workload_dir": None, # in some cases (i.e. --specs) path will not be given
"device_id": 0,
"sort_type": "kernels",
"mem_level": "ALL",
"include_kernel_names": False,
"is_standalone": False,
}
)
self.__ai_data = None
self.__ceiling_data = None
self.__figure = go.Figure()
# Set roofline run parameters from args
if hasattr(self.__args, "path") and not run_parameters:
self.__run_parameters["workload_dir"] = self.__args.path
if hasattr(self.__args, "roof_only") and self.__args.roof_only == True:
self.__run_parameters["is_standalone"] = True
if hasattr(self.__args, "kernel_names") and self.__args.kernel_names == True:
self.__run_parameters["include_kernel_names"] = True
if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL":
self.__run_parameters["mem_level"] = self.__args.mem_level
if hasattr(self.__args, "sort") and self.__args.sort != "ALL":
self.__run_parameters["sort_type"] = self.__args.sort
if (
not isinstance(self.__run_parameters["workload_dir"], list)
and self.__run_parameters["workload_dir"] != None
):
self.roof_setup()
self.validate_parameters()
def validate_parameters(self):
if self.__run_parameters["include_kernel_names"] and (
not self.__run_parameters["is_standalone"]
):
console_error("--roof-only is required for --kernel-names")
def roof_setup(self):
# set default workload path if not specified
if self.__run_parameters["workload_dir"] == os.path.join(
os.getcwd(), "workloads"
):
self.__run_parameters["workload_dir"] = os.path.join(
self.__run_parameters["workload_dir"],
self.__args.name,
self.__mspec.gpu_model,
)
# create new directory for roofline if it doesn't exist
if not os.path.isdir(self.__run_parameters["workload_dir"]):
os.makedirs(self.__run_parameters["workload_dir"])
@demarcate
def empirical_roofline(
self,
ret_df,
):
"""Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data"""
# Create arithmetic intensity data that will populate the roofline model
console_debug("roofline", "Path: %s" % self.__run_parameters["workload_dir"])
self.__ai_data = calc_ai(self.__run_parameters["sort_type"], ret_df)
msg = "AI at each mem level:"
for i in self.__ai_data:
msg += "\n\t%s -> %s" % (i, self.__ai_data[i])
console_debug(msg)
# Generate a roofline figure for each data type
fp32_fig = self.generate_plot(dtype="FP32")
fp16_fig = self.generate_plot(dtype="FP16")
ml_combo_fig = self.generate_plot(
dtype="I8",
fig=fp16_fig,
)
# Create a legend and distinct kernel markers. This can be saved, optionally
self.__figure = go.Figure(
go.Scatter(
mode="markers",
x=[0] * 10,
y=self.__ai_data["kernelNames"],
marker_symbol=SYMBOLS,
marker_size=15,
)
)
self.__figure.update_layout(
title="Kernel Names and Markers",
margin=dict(b=0, r=0),
xaxis_range=[-1, 1],
xaxis_side="top",
yaxis_side="right",
height=400,
width=1000,
)
self.__figure.update_xaxes(dtick=1)
# Output will be different depending on interaction type:
# Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output
if self.__run_parameters["is_standalone"]:
dev_id = str(self.__run_parameters["device_id"])
fp32_fig.write_image(
self.__run_parameters["workload_dir"]
+ "/empirRoof_gpu-{}_fp32_fp64.pdf".format(dev_id)
)
ml_combo_fig.write_image(
self.__run_parameters["workload_dir"]
+ "/empirRoof_gpu-{}_int8_fp16.pdf".format(dev_id)
)
# only save a legend if kernel_names option is toggled
if self.__run_parameters["include_kernel_names"]:
self.__figure.write_image(
self.__run_parameters["workload_dir"] + "/kernelName_legend.pdf"
)
time.sleep(1)
# Re-save to remove loading MathJax pop up
fp32_fig.write_image(
self.__run_parameters["workload_dir"]
+ "/empirRoof_gpu-{}_fp32_fp64.pdf".format(dev_id)
)
ml_combo_fig.write_image(
self.__run_parameters["workload_dir"]
+ "/empirRoof_gpu-{}_int8_fp16.pdf".format(dev_id)
)
if self.__run_parameters["include_kernel_names"]:
self.__figure.write_image(
self.__run_parameters["workload_dir"] + "/kernelName_legend.pdf"
)
console_log("roofline", "Empirical Roofline PDFs saved!")
else:
return html.Section(
id="roofline",
children=[
html.Div(
className="float-container",
children=[
html.Div(
className="float-child",
children=[
html.H3(
children="Empirical Roofline Analysis (FP32/FP64)"
),
dcc.Graph(figure=fp32_fig),
],
),
html.Div(
className="float-child",
children=[
html.H3(
children="Empirical Roofline Analysis (FP16/INT8)"
),
dcc.Graph(figure=ml_combo_fig),
],
),
],
)
],
)
@demarcate
def generate_plot(self, dtype, fig=None) -> go.Figure():
"""Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data."""
if fig is None:
fig = go.Figure()
plot_mode = "lines+text" if self.__run_parameters["is_standalone"] else "lines"
self.__ceiling_data = constuct_roof(
roofline_parameters=self.__run_parameters,
dtype=dtype,
)
console_debug("roofline", "Ceiling data:\n%s" % self.__ceiling_data)
#######################
# Plot ceilings
#######################
if self.__run_parameters["mem_level"] == "ALL":
cache_hierarchy = ["HBM", "L2", "L1", "LDS"]
else:
cache_hierarchy = self.__run_parameters["mem_level"]
# Plot peak BW ceiling(s)
for cache_level in cache_hierarchy:
fig.add_trace(
go.Scatter(
x=self.__ceiling_data[cache_level.lower()][0],
y=self.__ceiling_data[cache_level.lower()][1],
name="{}-{}".format(cache_level, dtype),
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
"{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
),
(
None
if self.__run_parameters["is_standalone"]
else "{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
)
),
],
textposition="top right",
)
)
# Plot peak VALU ceiling
# VALU info I8/FP16 not collected via microbench
if dtype != "FP16" and dtype != "I8":
fig.add_trace(
go.Scatter(
x=self.__ceiling_data["valu"][0],
y=self.__ceiling_data["valu"][1],
name="Peak VALU-{}".format(dtype),
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
(
None
if self.__run_parameters["is_standalone"]
else "{} GFLOP/s".format(
to_int(self.__ceiling_data["valu"][2])
)
),
"{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])),
],
textposition="top left",
)
)
if dtype == "FP16":
pos = "bottom left"
else:
pos = "top left"
# Plot peak MFMA ceiling
fig.add_trace(
go.Scatter(
x=self.__ceiling_data["mfma"][0],
y=self.__ceiling_data["mfma"][1],
name="Peak MFMA-{}".format(dtype),
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
(
None
if self.__run_parameters["is_standalone"]
else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2]))
),
"{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])),
],
textposition=pos,
)
)
#######################
# Plot Application AI
#######################
if dtype != "I8":
# Plot the arithmetic intensity points for each cache level
# Omitting I8 AIs to clean up graph. FP16 tends to be higher.
fig.add_trace(
go.Scatter(
x=self.__ai_data["ai_l1"][0],
y=self.__ai_data["ai_l1"][1],
name="ai_l1",
mode="markers",
marker={"color": "#00CC96"},
marker_symbol=(
SYMBOLS if self.__run_parameters["include_kernel_names"] else None
),
)
)
fig.add_trace(
go.Scatter(
x=self.__ai_data["ai_l2"][0],
y=self.__ai_data["ai_l2"][1],
name="ai_l2",
mode="markers",
marker={"color": "#EF553B"},
marker_symbol=(
SYMBOLS if self.__run_parameters["include_kernel_names"] else None
),
)
)
fig.add_trace(
go.Scatter(
x=self.__ai_data["ai_hbm"][0],
y=self.__ai_data["ai_hbm"][1],
name="ai_hbm",
mode="markers",
marker={"color": "#636EFA"},
marker_symbol=(
SYMBOLS if self.__run_parameters["include_kernel_names"] else None
),
)
)
# Set layout
fig.update_layout(
xaxis_title="Arithmetic Intensity (FLOPs/Byte)",
yaxis_title="Performance (GFLOP/sec)",
hovermode="x unified",
margin=dict(l=50, r=50, b=50, t=50, pad=4),
)
fig.update_xaxes(type="log", autorange=True)
fig.update_yaxes(type="log", autorange=True)
return fig
@demarcate
def standalone_roofline(self):
import pandas as pd
from collections import OrderedDict
# Change vL1D to a interpretable str, if required
if "vL1D" in self.__run_parameters["mem_level"]:
self.__run_parameters["mem_level"].remove("vL1D")
self.__run_parameters["mem_level"].append("L1")
app_path = os.path.join(self.__run_parameters["workload_dir"], "pmc_perf.csv")
roofline_exists = os.path.isfile(app_path)
if not roofline_exists:
console_error("roofline", "{} does not exist".format(app_path))
t_df = OrderedDict()
t_df["pmc_perf"] = pd.read_csv(app_path)
self.empirical_roofline(ret_df=t_df)
# Main methods
@abstractmethod
def pre_processing(self):
if self.__args.roof_only:
# check for sysinfo
console_log(
"roofline", "Checking for sysinfo.csv in " + str(self.__args.path)
)
sysinfo_path = os.path.join(self.__args.path, "sysinfo.csv")
if not os.path.isfile(sysinfo_path):
console_log("roofline", "sysinfo.csv not found. Generating...")
class Dummy_SoC:
roofline_obj = True
gen_sysinfo(
workload_name=self.__args.name,
workload_dir=self.__workload_dir,
ip_blocks=self.__args.ipblocks,
app_cmd=self.__args.remaining,
skip_roof=self.__args.no_roof,
roof_only=self.__args.roof_only,
mspec=self.__mspec,
soc=Dummy_SoC,
)
@abstractmethod
def profile(self):
if self.__args.roof_only:
# check for roofline benchmark
console_log(
"roofline", "Checking for roofline.csv in " + str(self.__args.path)
)
roof_path = os.path.join(self.__args.path, "roofline.csv")
if not os.path.isfile(roof_path):
mibench(self.__args, self.__mspec)
# check for profiling data
console_log(
"roofline", "Checking for pmc_perf.csv in " + str(self.__args.path)
)
app_path = os.path.join(self.__args.path, "pmc_perf.csv")
if not os.path.isfile(app_path):
console_log("roofline", "pmc_perf.csv not found. Generating...")
if not self.__args.remaining:
console_error(
"profiling"
"An <app_cmd> is required to run.\nomniperf profile -n test -- <app_cmd>"
)
# TODO: Add an equivelent of characterize_app() to run profiling directly out of this module
elif self.__args.no_roof:
console_log("roofline", "Skipping roofline.")
else:
mibench(self.__args, self.__mspec)
# NB: Currently the post_prossesing() method is the only one being used by omniperf,
# we include pre_processing() and profile() methods for those who wish to borrow the roofline module
@abstractmethod
def post_processing(self):
if self.__run_parameters["is_standalone"]:
self.standalone_roofline()
def to_int(a):
if str(type(a)) == "<class 'NoneType'>":
return np.nan
else:
return int(a)