diff --git a/projects/rocprofiler-compute/src/roofline.py b/projects/rocprofiler-compute/src/roofline.py
index eebda645bf..e0424e8617 100644
--- a/projects/rocprofiler-compute/src/roofline.py
+++ b/projects/rocprofiler-compute/src/roofline.py
@@ -223,10 +223,12 @@ class Roofline:
"{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
),
- None
- if self.__run_parameters["is_standalone"]
- else "{} GB/s".format(
- to_int(self.__ceiling_data[cache_level.lower()][2])
+ (
+ None
+ if self.__run_parameters["is_standalone"]
+ else "{} GB/s".format(
+ to_int(self.__ceiling_data[cache_level.lower()][2])
+ )
),
],
textposition="top right",
@@ -243,9 +245,13 @@ class Roofline:
mode=plot_mode,
hovertemplate="%{text}",
text=[
- None
- if self.__run_parameters["is_standalone"]
- else "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])),
+ (
+ None
+ if self.__run_parameters["is_standalone"]
+ else "{} GFLOP/s".format(
+ to_int(self.__ceiling_data["valu"][2])
+ )
+ ),
"{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])),
],
textposition="top left",
@@ -265,9 +271,11 @@ class Roofline:
mode=plot_mode,
hovertemplate="%{text}",
text=[
- None
- if self.__run_parameters["is_standalone"]
- else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])),
+ (
+ None
+ if self.__run_parameters["is_standalone"]
+ else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2]))
+ ),
"{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])),
],
textposition=pos,
@@ -285,9 +293,9 @@ class Roofline:
name="ai_l1",
mode="markers",
marker={"color": "#00CC96"},
- marker_symbol=SYMBOLS
- if self.__run_parameters["include_kernel_names"]
- else None,
+ marker_symbol=(
+ SYMBOLS if self.__run_parameters["include_kernel_names"] else None
+ ),
)
)
fig.add_trace(
@@ -297,9 +305,9 @@ class Roofline:
name="ai_l2",
mode="markers",
marker={"color": "#EF553B"},
- marker_symbol=SYMBOLS
- if self.__run_parameters["include_kernel_names"]
- else None,
+ marker_symbol=(
+ SYMBOLS if self.__run_parameters["include_kernel_names"] else None
+ ),
)
)
fig.add_trace(
@@ -309,9 +317,9 @@ class Roofline:
name="ai_hbm",
mode="markers",
marker={"color": "#636EFA"},
- marker_symbol=SYMBOLS
- if self.__run_parameters["include_kernel_names"]
- else None,
+ marker_symbol=(
+ SYMBOLS if self.__run_parameters["include_kernel_names"] else None
+ ),
)
)
diff --git a/projects/rocprofiler-compute/src/utils/gui.py b/projects/rocprofiler-compute/src/utils/gui.py
index bc1b3d8e96..78244b6a55 100644
--- a/projects/rocprofiler-compute/src/utils/gui.py
+++ b/projects/rocprofiler-compute/src/utils/gui.py
@@ -298,9 +298,11 @@ def build_table_chart(
[
{
column: {
- "value": str(row["Tips"])
- if column == display_columns[0] and row["Tips"]
- else "",
+ "value": (
+ str(row["Tips"])
+ if column == display_columns[0] and row["Tips"]
+ else ""
+ ),
"type": "markdown",
}
for column, value in row.items()
@@ -325,52 +327,58 @@ def build_table_chart(
# style cell
style_cell={"maxWidth": "500px"},
# display style
- style_header={
- "backgroundColor": "rgb(30, 30, 30)",
- "color": "white",
- "fontWeight": "bold",
- }
- if IS_DARK
- else {},
- style_data={
- "backgroundColor": "rgb(50, 50, 50)",
- "color": "white",
- "whiteSpace": "normal",
- "height": "auto",
- }
- if IS_DARK
- else {},
- style_data_conditional=[
- {"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"},
+ style_header=(
{
- "if": {"column_id": "PoP", "filter_query": "{PoP} > 50"},
- "backgroundColor": "#ffa90a",
+ "backgroundColor": "rgb(30, 30, 30)",
"color": "white",
- },
+ "fontWeight": "bold",
+ }
+ if IS_DARK
+ else {}
+ ),
+ style_data=(
{
- "if": {"column_id": "PoP", "filter_query": "{PoP} > 80"},
- "backgroundColor": "#ff120a",
+ "backgroundColor": "rgb(50, 50, 50)",
"color": "white",
- },
- {
- "if": {
- "column_id": "Avg",
- "filter_query": "{Unit} = Pct && {Avg} > 50",
+ "whiteSpace": "normal",
+ "height": "auto",
+ }
+ if IS_DARK
+ else {}
+ ),
+ style_data_conditional=(
+ [
+ {"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"},
+ {
+ "if": {"column_id": "PoP", "filter_query": "{PoP} > 50"},
+ "backgroundColor": "#ffa90a",
+ "color": "white",
},
- "backgroundColor": "#ffa90a",
- "color": "white",
- },
- {
- "if": {
- "column_id": "Avg",
- "filter_query": "{Unit} = Pct && {Avg} > 80",
+ {
+ "if": {"column_id": "PoP", "filter_query": "{PoP} > 80"},
+ "backgroundColor": "#ff120a",
+ "color": "white",
},
- "backgroundColor": "#ff120a",
- "color": "white",
- },
- ]
- if IS_DARK
- else [],
+ {
+ "if": {
+ "column_id": "Avg",
+ "filter_query": "{Unit} = Pct && {Avg} > 50",
+ },
+ "backgroundColor": "#ffa90a",
+ "color": "white",
+ },
+ {
+ "if": {
+ "column_id": "Avg",
+ "filter_query": "{Unit} = Pct && {Avg} > 80",
+ },
+ "backgroundColor": "#ff120a",
+ "color": "white",
+ },
+ ]
+ if IS_DARK
+ else []
+ ),
# the df to display
data=display_df.to_dict("records"),
)
diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py
index 2c3cf44def..e8367d1184 100644
--- a/projects/rocprofiler-compute/src/utils/roofline_calc.py
+++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py
@@ -195,21 +195,11 @@ def calc_ai(sort_type, ret_df):
df = df.sort_values(by=["Kernel_Name"])
df = df.reset_index(drop=True)
- total_flops = (
- valu_flops
- ) = (
- mfma_flops_bf16
- ) = (
- mfma_flops_f16
- ) = (
- mfma_iops_i8
- ) = (
+ total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
mfma_flops_f32
- ) = (
- mfma_flops_f64
- ) = (
- lds_data
- ) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
+ ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = calls = (
+ totalDuration
+ ) = avgDuration = 0.0
kernelName = ""
@@ -390,23 +380,11 @@ def calc_ai(sort_type, ret_df):
kernelName, idx, calls
)
)
- total_flops = (
- valu_flops
- ) = (
- mfma_flops_bf16
- ) = (
- mfma_flops_f16
- ) = (
- mfma_iops_i8
- ) = (
+ total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
mfma_flops_f32
- ) = (
- mfma_flops_f64
- ) = (
- lds_data
- ) = (
- L1cache_data
- ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
+ ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = (
+ calls
+ ) = totalDuration = avgDuration = 0.0
if sort_type == "dispatches":
myList.append(
@@ -428,23 +406,11 @@ def calc_ai(sort_type, ret_df):
avgDuration,
)
)
- total_flops = (
- valu_flops
- ) = (
- mfma_flops_bf16
- ) = (
- mfma_flops_f16
- ) = (
- mfma_iops_i8
- ) = (
+ total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
mfma_flops_f32
- ) = (
- mfma_flops_f64
- ) = (
- lds_data
- ) = (
- L1cache_data
- ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
+ ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = (
+ calls
+ ) = totalDuration = avgDuration = 0.0
myList.sort(key=lambda x: x.totalDuration, reverse=True)
@@ -456,24 +422,32 @@ def calc_ai(sort_type, ret_df):
# Create list of top 5 intensities
while i < TOP_N and i != len(myList):
kernelNames.append(myList[i].KernelName)
- intensities["ai_l1"].append(
- myList[i].total_flops / myList[i].L1cache_data
- ) if myList[i].L1cache_data else intensities["ai_l1"].append(0)
+ (
+ intensities["ai_l1"].append(myList[i].total_flops / myList[i].L1cache_data)
+ if myList[i].L1cache_data
+ else intensities["ai_l1"].append(0)
+ )
# print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null")
# print()
- intensities["ai_l2"].append(
- myList[i].total_flops / myList[i].L2cache_data
- ) if myList[i].L2cache_data else intensities["ai_l2"].append(0)
+ (
+ intensities["ai_l2"].append(myList[i].total_flops / myList[i].L2cache_data)
+ if myList[i].L2cache_data
+ else intensities["ai_l2"].append(0)
+ )
# print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null")
# print()
- intensities["ai_hbm"].append(
- myList[i].total_flops / myList[i].hbm_data
- ) if myList[i].hbm_data else intensities["ai_hbm"].append(0)
+ (
+ intensities["ai_hbm"].append(myList[i].total_flops / myList[i].hbm_data)
+ if myList[i].hbm_data
+ else intensities["ai_hbm"].append(0)
+ )
# print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null")
# print()
- curr_perf.append(myList[i].total_flops / myList[i].avgDuration) if myList[
- i
- ].avgDuration else curr_perf.append(0)
+ (
+ curr_perf.append(myList[i].total_flops / myList[i].avgDuration)
+ if myList[i].avgDuration
+ else curr_perf.append(0)
+ )
# print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null")
i += 1
diff --git a/projects/rocprofiler-compute/src/utils/tty.py b/projects/rocprofiler-compute/src/utils/tty.py
index 4870ef7e60..5d1f68a701 100644
--- a/projects/rocprofiler-compute/src/utils/tty.py
+++ b/projects/rocprofiler-compute/src/utils/tty.py
@@ -170,9 +170,11 @@ def show_all(args, runs, archConfigs, output):
else:
cur_df_copy = copy.deepcopy(cur_df)
cur_df_copy[header] = [
- round(float(x), args.decimal)
- if x != ""
- else x
+ (
+ round(float(x), args.decimal)
+ if x != ""
+ else x
+ )
for x in base_df[header]
]
df = pd.concat([df, cur_df_copy[header]], axis=1)
@@ -214,11 +216,13 @@ def show_all(args, runs, archConfigs, output):
# fash for now.
ss += (
tabulate(
- df.transpose()
- if type != "raw_csv_table"
- and "columnwise" in table_config
- and table_config["columnwise"] == True
- else df,
+ (
+ df.transpose()
+ if type != "raw_csv_table"
+ and "columnwise" in table_config
+ and table_config["columnwise"] == True
+ else df
+ ),
headers="keys",
tablefmt="fancy_grid",
floatfmt="." + str(args.decimal) + "f",
diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py
index 0b7e91bea7..8e4bed5958 100644
--- a/projects/rocprofiler-compute/src/utils/utils.py
+++ b/projects/rocprofiler-compute/src/utils/utils.py
@@ -39,26 +39,30 @@ import config
rocprof_cmd = ""
+
def demarcate(function):
def wrap_function(*args, **kwargs):
logging.trace("----- [entering function] -> %s()" % (function.__qualname__))
result = function(*args, **kwargs)
logging.trace("----- [exiting function] -> %s()" % function.__qualname__)
return result
+
return wrap_function
+
def error(message):
logging.error("")
logging.error("[ERROR]: " + message)
logging.error("")
sys.exit(1)
+
def trace_logger(message, *args, **kwargs):
logging.log(logging.TRACE, message, *args, **kwargs)
+
def get_version(omniperf_home) -> dict:
- """Return Omniperf versioning info
- """
+ """Return Omniperf versioning info"""
# symantic version info
version = os.path.join(omniperf_home.parent, "VERSION")
try:
@@ -96,9 +100,9 @@ def get_version(omniperf_home) -> dict:
versionData = {"version": VER, "sha": SHA, "mode": MODE}
return versionData
+
def get_version_display(version, sha, mode):
- """Pretty print versioning info
- """
+ """Pretty print versioning info"""
buf = io.StringIO()
print("-" * 40, file=buf)
print("Omniperf version: %s (%s)" % (version, mode), file=buf)
@@ -106,30 +110,36 @@ def get_version_display(version, sha, mode):
print("-" * 40, file=buf)
return buf.getvalue()
+
def detect_rocprof():
- """Detect loaded rocprof version. Resolve path and set cmd globally.
- """
+ """Detect loaded rocprof version. Resolve path and set cmd globally."""
global rocprof_cmd
# detect rocprof
if not "ROCPROF" in os.environ.keys():
rocprof_cmd = "rocprof"
else:
rocprof_cmd = os.environ["ROCPROF"]
-
+
# resolve rocprof path
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
rocprof_cmd = "rocprof"
- logging.warning("Warning: Unable to resolve path to %s binary. Reverting to default." % rocprof_cmd)
+ logging.warning(
+ "Warning: Unable to resolve path to %s binary. Reverting to default."
+ % rocprof_cmd
+ )
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
- error("Please verify installation or set ROCPROF environment variable with full path.")
+ error(
+ "Please verify installation or set ROCPROF environment variable with full path."
+ )
else:
# Resolve any sym links in file path
rocprof_path = os.path.realpath(rocprof_path.rstrip("\n"))
logging.info("ROC Profiler: " + str(rocprof_path))
- return rocprof_cmd #TODO: Do we still need to return this? It's not being used in the function call
+ return rocprof_cmd # TODO: Do we still need to return this? It's not being used in the function call
+
def capture_subprocess_output(subprocess_args, new_env=None):
# Start subprocess
@@ -180,7 +190,7 @@ def capture_subprocess_output(subprocess_args, new_env=None):
return_code = process.wait()
selector.close()
- success = (return_code == 0)
+ success = return_code == 0
# Store buffered output
output = buf.getvalue()
@@ -188,22 +198,26 @@ def capture_subprocess_output(subprocess_args, new_env=None):
return (success, output)
+
def run_prof(fname, profiler_options, target, workload_dir):
fbase = os.path.splitext(os.path.basename(fname))[0]
m_specs = specs.get_machine_specs(0)
-
+
logging.debug("pmc file: %s" % str(os.path.basename(fname)))
# standard rocprof options
- default_options = [
- "-i", fname
- ]
+ default_options = ["-i", fname]
options = default_options + profiler_options
# set required env var for mi300
new_env = None
- if (target.lower() == "mi300x_a0" or target.lower() == "mi300x_a1" or target.lower() == "mi300a_a0" or target.lower() == "mi300a_a1") and (
+ if (
+ target.lower() == "mi300x_a0"
+ or target.lower() == "mi300x_a1"
+ or target.lower() == "mi300a_a0"
+ or target.lower() == "mi300a_a1"
+ ) and (
os.path.basename(fname) == "pmc_perf_13.txt"
or os.path.basename(fname) == "pmc_perf_14.txt"
or os.path.basename(fname) == "pmc_perf_15.txt"
@@ -215,13 +229,9 @@ def run_prof(fname, profiler_options, target, workload_dir):
# profile the app
if new_env:
- success, output = capture_subprocess_output(
- [ rocprof_cmd ] + options, new_env
- )
+ success, output = capture_subprocess_output([rocprof_cmd] + options, new_env)
else:
- success, output = capture_subprocess_output(
- [ rocprof_cmd ] + options
- )
+ success, output = capture_subprocess_output([rocprof_cmd] + options)
if not success:
error(output)
@@ -230,9 +240,7 @@ def run_prof(fname, profiler_options, target, workload_dir):
# flatten tcc for applicable mi300 input
f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv")
hbm_stack_num = get_hbm_stack_num(target, m_specs.memory_partition)
- df = flatten_tcc_info_across_hbm_stacks(
- f, hbm_stack_num, int(m_specs.L2Banks)
- )
+ df = flatten_tcc_info_across_hbm_stacks(f, hbm_stack_num, int(m_specs.L2Banks))
df.to_csv(f, index=False)
if os.path.exists(workload_dir + "/out"):
@@ -270,10 +278,11 @@ def run_prof(fname, profiler_options, target, workload_dir):
df = pd.read_csv(workload_dir + "/" + fbase + ".csv")
df.rename(columns=output_headers, inplace=True)
df.to_csv(workload_dir + "/" + fbase + ".csv", index=False)
-
+
# write rocprof output to logging
logging.info(output)
+
def replace_timestamps(workload_dir):
df_stamps = pd.read_csv(workload_dir + "/timestamps.csv")
if "Start_Timestamp" in df_stamps.columns and "End_Timestamp" in df_stamps.columns:
@@ -286,9 +295,12 @@ def replace_timestamps(workload_dir):
df_pmc_perf["End_Timestamp"] = df_stamps["End_Timestamp"]
df_pmc_perf.to_csv(fname, index=False)
else:
- warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps."
+ warning = (
+ "WARNING: Incomplete profiling data detected. Unable to update timestamps."
+ )
logging.warning(warning + "\n")
+
def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only):
# Record system information
mspec = specs.get_machine_specs(0)
@@ -367,6 +379,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
sysinfo.write(",".join(param))
sysinfo.close()
+
def detect_roofline():
mspec = specs.get_machine_specs(0)
rocm_ver = mspec.rocm_version[:1]
@@ -389,8 +402,9 @@ def detect_roofline():
# Must be a valid RHEL machine
distro = "platform:el8"
elif (
- (type(sles_distro) == str and len(sles_distro) >= 3) and # confirm string and len
- sles_distro[:2] == "15" and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
+ (type(sles_distro) == str and len(sles_distro) >= 3)
+ and sles_distro[:2] == "15" # confirm string and len
+ and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
):
# Must be a valid SLES machine
# Use SP3 binary for all forward compatible service pack versions
@@ -399,12 +413,15 @@ def detect_roofline():
# Must be a valid Ubuntu machine
distro = ubuntu_distro
else:
- logging.error("ROOFLINE ERROR: Cannot find a valid binary for your operating system")
+ logging.error(
+ "ROOFLINE ERROR: Cannot find a valid binary for your operating system"
+ )
sys.exit(1)
target_binary = {"rocm_ver": rocm_ver, "distro": distro}
return target_binary
+
def run_rocscope(args, fname):
# profile the app
if args.use_rocscope == True:
@@ -417,23 +434,21 @@ def run_rocscope(args, fname):
args.path,
"-n",
args.name,
- "-t",
+ "-t",
fname,
"--",
]
for i in args.remaining.split():
rs_cmd.append(i)
logging.info(rs_cmd)
- success, output = capture_subprocess_output(
- rs_cmd
- )
+ success, output = capture_subprocess_output(rs_cmd)
if not success:
logging.error(result.stderr.decode("ascii"))
sys.exit(1)
+
def mibench(args):
- """Run roofline microbenchmark to generate peak BW and FLOP measurements.
- """
+ """Run roofline microbenchmark to generate peak BW and FLOP measurements."""
logging.info("[roofline] No roofline data found. Generating...")
distro_map = {"platform:el8": "rhel8", "15.3": "sle15sp3", "20.04": "ubuntu20_04"}
@@ -454,7 +469,9 @@ def mibench(args):
# Distro is valid but cant find rocm ver
if not os.path.exists(path_to_binary):
- logging.error("ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary)
+ logging.error(
+ "ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary
+ )
sys.exit(1)
subprocess.run(
@@ -465,9 +482,10 @@ def mibench(args):
"-d",
str(args.device),
],
- check=True
+ check=True,
)
+
def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack):
"""
Flatten TCC per channel counters across all HBM stacks in used.
@@ -532,6 +550,7 @@ def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack):
return df
+
def get_hbm_stack_num(gpu_name, memory_partition):
"""
Get total HBM stack numbers based on memory partition for MI300.
@@ -564,15 +583,15 @@ def get_hbm_stack_num(gpu_name, memory_partition):
else:
# Fixme: add proper numbers for other archs
return -1
-
+
+
def get_submodules(package_name):
- """List all submodules for a target package
- """
+ """List all submodules for a target package"""
import importlib
import pkgutil
submodules = []
-
+
# walk all submodules in target package
package = importlib.import_module(package_name)
for _, name, _ in pkgutil.walk_packages(package.__path__):
@@ -583,15 +602,17 @@ def get_submodules(package_name):
return submodules
+
def is_workload_empty(path):
- """Peek workload directory to verify valid profiling output
- """
+ """Peek workload directory to verify valid profiling output"""
pmc_perf_path = path + "/pmc_perf.csv"
if os.path.isfile(pmc_perf_path):
temp_df = pd.read_csv(pmc_perf_path)
if temp_df.dropna().empty:
- error("[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt." % pmc_perf_path)
+ error(
+ "[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt."
+ % pmc_perf_path
+ )
else:
error("[profiling] Error. Cannot find pmc_perf.csv in %s" % path)
-