Signed-off-by: Karl W Schulz <karl.schulz@amd.com>


[ROCm/rocprofiler-compute commit: 76873dd0c4]
Этот коммит содержится в:
Karl W Schulz
2024-02-22 15:40:02 -06:00
коммит произвёл Karl W. Schulz
родитель e9f8f78f9f
Коммит 6ac883e20e
5 изменённых файлов: 189 добавлений и 174 удалений
+27 -19
Просмотреть файл
@@ -223,10 +223,12 @@ class Roofline:
"{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
),
None
if self.__run_parameters["is_standalone"]
else "{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
(
None
if self.__run_parameters["is_standalone"]
else "{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
)
),
],
textposition="top right",
@@ -243,9 +245,13 @@ class Roofline:
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
None
if self.__run_parameters["is_standalone"]
else "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])),
(
None
if self.__run_parameters["is_standalone"]
else "{} GFLOP/s".format(
to_int(self.__ceiling_data["valu"][2])
)
),
"{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])),
],
textposition="top left",
@@ -265,9 +271,11 @@ class Roofline:
mode=plot_mode,
hovertemplate="<b>%{text}</b>",
text=[
None
if self.__run_parameters["is_standalone"]
else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])),
(
None
if self.__run_parameters["is_standalone"]
else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2]))
),
"{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])),
],
textposition=pos,
@@ -285,9 +293,9 @@ class Roofline:
name="ai_l1",
mode="markers",
marker={"color": "#00CC96"},
marker_symbol=SYMBOLS
if self.__run_parameters["include_kernel_names"]
else None,
marker_symbol=(
SYMBOLS if self.__run_parameters["include_kernel_names"] else None
),
)
)
fig.add_trace(
@@ -297,9 +305,9 @@ class Roofline:
name="ai_l2",
mode="markers",
marker={"color": "#EF553B"},
marker_symbol=SYMBOLS
if self.__run_parameters["include_kernel_names"]
else None,
marker_symbol=(
SYMBOLS if self.__run_parameters["include_kernel_names"] else None
),
)
)
fig.add_trace(
@@ -309,9 +317,9 @@ class Roofline:
name="ai_hbm",
mode="markers",
marker={"color": "#636EFA"},
marker_symbol=SYMBOLS
if self.__run_parameters["include_kernel_names"]
else None,
marker_symbol=(
SYMBOLS if self.__run_parameters["include_kernel_names"] else None
),
)
)
+51 -43
Просмотреть файл
@@ -298,9 +298,11 @@ def build_table_chart(
[
{
column: {
"value": str(row["Tips"])
if column == display_columns[0] and row["Tips"]
else "",
"value": (
str(row["Tips"])
if column == display_columns[0] and row["Tips"]
else ""
),
"type": "markdown",
}
for column, value in row.items()
@@ -325,52 +327,58 @@ def build_table_chart(
# style cell
style_cell={"maxWidth": "500px"},
# display style
style_header={
"backgroundColor": "rgb(30, 30, 30)",
"color": "white",
"fontWeight": "bold",
}
if IS_DARK
else {},
style_data={
"backgroundColor": "rgb(50, 50, 50)",
"color": "white",
"whiteSpace": "normal",
"height": "auto",
}
if IS_DARK
else {},
style_data_conditional=[
{"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"},
style_header=(
{
"if": {"column_id": "PoP", "filter_query": "{PoP} > 50"},
"backgroundColor": "#ffa90a",
"backgroundColor": "rgb(30, 30, 30)",
"color": "white",
},
"fontWeight": "bold",
}
if IS_DARK
else {}
),
style_data=(
{
"if": {"column_id": "PoP", "filter_query": "{PoP} > 80"},
"backgroundColor": "#ff120a",
"backgroundColor": "rgb(50, 50, 50)",
"color": "white",
},
{
"if": {
"column_id": "Avg",
"filter_query": "{Unit} = Pct && {Avg} > 50",
"whiteSpace": "normal",
"height": "auto",
}
if IS_DARK
else {}
),
style_data_conditional=(
[
{"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"},
{
"if": {"column_id": "PoP", "filter_query": "{PoP} > 50"},
"backgroundColor": "#ffa90a",
"color": "white",
},
"backgroundColor": "#ffa90a",
"color": "white",
},
{
"if": {
"column_id": "Avg",
"filter_query": "{Unit} = Pct && {Avg} > 80",
{
"if": {"column_id": "PoP", "filter_query": "{PoP} > 80"},
"backgroundColor": "#ff120a",
"color": "white",
},
"backgroundColor": "#ff120a",
"color": "white",
},
]
if IS_DARK
else [],
{
"if": {
"column_id": "Avg",
"filter_query": "{Unit} = Pct && {Avg} > 50",
},
"backgroundColor": "#ffa90a",
"color": "white",
},
{
"if": {
"column_id": "Avg",
"filter_query": "{Unit} = Pct && {Avg} > 80",
},
"backgroundColor": "#ff120a",
"color": "white",
},
]
if IS_DARK
else []
),
# the df to display
data=display_df.to_dict("records"),
)
+32 -58
Просмотреть файл
@@ -195,21 +195,11 @@ def calc_ai(sort_type, ret_df):
df = df.sort_values(by=["Kernel_Name"])
df = df.reset_index(drop=True)
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
mfma_flops_f32
) = (
mfma_flops_f64
) = (
lds_data
) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = calls = (
totalDuration
) = avgDuration = 0.0
kernelName = ""
@@ -390,23 +380,11 @@ def calc_ai(sort_type, ret_df):
kernelName, idx, calls
)
)
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
mfma_flops_f32
) = (
mfma_flops_f64
) = (
lds_data
) = (
L1cache_data
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = (
calls
) = totalDuration = avgDuration = 0.0
if sort_type == "dispatches":
myList.append(
@@ -428,23 +406,11 @@ def calc_ai(sort_type, ret_df):
avgDuration,
)
)
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
mfma_flops_f32
) = (
mfma_flops_f64
) = (
lds_data
) = (
L1cache_data
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = (
calls
) = totalDuration = avgDuration = 0.0
myList.sort(key=lambda x: x.totalDuration, reverse=True)
@@ -456,24 +422,32 @@ def calc_ai(sort_type, ret_df):
# Create list of top 5 intensities
while i < TOP_N and i != len(myList):
kernelNames.append(myList[i].KernelName)
intensities["ai_l1"].append(
myList[i].total_flops / myList[i].L1cache_data
) if myList[i].L1cache_data else intensities["ai_l1"].append(0)
(
intensities["ai_l1"].append(myList[i].total_flops / myList[i].L1cache_data)
if myList[i].L1cache_data
else intensities["ai_l1"].append(0)
)
# print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null")
# print()
intensities["ai_l2"].append(
myList[i].total_flops / myList[i].L2cache_data
) if myList[i].L2cache_data else intensities["ai_l2"].append(0)
(
intensities["ai_l2"].append(myList[i].total_flops / myList[i].L2cache_data)
if myList[i].L2cache_data
else intensities["ai_l2"].append(0)
)
# print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null")
# print()
intensities["ai_hbm"].append(
myList[i].total_flops / myList[i].hbm_data
) if myList[i].hbm_data else intensities["ai_hbm"].append(0)
(
intensities["ai_hbm"].append(myList[i].total_flops / myList[i].hbm_data)
if myList[i].hbm_data
else intensities["ai_hbm"].append(0)
)
# print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null")
# print()
curr_perf.append(myList[i].total_flops / myList[i].avgDuration) if myList[
i
].avgDuration else curr_perf.append(0)
(
curr_perf.append(myList[i].total_flops / myList[i].avgDuration)
if myList[i].avgDuration
else curr_perf.append(0)
)
# print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null")
i += 1
+12 -8
Просмотреть файл
@@ -170,9 +170,11 @@ def show_all(args, runs, archConfigs, output):
else:
cur_df_copy = copy.deepcopy(cur_df)
cur_df_copy[header] = [
round(float(x), args.decimal)
if x != ""
else x
(
round(float(x), args.decimal)
if x != ""
else x
)
for x in base_df[header]
]
df = pd.concat([df, cur_df_copy[header]], axis=1)
@@ -214,11 +216,13 @@ def show_all(args, runs, archConfigs, output):
# fash for now.
ss += (
tabulate(
df.transpose()
if type != "raw_csv_table"
and "columnwise" in table_config
and table_config["columnwise"] == True
else df,
(
df.transpose()
if type != "raw_csv_table"
and "columnwise" in table_config
and table_config["columnwise"] == True
else df
),
headers="keys",
tablefmt="fancy_grid",
floatfmt="." + str(args.decimal) + "f",
+67 -46
Просмотреть файл
@@ -39,26 +39,30 @@ import config
rocprof_cmd = ""
def demarcate(function):
def wrap_function(*args, **kwargs):
logging.trace("----- [entering function] -> %s()" % (function.__qualname__))
result = function(*args, **kwargs)
logging.trace("----- [exiting function] -> %s()" % function.__qualname__)
return result
return wrap_function
def error(message):
logging.error("")
logging.error("[ERROR]: " + message)
logging.error("")
sys.exit(1)
def trace_logger(message, *args, **kwargs):
logging.log(logging.TRACE, message, *args, **kwargs)
def get_version(omniperf_home) -> dict:
"""Return Omniperf versioning info
"""
"""Return Omniperf versioning info"""
# symantic version info
version = os.path.join(omniperf_home.parent, "VERSION")
try:
@@ -96,9 +100,9 @@ def get_version(omniperf_home) -> dict:
versionData = {"version": VER, "sha": SHA, "mode": MODE}
return versionData
def get_version_display(version, sha, mode):
"""Pretty print versioning info
"""
"""Pretty print versioning info"""
buf = io.StringIO()
print("-" * 40, file=buf)
print("Omniperf version: %s (%s)" % (version, mode), file=buf)
@@ -106,30 +110,36 @@ def get_version_display(version, sha, mode):
print("-" * 40, file=buf)
return buf.getvalue()
def detect_rocprof():
"""Detect loaded rocprof version. Resolve path and set cmd globally.
"""
"""Detect loaded rocprof version. Resolve path and set cmd globally."""
global rocprof_cmd
# detect rocprof
if not "ROCPROF" in os.environ.keys():
rocprof_cmd = "rocprof"
else:
rocprof_cmd = os.environ["ROCPROF"]
# resolve rocprof path
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
rocprof_cmd = "rocprof"
logging.warning("Warning: Unable to resolve path to %s binary. Reverting to default." % rocprof_cmd)
logging.warning(
"Warning: Unable to resolve path to %s binary. Reverting to default."
% rocprof_cmd
)
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
error("Please verify installation or set ROCPROF environment variable with full path.")
error(
"Please verify installation or set ROCPROF environment variable with full path."
)
else:
# Resolve any sym links in file path
rocprof_path = os.path.realpath(rocprof_path.rstrip("\n"))
logging.info("ROC Profiler: " + str(rocprof_path))
return rocprof_cmd #TODO: Do we still need to return this? It's not being used in the function call
return rocprof_cmd # TODO: Do we still need to return this? It's not being used in the function call
def capture_subprocess_output(subprocess_args, new_env=None):
# Start subprocess
@@ -180,7 +190,7 @@ def capture_subprocess_output(subprocess_args, new_env=None):
return_code = process.wait()
selector.close()
success = (return_code == 0)
success = return_code == 0
# Store buffered output
output = buf.getvalue()
@@ -188,22 +198,26 @@ def capture_subprocess_output(subprocess_args, new_env=None):
return (success, output)
def run_prof(fname, profiler_options, target, workload_dir):
fbase = os.path.splitext(os.path.basename(fname))[0]
m_specs = specs.get_machine_specs(0)
logging.debug("pmc file: %s" % str(os.path.basename(fname)))
# standard rocprof options
default_options = [
"-i", fname
]
default_options = ["-i", fname]
options = default_options + profiler_options
# set required env var for mi300
new_env = None
if (target.lower() == "mi300x_a0" or target.lower() == "mi300x_a1" or target.lower() == "mi300a_a0" or target.lower() == "mi300a_a1") and (
if (
target.lower() == "mi300x_a0"
or target.lower() == "mi300x_a1"
or target.lower() == "mi300a_a0"
or target.lower() == "mi300a_a1"
) and (
os.path.basename(fname) == "pmc_perf_13.txt"
or os.path.basename(fname) == "pmc_perf_14.txt"
or os.path.basename(fname) == "pmc_perf_15.txt"
@@ -215,13 +229,9 @@ def run_prof(fname, profiler_options, target, workload_dir):
# profile the app
if new_env:
success, output = capture_subprocess_output(
[ rocprof_cmd ] + options, new_env
)
success, output = capture_subprocess_output([rocprof_cmd] + options, new_env)
else:
success, output = capture_subprocess_output(
[ rocprof_cmd ] + options
)
success, output = capture_subprocess_output([rocprof_cmd] + options)
if not success:
error(output)
@@ -230,9 +240,7 @@ def run_prof(fname, profiler_options, target, workload_dir):
# flatten tcc for applicable mi300 input
f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv")
hbm_stack_num = get_hbm_stack_num(target, m_specs.memory_partition)
df = flatten_tcc_info_across_hbm_stacks(
f, hbm_stack_num, int(m_specs.L2Banks)
)
df = flatten_tcc_info_across_hbm_stacks(f, hbm_stack_num, int(m_specs.L2Banks))
df.to_csv(f, index=False)
if os.path.exists(workload_dir + "/out"):
@@ -270,10 +278,11 @@ def run_prof(fname, profiler_options, target, workload_dir):
df = pd.read_csv(workload_dir + "/" + fbase + ".csv")
df.rename(columns=output_headers, inplace=True)
df.to_csv(workload_dir + "/" + fbase + ".csv", index=False)
# write rocprof output to logging
logging.info(output)
def replace_timestamps(workload_dir):
df_stamps = pd.read_csv(workload_dir + "/timestamps.csv")
if "Start_Timestamp" in df_stamps.columns and "End_Timestamp" in df_stamps.columns:
@@ -286,9 +295,12 @@ def replace_timestamps(workload_dir):
df_pmc_perf["End_Timestamp"] = df_stamps["End_Timestamp"]
df_pmc_perf.to_csv(fname, index=False)
else:
warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps."
warning = (
"WARNING: Incomplete profiling data detected. Unable to update timestamps."
)
logging.warning(warning + "\n")
def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only):
# Record system information
mspec = specs.get_machine_specs(0)
@@ -367,6 +379,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
sysinfo.write(",".join(param))
sysinfo.close()
def detect_roofline():
mspec = specs.get_machine_specs(0)
rocm_ver = mspec.rocm_version[:1]
@@ -389,8 +402,9 @@ def detect_roofline():
# Must be a valid RHEL machine
distro = "platform:el8"
elif (
(type(sles_distro) == str and len(sles_distro) >= 3) and # confirm string and len
sles_distro[:2] == "15" and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
(type(sles_distro) == str and len(sles_distro) >= 3)
and sles_distro[:2] == "15" # confirm string and len
and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
):
# Must be a valid SLES machine
# Use SP3 binary for all forward compatible service pack versions
@@ -399,12 +413,15 @@ def detect_roofline():
# Must be a valid Ubuntu machine
distro = ubuntu_distro
else:
logging.error("ROOFLINE ERROR: Cannot find a valid binary for your operating system")
logging.error(
"ROOFLINE ERROR: Cannot find a valid binary for your operating system"
)
sys.exit(1)
target_binary = {"rocm_ver": rocm_ver, "distro": distro}
return target_binary
def run_rocscope(args, fname):
# profile the app
if args.use_rocscope == True:
@@ -417,23 +434,21 @@ def run_rocscope(args, fname):
args.path,
"-n",
args.name,
"-t",
"-t",
fname,
"--",
]
for i in args.remaining.split():
rs_cmd.append(i)
logging.info(rs_cmd)
success, output = capture_subprocess_output(
rs_cmd
)
success, output = capture_subprocess_output(rs_cmd)
if not success:
logging.error(result.stderr.decode("ascii"))
sys.exit(1)
def mibench(args):
"""Run roofline microbenchmark to generate peak BW and FLOP measurements.
"""
"""Run roofline microbenchmark to generate peak BW and FLOP measurements."""
logging.info("[roofline] No roofline data found. Generating...")
distro_map = {"platform:el8": "rhel8", "15.3": "sle15sp3", "20.04": "ubuntu20_04"}
@@ -454,7 +469,9 @@ def mibench(args):
# Distro is valid but cant find rocm ver
if not os.path.exists(path_to_binary):
logging.error("ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary)
logging.error(
"ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary
)
sys.exit(1)
subprocess.run(
@@ -465,9 +482,10 @@ def mibench(args):
"-d",
str(args.device),
],
check=True
check=True,
)
def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack):
"""
Flatten TCC per channel counters across all HBM stacks in used.
@@ -532,6 +550,7 @@ def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack):
return df
def get_hbm_stack_num(gpu_name, memory_partition):
"""
Get total HBM stack numbers based on memory partition for MI300.
@@ -564,15 +583,15 @@ def get_hbm_stack_num(gpu_name, memory_partition):
else:
# Fixme: add proper numbers for other archs
return -1
def get_submodules(package_name):
"""List all submodules for a target package
"""
"""List all submodules for a target package"""
import importlib
import pkgutil
submodules = []
# walk all submodules in target package
package = importlib.import_module(package_name)
for _, name, _ in pkgutil.walk_packages(package.__path__):
@@ -583,15 +602,17 @@ def get_submodules(package_name):
return submodules
def is_workload_empty(path):
"""Peek workload directory to verify valid profiling output
"""
"""Peek workload directory to verify valid profiling output"""
pmc_perf_path = path + "/pmc_perf.csv"
if os.path.isfile(pmc_perf_path):
temp_df = pd.read_csv(pmc_perf_path)
if temp_df.dropna().empty:
error("[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt." % pmc_perf_path)
error(
"[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt."
% pmc_perf_path
)
else:
error("[profiling] Error. Cannot find pmc_perf.csv in %s" % path)