From 6ac883e20e5ae141e1beada5fe9ddc35a04e81d2 Mon Sep 17 00:00:00 2001 From: Karl W Schulz Date: Thu, 22 Feb 2024 15:40:02 -0600 Subject: [PATCH] more code formatting updates Signed-off-by: Karl W Schulz [ROCm/rocprofiler-compute commit: 76873dd0c4c20660fc716fc55ec61fd58e490d4e] --- projects/rocprofiler-compute/src/roofline.py | 46 ++++--- projects/rocprofiler-compute/src/utils/gui.py | 94 ++++++++------- .../src/utils/roofline_calc.py | 90 +++++--------- projects/rocprofiler-compute/src/utils/tty.py | 20 ++-- .../rocprofiler-compute/src/utils/utils.py | 113 +++++++++++------- 5 files changed, 189 insertions(+), 174 deletions(-) diff --git a/projects/rocprofiler-compute/src/roofline.py b/projects/rocprofiler-compute/src/roofline.py index eebda645bf..e0424e8617 100644 --- a/projects/rocprofiler-compute/src/roofline.py +++ b/projects/rocprofiler-compute/src/roofline.py @@ -223,10 +223,12 @@ class Roofline: "{} GB/s".format( to_int(self.__ceiling_data[cache_level.lower()][2]) ), - None - if self.__run_parameters["is_standalone"] - else "{} GB/s".format( - to_int(self.__ceiling_data[cache_level.lower()][2]) + ( + None + if self.__run_parameters["is_standalone"] + else "{} GB/s".format( + to_int(self.__ceiling_data[cache_level.lower()][2]) + ) ), ], textposition="top right", @@ -243,9 +245,13 @@ class Roofline: mode=plot_mode, hovertemplate="%{text}", text=[ - None - if self.__run_parameters["is_standalone"] - else "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])), + ( + None + if self.__run_parameters["is_standalone"] + else "{} GFLOP/s".format( + to_int(self.__ceiling_data["valu"][2]) + ) + ), "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])), ], textposition="top left", @@ -265,9 +271,11 @@ class Roofline: mode=plot_mode, hovertemplate="%{text}", text=[ - None - if self.__run_parameters["is_standalone"] - else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), + ( + None + if self.__run_parameters["is_standalone"] + else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])) + ), "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), ], textposition=pos, @@ -285,9 +293,9 @@ class Roofline: name="ai_l1", mode="markers", marker={"color": "#00CC96"}, - marker_symbol=SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None, + marker_symbol=( + SYMBOLS if self.__run_parameters["include_kernel_names"] else None + ), ) ) fig.add_trace( @@ -297,9 +305,9 @@ class Roofline: name="ai_l2", mode="markers", marker={"color": "#EF553B"}, - marker_symbol=SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None, + marker_symbol=( + SYMBOLS if self.__run_parameters["include_kernel_names"] else None + ), ) ) fig.add_trace( @@ -309,9 +317,9 @@ class Roofline: name="ai_hbm", mode="markers", marker={"color": "#636EFA"}, - marker_symbol=SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None, + marker_symbol=( + SYMBOLS if self.__run_parameters["include_kernel_names"] else None + ), ) ) diff --git a/projects/rocprofiler-compute/src/utils/gui.py b/projects/rocprofiler-compute/src/utils/gui.py index bc1b3d8e96..78244b6a55 100644 --- a/projects/rocprofiler-compute/src/utils/gui.py +++ b/projects/rocprofiler-compute/src/utils/gui.py @@ -298,9 +298,11 @@ def build_table_chart( [ { column: { - "value": str(row["Tips"]) - if column == display_columns[0] and row["Tips"] - else "", + "value": ( + str(row["Tips"]) + if column == display_columns[0] and row["Tips"] + else "" + ), "type": "markdown", } for column, value in row.items() @@ -325,52 +327,58 @@ def build_table_chart( # style cell style_cell={"maxWidth": "500px"}, # display style - style_header={ - "backgroundColor": "rgb(30, 30, 30)", - "color": "white", - "fontWeight": "bold", - } - if IS_DARK - else {}, - style_data={ - "backgroundColor": "rgb(50, 50, 50)", - "color": "white", - "whiteSpace": "normal", - "height": "auto", - } - if IS_DARK - else {}, - style_data_conditional=[ - {"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"}, + style_header=( { - "if": {"column_id": "PoP", "filter_query": "{PoP} > 50"}, - "backgroundColor": "#ffa90a", + "backgroundColor": "rgb(30, 30, 30)", "color": "white", - }, + "fontWeight": "bold", + } + if IS_DARK + else {} + ), + style_data=( { - "if": {"column_id": "PoP", "filter_query": "{PoP} > 80"}, - "backgroundColor": "#ff120a", + "backgroundColor": "rgb(50, 50, 50)", "color": "white", - }, - { - "if": { - "column_id": "Avg", - "filter_query": "{Unit} = Pct && {Avg} > 50", + "whiteSpace": "normal", + "height": "auto", + } + if IS_DARK + else {} + ), + style_data_conditional=( + [ + {"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"}, + { + "if": {"column_id": "PoP", "filter_query": "{PoP} > 50"}, + "backgroundColor": "#ffa90a", + "color": "white", }, - "backgroundColor": "#ffa90a", - "color": "white", - }, - { - "if": { - "column_id": "Avg", - "filter_query": "{Unit} = Pct && {Avg} > 80", + { + "if": {"column_id": "PoP", "filter_query": "{PoP} > 80"}, + "backgroundColor": "#ff120a", + "color": "white", }, - "backgroundColor": "#ff120a", - "color": "white", - }, - ] - if IS_DARK - else [], + { + "if": { + "column_id": "Avg", + "filter_query": "{Unit} = Pct && {Avg} > 50", + }, + "backgroundColor": "#ffa90a", + "color": "white", + }, + { + "if": { + "column_id": "Avg", + "filter_query": "{Unit} = Pct && {Avg} > 80", + }, + "backgroundColor": "#ff120a", + "color": "white", + }, + ] + if IS_DARK + else [] + ), # the df to display data=display_df.to_dict("records"), ) diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py index 2c3cf44def..e8367d1184 100644 --- a/projects/rocprofiler-compute/src/utils/roofline_calc.py +++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py @@ -195,21 +195,11 @@ def calc_ai(sort_type, ret_df): df = df.sort_values(by=["Kernel_Name"]) df = df.reset_index(drop=True) - total_flops = ( - valu_flops - ) = ( - mfma_flops_bf16 - ) = ( - mfma_flops_f16 - ) = ( - mfma_iops_i8 - ) = ( + total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = ( mfma_flops_f32 - ) = ( - mfma_flops_f64 - ) = ( - lds_data - ) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 + ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = calls = ( + totalDuration + ) = avgDuration = 0.0 kernelName = "" @@ -390,23 +380,11 @@ def calc_ai(sort_type, ret_df): kernelName, idx, calls ) ) - total_flops = ( - valu_flops - ) = ( - mfma_flops_bf16 - ) = ( - mfma_flops_f16 - ) = ( - mfma_iops_i8 - ) = ( + total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = ( mfma_flops_f32 - ) = ( - mfma_flops_f64 - ) = ( - lds_data - ) = ( - L1cache_data - ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 + ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = ( + calls + ) = totalDuration = avgDuration = 0.0 if sort_type == "dispatches": myList.append( @@ -428,23 +406,11 @@ def calc_ai(sort_type, ret_df): avgDuration, ) ) - total_flops = ( - valu_flops - ) = ( - mfma_flops_bf16 - ) = ( - mfma_flops_f16 - ) = ( - mfma_iops_i8 - ) = ( + total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = ( mfma_flops_f32 - ) = ( - mfma_flops_f64 - ) = ( - lds_data - ) = ( - L1cache_data - ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 + ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = ( + calls + ) = totalDuration = avgDuration = 0.0 myList.sort(key=lambda x: x.totalDuration, reverse=True) @@ -456,24 +422,32 @@ def calc_ai(sort_type, ret_df): # Create list of top 5 intensities while i < TOP_N and i != len(myList): kernelNames.append(myList[i].KernelName) - intensities["ai_l1"].append( - myList[i].total_flops / myList[i].L1cache_data - ) if myList[i].L1cache_data else intensities["ai_l1"].append(0) + ( + intensities["ai_l1"].append(myList[i].total_flops / myList[i].L1cache_data) + if myList[i].L1cache_data + else intensities["ai_l1"].append(0) + ) # print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null") # print() - intensities["ai_l2"].append( - myList[i].total_flops / myList[i].L2cache_data - ) if myList[i].L2cache_data else intensities["ai_l2"].append(0) + ( + intensities["ai_l2"].append(myList[i].total_flops / myList[i].L2cache_data) + if myList[i].L2cache_data + else intensities["ai_l2"].append(0) + ) # print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null") # print() - intensities["ai_hbm"].append( - myList[i].total_flops / myList[i].hbm_data - ) if myList[i].hbm_data else intensities["ai_hbm"].append(0) + ( + intensities["ai_hbm"].append(myList[i].total_flops / myList[i].hbm_data) + if myList[i].hbm_data + else intensities["ai_hbm"].append(0) + ) # print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null") # print() - curr_perf.append(myList[i].total_flops / myList[i].avgDuration) if myList[ - i - ].avgDuration else curr_perf.append(0) + ( + curr_perf.append(myList[i].total_flops / myList[i].avgDuration) + if myList[i].avgDuration + else curr_perf.append(0) + ) # print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null") i += 1 diff --git a/projects/rocprofiler-compute/src/utils/tty.py b/projects/rocprofiler-compute/src/utils/tty.py index 4870ef7e60..5d1f68a701 100644 --- a/projects/rocprofiler-compute/src/utils/tty.py +++ b/projects/rocprofiler-compute/src/utils/tty.py @@ -170,9 +170,11 @@ def show_all(args, runs, archConfigs, output): else: cur_df_copy = copy.deepcopy(cur_df) cur_df_copy[header] = [ - round(float(x), args.decimal) - if x != "" - else x + ( + round(float(x), args.decimal) + if x != "" + else x + ) for x in base_df[header] ] df = pd.concat([df, cur_df_copy[header]], axis=1) @@ -214,11 +216,13 @@ def show_all(args, runs, archConfigs, output): # fash for now. ss += ( tabulate( - df.transpose() - if type != "raw_csv_table" - and "columnwise" in table_config - and table_config["columnwise"] == True - else df, + ( + df.transpose() + if type != "raw_csv_table" + and "columnwise" in table_config + and table_config["columnwise"] == True + else df + ), headers="keys", tablefmt="fancy_grid", floatfmt="." + str(args.decimal) + "f", diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index 0b7e91bea7..8e4bed5958 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -39,26 +39,30 @@ import config rocprof_cmd = "" + def demarcate(function): def wrap_function(*args, **kwargs): logging.trace("----- [entering function] -> %s()" % (function.__qualname__)) result = function(*args, **kwargs) logging.trace("----- [exiting function] -> %s()" % function.__qualname__) return result + return wrap_function + def error(message): logging.error("") logging.error("[ERROR]: " + message) logging.error("") sys.exit(1) + def trace_logger(message, *args, **kwargs): logging.log(logging.TRACE, message, *args, **kwargs) + def get_version(omniperf_home) -> dict: - """Return Omniperf versioning info - """ + """Return Omniperf versioning info""" # symantic version info version = os.path.join(omniperf_home.parent, "VERSION") try: @@ -96,9 +100,9 @@ def get_version(omniperf_home) -> dict: versionData = {"version": VER, "sha": SHA, "mode": MODE} return versionData + def get_version_display(version, sha, mode): - """Pretty print versioning info - """ + """Pretty print versioning info""" buf = io.StringIO() print("-" * 40, file=buf) print("Omniperf version: %s (%s)" % (version, mode), file=buf) @@ -106,30 +110,36 @@ def get_version_display(version, sha, mode): print("-" * 40, file=buf) return buf.getvalue() + def detect_rocprof(): - """Detect loaded rocprof version. Resolve path and set cmd globally. - """ + """Detect loaded rocprof version. Resolve path and set cmd globally.""" global rocprof_cmd # detect rocprof if not "ROCPROF" in os.environ.keys(): rocprof_cmd = "rocprof" else: rocprof_cmd = os.environ["ROCPROF"] - + # resolve rocprof path rocprof_path = shutil.which(rocprof_cmd) if not rocprof_path: rocprof_cmd = "rocprof" - logging.warning("Warning: Unable to resolve path to %s binary. Reverting to default." % rocprof_cmd) + logging.warning( + "Warning: Unable to resolve path to %s binary. Reverting to default." + % rocprof_cmd + ) rocprof_path = shutil.which(rocprof_cmd) if not rocprof_path: - error("Please verify installation or set ROCPROF environment variable with full path.") + error( + "Please verify installation or set ROCPROF environment variable with full path." + ) else: # Resolve any sym links in file path rocprof_path = os.path.realpath(rocprof_path.rstrip("\n")) logging.info("ROC Profiler: " + str(rocprof_path)) - return rocprof_cmd #TODO: Do we still need to return this? It's not being used in the function call + return rocprof_cmd # TODO: Do we still need to return this? It's not being used in the function call + def capture_subprocess_output(subprocess_args, new_env=None): # Start subprocess @@ -180,7 +190,7 @@ def capture_subprocess_output(subprocess_args, new_env=None): return_code = process.wait() selector.close() - success = (return_code == 0) + success = return_code == 0 # Store buffered output output = buf.getvalue() @@ -188,22 +198,26 @@ def capture_subprocess_output(subprocess_args, new_env=None): return (success, output) + def run_prof(fname, profiler_options, target, workload_dir): fbase = os.path.splitext(os.path.basename(fname))[0] m_specs = specs.get_machine_specs(0) - + logging.debug("pmc file: %s" % str(os.path.basename(fname))) # standard rocprof options - default_options = [ - "-i", fname - ] + default_options = ["-i", fname] options = default_options + profiler_options # set required env var for mi300 new_env = None - if (target.lower() == "mi300x_a0" or target.lower() == "mi300x_a1" or target.lower() == "mi300a_a0" or target.lower() == "mi300a_a1") and ( + if ( + target.lower() == "mi300x_a0" + or target.lower() == "mi300x_a1" + or target.lower() == "mi300a_a0" + or target.lower() == "mi300a_a1" + ) and ( os.path.basename(fname) == "pmc_perf_13.txt" or os.path.basename(fname) == "pmc_perf_14.txt" or os.path.basename(fname) == "pmc_perf_15.txt" @@ -215,13 +229,9 @@ def run_prof(fname, profiler_options, target, workload_dir): # profile the app if new_env: - success, output = capture_subprocess_output( - [ rocprof_cmd ] + options, new_env - ) + success, output = capture_subprocess_output([rocprof_cmd] + options, new_env) else: - success, output = capture_subprocess_output( - [ rocprof_cmd ] + options - ) + success, output = capture_subprocess_output([rocprof_cmd] + options) if not success: error(output) @@ -230,9 +240,7 @@ def run_prof(fname, profiler_options, target, workload_dir): # flatten tcc for applicable mi300 input f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv") hbm_stack_num = get_hbm_stack_num(target, m_specs.memory_partition) - df = flatten_tcc_info_across_hbm_stacks( - f, hbm_stack_num, int(m_specs.L2Banks) - ) + df = flatten_tcc_info_across_hbm_stacks(f, hbm_stack_num, int(m_specs.L2Banks)) df.to_csv(f, index=False) if os.path.exists(workload_dir + "/out"): @@ -270,10 +278,11 @@ def run_prof(fname, profiler_options, target, workload_dir): df = pd.read_csv(workload_dir + "/" + fbase + ".csv") df.rename(columns=output_headers, inplace=True) df.to_csv(workload_dir + "/" + fbase + ".csv", index=False) - + # write rocprof output to logging logging.info(output) + def replace_timestamps(workload_dir): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "Start_Timestamp" in df_stamps.columns and "End_Timestamp" in df_stamps.columns: @@ -286,9 +295,12 @@ def replace_timestamps(workload_dir): df_pmc_perf["End_Timestamp"] = df_stamps["End_Timestamp"] df_pmc_perf.to_csv(fname, index=False) else: - warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps." + warning = ( + "WARNING: Incomplete profiling data detected. Unable to update timestamps." + ) logging.warning(warning + "\n") + def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only): # Record system information mspec = specs.get_machine_specs(0) @@ -367,6 +379,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof sysinfo.write(",".join(param)) sysinfo.close() + def detect_roofline(): mspec = specs.get_machine_specs(0) rocm_ver = mspec.rocm_version[:1] @@ -389,8 +402,9 @@ def detect_roofline(): # Must be a valid RHEL machine distro = "platform:el8" elif ( - (type(sles_distro) == str and len(sles_distro) >= 3) and # confirm string and len - sles_distro[:2] == "15" and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3 + (type(sles_distro) == str and len(sles_distro) >= 3) + and sles_distro[:2] == "15" # confirm string and len + and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3 ): # Must be a valid SLES machine # Use SP3 binary for all forward compatible service pack versions @@ -399,12 +413,15 @@ def detect_roofline(): # Must be a valid Ubuntu machine distro = ubuntu_distro else: - logging.error("ROOFLINE ERROR: Cannot find a valid binary for your operating system") + logging.error( + "ROOFLINE ERROR: Cannot find a valid binary for your operating system" + ) sys.exit(1) target_binary = {"rocm_ver": rocm_ver, "distro": distro} return target_binary + def run_rocscope(args, fname): # profile the app if args.use_rocscope == True: @@ -417,23 +434,21 @@ def run_rocscope(args, fname): args.path, "-n", args.name, - "-t", + "-t", fname, "--", ] for i in args.remaining.split(): rs_cmd.append(i) logging.info(rs_cmd) - success, output = capture_subprocess_output( - rs_cmd - ) + success, output = capture_subprocess_output(rs_cmd) if not success: logging.error(result.stderr.decode("ascii")) sys.exit(1) + def mibench(args): - """Run roofline microbenchmark to generate peak BW and FLOP measurements. - """ + """Run roofline microbenchmark to generate peak BW and FLOP measurements.""" logging.info("[roofline] No roofline data found. Generating...") distro_map = {"platform:el8": "rhel8", "15.3": "sle15sp3", "20.04": "ubuntu20_04"} @@ -454,7 +469,9 @@ def mibench(args): # Distro is valid but cant find rocm ver if not os.path.exists(path_to_binary): - logging.error("ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary) + logging.error( + "ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary + ) sys.exit(1) subprocess.run( @@ -465,9 +482,10 @@ def mibench(args): "-d", str(args.device), ], - check=True + check=True, ) + def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack): """ Flatten TCC per channel counters across all HBM stacks in used. @@ -532,6 +550,7 @@ def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack): return df + def get_hbm_stack_num(gpu_name, memory_partition): """ Get total HBM stack numbers based on memory partition for MI300. @@ -564,15 +583,15 @@ def get_hbm_stack_num(gpu_name, memory_partition): else: # Fixme: add proper numbers for other archs return -1 - + + def get_submodules(package_name): - """List all submodules for a target package - """ + """List all submodules for a target package""" import importlib import pkgutil submodules = [] - + # walk all submodules in target package package = importlib.import_module(package_name) for _, name, _ in pkgutil.walk_packages(package.__path__): @@ -583,15 +602,17 @@ def get_submodules(package_name): return submodules + def is_workload_empty(path): - """Peek workload directory to verify valid profiling output - """ + """Peek workload directory to verify valid profiling output""" pmc_perf_path = path + "/pmc_perf.csv" if os.path.isfile(pmc_perf_path): temp_df = pd.read_csv(pmc_perf_path) if temp_df.dropna().empty: - error("[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt." % pmc_perf_path) + error( + "[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt." + % pmc_perf_path + ) else: error("[profiling] Error. Cannot find pmc_perf.csv in %s" % path) -