diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 9b1bbdacc7..4a410947cb 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -47,6 +47,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Roofline support for MI350 series architecture +* Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script + * Add --rocprofiler-sdk-library-path runtime option to choose the path to rocprofiler-sdk library to be used + ### Changed * Change the default rocprof version to v3 when environment variable "ROCPROF" is not set diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index 8ed1c3bdbc..774d466e87 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -333,6 +333,15 @@ Examples: help="\t\t\tSet the interval of pc sampling in microsecond (DEFAULT: 1).", ) + profile_group.add_argument( + "--rocprofiler-sdk-library-path", + type=str, + dest="rocprofiler_sdk_library_path", + required=False, + default="/opt/rocm/lib/librocprofiler-sdk.so", + help="\t\t\tSet the path to rocprofiler SDK library.", + ) + ## Roofline Command Line Options roofline_group.add_argument( "--roof-only", diff --git a/projects/rocprofiler-compute/src/rocprof_compute_base.py b/projects/rocprofiler-compute/src/rocprof_compute_base.py index deeac145d6..9bc7afcc49 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_base.py @@ -126,17 +126,19 @@ class RocProfCompute: else: self.__profiler_mode = "rocscope" else: - rocprof_cmd = detect_rocprof() - if str(rocprof_cmd).endswith("rocprof"): + profiler_mode = detect_rocprof(self.__args) + if str(profiler_mode).endswith("rocprof"): self.__profiler_mode = "rocprofv1" - elif str(rocprof_cmd).endswith("rocprofv2"): + elif str(profiler_mode).endswith("rocprofv2"): self.__profiler_mode = "rocprofv2" - elif str(rocprof_cmd).endswith("rocprofv3"): + elif str(profiler_mode).endswith("rocprofv3"): self.__profiler_mode = "rocprofv3" + elif str(profiler_mode) == "rocprofiler-sdk": + self.__profiler_mode = "rocprofiler-sdk" else: console_error( "Incompatible profiler: %s. Supported profilers include: %s" - % (rocprof_cmd, get_submodules("rocprof_compute_profile")) + % (profiler_mode, get_submodules("rocprof_compute_profile")) ) return @@ -305,6 +307,17 @@ class RocProfCompute: self.__soc[self.__mspec.gpu_arch], self.__supported_archs, ) + elif self.__profiler_mode == "rocprofiler-sdk": + from rocprof_compute_profile.profiler_rocprofiler_sdk import ( + rocprofiler_sdk_profiler, + ) + + profiler = rocprofiler_sdk_profiler( + self.__args, + self.__profiler_mode, + self.__soc[self.__mspec.gpu_arch], + self.__supported_archs, + ) else: console_error("Unsupported profiler") diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index fea8f09308..e1a461d1ac 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -416,6 +416,7 @@ class RocProfCompute_Base: self.__profiler == "rocprofv1" or self.__profiler == "rocprofv2" or self.__profiler == "rocprofv3" + or self.__profiler == "rocprofiler-sdk" ): start_run_prof = time.time() run_prof( @@ -441,12 +442,16 @@ class RocProfCompute_Base: # TODO: Finish logic console_error("Profiler not supported") - if self.__pc_sampling == True and self.__profiler == "rocprofv3": + if self.__pc_sampling == True and self.__profiler in ( + "rocprofv3", + "rocprofiler-sdk", + ): start_run_prof = time.time() pc_sampling_prof( interval=self.get_args().pc_sampling_interval, workload_dir=self.get_args().path, appcmd=self.get_args().remaining, + rocprofiler_sdk_library_path=self.get_args().rocprofiler_sdk_library_path, ) end_run_prof = time.time() console_debug( diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py new file mode 100644 index 0000000000..68b1e73c29 --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py @@ -0,0 +1,126 @@ +##############################################################################bl +# MIT License +# +# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +import os +import shlex +from pathlib import Path + +from rocprof_compute_profile.profiler_base import RocProfCompute_Base +from utils.logger import console_error, console_log, demarcate + + +class rocprofiler_sdk_profiler(RocProfCompute_Base): + def __init__(self, profiling_args, profiler_mode, soc, supported_archs): + super().__init__(profiling_args, profiler_mode, soc, supported_archs) + self.ready_to_profile = ( + self.get_args().roof_only + and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file() + or not self.get_args().roof_only + ) + + def get_profiler_options(self, fname, soc): + app_cmd = shlex.split(self.get_args().remaining) + rocm_libdir = str(Path(self.get_args().rocprofiler_sdk_library_path).parent) + rocprofiler_sdk_tool_path = str( + Path(rocm_libdir).joinpath("rocprofiler-sdk/librocprofiler-sdk-tool.so") + ) + ld_preload = [ + rocprofiler_sdk_tool_path, + self.get_args().rocprofiler_sdk_library_path, + ] + options = { + "ROCPROFILER_LIBRARY_CTOR": "1", + "LD_PRELOAD": ":".join(ld_preload), + "ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path, + "LD_LIBRARY_PATH": rocm_libdir, + "ROCPROF_KERNEL_TRACE": "1", + "ROCPROF_OUTPUT_FORMAT": "json", + "ROCPROF_OUTPUT_PATH": self.get_args().path + "/out/pmc_1", + } + + if self.get_args().format_rocprof_output == "csv": + options["ROCPROF_OUTPUT_FORMAT"] = "csv" + + if self.get_args().kokkos_trace: + # NOTE: --kokkos-trace feature is incomplete and is disabled for now. + console_error( + "The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly." + ) + if self.get_args().hip_trace: + options["ROCPROF_HIP_COMPILER_API_TRACE"] = "1" + options["ROCPROF_HIP_RUNTIME_API_TRACE"] = "1" + + # Kernel filtering + if self.get_args().kernel: + options["ROCPROF_KERNEL_FILTER_INCLUDE_REGEX"] = "|".join( + self.get_args().kernel + ) + # Dispatch filtering + dispatch = [] + # rocprof sdk dispatch indexing is inclusive and starts from 1 + if self.get_args().dispatch: + for dispatch_id in self.get_args().dispatch: + if ":" in dispatch_id: + tokens = dispatch_id.split(":") + # 4:7 -> 5-7 + dispatch.append(f"{int(tokens[0]) + 1}-{tokens[1]}") + else: + # 4 -> 5 + dispatch.append(f"{int(dispatch_id) + 1}") + if dispatch: + options["ROCPROF_KERNEL_FILTER_RANGE"] = f"[{','.join(dispatch)}]" + options["APP_CMD"] = app_cmd + return options + + # ----------------------- + # Required child methods + # ----------------------- + @demarcate + def pre_processing(self): + """Perform any pre-processing steps prior to profiling.""" + super().pre_processing() + + @demarcate + def run_profiling(self, version, prog): + """Run profiling.""" + if self.ready_to_profile: + if self.get_args().roof_only: + console_log( + "roofline", "Generating pmc_perf.csv (roofline counters only)." + ) + # Log profiling options and setup filtering + super().run_profiling(version, prog) + else: + console_log("roofline", "Detected existing pmc_perf.csv") + + @demarcate + def post_processing(self): + """Perform any post-processing steps prior to profiling.""" + super().post_processing() + + if self.ready_to_profile: + # Manually join each pmc_perf*.csv output + self.join_prof() + # Replace timestamp data to solve a known rocprof bug + # replace_timestamps(self.get_args().path) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py index 594957fc99..13ada9f73c 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py @@ -22,6 +22,7 @@ # SOFTWARE. ##############################################################################el +import ctypes import glob import math import os @@ -377,8 +378,8 @@ class OmniSoC_Base: return hw_counter_matches, variable_matches def get_rocprof_supported_counters(self): - rocprof_cmd = detect_rocprof() - rorcprof_counters = set() + rocprof_cmd = detect_rocprof(self.get_args()) + rocprof_counters = set() if str(rocprof_cmd).endswith("rocprof"): command = [rocprof_cmd, "--list-basic"] @@ -391,7 +392,7 @@ class OmniSoC_Base: for line in output.splitlines(): if "gpu-agent" in line: counters, _ = self.parse_counters_text(line.split(":")[1].strip()) - rorcprof_counters.update(counters) + rocprof_counters.update(counters) command = [rocprof_cmd, "--list-derived"] success, output = capture_subprocess_output(command, enable_logging=False) @@ -403,7 +404,7 @@ class OmniSoC_Base: for line in output.splitlines(): if "gpu-agent" in line: counters, _ = self.parse_counters_text(line.split(":")[1].strip()) - rorcprof_counters.update(counters) + rocprof_counters.update(counters) elif str(rocprof_cmd).endswith("rocprofv2"): command = [rocprof_cmd, "--list-counters"] @@ -416,7 +417,7 @@ class OmniSoC_Base: for line in output.splitlines(): if "gfx" in line: counters, _ = self.parse_counters_text(line.split(":")[2].strip()) - rorcprof_counters.update(counters) + rocprof_counters.update(counters) elif str(rocprof_cmd).endswith("rocprofv3"): command = [rocprof_cmd, "--list-avail"] @@ -429,7 +430,68 @@ class OmniSoC_Base: for line in output.splitlines(): if "Name:" in line: counters, _ = self.parse_counters_text(line.split(":")[1].strip()) - rorcprof_counters.update(counters) + rocprof_counters.update(counters) + + elif str(rocprof_cmd) == "rocprofiler-sdk": + MAX_STR = 256 + + # rocprofiler sdk list avail library + libname = str( + Path(self.get_args().rocprofiler_sdk_library_path).parent.parent.joinpath( + "libexec/rocprofiler-sdk/librocprofv3-list-avail.so" + ) + ) + c_lib = ctypes.CDLL(libname) + if c_lib is None: + console_error(f"Error opening {libname}") + + # Intialize the library and set data types for arguments and variables + c_lib.avail_tool_init() + c_lib.get_number_of_agents.restype = ctypes.c_size_t + c_lib.get_agent_node_id.restype = ctypes.c_ulong + c_lib.get_agent_node_id.argtypes = [ctypes.c_int] + c_lib.get_number_of_counters.restype = ctypes.c_ulong + c_lib.get_number_of_counters.argtypes = [ctypes.c_int] + c_lib.get_counters_info.argtypes = [ + ctypes.c_ulong, + ctypes.c_int, + ctypes.POINTER(ctypes.c_ulong), + ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)), + ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)), + ctypes.POINTER(ctypes.c_int), + ] + c_lib.get_counter_block.argtypes = [ + ctypes.c_ulong, + ctypes.c_ulong, + ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)), + ] + + # Iterate through each counter index and get its information + for idx in range(c_lib.get_number_of_agents()): + node_id = c_lib.get_agent_node_id(idx) + for counter_idx in range(c_lib.get_number_of_counters(node_id)): + # Counter information will be stored in these variables + name_args = ctypes.POINTER(ctypes.c_char * MAX_STR)() + description_args = ctypes.POINTER(ctypes.c_char * MAX_STR)() + is_derived_args = ctypes.c_int() + counter_id_args = ctypes.c_ulong() + block_args = ctypes.POINTER(ctypes.c_char * MAX_STR)() + # Get the counter information + c_lib.get_counters_info( + node_id, + counter_idx, + ctypes.byref(counter_id_args), + name_args, + description_args, + ctypes.byref(is_derived_args), + ) + c_lib.get_counter_block(node_id, counter_idx, block_args) + block = ctypes.cast(block_args, ctypes.c_char_p).value.decode("utf-8") + if not is_derived_args.value and block: + # Only consider raw hardware counters from IP blocks + rocprof_counters.add( + ctypes.cast(name_args, ctypes.c_char_p).value.decode("utf-8") + ) else: console_error( @@ -437,7 +499,7 @@ class OmniSoC_Base: % (rocprof_cmd, get_submodules("rocprof_compute_profile")) ) - return rorcprof_counters + return rocprof_counters @demarcate def perfmon_coalesce(self, counters): diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py index 4208efb25b..54e7c4a7a5 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py @@ -33,7 +33,9 @@ class gfx908_soc(OmniSoC_Base): def __init__(self, args, mspec): super().__init__(args, mspec) self.set_arch("gfx908") - self.set_compatible_profilers(["rocprofv1", "rocscope", "rocprofv3"]) + self.set_compatible_profilers( + ["rocprofv1", "rocscope", "rocprofv3", "rocprofiler-sdk"] + ) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( { diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx90a.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx90a.py index 56f002dd24..311b65e18e 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx90a.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx90a.py @@ -46,7 +46,9 @@ class gfx90a_soc(OmniSoC_Base): ) ) ) - self.set_compatible_profilers(["rocprofv1", "rocscope", "rocprofv2", "rocprofv3"]) + self.set_compatible_profilers( + ["rocprofv1", "rocscope", "rocprofv2", "rocprofv3", "rocprofiler-sdk"] + ) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( { diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx940.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx940.py index 9508f02d6c..e28c6eb8f9 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx940.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx940.py @@ -46,7 +46,9 @@ class gfx940_soc(OmniSoC_Base): ) ) ) - self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"]) + self.set_compatible_profilers( + ["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"] + ) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( { diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx941.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx941.py index d64c5a0839..ca52935b59 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx941.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx941.py @@ -46,7 +46,9 @@ class gfx941_soc(OmniSoC_Base): ) ) ) - self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"]) + self.set_compatible_profilers( + ["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"] + ) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( { diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx942.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx942.py index e9f21c563c..19da8f6de4 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx942.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx942.py @@ -46,7 +46,9 @@ class gfx942_soc(OmniSoC_Base): ) ) ) - self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"]) + self.set_compatible_profilers( + ["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"] + ) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( { diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py index b266ef3476..ba93e2aab0 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py @@ -46,7 +46,7 @@ class gfx950_soc(OmniSoC_Base): ) ) ) - self.set_compatible_profilers(["rocprofv3"]) + self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( { diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index 01fef7c699..b9a1e22bfd 100644 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -1181,7 +1181,7 @@ def load_pc_sampling_data(workload, dir, file_prefix): # No kernel filter, return grouped and sorted csv directly if not workload.filter_kernel_ids: - # NB: the default file name is subject to changes from rocprofv3 + # NB: the default file name is subject to changes from rocprofv3/rocprofiler_sdk csv_file_path = Path.joinpath( Path(dir), file_prefix + "_pc_sampling_host_trap.csv" ) @@ -1221,7 +1221,7 @@ def load_pc_sampling_data(workload, dir, file_prefix): elif len(workload.filter_kernel_ids) == 1: # print("kernel id", workload.filter_kernel_ids[0]) - # NB: the default file name is subject to changes from rocprofv3 + # NB: the default file name is subject to changes from rocprofv3/rocprofiler_sdk json_file_path = Path.joinpath(Path(dir), file_prefix + "_results.json") if not json_file_path.exists(): console_error("PC sampling: can not read %s " % json_file_path) diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index 6709ca589e..f72da647e7 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -81,7 +81,11 @@ def using_v1(): def using_v3(): return "ROCPROF" not in os.environ.keys() or ( - "ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprofv3") + "ROCPROF" in os.environ.keys() + and ( + os.environ["ROCPROF"].endswith("rocprofv3") + or os.environ["ROCPROF"] == "rocprofiler-sdk" + ) ) @@ -141,9 +145,23 @@ def get_version_display(version, sha, mode): return buf.getvalue() -def detect_rocprof(): +def detect_rocprof(args): """Detect loaded rocprof version. Resolve path and set cmd globally.""" global rocprof_cmd + + if os.environ.get("ROCPROF") == "rocprofiler-sdk": + if not path(args.rocprofiler_sdk_library_path).exists(): + console_error( + "Could not find rocprofiler-sdk library at " + + args.rocprofiler_sdk_library_path + ) + rocprof_cmd = "rocprofiler-sdk" + console_debug("rocprof_cmd is {}".format(rocprof_cmd)) + console_debug( + "rocprofiler_sdk_path is {}".format(args.rocprofiler_sdk_library_path) + ) + return rocprof_cmd + # detect rocprof if not "ROCPROF" in os.environ.keys(): rocprof_cmd = "rocprofv3" @@ -575,6 +593,35 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi result.to_csv(converted_csv_file, index=False) +def parse_text(text_file): + """ + Parse the text file to get the pmc counters. + """ + + def process_line(line): + if "pmc:" not in line: + return "" + line = line.strip() + pos = line.find("#") + if pos >= 0: + line = line[0:pos] + + def _dedup(_line, _sep): + for itr in _sep: + _line = " ".join(_line.split(itr)) + return _line.strip() + + # remove tabs and duplicate spaces + return _dedup(line.replace("pmc:", ""), ["\n", "\t", " "]).split(" ") + + with open(text_file, "r") as file: + return [ + counter + for litr in [process_line(itr) for itr in file.readlines()] + for counter in litr + ] + + def run_prof( fname, profiler_options, workload_dir, mspec, loglevel, format_rocprof_output ): @@ -585,11 +632,25 @@ def run_prof( path_counter_config_yaml = path(fname).with_suffix(".yaml") # standard rocprof options - default_options = ["-i", fname] - options = default_options + profiler_options + if rocprof_cmd == "rocprofiler-sdk": + options = profiler_options + options["ROCPROF_COUNTER_COLLECTION"] = "1" + options["ROCPROF_COUNTERS"] = "pmc: " + " ".join(parse_text(fname)) + else: + default_options = ["-i", fname] + options = default_options + profiler_options + if using_v3(): - options = ["-A", "absolute"] + options - if path_counter_config_yaml.exists(): + if rocprof_cmd == "rocprofiler-sdk": + options["ROCPROF_AGENT_INDEX"] = "absolute" + else: + options = ["-A", "absolute"] + options + + if using_v3() and path_counter_config_yaml.exists(): + if rocprof_cmd == "rocprofiler-sdk": + with open(path_counter_config_yaml, "r") as file: + options["ROCPROF_EXTRA_COUNTERS_CONTENTS"] = file.read() + else: options = ["-E", str(path_counter_config_yaml)] + options # set required env var for mi300 @@ -603,16 +664,26 @@ def run_prof( is_timestamps = True time_1 = time.time() - console_debug("rocprof command: {}".format([rocprof_cmd] + options)) - # profile the app - if new_env: + if rocprof_cmd == "rocprofiler-sdk": + app_cmd = options.pop("APP_CMD") + for key, value in options.items(): + new_env[key] = value + console_debug("rocprof sdk env vars: {}".format(new_env)) + console_debug("rocprof sdk user provided command: {}".format(app_cmd)) success, output = capture_subprocess_output( - [rocprof_cmd] + options, new_env=new_env, profileMode=True + app_cmd, new_env=new_env, profileMode=True ) else: - success, output = capture_subprocess_output( - [rocprof_cmd] + options, profileMode=True - ) + console_debug("rocprof command: {}".format([rocprof_cmd] + options)) + # profile the app + if new_env: + success, output = capture_subprocess_output( + [rocprof_cmd] + options, new_env=new_env, profileMode=True + ) + else: + success, output = capture_subprocess_output( + [rocprof_cmd] + options, profileMode=True + ) time_2 = time.time() console_debug( @@ -647,17 +718,22 @@ def run_prof( combined_results.to_csv( workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False ) - elif rocprof_cmd.endswith("v3"): + elif rocprof_cmd.endswith("v3") or rocprof_cmd == "rocprofiler-sdk": # rocprofv3 requires additional processing for each process results_files = process_rocprofv3_output( format_rocprof_output, workload_dir, is_timestamps ) - if "--kokkos-trace" in options: + if rocprof_cmd == "rocprofiler-sdk": # TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly - process_kokkos_trace_output(workload_dir, fbase) - elif "--hip-trace" in options: - process_hip_trace_output(workload_dir, fbase) + if "ROCPROF_HIP_RUNTIME_API_TRACE" in options: + process_hip_trace_output(workload_dir, fbase) + else: + if "--kokkos-trace" in options: + # TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly + process_kokkos_trace_output(workload_dir, fbase) + elif "--hip-trace" in options: + process_hip_trace_output(workload_dir, fbase) # Combine results into single CSV file if results_files: @@ -721,33 +797,66 @@ def run_prof( df.to_csv(workload_dir + "/" + fbase + ".csv", index=False) -def pc_sampling_prof(interval, workload_dir, appcmd): +def pc_sampling_prof(interval, workload_dir, appcmd, rocprofiler_sdk_library_path): """ Run rocprof with pc sampling. Current support v3 only. """ # Todo: # - precheck with rocprofv3 –-list-avail - options = [ - "--pc-sampling-beta-enable", - "--pc-sampling-method", - "host_trap", - "--pc-sampling-unit", - "time", - "--output-format", - "csv", - "json", - "--pc-sampling-interval", - str(interval), - "-d", - workload_dir, - "-o", - "ps_file", # todo: sync up with the name from source in 2100_.yaml - "--", - appcmd, - ] - success, output = capture_subprocess_output( - [rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True - ) + if rocprof_cmd == "rocprofiler-sdk": + rocm_libdir = str(pathlib.Path(rocprofiler_sdk_library_path).parent) + rocprofiler_sdk_tool_path = str( + pathlib.Path(rocm_libdir).joinpath( + "rocprofiler-sdk/librocprofiler-sdk-tool.so" + ) + ) + ld_preload = [ + rocprofiler_sdk_tool_path, + rocprofiler_sdk_library_path, + ] + options = { + "ROCPROFILER_LIBRARY_CTOR": "1", + "LD_PRELOAD": ":".join(ld_preload), + "ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path, + "LD_LIBRARY_PATH": rocm_libdir, + "ROCPROF_OUTPUT_FORMAT": "csv,json", + "ROCPROF_OUTPUT_PATH": workload_dir, + "ROCPROF_OUTPUT_FILE_NAME": "ps_file", + "ROCPROFILER_PC_SAMPLING_BETA_ENABLED": "1", + "ROCPROF_PC_SAMPLING_UNIT": "time", + "ROCPROF_PC_SAMPLING_INTERVAL": str(interval), + "ROCPROF_PC_SAMPLING_METHOD": "host_trap", + } + new_env = os.environ.copy() + for key, value in options.items(): + new_env[key] = value + console_debug("pc sampling rocprof sdk env vars: {}".format(new_env)) + console_debug("pc sampling rocprof sdk user provided command: {}".format(appcmd)) + success, output = capture_subprocess_output( + appcmd, new_env=new_env, profileMode=True + ) + else: + options = [ + "--pc-sampling-beta-enabled", + "--pc-sampling-method", + "host_trap", + "--pc-sampling-unit", + "time", + "--output-format", + "csv", + "json", + "--pc-sampling-interval", + str(interval), + "-d", + workload_dir, + "-o", + "ps_file", # todo: sync up with the name from source in 2100_.yaml + "--", + appcmd, + ] + success, output = capture_subprocess_output( + [rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True + ) if not success: console_error("PC sampling failed.") diff --git a/projects/rocprofiler-compute/tests/conftest.py b/projects/rocprofiler-compute/tests/conftest.py index 5a97d0b479..57eee6a4d0 100644 --- a/projects/rocprofiler-compute/tests/conftest.py +++ b/projects/rocprofiler-compute/tests/conftest.py @@ -15,12 +15,26 @@ def pytest_addoption(parser): help="Call standalone binary instead of main function during tests", ) + parser.addoption( + "--rocprofiler-sdk-library-path", + type=str, + default="/opt/rocm/lib/librocprofiler-sdk.so", + help="Path to the rocprofiler-sdk library", + ) + @pytest.fixture def binary_handler_profile_rocprof_compute(request): def _handler( config, workload_dir, options=[], check_success=True, roof=False, app_name="app_1" ): + if request.config.getoption("--rocprofiler-sdk-library-path"): + options.extend( + [ + "--rocprofiler-sdk-library-path", + request.config.getoption("--rocprofiler-sdk-library-path"), + ], + ) if request.config.getoption("--call-binary"): baseline_opts = [ "build/rocprof-compute.bin", diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index 34c40d2c2a..0764f2e5c7 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -323,15 +323,22 @@ def gpu_soc(): soc = gpu_soc() # Set rocprofv2 as profiler if MI300 -if soc == "MI100": - os.environ["ROCPROF"] = "rocprof" +if "ROCPROF" not in os.environ.keys(): + if soc == "MI100": + os.environ["ROCPROF"] = "rocprof" -else: - os.environ["ROCPROF"] = "rocprofv3" + else: + os.environ["ROCPROF"] = "rocprofv3" def using_v3(): - return "ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprofv3") + return "ROCPROF" not in os.environ.keys() or ( + "ROCPROF" in os.environ.keys() + and ( + os.environ["ROCPROF"].endswith("rocprofv3") + or os.environ["ROCPROF"] == "rocprofiler-sdk" + ) + ) Baseline_dir = str(Path("tests/workloads/vcopy/" + soc).resolve())