Implement interface to rocprofiler sdk (#695)

* Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script

* Add runtime option --rocprofiler-sdk-library-path to use custom version of rocprofiler sdk library
    * Add --rocprofiler-sdk-library-path conftest option for tests

* Setup appropriate environment variables to inject rocprofiler sdk code to user command
    * Add env. vars. for counter collection and filtering
    * Add env. vars. for pc sampling

* Use python bindings to list counters supported by rocprofiler sdk

[ROCm/rocprofiler-compute commit: 5cb86e31fc]
This commit is contained in:
vedithal-amd
2025-05-13 10:48:21 -04:00
gecommit door GitHub
bovenliggende e025a04a60
commit 6dc3fad1c7
16 gewijzigde bestanden met toevoegingen van 424 en 66 verwijderingen
@@ -47,6 +47,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Roofline support for MI350 series architecture
* Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script
* Add --rocprofiler-sdk-library-path runtime option to choose the path to rocprofiler-sdk library to be used
### Changed
* Change the default rocprof version to v3 when environment variable "ROCPROF" is not set
@@ -333,6 +333,15 @@ Examples:
help="\t\t\tSet the interval of pc sampling in microsecond (DEFAULT: 1).",
)
profile_group.add_argument(
"--rocprofiler-sdk-library-path",
type=str,
dest="rocprofiler_sdk_library_path",
required=False,
default="/opt/rocm/lib/librocprofiler-sdk.so",
help="\t\t\tSet the path to rocprofiler SDK library.",
)
## Roofline Command Line Options
roofline_group.add_argument(
"--roof-only",
@@ -126,17 +126,19 @@ class RocProfCompute:
else:
self.__profiler_mode = "rocscope"
else:
rocprof_cmd = detect_rocprof()
if str(rocprof_cmd).endswith("rocprof"):
profiler_mode = detect_rocprof(self.__args)
if str(profiler_mode).endswith("rocprof"):
self.__profiler_mode = "rocprofv1"
elif str(rocprof_cmd).endswith("rocprofv2"):
elif str(profiler_mode).endswith("rocprofv2"):
self.__profiler_mode = "rocprofv2"
elif str(rocprof_cmd).endswith("rocprofv3"):
elif str(profiler_mode).endswith("rocprofv3"):
self.__profiler_mode = "rocprofv3"
elif str(profiler_mode) == "rocprofiler-sdk":
self.__profiler_mode = "rocprofiler-sdk"
else:
console_error(
"Incompatible profiler: %s. Supported profilers include: %s"
% (rocprof_cmd, get_submodules("rocprof_compute_profile"))
% (profiler_mode, get_submodules("rocprof_compute_profile"))
)
return
@@ -305,6 +307,17 @@ class RocProfCompute:
self.__soc[self.__mspec.gpu_arch],
self.__supported_archs,
)
elif self.__profiler_mode == "rocprofiler-sdk":
from rocprof_compute_profile.profiler_rocprofiler_sdk import (
rocprofiler_sdk_profiler,
)
profiler = rocprofiler_sdk_profiler(
self.__args,
self.__profiler_mode,
self.__soc[self.__mspec.gpu_arch],
self.__supported_archs,
)
else:
console_error("Unsupported profiler")
@@ -416,6 +416,7 @@ class RocProfCompute_Base:
self.__profiler == "rocprofv1"
or self.__profiler == "rocprofv2"
or self.__profiler == "rocprofv3"
or self.__profiler == "rocprofiler-sdk"
):
start_run_prof = time.time()
run_prof(
@@ -441,12 +442,16 @@ class RocProfCompute_Base:
# TODO: Finish logic
console_error("Profiler not supported")
if self.__pc_sampling == True and self.__profiler == "rocprofv3":
if self.__pc_sampling == True and self.__profiler in (
"rocprofv3",
"rocprofiler-sdk",
):
start_run_prof = time.time()
pc_sampling_prof(
interval=self.get_args().pc_sampling_interval,
workload_dir=self.get_args().path,
appcmd=self.get_args().remaining,
rocprofiler_sdk_library_path=self.get_args().rocprofiler_sdk_library_path,
)
end_run_prof = time.time()
console_debug(
@@ -0,0 +1,126 @@
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import os
import shlex
from pathlib import Path
from rocprof_compute_profile.profiler_base import RocProfCompute_Base
from utils.logger import console_error, console_log, demarcate
class rocprofiler_sdk_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
or not self.get_args().roof_only
)
def get_profiler_options(self, fname, soc):
app_cmd = shlex.split(self.get_args().remaining)
rocm_libdir = str(Path(self.get_args().rocprofiler_sdk_library_path).parent)
rocprofiler_sdk_tool_path = str(
Path(rocm_libdir).joinpath("rocprofiler-sdk/librocprofiler-sdk-tool.so")
)
ld_preload = [
rocprofiler_sdk_tool_path,
self.get_args().rocprofiler_sdk_library_path,
]
options = {
"ROCPROFILER_LIBRARY_CTOR": "1",
"LD_PRELOAD": ":".join(ld_preload),
"ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path,
"LD_LIBRARY_PATH": rocm_libdir,
"ROCPROF_KERNEL_TRACE": "1",
"ROCPROF_OUTPUT_FORMAT": "json",
"ROCPROF_OUTPUT_PATH": self.get_args().path + "/out/pmc_1",
}
if self.get_args().format_rocprof_output == "csv":
options["ROCPROF_OUTPUT_FORMAT"] = "csv"
if self.get_args().kokkos_trace:
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
console_error(
"The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly."
)
if self.get_args().hip_trace:
options["ROCPROF_HIP_COMPILER_API_TRACE"] = "1"
options["ROCPROF_HIP_RUNTIME_API_TRACE"] = "1"
# Kernel filtering
if self.get_args().kernel:
options["ROCPROF_KERNEL_FILTER_INCLUDE_REGEX"] = "|".join(
self.get_args().kernel
)
# Dispatch filtering
dispatch = []
# rocprof sdk dispatch indexing is inclusive and starts from 1
if self.get_args().dispatch:
for dispatch_id in self.get_args().dispatch:
if ":" in dispatch_id:
tokens = dispatch_id.split(":")
# 4:7 -> 5-7
dispatch.append(f"{int(tokens[0]) + 1}-{tokens[1]}")
else:
# 4 -> 5
dispatch.append(f"{int(dispatch_id) + 1}")
if dispatch:
options["ROCPROF_KERNEL_FILTER_RANGE"] = f"[{','.join(dispatch)}]"
options["APP_CMD"] = app_cmd
return options
# -----------------------
# Required child methods
# -----------------------
@demarcate
def pre_processing(self):
"""Perform any pre-processing steps prior to profiling."""
super().pre_processing()
@demarcate
def run_profiling(self, version, prog):
"""Run profiling."""
if self.ready_to_profile:
if self.get_args().roof_only:
console_log(
"roofline", "Generating pmc_perf.csv (roofline counters only)."
)
# Log profiling options and setup filtering
super().run_profiling(version, prog)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
super().post_processing()
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Replace timestamp data to solve a known rocprof bug
# replace_timestamps(self.get_args().path)
@@ -22,6 +22,7 @@
# SOFTWARE.
##############################################################################el
import ctypes
import glob
import math
import os
@@ -377,8 +378,8 @@ class OmniSoC_Base:
return hw_counter_matches, variable_matches
def get_rocprof_supported_counters(self):
rocprof_cmd = detect_rocprof()
rorcprof_counters = set()
rocprof_cmd = detect_rocprof(self.get_args())
rocprof_counters = set()
if str(rocprof_cmd).endswith("rocprof"):
command = [rocprof_cmd, "--list-basic"]
@@ -391,7 +392,7 @@ class OmniSoC_Base:
for line in output.splitlines():
if "gpu-agent" in line:
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
rorcprof_counters.update(counters)
rocprof_counters.update(counters)
command = [rocprof_cmd, "--list-derived"]
success, output = capture_subprocess_output(command, enable_logging=False)
@@ -403,7 +404,7 @@ class OmniSoC_Base:
for line in output.splitlines():
if "gpu-agent" in line:
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
rorcprof_counters.update(counters)
rocprof_counters.update(counters)
elif str(rocprof_cmd).endswith("rocprofv2"):
command = [rocprof_cmd, "--list-counters"]
@@ -416,7 +417,7 @@ class OmniSoC_Base:
for line in output.splitlines():
if "gfx" in line:
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
rorcprof_counters.update(counters)
rocprof_counters.update(counters)
elif str(rocprof_cmd).endswith("rocprofv3"):
command = [rocprof_cmd, "--list-avail"]
@@ -429,7 +430,68 @@ class OmniSoC_Base:
for line in output.splitlines():
if "Name:" in line:
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
rorcprof_counters.update(counters)
rocprof_counters.update(counters)
elif str(rocprof_cmd) == "rocprofiler-sdk":
MAX_STR = 256
# rocprofiler sdk list avail library
libname = str(
Path(self.get_args().rocprofiler_sdk_library_path).parent.parent.joinpath(
"libexec/rocprofiler-sdk/librocprofv3-list-avail.so"
)
)
c_lib = ctypes.CDLL(libname)
if c_lib is None:
console_error(f"Error opening {libname}")
# Intialize the library and set data types for arguments and variables
c_lib.avail_tool_init()
c_lib.get_number_of_agents.restype = ctypes.c_size_t
c_lib.get_agent_node_id.restype = ctypes.c_ulong
c_lib.get_agent_node_id.argtypes = [ctypes.c_int]
c_lib.get_number_of_counters.restype = ctypes.c_ulong
c_lib.get_number_of_counters.argtypes = [ctypes.c_int]
c_lib.get_counters_info.argtypes = [
ctypes.c_ulong,
ctypes.c_int,
ctypes.POINTER(ctypes.c_ulong),
ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)),
ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)),
ctypes.POINTER(ctypes.c_int),
]
c_lib.get_counter_block.argtypes = [
ctypes.c_ulong,
ctypes.c_ulong,
ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)),
]
# Iterate through each counter index and get its information
for idx in range(c_lib.get_number_of_agents()):
node_id = c_lib.get_agent_node_id(idx)
for counter_idx in range(c_lib.get_number_of_counters(node_id)):
# Counter information will be stored in these variables
name_args = ctypes.POINTER(ctypes.c_char * MAX_STR)()
description_args = ctypes.POINTER(ctypes.c_char * MAX_STR)()
is_derived_args = ctypes.c_int()
counter_id_args = ctypes.c_ulong()
block_args = ctypes.POINTER(ctypes.c_char * MAX_STR)()
# Get the counter information
c_lib.get_counters_info(
node_id,
counter_idx,
ctypes.byref(counter_id_args),
name_args,
description_args,
ctypes.byref(is_derived_args),
)
c_lib.get_counter_block(node_id, counter_idx, block_args)
block = ctypes.cast(block_args, ctypes.c_char_p).value.decode("utf-8")
if not is_derived_args.value and block:
# Only consider raw hardware counters from IP blocks
rocprof_counters.add(
ctypes.cast(name_args, ctypes.c_char_p).value.decode("utf-8")
)
else:
console_error(
@@ -437,7 +499,7 @@ class OmniSoC_Base:
% (rocprof_cmd, get_submodules("rocprof_compute_profile"))
)
return rorcprof_counters
return rocprof_counters
@demarcate
def perfmon_coalesce(self, counters):
@@ -33,7 +33,9 @@ class gfx908_soc(OmniSoC_Base):
def __init__(self, args, mspec):
super().__init__(args, mspec)
self.set_arch("gfx908")
self.set_compatible_profilers(["rocprofv1", "rocscope", "rocprofv3"])
self.set_compatible_profilers(
["rocprofv1", "rocscope", "rocprofv3", "rocprofiler-sdk"]
)
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
@@ -46,7 +46,9 @@ class gfx90a_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(["rocprofv1", "rocscope", "rocprofv2", "rocprofv3"])
self.set_compatible_profilers(
["rocprofv1", "rocscope", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
@@ -46,7 +46,9 @@ class gfx940_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"])
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
@@ -46,7 +46,9 @@ class gfx941_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"])
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
@@ -46,7 +46,9 @@ class gfx942_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"])
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
@@ -46,7 +46,7 @@ class gfx950_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(["rocprofv3"])
self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
@@ -1181,7 +1181,7 @@ def load_pc_sampling_data(workload, dir, file_prefix):
# No kernel filter, return grouped and sorted csv directly
if not workload.filter_kernel_ids:
# NB: the default file name is subject to changes from rocprofv3
# NB: the default file name is subject to changes from rocprofv3/rocprofiler_sdk
csv_file_path = Path.joinpath(
Path(dir), file_prefix + "_pc_sampling_host_trap.csv"
)
@@ -1221,7 +1221,7 @@ def load_pc_sampling_data(workload, dir, file_prefix):
elif len(workload.filter_kernel_ids) == 1:
# print("kernel id", workload.filter_kernel_ids[0])
# NB: the default file name is subject to changes from rocprofv3
# NB: the default file name is subject to changes from rocprofv3/rocprofiler_sdk
json_file_path = Path.joinpath(Path(dir), file_prefix + "_results.json")
if not json_file_path.exists():
console_error("PC sampling: can not read %s " % json_file_path)
@@ -81,7 +81,11 @@ def using_v1():
def using_v3():
return "ROCPROF" not in os.environ.keys() or (
"ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprofv3")
"ROCPROF" in os.environ.keys()
and (
os.environ["ROCPROF"].endswith("rocprofv3")
or os.environ["ROCPROF"] == "rocprofiler-sdk"
)
)
@@ -141,9 +145,23 @@ def get_version_display(version, sha, mode):
return buf.getvalue()
def detect_rocprof():
def detect_rocprof(args):
"""Detect loaded rocprof version. Resolve path and set cmd globally."""
global rocprof_cmd
if os.environ.get("ROCPROF") == "rocprofiler-sdk":
if not path(args.rocprofiler_sdk_library_path).exists():
console_error(
"Could not find rocprofiler-sdk library at "
+ args.rocprofiler_sdk_library_path
)
rocprof_cmd = "rocprofiler-sdk"
console_debug("rocprof_cmd is {}".format(rocprof_cmd))
console_debug(
"rocprofiler_sdk_path is {}".format(args.rocprofiler_sdk_library_path)
)
return rocprof_cmd
# detect rocprof
if not "ROCPROF" in os.environ.keys():
rocprof_cmd = "rocprofv3"
@@ -575,6 +593,35 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi
result.to_csv(converted_csv_file, index=False)
def parse_text(text_file):
"""
Parse the text file to get the pmc counters.
"""
def process_line(line):
if "pmc:" not in line:
return ""
line = line.strip()
pos = line.find("#")
if pos >= 0:
line = line[0:pos]
def _dedup(_line, _sep):
for itr in _sep:
_line = " ".join(_line.split(itr))
return _line.strip()
# remove tabs and duplicate spaces
return _dedup(line.replace("pmc:", ""), ["\n", "\t", " "]).split(" ")
with open(text_file, "r") as file:
return [
counter
for litr in [process_line(itr) for itr in file.readlines()]
for counter in litr
]
def run_prof(
fname, profiler_options, workload_dir, mspec, loglevel, format_rocprof_output
):
@@ -585,11 +632,25 @@ def run_prof(
path_counter_config_yaml = path(fname).with_suffix(".yaml")
# standard rocprof options
default_options = ["-i", fname]
options = default_options + profiler_options
if rocprof_cmd == "rocprofiler-sdk":
options = profiler_options
options["ROCPROF_COUNTER_COLLECTION"] = "1"
options["ROCPROF_COUNTERS"] = "pmc: " + " ".join(parse_text(fname))
else:
default_options = ["-i", fname]
options = default_options + profiler_options
if using_v3():
options = ["-A", "absolute"] + options
if path_counter_config_yaml.exists():
if rocprof_cmd == "rocprofiler-sdk":
options["ROCPROF_AGENT_INDEX"] = "absolute"
else:
options = ["-A", "absolute"] + options
if using_v3() and path_counter_config_yaml.exists():
if rocprof_cmd == "rocprofiler-sdk":
with open(path_counter_config_yaml, "r") as file:
options["ROCPROF_EXTRA_COUNTERS_CONTENTS"] = file.read()
else:
options = ["-E", str(path_counter_config_yaml)] + options
# set required env var for mi300
@@ -603,16 +664,26 @@ def run_prof(
is_timestamps = True
time_1 = time.time()
console_debug("rocprof command: {}".format([rocprof_cmd] + options))
# profile the app
if new_env:
if rocprof_cmd == "rocprofiler-sdk":
app_cmd = options.pop("APP_CMD")
for key, value in options.items():
new_env[key] = value
console_debug("rocprof sdk env vars: {}".format(new_env))
console_debug("rocprof sdk user provided command: {}".format(app_cmd))
success, output = capture_subprocess_output(
[rocprof_cmd] + options, new_env=new_env, profileMode=True
app_cmd, new_env=new_env, profileMode=True
)
else:
success, output = capture_subprocess_output(
[rocprof_cmd] + options, profileMode=True
)
console_debug("rocprof command: {}".format([rocprof_cmd] + options))
# profile the app
if new_env:
success, output = capture_subprocess_output(
[rocprof_cmd] + options, new_env=new_env, profileMode=True
)
else:
success, output = capture_subprocess_output(
[rocprof_cmd] + options, profileMode=True
)
time_2 = time.time()
console_debug(
@@ -647,17 +718,22 @@ def run_prof(
combined_results.to_csv(
workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False
)
elif rocprof_cmd.endswith("v3"):
elif rocprof_cmd.endswith("v3") or rocprof_cmd == "rocprofiler-sdk":
# rocprofv3 requires additional processing for each process
results_files = process_rocprofv3_output(
format_rocprof_output, workload_dir, is_timestamps
)
if "--kokkos-trace" in options:
if rocprof_cmd == "rocprofiler-sdk":
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
process_kokkos_trace_output(workload_dir, fbase)
elif "--hip-trace" in options:
process_hip_trace_output(workload_dir, fbase)
if "ROCPROF_HIP_RUNTIME_API_TRACE" in options:
process_hip_trace_output(workload_dir, fbase)
else:
if "--kokkos-trace" in options:
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
process_kokkos_trace_output(workload_dir, fbase)
elif "--hip-trace" in options:
process_hip_trace_output(workload_dir, fbase)
# Combine results into single CSV file
if results_files:
@@ -721,33 +797,66 @@ def run_prof(
df.to_csv(workload_dir + "/" + fbase + ".csv", index=False)
def pc_sampling_prof(interval, workload_dir, appcmd):
def pc_sampling_prof(interval, workload_dir, appcmd, rocprofiler_sdk_library_path):
"""
Run rocprof with pc sampling. Current support v3 only.
"""
# Todo:
# - precheck with rocprofv3 -list-avail
options = [
"--pc-sampling-beta-enable",
"--pc-sampling-method",
"host_trap",
"--pc-sampling-unit",
"time",
"--output-format",
"csv",
"json",
"--pc-sampling-interval",
str(interval),
"-d",
workload_dir,
"-o",
"ps_file", # todo: sync up with the name from source in 2100_.yaml
"--",
appcmd,
]
success, output = capture_subprocess_output(
[rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True
)
if rocprof_cmd == "rocprofiler-sdk":
rocm_libdir = str(pathlib.Path(rocprofiler_sdk_library_path).parent)
rocprofiler_sdk_tool_path = str(
pathlib.Path(rocm_libdir).joinpath(
"rocprofiler-sdk/librocprofiler-sdk-tool.so"
)
)
ld_preload = [
rocprofiler_sdk_tool_path,
rocprofiler_sdk_library_path,
]
options = {
"ROCPROFILER_LIBRARY_CTOR": "1",
"LD_PRELOAD": ":".join(ld_preload),
"ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path,
"LD_LIBRARY_PATH": rocm_libdir,
"ROCPROF_OUTPUT_FORMAT": "csv,json",
"ROCPROF_OUTPUT_PATH": workload_dir,
"ROCPROF_OUTPUT_FILE_NAME": "ps_file",
"ROCPROFILER_PC_SAMPLING_BETA_ENABLED": "1",
"ROCPROF_PC_SAMPLING_UNIT": "time",
"ROCPROF_PC_SAMPLING_INTERVAL": str(interval),
"ROCPROF_PC_SAMPLING_METHOD": "host_trap",
}
new_env = os.environ.copy()
for key, value in options.items():
new_env[key] = value
console_debug("pc sampling rocprof sdk env vars: {}".format(new_env))
console_debug("pc sampling rocprof sdk user provided command: {}".format(appcmd))
success, output = capture_subprocess_output(
appcmd, new_env=new_env, profileMode=True
)
else:
options = [
"--pc-sampling-beta-enabled",
"--pc-sampling-method",
"host_trap",
"--pc-sampling-unit",
"time",
"--output-format",
"csv",
"json",
"--pc-sampling-interval",
str(interval),
"-d",
workload_dir,
"-o",
"ps_file", # todo: sync up with the name from source in 2100_.yaml
"--",
appcmd,
]
success, output = capture_subprocess_output(
[rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True
)
if not success:
console_error("PC sampling failed.")
@@ -15,12 +15,26 @@ def pytest_addoption(parser):
help="Call standalone binary instead of main function during tests",
)
parser.addoption(
"--rocprofiler-sdk-library-path",
type=str,
default="/opt/rocm/lib/librocprofiler-sdk.so",
help="Path to the rocprofiler-sdk library",
)
@pytest.fixture
def binary_handler_profile_rocprof_compute(request):
def _handler(
config, workload_dir, options=[], check_success=True, roof=False, app_name="app_1"
):
if request.config.getoption("--rocprofiler-sdk-library-path"):
options.extend(
[
"--rocprofiler-sdk-library-path",
request.config.getoption("--rocprofiler-sdk-library-path"),
],
)
if request.config.getoption("--call-binary"):
baseline_opts = [
"build/rocprof-compute.bin",
@@ -323,15 +323,22 @@ def gpu_soc():
soc = gpu_soc()
# Set rocprofv2 as profiler if MI300
if soc == "MI100":
os.environ["ROCPROF"] = "rocprof"
if "ROCPROF" not in os.environ.keys():
if soc == "MI100":
os.environ["ROCPROF"] = "rocprof"
else:
os.environ["ROCPROF"] = "rocprofv3"
else:
os.environ["ROCPROF"] = "rocprofv3"
def using_v3():
return "ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprofv3")
return "ROCPROF" not in os.environ.keys() or (
"ROCPROF" in os.environ.keys()
and (
os.environ["ROCPROF"].endswith("rocprofv3")
or os.environ["ROCPROF"] == "rocprofiler-sdk"
)
)
Baseline_dir = str(Path("tests/workloads/vcopy/" + soc).resolve())