Implement interface to rocprofiler sdk (#695)
* Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script
* Add runtime option --rocprofiler-sdk-library-path to use custom version of rocprofiler sdk library
* Add --rocprofiler-sdk-library-path conftest option for tests
* Setup appropriate environment variables to inject rocprofiler sdk code to user command
* Add env. vars. for counter collection and filtering
* Add env. vars. for pc sampling
* Use python bindings to list counters supported by rocprofiler sdk
[ROCm/rocprofiler-compute commit: 5cb86e31fc]
This commit is contained in:
gecommit door
GitHub
bovenliggende
e025a04a60
commit
6dc3fad1c7
@@ -47,6 +47,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
* Roofline support for MI350 series architecture
|
||||
|
||||
* Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script
|
||||
* Add --rocprofiler-sdk-library-path runtime option to choose the path to rocprofiler-sdk library to be used
|
||||
|
||||
### Changed
|
||||
|
||||
* Change the default rocprof version to v3 when environment variable "ROCPROF" is not set
|
||||
|
||||
@@ -333,6 +333,15 @@ Examples:
|
||||
help="\t\t\tSet the interval of pc sampling in microsecond (DEFAULT: 1).",
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
"--rocprofiler-sdk-library-path",
|
||||
type=str,
|
||||
dest="rocprofiler_sdk_library_path",
|
||||
required=False,
|
||||
default="/opt/rocm/lib/librocprofiler-sdk.so",
|
||||
help="\t\t\tSet the path to rocprofiler SDK library.",
|
||||
)
|
||||
|
||||
## Roofline Command Line Options
|
||||
roofline_group.add_argument(
|
||||
"--roof-only",
|
||||
|
||||
@@ -126,17 +126,19 @@ class RocProfCompute:
|
||||
else:
|
||||
self.__profiler_mode = "rocscope"
|
||||
else:
|
||||
rocprof_cmd = detect_rocprof()
|
||||
if str(rocprof_cmd).endswith("rocprof"):
|
||||
profiler_mode = detect_rocprof(self.__args)
|
||||
if str(profiler_mode).endswith("rocprof"):
|
||||
self.__profiler_mode = "rocprofv1"
|
||||
elif str(rocprof_cmd).endswith("rocprofv2"):
|
||||
elif str(profiler_mode).endswith("rocprofv2"):
|
||||
self.__profiler_mode = "rocprofv2"
|
||||
elif str(rocprof_cmd).endswith("rocprofv3"):
|
||||
elif str(profiler_mode).endswith("rocprofv3"):
|
||||
self.__profiler_mode = "rocprofv3"
|
||||
elif str(profiler_mode) == "rocprofiler-sdk":
|
||||
self.__profiler_mode = "rocprofiler-sdk"
|
||||
else:
|
||||
console_error(
|
||||
"Incompatible profiler: %s. Supported profilers include: %s"
|
||||
% (rocprof_cmd, get_submodules("rocprof_compute_profile"))
|
||||
% (profiler_mode, get_submodules("rocprof_compute_profile"))
|
||||
)
|
||||
return
|
||||
|
||||
@@ -305,6 +307,17 @@ class RocProfCompute:
|
||||
self.__soc[self.__mspec.gpu_arch],
|
||||
self.__supported_archs,
|
||||
)
|
||||
elif self.__profiler_mode == "rocprofiler-sdk":
|
||||
from rocprof_compute_profile.profiler_rocprofiler_sdk import (
|
||||
rocprofiler_sdk_profiler,
|
||||
)
|
||||
|
||||
profiler = rocprofiler_sdk_profiler(
|
||||
self.__args,
|
||||
self.__profiler_mode,
|
||||
self.__soc[self.__mspec.gpu_arch],
|
||||
self.__supported_archs,
|
||||
)
|
||||
else:
|
||||
console_error("Unsupported profiler")
|
||||
|
||||
|
||||
@@ -416,6 +416,7 @@ class RocProfCompute_Base:
|
||||
self.__profiler == "rocprofv1"
|
||||
or self.__profiler == "rocprofv2"
|
||||
or self.__profiler == "rocprofv3"
|
||||
or self.__profiler == "rocprofiler-sdk"
|
||||
):
|
||||
start_run_prof = time.time()
|
||||
run_prof(
|
||||
@@ -441,12 +442,16 @@ class RocProfCompute_Base:
|
||||
# TODO: Finish logic
|
||||
console_error("Profiler not supported")
|
||||
|
||||
if self.__pc_sampling == True and self.__profiler == "rocprofv3":
|
||||
if self.__pc_sampling == True and self.__profiler in (
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
):
|
||||
start_run_prof = time.time()
|
||||
pc_sampling_prof(
|
||||
interval=self.get_args().pc_sampling_interval,
|
||||
workload_dir=self.get_args().path,
|
||||
appcmd=self.get_args().remaining,
|
||||
rocprofiler_sdk_library_path=self.get_args().rocprofiler_sdk_library_path,
|
||||
)
|
||||
end_run_prof = time.time()
|
||||
console_debug(
|
||||
|
||||
+126
@@ -0,0 +1,126 @@
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
from rocprof_compute_profile.profiler_base import RocProfCompute_Base
|
||||
from utils.logger import console_error, console_log, demarcate
|
||||
|
||||
|
||||
class rocprofiler_sdk_profiler(RocProfCompute_Base):
|
||||
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
|
||||
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
|
||||
self.ready_to_profile = (
|
||||
self.get_args().roof_only
|
||||
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
|
||||
or not self.get_args().roof_only
|
||||
)
|
||||
|
||||
def get_profiler_options(self, fname, soc):
|
||||
app_cmd = shlex.split(self.get_args().remaining)
|
||||
rocm_libdir = str(Path(self.get_args().rocprofiler_sdk_library_path).parent)
|
||||
rocprofiler_sdk_tool_path = str(
|
||||
Path(rocm_libdir).joinpath("rocprofiler-sdk/librocprofiler-sdk-tool.so")
|
||||
)
|
||||
ld_preload = [
|
||||
rocprofiler_sdk_tool_path,
|
||||
self.get_args().rocprofiler_sdk_library_path,
|
||||
]
|
||||
options = {
|
||||
"ROCPROFILER_LIBRARY_CTOR": "1",
|
||||
"LD_PRELOAD": ":".join(ld_preload),
|
||||
"ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path,
|
||||
"LD_LIBRARY_PATH": rocm_libdir,
|
||||
"ROCPROF_KERNEL_TRACE": "1",
|
||||
"ROCPROF_OUTPUT_FORMAT": "json",
|
||||
"ROCPROF_OUTPUT_PATH": self.get_args().path + "/out/pmc_1",
|
||||
}
|
||||
|
||||
if self.get_args().format_rocprof_output == "csv":
|
||||
options["ROCPROF_OUTPUT_FORMAT"] = "csv"
|
||||
|
||||
if self.get_args().kokkos_trace:
|
||||
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
|
||||
console_error(
|
||||
"The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly."
|
||||
)
|
||||
if self.get_args().hip_trace:
|
||||
options["ROCPROF_HIP_COMPILER_API_TRACE"] = "1"
|
||||
options["ROCPROF_HIP_RUNTIME_API_TRACE"] = "1"
|
||||
|
||||
# Kernel filtering
|
||||
if self.get_args().kernel:
|
||||
options["ROCPROF_KERNEL_FILTER_INCLUDE_REGEX"] = "|".join(
|
||||
self.get_args().kernel
|
||||
)
|
||||
# Dispatch filtering
|
||||
dispatch = []
|
||||
# rocprof sdk dispatch indexing is inclusive and starts from 1
|
||||
if self.get_args().dispatch:
|
||||
for dispatch_id in self.get_args().dispatch:
|
||||
if ":" in dispatch_id:
|
||||
tokens = dispatch_id.split(":")
|
||||
# 4:7 -> 5-7
|
||||
dispatch.append(f"{int(tokens[0]) + 1}-{tokens[1]}")
|
||||
else:
|
||||
# 4 -> 5
|
||||
dispatch.append(f"{int(dispatch_id) + 1}")
|
||||
if dispatch:
|
||||
options["ROCPROF_KERNEL_FILTER_RANGE"] = f"[{','.join(dispatch)}]"
|
||||
options["APP_CMD"] = app_cmd
|
||||
return options
|
||||
|
||||
# -----------------------
|
||||
# Required child methods
|
||||
# -----------------------
|
||||
@demarcate
|
||||
def pre_processing(self):
|
||||
"""Perform any pre-processing steps prior to profiling."""
|
||||
super().pre_processing()
|
||||
|
||||
@demarcate
|
||||
def run_profiling(self, version, prog):
|
||||
"""Run profiling."""
|
||||
if self.ready_to_profile:
|
||||
if self.get_args().roof_only:
|
||||
console_log(
|
||||
"roofline", "Generating pmc_perf.csv (roofline counters only)."
|
||||
)
|
||||
# Log profiling options and setup filtering
|
||||
super().run_profiling(version, prog)
|
||||
else:
|
||||
console_log("roofline", "Detected existing pmc_perf.csv")
|
||||
|
||||
@demarcate
|
||||
def post_processing(self):
|
||||
"""Perform any post-processing steps prior to profiling."""
|
||||
super().post_processing()
|
||||
|
||||
if self.ready_to_profile:
|
||||
# Manually join each pmc_perf*.csv output
|
||||
self.join_prof()
|
||||
# Replace timestamp data to solve a known rocprof bug
|
||||
# replace_timestamps(self.get_args().path)
|
||||
@@ -22,6 +22,7 @@
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import ctypes
|
||||
import glob
|
||||
import math
|
||||
import os
|
||||
@@ -377,8 +378,8 @@ class OmniSoC_Base:
|
||||
return hw_counter_matches, variable_matches
|
||||
|
||||
def get_rocprof_supported_counters(self):
|
||||
rocprof_cmd = detect_rocprof()
|
||||
rorcprof_counters = set()
|
||||
rocprof_cmd = detect_rocprof(self.get_args())
|
||||
rocprof_counters = set()
|
||||
|
||||
if str(rocprof_cmd).endswith("rocprof"):
|
||||
command = [rocprof_cmd, "--list-basic"]
|
||||
@@ -391,7 +392,7 @@ class OmniSoC_Base:
|
||||
for line in output.splitlines():
|
||||
if "gpu-agent" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
|
||||
rorcprof_counters.update(counters)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
command = [rocprof_cmd, "--list-derived"]
|
||||
success, output = capture_subprocess_output(command, enable_logging=False)
|
||||
@@ -403,7 +404,7 @@ class OmniSoC_Base:
|
||||
for line in output.splitlines():
|
||||
if "gpu-agent" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
|
||||
rorcprof_counters.update(counters)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif str(rocprof_cmd).endswith("rocprofv2"):
|
||||
command = [rocprof_cmd, "--list-counters"]
|
||||
@@ -416,7 +417,7 @@ class OmniSoC_Base:
|
||||
for line in output.splitlines():
|
||||
if "gfx" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
|
||||
rorcprof_counters.update(counters)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif str(rocprof_cmd).endswith("rocprofv3"):
|
||||
command = [rocprof_cmd, "--list-avail"]
|
||||
@@ -429,7 +430,68 @@ class OmniSoC_Base:
|
||||
for line in output.splitlines():
|
||||
if "Name:" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
|
||||
rorcprof_counters.update(counters)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif str(rocprof_cmd) == "rocprofiler-sdk":
|
||||
MAX_STR = 256
|
||||
|
||||
# rocprofiler sdk list avail library
|
||||
libname = str(
|
||||
Path(self.get_args().rocprofiler_sdk_library_path).parent.parent.joinpath(
|
||||
"libexec/rocprofiler-sdk/librocprofv3-list-avail.so"
|
||||
)
|
||||
)
|
||||
c_lib = ctypes.CDLL(libname)
|
||||
if c_lib is None:
|
||||
console_error(f"Error opening {libname}")
|
||||
|
||||
# Intialize the library and set data types for arguments and variables
|
||||
c_lib.avail_tool_init()
|
||||
c_lib.get_number_of_agents.restype = ctypes.c_size_t
|
||||
c_lib.get_agent_node_id.restype = ctypes.c_ulong
|
||||
c_lib.get_agent_node_id.argtypes = [ctypes.c_int]
|
||||
c_lib.get_number_of_counters.restype = ctypes.c_ulong
|
||||
c_lib.get_number_of_counters.argtypes = [ctypes.c_int]
|
||||
c_lib.get_counters_info.argtypes = [
|
||||
ctypes.c_ulong,
|
||||
ctypes.c_int,
|
||||
ctypes.POINTER(ctypes.c_ulong),
|
||||
ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)),
|
||||
ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)),
|
||||
ctypes.POINTER(ctypes.c_int),
|
||||
]
|
||||
c_lib.get_counter_block.argtypes = [
|
||||
ctypes.c_ulong,
|
||||
ctypes.c_ulong,
|
||||
ctypes.POINTER(ctypes.POINTER(ctypes.c_char * MAX_STR)),
|
||||
]
|
||||
|
||||
# Iterate through each counter index and get its information
|
||||
for idx in range(c_lib.get_number_of_agents()):
|
||||
node_id = c_lib.get_agent_node_id(idx)
|
||||
for counter_idx in range(c_lib.get_number_of_counters(node_id)):
|
||||
# Counter information will be stored in these variables
|
||||
name_args = ctypes.POINTER(ctypes.c_char * MAX_STR)()
|
||||
description_args = ctypes.POINTER(ctypes.c_char * MAX_STR)()
|
||||
is_derived_args = ctypes.c_int()
|
||||
counter_id_args = ctypes.c_ulong()
|
||||
block_args = ctypes.POINTER(ctypes.c_char * MAX_STR)()
|
||||
# Get the counter information
|
||||
c_lib.get_counters_info(
|
||||
node_id,
|
||||
counter_idx,
|
||||
ctypes.byref(counter_id_args),
|
||||
name_args,
|
||||
description_args,
|
||||
ctypes.byref(is_derived_args),
|
||||
)
|
||||
c_lib.get_counter_block(node_id, counter_idx, block_args)
|
||||
block = ctypes.cast(block_args, ctypes.c_char_p).value.decode("utf-8")
|
||||
if not is_derived_args.value and block:
|
||||
# Only consider raw hardware counters from IP blocks
|
||||
rocprof_counters.add(
|
||||
ctypes.cast(name_args, ctypes.c_char_p).value.decode("utf-8")
|
||||
)
|
||||
|
||||
else:
|
||||
console_error(
|
||||
@@ -437,7 +499,7 @@ class OmniSoC_Base:
|
||||
% (rocprof_cmd, get_submodules("rocprof_compute_profile"))
|
||||
)
|
||||
|
||||
return rorcprof_counters
|
||||
return rocprof_counters
|
||||
|
||||
@demarcate
|
||||
def perfmon_coalesce(self, counters):
|
||||
|
||||
@@ -33,7 +33,9 @@ class gfx908_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx908")
|
||||
self.set_compatible_profilers(["rocprofv1", "rocscope", "rocprofv3"])
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocscope", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
|
||||
@@ -46,7 +46,9 @@ class gfx90a_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv1", "rocscope", "rocprofv2", "rocprofv3"])
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocscope", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
|
||||
@@ -46,7 +46,9 @@ class gfx940_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"])
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
|
||||
@@ -46,7 +46,9 @@ class gfx941_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"])
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
|
||||
@@ -46,7 +46,9 @@ class gfx942_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv1", "rocprofv2", "rocprofv3"])
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
|
||||
@@ -46,7 +46,7 @@ class gfx950_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv3"])
|
||||
self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
{
|
||||
|
||||
@@ -1181,7 +1181,7 @@ def load_pc_sampling_data(workload, dir, file_prefix):
|
||||
|
||||
# No kernel filter, return grouped and sorted csv directly
|
||||
if not workload.filter_kernel_ids:
|
||||
# NB: the default file name is subject to changes from rocprofv3
|
||||
# NB: the default file name is subject to changes from rocprofv3/rocprofiler_sdk
|
||||
csv_file_path = Path.joinpath(
|
||||
Path(dir), file_prefix + "_pc_sampling_host_trap.csv"
|
||||
)
|
||||
@@ -1221,7 +1221,7 @@ def load_pc_sampling_data(workload, dir, file_prefix):
|
||||
|
||||
elif len(workload.filter_kernel_ids) == 1:
|
||||
# print("kernel id", workload.filter_kernel_ids[0])
|
||||
# NB: the default file name is subject to changes from rocprofv3
|
||||
# NB: the default file name is subject to changes from rocprofv3/rocprofiler_sdk
|
||||
json_file_path = Path.joinpath(Path(dir), file_prefix + "_results.json")
|
||||
if not json_file_path.exists():
|
||||
console_error("PC sampling: can not read %s " % json_file_path)
|
||||
|
||||
@@ -81,7 +81,11 @@ def using_v1():
|
||||
|
||||
def using_v3():
|
||||
return "ROCPROF" not in os.environ.keys() or (
|
||||
"ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprofv3")
|
||||
"ROCPROF" in os.environ.keys()
|
||||
and (
|
||||
os.environ["ROCPROF"].endswith("rocprofv3")
|
||||
or os.environ["ROCPROF"] == "rocprofiler-sdk"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -141,9 +145,23 @@ def get_version_display(version, sha, mode):
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def detect_rocprof():
|
||||
def detect_rocprof(args):
|
||||
"""Detect loaded rocprof version. Resolve path and set cmd globally."""
|
||||
global rocprof_cmd
|
||||
|
||||
if os.environ.get("ROCPROF") == "rocprofiler-sdk":
|
||||
if not path(args.rocprofiler_sdk_library_path).exists():
|
||||
console_error(
|
||||
"Could not find rocprofiler-sdk library at "
|
||||
+ args.rocprofiler_sdk_library_path
|
||||
)
|
||||
rocprof_cmd = "rocprofiler-sdk"
|
||||
console_debug("rocprof_cmd is {}".format(rocprof_cmd))
|
||||
console_debug(
|
||||
"rocprofiler_sdk_path is {}".format(args.rocprofiler_sdk_library_path)
|
||||
)
|
||||
return rocprof_cmd
|
||||
|
||||
# detect rocprof
|
||||
if not "ROCPROF" in os.environ.keys():
|
||||
rocprof_cmd = "rocprofv3"
|
||||
@@ -575,6 +593,35 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi
|
||||
result.to_csv(converted_csv_file, index=False)
|
||||
|
||||
|
||||
def parse_text(text_file):
|
||||
"""
|
||||
Parse the text file to get the pmc counters.
|
||||
"""
|
||||
|
||||
def process_line(line):
|
||||
if "pmc:" not in line:
|
||||
return ""
|
||||
line = line.strip()
|
||||
pos = line.find("#")
|
||||
if pos >= 0:
|
||||
line = line[0:pos]
|
||||
|
||||
def _dedup(_line, _sep):
|
||||
for itr in _sep:
|
||||
_line = " ".join(_line.split(itr))
|
||||
return _line.strip()
|
||||
|
||||
# remove tabs and duplicate spaces
|
||||
return _dedup(line.replace("pmc:", ""), ["\n", "\t", " "]).split(" ")
|
||||
|
||||
with open(text_file, "r") as file:
|
||||
return [
|
||||
counter
|
||||
for litr in [process_line(itr) for itr in file.readlines()]
|
||||
for counter in litr
|
||||
]
|
||||
|
||||
|
||||
def run_prof(
|
||||
fname, profiler_options, workload_dir, mspec, loglevel, format_rocprof_output
|
||||
):
|
||||
@@ -585,11 +632,25 @@ def run_prof(
|
||||
|
||||
path_counter_config_yaml = path(fname).with_suffix(".yaml")
|
||||
# standard rocprof options
|
||||
default_options = ["-i", fname]
|
||||
options = default_options + profiler_options
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
options = profiler_options
|
||||
options["ROCPROF_COUNTER_COLLECTION"] = "1"
|
||||
options["ROCPROF_COUNTERS"] = "pmc: " + " ".join(parse_text(fname))
|
||||
else:
|
||||
default_options = ["-i", fname]
|
||||
options = default_options + profiler_options
|
||||
|
||||
if using_v3():
|
||||
options = ["-A", "absolute"] + options
|
||||
if path_counter_config_yaml.exists():
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
options["ROCPROF_AGENT_INDEX"] = "absolute"
|
||||
else:
|
||||
options = ["-A", "absolute"] + options
|
||||
|
||||
if using_v3() and path_counter_config_yaml.exists():
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
with open(path_counter_config_yaml, "r") as file:
|
||||
options["ROCPROF_EXTRA_COUNTERS_CONTENTS"] = file.read()
|
||||
else:
|
||||
options = ["-E", str(path_counter_config_yaml)] + options
|
||||
|
||||
# set required env var for mi300
|
||||
@@ -603,16 +664,26 @@ def run_prof(
|
||||
is_timestamps = True
|
||||
time_1 = time.time()
|
||||
|
||||
console_debug("rocprof command: {}".format([rocprof_cmd] + options))
|
||||
# profile the app
|
||||
if new_env:
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
app_cmd = options.pop("APP_CMD")
|
||||
for key, value in options.items():
|
||||
new_env[key] = value
|
||||
console_debug("rocprof sdk env vars: {}".format(new_env))
|
||||
console_debug("rocprof sdk user provided command: {}".format(app_cmd))
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, new_env=new_env, profileMode=True
|
||||
app_cmd, new_env=new_env, profileMode=True
|
||||
)
|
||||
else:
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, profileMode=True
|
||||
)
|
||||
console_debug("rocprof command: {}".format([rocprof_cmd] + options))
|
||||
# profile the app
|
||||
if new_env:
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, new_env=new_env, profileMode=True
|
||||
)
|
||||
else:
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, profileMode=True
|
||||
)
|
||||
|
||||
time_2 = time.time()
|
||||
console_debug(
|
||||
@@ -647,17 +718,22 @@ def run_prof(
|
||||
combined_results.to_csv(
|
||||
workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False
|
||||
)
|
||||
elif rocprof_cmd.endswith("v3"):
|
||||
elif rocprof_cmd.endswith("v3") or rocprof_cmd == "rocprofiler-sdk":
|
||||
# rocprofv3 requires additional processing for each process
|
||||
results_files = process_rocprofv3_output(
|
||||
format_rocprof_output, workload_dir, is_timestamps
|
||||
)
|
||||
|
||||
if "--kokkos-trace" in options:
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
|
||||
process_kokkos_trace_output(workload_dir, fbase)
|
||||
elif "--hip-trace" in options:
|
||||
process_hip_trace_output(workload_dir, fbase)
|
||||
if "ROCPROF_HIP_RUNTIME_API_TRACE" in options:
|
||||
process_hip_trace_output(workload_dir, fbase)
|
||||
else:
|
||||
if "--kokkos-trace" in options:
|
||||
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
|
||||
process_kokkos_trace_output(workload_dir, fbase)
|
||||
elif "--hip-trace" in options:
|
||||
process_hip_trace_output(workload_dir, fbase)
|
||||
|
||||
# Combine results into single CSV file
|
||||
if results_files:
|
||||
@@ -721,33 +797,66 @@ def run_prof(
|
||||
df.to_csv(workload_dir + "/" + fbase + ".csv", index=False)
|
||||
|
||||
|
||||
def pc_sampling_prof(interval, workload_dir, appcmd):
|
||||
def pc_sampling_prof(interval, workload_dir, appcmd, rocprofiler_sdk_library_path):
|
||||
"""
|
||||
Run rocprof with pc sampling. Current support v3 only.
|
||||
"""
|
||||
# Todo:
|
||||
# - precheck with rocprofv3 –-list-avail
|
||||
options = [
|
||||
"--pc-sampling-beta-enable",
|
||||
"--pc-sampling-method",
|
||||
"host_trap",
|
||||
"--pc-sampling-unit",
|
||||
"time",
|
||||
"--output-format",
|
||||
"csv",
|
||||
"json",
|
||||
"--pc-sampling-interval",
|
||||
str(interval),
|
||||
"-d",
|
||||
workload_dir,
|
||||
"-o",
|
||||
"ps_file", # todo: sync up with the name from source in 2100_.yaml
|
||||
"--",
|
||||
appcmd,
|
||||
]
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True
|
||||
)
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
rocm_libdir = str(pathlib.Path(rocprofiler_sdk_library_path).parent)
|
||||
rocprofiler_sdk_tool_path = str(
|
||||
pathlib.Path(rocm_libdir).joinpath(
|
||||
"rocprofiler-sdk/librocprofiler-sdk-tool.so"
|
||||
)
|
||||
)
|
||||
ld_preload = [
|
||||
rocprofiler_sdk_tool_path,
|
||||
rocprofiler_sdk_library_path,
|
||||
]
|
||||
options = {
|
||||
"ROCPROFILER_LIBRARY_CTOR": "1",
|
||||
"LD_PRELOAD": ":".join(ld_preload),
|
||||
"ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path,
|
||||
"LD_LIBRARY_PATH": rocm_libdir,
|
||||
"ROCPROF_OUTPUT_FORMAT": "csv,json",
|
||||
"ROCPROF_OUTPUT_PATH": workload_dir,
|
||||
"ROCPROF_OUTPUT_FILE_NAME": "ps_file",
|
||||
"ROCPROFILER_PC_SAMPLING_BETA_ENABLED": "1",
|
||||
"ROCPROF_PC_SAMPLING_UNIT": "time",
|
||||
"ROCPROF_PC_SAMPLING_INTERVAL": str(interval),
|
||||
"ROCPROF_PC_SAMPLING_METHOD": "host_trap",
|
||||
}
|
||||
new_env = os.environ.copy()
|
||||
for key, value in options.items():
|
||||
new_env[key] = value
|
||||
console_debug("pc sampling rocprof sdk env vars: {}".format(new_env))
|
||||
console_debug("pc sampling rocprof sdk user provided command: {}".format(appcmd))
|
||||
success, output = capture_subprocess_output(
|
||||
appcmd, new_env=new_env, profileMode=True
|
||||
)
|
||||
else:
|
||||
options = [
|
||||
"--pc-sampling-beta-enabled",
|
||||
"--pc-sampling-method",
|
||||
"host_trap",
|
||||
"--pc-sampling-unit",
|
||||
"time",
|
||||
"--output-format",
|
||||
"csv",
|
||||
"json",
|
||||
"--pc-sampling-interval",
|
||||
str(interval),
|
||||
"-d",
|
||||
workload_dir,
|
||||
"-o",
|
||||
"ps_file", # todo: sync up with the name from source in 2100_.yaml
|
||||
"--",
|
||||
appcmd,
|
||||
]
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True
|
||||
)
|
||||
|
||||
if not success:
|
||||
console_error("PC sampling failed.")
|
||||
|
||||
@@ -15,12 +15,26 @@ def pytest_addoption(parser):
|
||||
help="Call standalone binary instead of main function during tests",
|
||||
)
|
||||
|
||||
parser.addoption(
|
||||
"--rocprofiler-sdk-library-path",
|
||||
type=str,
|
||||
default="/opt/rocm/lib/librocprofiler-sdk.so",
|
||||
help="Path to the rocprofiler-sdk library",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def binary_handler_profile_rocprof_compute(request):
|
||||
def _handler(
|
||||
config, workload_dir, options=[], check_success=True, roof=False, app_name="app_1"
|
||||
):
|
||||
if request.config.getoption("--rocprofiler-sdk-library-path"):
|
||||
options.extend(
|
||||
[
|
||||
"--rocprofiler-sdk-library-path",
|
||||
request.config.getoption("--rocprofiler-sdk-library-path"),
|
||||
],
|
||||
)
|
||||
if request.config.getoption("--call-binary"):
|
||||
baseline_opts = [
|
||||
"build/rocprof-compute.bin",
|
||||
|
||||
@@ -323,15 +323,22 @@ def gpu_soc():
|
||||
soc = gpu_soc()
|
||||
|
||||
# Set rocprofv2 as profiler if MI300
|
||||
if soc == "MI100":
|
||||
os.environ["ROCPROF"] = "rocprof"
|
||||
if "ROCPROF" not in os.environ.keys():
|
||||
if soc == "MI100":
|
||||
os.environ["ROCPROF"] = "rocprof"
|
||||
|
||||
else:
|
||||
os.environ["ROCPROF"] = "rocprofv3"
|
||||
else:
|
||||
os.environ["ROCPROF"] = "rocprofv3"
|
||||
|
||||
|
||||
def using_v3():
|
||||
return "ROCPROF" in os.environ.keys() and os.environ["ROCPROF"].endswith("rocprofv3")
|
||||
return "ROCPROF" not in os.environ.keys() or (
|
||||
"ROCPROF" in os.environ.keys()
|
||||
and (
|
||||
os.environ["ROCPROF"].endswith("rocprofv3")
|
||||
or os.environ["ROCPROF"] == "rocprofiler-sdk"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
Baseline_dir = str(Path("tests/workloads/vcopy/" + soc).resolve())
|
||||
|
||||
Verwijs in nieuw issue
Block a user