905 строки
29 KiB
Python
Исполняемый файл
905 строки
29 KiB
Python
Исполняемый файл
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import subprocess
|
|
|
|
|
|
class dotdict(dict):
|
|
"""dot.notation access to dictionary attributes"""
|
|
|
|
__getattr__ = dict.get
|
|
__setattr__ = dict.__setitem__
|
|
__delattr__ = dict.__delitem__
|
|
|
|
def __init__(self, d):
|
|
super(dotdict, self).__init__(d)
|
|
for k, v in self.items():
|
|
if isinstance(v, dict):
|
|
self.__setitem__(k, dotdict(v))
|
|
elif isinstance(v, (list, tuple)):
|
|
self.__setitem__(
|
|
k,
|
|
[dotdict(i) if isinstance(i, (list, tuple, dict)) else i for i in v],
|
|
)
|
|
|
|
|
|
def fatal_error(msg, exit_code=1):
|
|
sys.stderr.write(f"Fatal error: {msg}\n")
|
|
sys.stderr.flush()
|
|
sys.exit(exit_code)
|
|
|
|
|
|
def strtobool(val):
|
|
"""Convert a string representation of truth to true or false.
|
|
True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
|
|
are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
|
|
'val' is anything else.
|
|
"""
|
|
if isinstance(val, (list, tuple)):
|
|
if len(val) > 1:
|
|
val_type = type(val).__name__
|
|
raise ValueError(f"invalid truth value {val} (type={val_type})")
|
|
else:
|
|
val = val[0]
|
|
|
|
if isinstance(val, bool):
|
|
return val
|
|
elif isinstance(val, str) and val.lower() in ("y", "yes", "t", "true", "on", "1"):
|
|
return True
|
|
elif isinstance(val, str) and val.lower() in ("n", "no", "f", "false", "off", "0"):
|
|
return False
|
|
else:
|
|
val_type = type(val).__name__
|
|
raise ValueError(f"invalid truth value {val} (type={val_type})")
|
|
|
|
|
|
class booleanArgAction(argparse.Action):
|
|
def __call__(self, parser, args, value, option_string=None):
|
|
setattr(args, self.dest, strtobool(value))
|
|
|
|
|
|
def parse_arguments(args=None):
|
|
|
|
usage_examples = """
|
|
|
|
%(prog)s requires double-hyphen (--) before the application to be executed, e.g.
|
|
|
|
$ rocprofv3 [<rocprofv3-option> ...] -- <application> [<application-arg> ...]
|
|
$ rocprofv3 --hip-trace -- ./myapp -n 1
|
|
|
|
For MPI applications (or other job launchers such as SLURM), place rocprofv3 inside the job launcher:
|
|
|
|
$ mpirun -n 4 rocprofv3 --hip-trace -- ./mympiapp
|
|
|
|
"""
|
|
|
|
# Create the parser
|
|
parser = argparse.ArgumentParser(
|
|
description="ROCProfilerV3 Run Script",
|
|
usage="%(prog)s [options] -- <application> [application options]",
|
|
epilog=usage_examples,
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
)
|
|
|
|
def add_parser_bool_argument(gparser, *args, **kwargs):
|
|
gparser.add_argument(
|
|
*args,
|
|
**kwargs,
|
|
action=booleanArgAction,
|
|
nargs="?",
|
|
const=True,
|
|
type=str,
|
|
required=False,
|
|
metavar="BOOL",
|
|
)
|
|
|
|
io_options = parser.add_argument_group("I/O options")
|
|
|
|
io_options.add_argument(
|
|
"-i",
|
|
"--input",
|
|
help="Input file for counter collection",
|
|
required=False,
|
|
)
|
|
io_options.add_argument(
|
|
"-o",
|
|
"--output-file",
|
|
help="For the output file name",
|
|
default=os.environ.get("ROCPROF_OUTPUT_FILE_NAME", None),
|
|
type=str,
|
|
required=False,
|
|
)
|
|
io_options.add_argument(
|
|
"-d",
|
|
"--output-directory",
|
|
help="For adding output path where the output files will be saved",
|
|
default=os.environ.get("ROCPROF_OUTPUT_PATH", None),
|
|
type=str,
|
|
required=False,
|
|
)
|
|
io_options.add_argument(
|
|
"--output-format",
|
|
help="For adding output format (supported formats: csv, json, pftrace, otf2)",
|
|
nargs="+",
|
|
default=None,
|
|
choices=("csv", "json", "pftrace", "otf2"),
|
|
type=str.lower,
|
|
)
|
|
io_options.add_argument(
|
|
"--log-level",
|
|
help="Set the log level",
|
|
default=None,
|
|
choices=("fatal", "error", "warning", "info", "trace", "env"),
|
|
type=str.lower,
|
|
)
|
|
|
|
aggregate_tracing_options = parser.add_argument_group("Aggregate tracing options")
|
|
|
|
add_parser_bool_argument(
|
|
aggregate_tracing_options,
|
|
"-r",
|
|
"--runtime-trace",
|
|
help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, Memory operations (copies and scratch), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.",
|
|
)
|
|
add_parser_bool_argument(
|
|
aggregate_tracing_options,
|
|
"-s",
|
|
"--sys-trace",
|
|
help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, Memory operations (copies and scratch), and Kernel dispatches.",
|
|
)
|
|
|
|
basic_tracing_options = parser.add_argument_group("Basic tracing options")
|
|
|
|
# Add the arguments
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--hip-trace",
|
|
help="For collecting HIP Traces (runtime + compiler)",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--marker-trace",
|
|
help="For collecting Marker (ROCTx) Traces",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--kernel-trace",
|
|
help="For collecting Kernel Dispatch Traces",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--memory-copy-trace",
|
|
help="For collecting Memory Copy Traces",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--scratch-memory-trace",
|
|
help="For collecting Scratch Memory operations Traces",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--hsa-trace",
|
|
help="For collecting HSA Traces (core + amd + image + finalizer)",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--rccl-trace",
|
|
help="For collecting RCCL Traces",
|
|
)
|
|
add_parser_bool_argument(
|
|
basic_tracing_options,
|
|
"--kokkos-trace",
|
|
help="Enable built-in Kokkos Tools support (implies --marker-trace and --kernel-rename)",
|
|
)
|
|
|
|
extended_tracing_options = parser.add_argument_group("Granular tracing options")
|
|
|
|
add_parser_bool_argument(
|
|
extended_tracing_options,
|
|
"--hip-runtime-trace",
|
|
help="For collecting HIP Runtime API Traces, e.g. public HIP API functions starting with 'hip' (i.e. hipSetDevice).",
|
|
)
|
|
add_parser_bool_argument(
|
|
extended_tracing_options,
|
|
"--hip-compiler-trace",
|
|
help="For collecting HIP Compiler generated code Traces, e.g. HIP API functions starting with '__hip' (i.e. __hipRegisterFatBinary).",
|
|
)
|
|
add_parser_bool_argument(
|
|
extended_tracing_options,
|
|
"--hsa-core-trace",
|
|
help="For collecting HSA API Traces (core API), e.g. HSA functions prefixed with only 'hsa_' (i.e. hsa_init).",
|
|
)
|
|
add_parser_bool_argument(
|
|
extended_tracing_options,
|
|
"--hsa-amd-trace",
|
|
help="For collecting HSA API Traces (AMD-extension API), e.g. HSA function prefixed with 'hsa_amd_' (i.e. hsa_amd_coherency_get_type).",
|
|
)
|
|
add_parser_bool_argument(
|
|
extended_tracing_options,
|
|
"--hsa-image-trace",
|
|
help="For collecting HSA API Traces (Image-extenson API), e.g. HSA functions prefixed with only 'hsa_ext_image_' (i.e. hsa_ext_image_get_capability).",
|
|
)
|
|
add_parser_bool_argument(
|
|
extended_tracing_options,
|
|
"--hsa-finalizer-trace",
|
|
help="For collecting HSA API Traces (Finalizer-extension API), e.g. HSA functions prefixed with only 'hsa_ext_program_' (i.e. hsa_ext_program_create).",
|
|
)
|
|
|
|
post_processing_options = parser.add_argument_group("Post-processing tracing options")
|
|
|
|
add_parser_bool_argument(
|
|
post_processing_options,
|
|
"--stats",
|
|
help="For collecting statistics of enabled tracing types",
|
|
)
|
|
add_parser_bool_argument(
|
|
post_processing_options,
|
|
"-S",
|
|
"--summary",
|
|
help="Output single summary of tracing data at the conclusion of the profiling session",
|
|
)
|
|
add_parser_bool_argument(
|
|
post_processing_options,
|
|
"-D",
|
|
"--summary-per-domain",
|
|
help="Output summary for each tracing domain at the conclusion of the profiling session",
|
|
)
|
|
post_processing_options.add_argument(
|
|
"--summary-groups",
|
|
help="Output a summary for each set of domains matching the regular expression, e.g. 'KERNEL_DISPATCH|MEMORY_COPY' will generate a summary from all the tracing data in the KERNEL_DISPATCH and MEMORY_COPY domains; '*._API' will generate a summary from all the tracing data in the HIP_API, HSA_API, and MARKER_API domains",
|
|
nargs="+",
|
|
default=None,
|
|
type=str,
|
|
metavar="REGULAR_EXPRESSION",
|
|
)
|
|
|
|
summary_options = parser.add_argument_group("Summary options")
|
|
|
|
summary_options.add_argument(
|
|
"--summary-output-file",
|
|
help="Output summary to a file, stdout, or stderr (default: stderr)",
|
|
default=None,
|
|
type=str,
|
|
)
|
|
summary_options.add_argument(
|
|
"-u",
|
|
"--summary-units",
|
|
help="Timing units for output summary",
|
|
default=None,
|
|
type=str,
|
|
choices=("sec", "msec", "usec", "nsec"),
|
|
)
|
|
|
|
kernel_naming_options = parser.add_argument_group("Kernel naming options")
|
|
|
|
add_parser_bool_argument(
|
|
kernel_naming_options,
|
|
"-M",
|
|
"--mangled-kernels",
|
|
help="Do not demangle the kernel names",
|
|
)
|
|
add_parser_bool_argument(
|
|
kernel_naming_options,
|
|
"-T",
|
|
"--truncate-kernels",
|
|
help="Truncate the demangled kernel names",
|
|
)
|
|
add_parser_bool_argument(
|
|
kernel_naming_options,
|
|
"--kernel-rename",
|
|
help="Use region names defined by roctxRangePush/roctxRangePop regions to rename the kernels",
|
|
)
|
|
|
|
filter_options = parser.add_argument_group("Filtering options")
|
|
|
|
filter_options.add_argument(
|
|
"--kernel-include-regex",
|
|
help="Include the kernels matching this filter from counter-collection and thread-trace data (non-matching kernels will be excluded)",
|
|
default=None,
|
|
type=str,
|
|
metavar="REGULAR_EXPRESSION",
|
|
)
|
|
filter_options.add_argument(
|
|
"--kernel-exclude-regex",
|
|
help="Exclude the kernels matching this filter from counter-collection and thread-trace data (applied after --kernel-include-regex option)",
|
|
default=None,
|
|
type=str,
|
|
metavar="REGULAR_EXPRESSION",
|
|
)
|
|
filter_options.add_argument(
|
|
"--kernel-iteration-range",
|
|
help="Iteration range",
|
|
nargs="+",
|
|
default=None,
|
|
type=str,
|
|
)
|
|
|
|
perfetto_options = parser.add_argument_group("Perfetto-specific options")
|
|
|
|
perfetto_options.add_argument(
|
|
"--perfetto-backend",
|
|
help="Perfetto data collection backend. 'system' mode requires starting traced and perfetto daemons",
|
|
default=None,
|
|
type=str,
|
|
nargs=1,
|
|
choices=("inprocess", "system"),
|
|
)
|
|
perfetto_options.add_argument(
|
|
"--perfetto-buffer-size",
|
|
help="Size of buffer for perfetto output in KB. default: 1 GB",
|
|
default=None,
|
|
type=int,
|
|
metavar="KB",
|
|
)
|
|
perfetto_options.add_argument(
|
|
"--perfetto-buffer-fill-policy",
|
|
help="Policy for handling new records when perfetto has reached the buffer limit",
|
|
default=None,
|
|
type=str,
|
|
choices=("discard", "ring_buffer"),
|
|
)
|
|
perfetto_options.add_argument(
|
|
"--perfetto-shmem-size-hint",
|
|
help="Perfetto shared memory size hint in KB. default: 64 KB",
|
|
default=None,
|
|
type=int,
|
|
metavar="KB",
|
|
)
|
|
|
|
display_options = parser.add_argument_group("Display options")
|
|
|
|
add_parser_bool_argument(
|
|
display_options,
|
|
"-L",
|
|
"--list-metrics",
|
|
help="List metrics for counter collection",
|
|
)
|
|
|
|
advanced_options = parser.add_argument_group("Advanced options")
|
|
|
|
advanced_options.add_argument(
|
|
"--preload",
|
|
help="Libraries to prepend to LD_PRELOAD (useful for sanitizer libraries)",
|
|
default=os.environ.get("ROCPROF_PRELOAD", "").split(":"),
|
|
nargs="*",
|
|
)
|
|
# below is available for CI because LD_PRELOADing a library linked to a sanitizer library
|
|
# causes issues in apps where HIP is part of shared library.
|
|
add_parser_bool_argument(
|
|
advanced_options,
|
|
"--suppress-marker-preload",
|
|
help=argparse.SUPPRESS,
|
|
)
|
|
|
|
if args is None:
|
|
args = sys.argv[1:]
|
|
|
|
rocp_args = args[:]
|
|
|
|
app_args = []
|
|
|
|
for idx, itr in enumerate(args):
|
|
if itr == "--":
|
|
rocp_args = args[0:idx]
|
|
app_args = args[(idx + 1) :]
|
|
break
|
|
|
|
return (parser.parse_args(rocp_args), app_args)
|
|
|
|
|
|
def parse_yaml(yaml_file):
|
|
try:
|
|
import yaml
|
|
except ImportError as e:
|
|
fatal_error(
|
|
f"{e}\n\nYAML package is not installed. Run '{sys.executable} -m pip install pyyaml' or use JSON or text format"
|
|
)
|
|
try:
|
|
lst = []
|
|
with open(yaml_file, "r") as file:
|
|
data = yaml.safe_load(file)
|
|
for itr in data["jobs"]:
|
|
# TODO: support naming jobs
|
|
# if isinstance(itr, str):
|
|
# itr = data["jobs"][itr]
|
|
itr["sub_directory"] = "pass_"
|
|
lst.append(itr)
|
|
|
|
return [dotdict(itr) for itr in lst]
|
|
|
|
except yaml.YAMLError as exc:
|
|
fatal_error(f"{exc}")
|
|
|
|
return None
|
|
|
|
|
|
def parse_json(json_file):
|
|
import json
|
|
|
|
try:
|
|
lst = []
|
|
with open(json_file, "r") as file:
|
|
data = json.load(file)
|
|
for itr in data["jobs"]:
|
|
itr["sub_directory"] = "pass_"
|
|
lst.append(itr)
|
|
|
|
return [dotdict(itr) for itr in lst]
|
|
|
|
except Exception as e:
|
|
fatal_error(f"{e}")
|
|
|
|
return None
|
|
|
|
|
|
def parse_text(text_file):
|
|
|
|
def process_line(line):
|
|
if "pmc:" not in line:
|
|
return ""
|
|
line = line.strip()
|
|
pos = line.find("#")
|
|
if pos >= 0:
|
|
line = line[0:pos]
|
|
|
|
def _dedup(_line, _sep):
|
|
for itr in _sep:
|
|
_line = " ".join(_line.split(itr))
|
|
return _line.strip()
|
|
|
|
# remove tabs and duplicate spaces
|
|
return _dedup(line.replace("pmc:", ""), ["\n", "\t", " "]).split(" ")
|
|
|
|
try:
|
|
with open(text_file, "r") as file:
|
|
return [
|
|
litr
|
|
for litr in [process_line(itr) for itr in file.readlines()]
|
|
if len(litr) > 0
|
|
]
|
|
except Exception as e:
|
|
fatal_error(f"{e}")
|
|
|
|
return None
|
|
|
|
|
|
def parse_input(input_file):
|
|
|
|
_, extension = os.path.splitext(input_file)
|
|
if extension == ".txt":
|
|
text_input = parse_text(input_file)
|
|
text_input_lst = [{"pmc": itr, "sub_directory": "pmc_"} for itr in text_input]
|
|
return [dotdict(itr) for itr in text_input_lst]
|
|
elif extension in (".yaml", ".yml"):
|
|
return parse_yaml(input_file)
|
|
elif extension == ".json":
|
|
return parse_json(input_file)
|
|
else:
|
|
fatal_error(
|
|
f"Input file '{input_file}' does not have a recognized extension (.txt, .json, .yaml, .yml)\n"
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
def has_set_attr(obj, key):
|
|
if obj and hasattr(obj, key) and getattr(obj, key) is not None:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def patch_args(data):
|
|
"""Used to handle certain fields which might be specified as a string instead of an array or vice-versa"""
|
|
|
|
if hasattr(data, "kernel_iteration_range") and isinstance(
|
|
data.kernel_iteration_range, str
|
|
):
|
|
data.kernel_iteration_range = [data.kernel_iteration_range]
|
|
return data
|
|
|
|
|
|
def get_args(cmd_args, inp_args):
|
|
|
|
def ensure_type(name, var, type_id):
|
|
if not isinstance(var, type_id):
|
|
raise TypeError(
|
|
f"{name} is of type {type(var).__name__}, expected {type(type_id).__name__}"
|
|
)
|
|
|
|
ensure_type("cmd_args", cmd_args, argparse.Namespace)
|
|
ensure_type("inp_args", inp_args, dotdict)
|
|
|
|
cmd_keys = list(cmd_args.__dict__.keys())
|
|
inp_keys = list(inp_args.keys())
|
|
data = {}
|
|
|
|
def get_attr(key):
|
|
if has_set_attr(cmd_args, key):
|
|
return getattr(cmd_args, key)
|
|
elif has_set_attr(inp_args, key):
|
|
return getattr(inp_args, key)
|
|
return None
|
|
|
|
for itr in set(cmd_keys + inp_keys):
|
|
if (
|
|
has_set_attr(cmd_args, itr)
|
|
and has_set_attr(inp_args, itr)
|
|
and getattr(cmd_args, itr) != getattr(inp_args, itr)
|
|
):
|
|
raise RuntimeError(f"conflicting value for {itr}")
|
|
else:
|
|
data[itr] = get_attr(itr)
|
|
|
|
return patch_args(dotdict(data))
|
|
|
|
|
|
def run(app_args, args, **kwargs):
|
|
|
|
app_env = dict(os.environ)
|
|
use_execv = kwargs.get("use_execv", True)
|
|
app_pass = kwargs.get("pass_id", None)
|
|
|
|
def update_env(env_var, env_val, **kwargs):
|
|
"""Local function for updating application environment which supports
|
|
various options for dealing with existing environment variables
|
|
"""
|
|
_overwrite = kwargs.get("overwrite", True)
|
|
_prepend = kwargs.get("prepend", False)
|
|
_append = kwargs.get("append", False)
|
|
_join_char = kwargs.get("join_char", ":")
|
|
|
|
# only overwrite if env_val evaluates as true
|
|
_overwrite_if_true = kwargs.get("overwrite_if_true", False)
|
|
# only overwrite if env_val evaluates as false
|
|
_overwrite_if_false = kwargs.get("overwrite_if_false", False)
|
|
|
|
_formatter = kwargs.get(
|
|
"formatter",
|
|
lambda x: f"{x}" if not isinstance(x, bool) else "1" if x else "0",
|
|
)
|
|
|
|
for itr in kwargs.keys():
|
|
if itr not in (
|
|
"overwrite",
|
|
"prepend",
|
|
"append",
|
|
"join_char",
|
|
"overwrite_if_true",
|
|
"overwrite_if_false",
|
|
"formatter",
|
|
):
|
|
fatal_error(
|
|
f"Internal error in update_env('{env_var}', {env_val}, {itr}={kwargs[itr]}). Invalid key: {itr}"
|
|
)
|
|
|
|
if env_val is None:
|
|
return app_env.get(env_var, None)
|
|
|
|
_val = _formatter(env_val)
|
|
_curr_val = app_env.get(env_var, None)
|
|
|
|
def _write_env_value():
|
|
if _overwrite_if_true:
|
|
if bool(env_val) is True:
|
|
app_env[env_var] = _val
|
|
elif _overwrite_if_false:
|
|
if bool(env_val) is False:
|
|
app_env[env_var] = _val
|
|
else:
|
|
app_env[env_var] = _val
|
|
|
|
if _curr_val is not None:
|
|
if not _overwrite:
|
|
pass
|
|
elif _prepend:
|
|
app_env[env_var] = (
|
|
"{}{}{}".format(_val, _join_char, _curr_val) if _val else _curr_val
|
|
).strip(":")
|
|
elif _append:
|
|
app_env[env_var] = (
|
|
"{}{}{}".format(_curr_val, _join_char, _val) if _val else _curr_val
|
|
).strip(":")
|
|
elif _overwrite:
|
|
_write_env_value()
|
|
else:
|
|
_write_env_value()
|
|
|
|
return app_env.get(env_var, None)
|
|
|
|
update_env("ROCPROFILER_LIBRARY_CTOR", True)
|
|
|
|
ROCPROFV3_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
ROCM_DIR = os.path.dirname(ROCPROFV3_DIR)
|
|
ROCPROF_TOOL_LIBRARY = f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofiler-sdk-tool.so"
|
|
ROCPROF_SDK_LIBRARY = f"{ROCM_DIR}/lib/librocprofiler-sdk.so"
|
|
ROCPROF_ROCTX_LIBRARY = f"{ROCM_DIR}/lib/librocprofiler-sdk-roctx.so"
|
|
ROCPROF_KOKKOSP_LIBRARY = (
|
|
f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofiler-sdk-tool-kokkosp.so"
|
|
)
|
|
|
|
prepend_preload = [itr for itr in args.preload if itr]
|
|
append_preload = [ROCPROF_TOOL_LIBRARY, ROCPROF_SDK_LIBRARY]
|
|
|
|
update_env("LD_PRELOAD", ":".join(prepend_preload), prepend=True)
|
|
update_env("LD_PRELOAD", ":".join(append_preload), append=True)
|
|
|
|
update_env(
|
|
"ROCP_TOOL_LIBRARIES",
|
|
f"{ROCPROF_TOOL_LIBRARY}",
|
|
append=True,
|
|
)
|
|
update_env(
|
|
"LD_LIBRARY_PATH",
|
|
f"{ROCM_DIR}/lib",
|
|
append=True,
|
|
)
|
|
|
|
_output_file = args.output_file
|
|
_output_path = (
|
|
args.output_directory if args.output_directory is not None else os.getcwd()
|
|
)
|
|
|
|
update_env("ROCPROF_OUTPUT_FILE_NAME", _output_file)
|
|
update_env("ROCPROF_OUTPUT_PATH", _output_path)
|
|
if app_pass is not None:
|
|
app_env["ROCPROF_OUTPUT_PATH"] = os.path.join(
|
|
f"{_output_path}", f"{args.sub_directory}{app_pass}"
|
|
)
|
|
|
|
if args.output_file is not None or args.output_directory is not None:
|
|
update_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", True)
|
|
|
|
if not args.output_format:
|
|
args.output_format = ["csv"]
|
|
|
|
update_env(
|
|
"ROCPROF_OUTPUT_FORMAT", ",".join(args.output_format), append=True, join_char=","
|
|
)
|
|
|
|
if args.kokkos_trace:
|
|
update_env("KOKKOS_TOOLS_LIBS", ROCPROF_KOKKOSP_LIBRARY, append=True)
|
|
for itr in (
|
|
"marker_trace",
|
|
"kernel_rename",
|
|
):
|
|
setattr(args, itr, True)
|
|
|
|
if args.sys_trace:
|
|
for itr in (
|
|
"hip_trace",
|
|
"hsa_trace",
|
|
"marker_trace",
|
|
"kernel_trace",
|
|
"memory_copy_trace",
|
|
"scratch_memory_trace",
|
|
"rccl_trace",
|
|
):
|
|
setattr(args, itr, True)
|
|
|
|
if args.runtime_trace:
|
|
for itr in (
|
|
"hip_runtime_trace",
|
|
"marker_trace",
|
|
"kernel_trace",
|
|
"memory_copy_trace",
|
|
"scratch_memory_trace",
|
|
"rccl_trace",
|
|
):
|
|
setattr(args, itr, True)
|
|
|
|
if args.hip_trace:
|
|
for itr in ("compiler", "runtime"):
|
|
setattr(args, f"hip_{itr}_trace", True)
|
|
|
|
if args.hsa_trace:
|
|
for itr in ("core", "amd", "image", "finalizer"):
|
|
setattr(args, f"hsa_{itr}_trace", True)
|
|
|
|
trace_count = 0
|
|
trace_opts = ["--hip-trace", "--hsa-trace"]
|
|
for opt, env_val in dict(
|
|
[
|
|
["hip_compiler_trace", "HIP_COMPILER_API_TRACE"],
|
|
["hip_runtime_trace", "HIP_RUNTIME_API_TRACE"],
|
|
["hsa_core_trace", "HSA_CORE_API_TRACE"],
|
|
["hsa_amd_trace", "HSA_AMD_EXT_API_TRACE"],
|
|
["hsa_image_trace", "HSA_IMAGE_EXT_API_TRACE"],
|
|
["hsa_finalizer_trace", "HSA_FINALIZER_EXT_API_TRACE"],
|
|
["marker_trace", "MARKER_API_TRACE"],
|
|
["rccl_trace", "RCCL_API_TRACE"],
|
|
["kernel_trace", "KERNEL_TRACE"],
|
|
["memory_copy_trace", "MEMORY_COPY_TRACE"],
|
|
["scratch_memory_trace", "SCRATCH_MEMORY_TRACE"],
|
|
]
|
|
).items():
|
|
val = getattr(args, f"{opt}")
|
|
update_env(f"ROCPROF_{env_val}", val, overwrite_if_true=True)
|
|
trace_count += 1 if val else 0
|
|
trace_opts += ["--{}".format(opt.replace("_", "-"))]
|
|
|
|
# if marker tracing was requested, LD_PRELOAD the rocprofiler-sdk-roctx library
|
|
# to override the roctx symbols of an app linked to the old roctracer roctx
|
|
if args.marker_trace and not args.suppress_marker_preload:
|
|
update_env("LD_PRELOAD", ROCPROF_ROCTX_LIBRARY, append=True)
|
|
|
|
if trace_count == 0:
|
|
# if no tracing was enabled but the options below were enabled, raise an error
|
|
for oitr in [
|
|
"stats",
|
|
"summary",
|
|
"summary-per-domain",
|
|
"summary-groups",
|
|
"summary-output-file",
|
|
"summary-units",
|
|
]:
|
|
_attr = oitr.replace("-", "_")
|
|
if not hasattr(args, _attr):
|
|
fatal_error(
|
|
f"Internal error. parser does not support --{oitr} argument (i.e. args.{_attr})"
|
|
)
|
|
elif getattr(args, _attr):
|
|
_len = max([len(f"{key}") for key in args.keys()])
|
|
_args = "\n\t".join(
|
|
sorted([f"{key:<{_len}} = {val}" for key, val in args.items()])
|
|
)
|
|
fatal_error(
|
|
"No tracing options were enabled for --{} option.\nConfiguration argument values:\n\t{}\nTracing options:\n\t{}".format(
|
|
oitr, f"{_args}", "\n\t".join(trace_opts)
|
|
)
|
|
)
|
|
|
|
_summary_groups = "##@@##".join(args.summary_groups) if args.summary_groups else None
|
|
_summary_output_fname = args.summary_output_file
|
|
if _summary_output_fname is None:
|
|
_summary_output_fname = "stderr"
|
|
elif _summary_output_fname.lower() in ("stdout", "stderr"):
|
|
_summary_output_fname = _summary_output_fname.lower()
|
|
|
|
update_env("ROCPROF_STATS", args.stats, overwrite_if_true=True)
|
|
update_env("ROCPROF_STATS_SUMMARY", args.summary, overwrite_if_true=True)
|
|
update_env("ROCPROF_STATS_SUMMARY_UNITS", args.summary_units, overwrite=True)
|
|
update_env("ROCPROF_STATS_SUMMARY_OUTPUT", _summary_output_fname, overwrite=True)
|
|
update_env("ROCPROF_STATS_SUMMARY_GROUPS", _summary_groups, overwrite=True)
|
|
update_env(
|
|
"ROCPROF_STATS_SUMMARY_PER_DOMAIN",
|
|
args.summary_per_domain,
|
|
overwrite_if_true=True,
|
|
)
|
|
update_env(
|
|
"ROCPROF_DEMANGLE_KERNELS",
|
|
not args.mangled_kernels,
|
|
overwrite_if_false=True,
|
|
)
|
|
update_env(
|
|
"ROCPROF_TRUNCATE_KERNELS",
|
|
args.truncate_kernels,
|
|
overwrite_if_true=True,
|
|
)
|
|
update_env(
|
|
"ROCPROF_LIST_METRICS",
|
|
args.list_metrics,
|
|
overwrite_if_true=True,
|
|
)
|
|
|
|
if args.log_level and args.log_level not in ("env"):
|
|
for itr in ("ROCPROF", "ROCPROFILER", "ROCTX"):
|
|
update_env(
|
|
f"{itr}_LOG_LEVEL",
|
|
args.log_level,
|
|
)
|
|
|
|
for opt, env_val in dict(
|
|
[
|
|
["kernel_rename", "KERNEL_RENAME"],
|
|
]
|
|
).items():
|
|
val = getattr(args, f"{opt}")
|
|
if val is not None:
|
|
update_env(f"ROCPROF_{env_val}", val, overwrite_if_true=True)
|
|
|
|
for opt, env_val in dict(
|
|
[
|
|
["perfetto_buffer_size", "PERFETTO_BUFFER_SIZE_KB"],
|
|
["perfetto_shmem_size_hint", "PERFETTO_SHMEM_SIZE_HINT_KB"],
|
|
["perfetto_fill_policy", "PERFETTO_BUFFER_FILL_POLICY"],
|
|
["perfetto_backend", "PERFETTO_BACKEND"],
|
|
]
|
|
).items():
|
|
val = getattr(args, f"{opt}")
|
|
if val is not None:
|
|
if isinstance(val, (list, tuple, set)):
|
|
val = ", ".join(val)
|
|
update_env(f"ROCPROF_{env_val}", val, overwrite=True)
|
|
|
|
def log_config(_env):
|
|
existing_env = dict(os.environ)
|
|
init_message = "\n- rocprofv3 configuration{}:\n".format(
|
|
"" if app_pass is None else f" (pass {app_pass})"
|
|
)
|
|
for key, itr in _env.items():
|
|
if key not in existing_env.keys():
|
|
if init_message:
|
|
sys.stderr.write(init_message)
|
|
init_message = None
|
|
sys.stderr.write(f"\t- {key}={itr}\n")
|
|
if init_message is None:
|
|
sys.stderr.write("\n")
|
|
sys.stderr.flush()
|
|
|
|
if args.list_metrics:
|
|
app_args = [f"{ROCM_DIR}/lib/rocprofiler-sdk/rocprofv3-trigger-list-metrics"]
|
|
|
|
elif not app_args:
|
|
log_config(app_env)
|
|
fatal_error("No application provided")
|
|
|
|
if args.kernel_include_regex:
|
|
update_env(
|
|
"ROCPROF_KERNEL_FILTER_INCLUDE_REGEX",
|
|
args.kernel_include_regex,
|
|
)
|
|
|
|
if args.kernel_exclude_regex:
|
|
update_env(
|
|
"ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX",
|
|
args.kernel_exclude_regex,
|
|
)
|
|
|
|
if args.kernel_iteration_range:
|
|
update_env("ROCPROF_KERNEL_FILTER_RANGE", ", ".join(args.kernel_iteration_range))
|
|
|
|
if args.pmc:
|
|
update_env("ROCPROF_COUNTER_COLLECTION", True, overwrite=True)
|
|
update_env(
|
|
"ROCPROF_COUNTERS", "pmc: {}".format(" ".join(args.pmc)), overwrite=True
|
|
)
|
|
else:
|
|
update_env("ROCPROF_COUNTER_COLLECTION", False, overwrite=True)
|
|
|
|
if args.log_level in ("info", "trace", "env"):
|
|
log_config(app_env)
|
|
|
|
if use_execv:
|
|
# does not return
|
|
os.execvpe(app_args[0], app_args, env=app_env)
|
|
else:
|
|
try:
|
|
exit_code = subprocess.check_call(app_args, env=app_env)
|
|
if exit_code != 0:
|
|
fatal_error("Application exited with non-zero exit code", exit_code)
|
|
except Exception as e:
|
|
fatal_error(f"{e}\n")
|
|
return exit_code
|
|
|
|
|
|
def main(argv=None):
|
|
|
|
cmd_args, app_args = parse_arguments(argv)
|
|
inp_args = (
|
|
parse_input(cmd_args.input) if getattr(cmd_args, "input") else [dotdict({})]
|
|
)
|
|
|
|
if len(inp_args) == 1:
|
|
args = get_args(cmd_args, inp_args[0])
|
|
pass_idx = None
|
|
if hasattr(args, "pmc") and args.pmc is not None and len(args.pmc) > 0:
|
|
pass_idx = 1
|
|
run(app_args, args, pass_id=pass_idx)
|
|
else:
|
|
for idx, itr in enumerate(inp_args):
|
|
args = get_args(cmd_args, itr)
|
|
run(
|
|
app_args,
|
|
args,
|
|
pass_id=(idx + 1),
|
|
use_execv=False,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
ec = main(sys.argv[1:])
|
|
sys.exit(ec)
|