diff --git a/CHANGELOG.md b/CHANGELOG.md index dcfc5d1f67..71e1af9296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Add Docker files to package the application and dependencies into a single portable and executable standalone binary file +* Analysis report based filtering + * -b option in profile mode now additionally accepts metric id(s) for analysis report based filtering + * -b option in profile mode also accept hardware IP block for filtering, however, this support will be deprecated soon + * --list-metrics option added in profile mode to list possible metric id(s), similar to analyze mode + ### Changed * Change normal_unit default to per_kernel diff --git a/CMakeLists.txt b/CMakeLists.txt index f2ca926314..39b4627be8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,6 +244,13 @@ add_test( ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +add_test( + NAME test_profile_section + COMMAND + ${Python3_EXECUTABLE} -m pytest -m section --junitxml=tests/test_profile_misc.xml + ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + set_tests_properties( test_profile_kernel_execution test_profile_ipblocks diff --git a/docs/how-to/profile/mode.rst b/docs/how-to/profile/mode.rst index d6b9c7ce89..0d6c72434e 100644 --- a/docs/how-to/profile/mode.rst +++ b/docs/how-to/profile/mode.rst @@ -230,7 +230,7 @@ Filtering options ----------------- ``-b``, ``--block `` - Allows system profiling on one or more selected hardware components to speed + Allows system profiling on one or more selected hardware report blocks to speed up the profiling process. See :ref:`profiling-hw-component-filtering`. ``-k``, ``--kernel `` @@ -251,21 +251,91 @@ Filtering options .. _profiling-hw-component-filtering: -Hardware component filtering +Hardware report block filtering ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You can profile specific hardware components to speed up the profiling process. -In ROCm Compute Profiler, the term hardware block to refers to a hardware component or a -group of hardware components. All profiling results are accumulated in the same -target directory without overwriting those for other hardware components. This -enables incremental profiling and analysis. +You can profile specific hardware report blocks to speed up the profiling process. +In ROCm Compute Profiler, the term hardware report block refers to a section of the +analysis report which focuses on metrics associated with a hardware component or +a group of hardware components. All profiling results are accumulated in the same +target directory without overwriting those for other hardware components. +This enables incremental profiling and analysis. -The following example only gathers hardware counters for the shader sequencer -(SQ) and L2 cache (TCC) components, skipping all other hardware components. +The following example only gathers hardware counters used to calculate metrics +for ``Compute Unit - Instruction Mix`` (block 10) and ``Wavefront Launch Statistics`` +(block 7) sections of the analysis report, while skipping over all other hardware counters. .. code-block:: shell-session - $ rocprof-compute profile --name vcopy -b SQ TCC -- ./vcopy -n 1048576 -b 256 + $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256 + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + rocprofiler-compute version: 2.0.0 + Profiler choice: rocprofv1 + Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 + Target: MI200 + Command: ./vcopy -n 1048576 -b 256 + Kernel Selection: None + Dispatch Selection: None + Hardware Blocks: [] + Report Sections: ['10', '7'] + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Collecting Performance Counters + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ... + + +To see a list of available hardware report blocks, use the ``--list-metrics`` option. + +.. code-block:: shell-session + + $ rocprof-compute profile --list-metrics + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + 0 -> Top Stats + 1 -> System Info + 2 -> System Speed-of-Light + 2.1 -> Speed-of-Light + 2.1.0 -> VALU FLOPs + 2.1.1 -> VALU IOPs + 2.1.2 -> MFMA FLOPs (F8) + ... + 5 -> Command Processor (CPC/CPF) + 5.1 -> Command Processor Fetcher + 5.1.0 -> CPF Utilization + 5.1.1 -> CPF Stall + 5.1.2 -> CPF-L2 Utilization + 5.2 -> Packet Processor + 5.2.0 -> CPC Utilization + 5.2.1 -> CPC Stall Rate + 5.2.5 -> CPC-UTCL1 Stall + ... + 6 -> Workgroup Manager (SPI) + 6.1 -> Workgroup Manager Utilizations + 6.1.0 -> Accelerator Utilization + 6.1.1 -> Scheduler-Pipe Utilization + 6.1.2 -> Workgroup Manager Utilization + + +It is also possible to filter counter collection by hardware component such as Shader Sequencer (SQ) +and L2 cache (TCC) as shown below. + +.. code-block:: shell-session + + $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256 __ _ _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ @@ -297,12 +367,18 @@ The following example only gathers hardware counters for the shader sequencer Kernel Selection: None Dispatch Selection: None Hardware Blocks: ['sq', 'tcc'] + Report Sections: [] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Collecting Performance Counters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ... +.. warning:: + + Filtering by hardware components (e.g. SQ, TCC) will soon be deprecated. + It is recommended to use hardware report block based filtering. + .. _profiling-kernel-filtering: Kernel filtering diff --git a/docs/how-to/use.rst b/docs/how-to/use.rst index e138e5a41b..4ac2b6bd1d 100644 --- a/docs/how-to/use.rst +++ b/docs/how-to/use.rst @@ -57,17 +57,17 @@ Common filters to customize data collection include: Enables filtering based on dispatch ID. ``-b``, ``--block`` - Enables collection metrics for only the specified (one or more) hardware - component blocks. + Enables collection metrics for only the specified hardware report blocks. See :ref:`Filtering ` for an in-depth walkthrough. -To view available metrics by hardware block, use the ``--list-metrics`` -argument: +To view available metrics by hardware block, use the ``profile`` mode ``--list-metrics`` +option with an optional system architecture argument (inferred if not provided): .. code-block:: shell - $ rocprof-compute analyze --list-metrics + $ rocprof-compute profile --list-metrics + $ rocprof-compute profile --list-metrics .. _basic-analyze-cli: @@ -80,7 +80,7 @@ interface with profiling results. View different metrics derived from your profiled results and get immediate access all metrics organized by hardware blocks. -If you don't apply kernel, dispatch, or hardware block filters at this stage, +If you don't apply kernel, dispatch, or hardware report block filters at this stage, analysis is reflective of the entirety of the profiling data. To interact with profiling results from a different session, provide the diff --git a/pyproject.toml b/pyproject.toml index 2308351d8e..a5cfb0c168 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ pythonpath = [ ] markers = [ + "section", "kernel_execution", "block", "misc", diff --git a/src/argparser.py b/src/argparser.py index d412063124..ce879a2905 100644 --- a/src/argparser.py +++ b/src/argparser.py @@ -24,14 +24,15 @@ import argparse import os +import re import shutil from pathlib import Path def print_avail_arch(avail_arch: list): - ret_str = "\t\tList all available metrics for analysis on specified arch:" + ret_str = "\t\t\tList all available metrics for analysis on specified arch:" for arch in avail_arch: - ret_str += "\n\t\t {}".format(arch) + ret_str += "\n\t\t\t {}".format(arch) return ret_str @@ -114,7 +115,6 @@ Examples: type=str, metavar="", dest="name", - required=True, help="\t\t\tAssign a name to workload.", ) profile_group.add_argument("--target", type=str, default=None, help=argparse.SUPPRESS) @@ -154,7 +154,7 @@ Examples: default=False, action="store_true", help=argparse.SUPPRESS, - #help="\t\t\tKokkos trace, traces Kokkos API calls.", + # help="\t\t\tKokkos trace, traces Kokkos API calls.", ) profile_group.add_argument( "-k", @@ -177,16 +177,67 @@ Examples: required=False, help="\t\t\tDispatch ID filtering.", ) + + class AggregateDict(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + aggregated_dict = getattr(namespace, self.dest, {}) + if aggregated_dict is None: + aggregated_dict = {} + for key, value in values: + aggregated_dict[key] = value + setattr(namespace, self.dest, aggregated_dict) + + def validate_block(value): + # Metric id regex, for example, 10, 4, 4.3, 4.32 + # Dont allow more than two digits after decimal point + metric_id_pattern = re.compile(r"^\d+$|^\d\.\d$|^\d+\.\d\d$") + # Allow only the following hardware blocks + hardware_block_pattern = re.compile(r"^(SQ|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF)$") + if metric_id_pattern.match(value): + return (str(value), "metric_id") + if hardware_block_pattern.match(value): + return (str(value), "hardware_block") + raise argparse.ArgumentTypeError(f"Invalid hardware block or metric id: {value}") + profile_group.add_argument( "-b", "--block", - type=str, - dest="ipblocks", + type=validate_block, + action=AggregateDict, + dest="filter_blocks", metavar="", nargs="+", required=False, - choices=["SQ", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"], - help="\t\t\tHardware block filtering:\n\t\t\t SQ\n\t\t\t SQC\n\t\t\t TA\n\t\t\t TD\n\t\t\t TCP\n\t\t\t TCC\n\t\t\t SPI\n\t\t\t CPC\n\t\t\t CPF", + default={}, + help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 10, 4, 4.3). + \t\t\tCan provide multiple space separated arguments. + \t\t\tCan also accept Hardware blocks. + \t\t\tHardware block filtering (to be deprecated soon): + \t\t\t SQ + \t\t\t SQC + \t\t\t TA + \t\t\t TD + \t\t\t TCP + \t\t\t TCC + \t\t\t SPI + \t\t\t CPC + \t\t\t CPF""", + ) + profile_group.add_argument( + "--list-metrics", + metavar="", + nargs="?", + const="", + # Argument to --list-metrics is optional + choices=[""] + list(supported_archs.keys()), # ["gfx906", "gfx908", "gfx90a"], + help=print_avail_arch(supported_archs.keys()), + ) + profile_group.add_argument( + "--config-dir", + dest="config_dir", + metavar="", + help="\t\t\tSpecify the directory of customized report section configs.", + default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"), ) result = shutil.which("rocscope") @@ -487,7 +538,7 @@ Examples: dest="filter_metrics", metavar="", nargs="+", - help="\t\tSpecify hardware block/metric id(s) from --list-metrics for filtering.", + help="\t\tSpecify metric id(s) from --list-metrics for filtering.", ) analyze_group.add_argument( "--gpu-id", diff --git a/src/rocprof_compute_analyze/analysis_base.py b/src/rocprof_compute_analyze/analysis_base.py index 51e7778d79..5591c1efe1 100644 --- a/src/rocprof_compute_analyze/analysis_base.py +++ b/src/rocprof_compute_analyze/analysis_base.py @@ -45,6 +45,7 @@ class OmniAnalyze_Base: self.__args = args self._runs = OrderedDict() self._arch_configs = {} + self._profiling_config = dict() self.__supported_archs = supported_archs self._output = None self.__socs: dict = None # available OmniSoC objs @@ -254,6 +255,9 @@ class OmniAnalyze_Base: open(self.__args.output_file, "w+") if self.__args.output_file else sys.stdout ) + # Read profiling config + self._profiling_config = file_io.load_profiling_config(self.__args.path[0][0]) + # initalize runs self._runs = self.initalize_runs() diff --git a/src/rocprof_compute_analyze/analysis_cli.py b/src/rocprof_compute_analyze/analysis_cli.py index facfad01f2..b370c070f0 100644 --- a/src/rocprof_compute_analyze/analysis_cli.py +++ b/src/rocprof_compute_analyze/analysis_cli.py @@ -100,4 +100,5 @@ class cli_analysis(OmniAnalyze_Base): self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_arch"] ], self._output, + self._profiling_config, ) diff --git a/src/rocprof_compute_base.py b/src/rocprof_compute_base.py index c407482331..cd97676fde 100644 --- a/src/rocprof_compute_base.py +++ b/src/rocprof_compute_base.py @@ -33,10 +33,11 @@ import time from pathlib import Path import pandas as pd +import yaml import config from argparser import omniarg_parser -from utils import file_io +from utils import file_io, parser, schema from utils.logger import ( setup_console_handler, setup_file_handler, @@ -47,6 +48,7 @@ from utils.utils import ( console_debug, console_error, console_log, + console_warning, demarcate, detect_rocprof, get_submodules, @@ -230,11 +232,50 @@ class RocProfCompute: return + @demarcate + def list_metrics(self): + if not self.__args.list_metrics: + arch = self.__mspec.gpu_arch + else: + arch = self.__args.list_metrics + if arch in self.__supported_archs.keys(): + ac = schema.ArchConfig() + ac.panel_configs = file_io.load_panel_configs( + self.__args.config_dir.joinpath(arch) + ) + sys_info = self.__mspec.get_class_members().iloc[0] + parser.build_dfs(archConfigs=ac, filter_metrics=[], sys_info=sys_info) + for key, value in ac.metric_list.items(): + prefix = "" + if "." not in str(key): + prefix = "" + elif str(key).count(".") == 1: + prefix = "\t" + else: + prefix = "\t\t" + print(prefix + key, "->", value) + sys.exit(0) + else: + console_error("Unsupported arch") + @demarcate def run_profiler(self): self.print_graphic() self.load_soc_specs() + if self.__args.list_metrics is not None: + self.list_metrics() + elif self.__args.name is None: + sys.exit("Either --list-name or --name is required") + + # Deprecation warning for hardware blocks + if [ + name + for name, type in self.__args.filter_blocks.items() + if type == "hardware_block" + ]: + console_warning("Hardware block based filtering will be deprecated soon") + # FIXME: # Changing default path should be done at the end of arg parsing stage, # unless there is a specific reason to do here. @@ -250,25 +291,37 @@ class RocProfCompute: from rocprof_compute_profile.profiler_rocprof_v1 import rocprof_v1_profiler profiler = rocprof_v1_profiler( - self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch] + self.__args, + self.__profiler_mode, + self.__soc[self.__mspec.gpu_arch], + self.__supported_archs, ) elif self.__profiler_mode == "rocprofv2": from rocprof_compute_profile.profiler_rocprof_v2 import rocprof_v2_profiler profiler = rocprof_v2_profiler( - self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch] + self.__args, + self.__profiler_mode, + self.__soc[self.__mspec.gpu_arch], + self.__supported_archs, ) elif self.__profiler_mode == "rocprofv3": from rocprof_compute_profile.profiler_rocprof_v3 import rocprof_v3_profiler profiler = rocprof_v3_profiler( - self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch] + self.__args, + self.__profiler_mode, + self.__soc[self.__mspec.gpu_arch], + self.__supported_archs, ) elif self.__profiler_mode == "rocscope": from rocprof_compute_profile.profiler_rocscope import rocscope_profiler profiler = rocscope_profiler( - self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch] + self.__args, + self.__profiler_mode, + self.__soc[self.__mspec.gpu_arch], + self.__supported_archs, ) else: console_error("Unsupported profiler") @@ -278,6 +331,11 @@ class RocProfCompute: # ----------------------- self.__soc[self.__mspec.gpu_arch].profiling_setup() + # Write profiling configuration as yaml file + with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f: + args_dict = vars(self.__args) + args_dict["config_dir"] = str(args_dict["config_dir"]) + yaml.dump(args_dict, f) # enable file-based logging setup_file_handler(self.__args.loglevel, self.__args.path) diff --git a/src/rocprof_compute_profile/profiler_base.py b/src/rocprof_compute_profile/profiler_base.py index d60340252d..23ac4580b7 100644 --- a/src/rocprof_compute_profile/profiler_base.py +++ b/src/rocprof_compute_profile/profiler_base.py @@ -27,7 +27,6 @@ import logging import os import re import shutil -import sys import time from abc import ABC, abstractmethod from pathlib import Path @@ -51,15 +50,22 @@ from utils.utils import ( class RocProfCompute_Base: - def __init__(self, args, profiler_mode, soc): + def __init__(self, args, profiler_mode, soc, supported_archs): self.__args = args self.__profiler = profiler_mode + self.__supported_archs = supported_archs self._soc = soc # OmniSoC obj self.__perfmon_dir = str( Path(str(config.rocprof_compute_home)).joinpath( "rocprof_compute_soc", "profile_configs" ) ) + self.__filter_hardware_blocks = [ + name for name, type in args.filter_blocks.items() if type == "hardware_block" + ] + self.__filter_metric_ids = [ + name for name, type in args.filter_blocks.items() if type == "metric_id" + ] def get_args(self): return self.__args @@ -320,10 +326,14 @@ class RocProfCompute_Base: console_log("Command: " + str(self.__args.remaining)) console_log("Kernel Selection: " + str(self.__args.kernel)) console_log("Dispatch Selection: " + str(self.__args.dispatch)) - if self.__args.ipblocks == None: + if self.__filter_hardware_blocks == None: console_log("Hardware Blocks: All") else: - console_log("Hardware Blocks: " + str(self.__args.ipblocks)) + console_log("Hardware Blocks: " + str(self.__filter_hardware_blocks)) + if self.__filter_metric_ids == None: + console_log("Report Sections: All") + else: + console_log("Report Sections: " + str(self.__filter_metric_ids)) msg = "Collecting Performance Counters" ( @@ -424,7 +434,11 @@ class RocProfCompute_Base: gen_sysinfo( workload_name=self.__args.name, workload_dir=self.get_args().path, - ip_blocks=self.__args.ipblocks, + ip_blocks=[ + name + for name, type in self.__args.filter_blocks.items() + if type == "hardware_block" + ], app_cmd=self.__args.remaining, skip_roof=self.__args.no_roof, roof_only=self.__args.roof_only, diff --git a/src/rocprof_compute_profile/profiler_rocprof_v1.py b/src/rocprof_compute_profile/profiler_rocprof_v1.py index e4fa44cdfd..53e54b70fc 100644 --- a/src/rocprof_compute_profile/profiler_rocprof_v1.py +++ b/src/rocprof_compute_profile/profiler_rocprof_v1.py @@ -30,8 +30,8 @@ from utils.utils import console_log, demarcate, replace_timestamps, store_app_cm class rocprof_v1_profiler(RocProfCompute_Base): - def __init__(self, profiling_args, profiler_mode, soc): - super().__init__(profiling_args, profiler_mode, soc) + def __init__(self, profiling_args, profiler_mode, soc, supported_archs): + super().__init__(profiling_args, profiler_mode, soc, supported_archs) self.ready_to_profile = ( self.get_args().roof_only and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file() diff --git a/src/rocprof_compute_profile/profiler_rocprof_v2.py b/src/rocprof_compute_profile/profiler_rocprof_v2.py index 4ed8b2ba9f..4b04f92e64 100644 --- a/src/rocprof_compute_profile/profiler_rocprof_v2.py +++ b/src/rocprof_compute_profile/profiler_rocprof_v2.py @@ -31,8 +31,8 @@ from utils.utils import console_log, demarcate, replace_timestamps, store_app_cm class rocprof_v2_profiler(RocProfCompute_Base): - def __init__(self, profiling_args, profiler_mode, soc): - super().__init__(profiling_args, profiler_mode, soc) + def __init__(self, profiling_args, profiler_mode, soc, supported_archs): + super().__init__(profiling_args, profiler_mode, soc, supported_archs) self.ready_to_profile = ( self.get_args().roof_only and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file() diff --git a/src/rocprof_compute_profile/profiler_rocprof_v3.py b/src/rocprof_compute_profile/profiler_rocprof_v3.py index af4c3b1022..da14ff2e5c 100644 --- a/src/rocprof_compute_profile/profiler_rocprof_v3.py +++ b/src/rocprof_compute_profile/profiler_rocprof_v3.py @@ -32,8 +32,8 @@ from utils.utils import console_error, console_log, demarcate, replace_timestamp class rocprof_v3_profiler(RocProfCompute_Base): - def __init__(self, profiling_args, profiler_mode, soc): - super().__init__(profiling_args, profiler_mode, soc) + def __init__(self, profiling_args, profiler_mode, soc, supported_archs): + super().__init__(profiling_args, profiler_mode, soc, supported_archs) self.ready_to_profile = ( self.get_args().roof_only and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file() diff --git a/src/rocprof_compute_profile/profiler_rocscope.py b/src/rocprof_compute_profile/profiler_rocscope.py index 761e931dab..81d134c0e7 100644 --- a/src/rocprof_compute_profile/profiler_rocscope.py +++ b/src/rocprof_compute_profile/profiler_rocscope.py @@ -27,8 +27,8 @@ from utils.utils import console_log, demarcate class rocscope_profiler(RocProfCompute_Base): - def __init__(self, profiling_args, profiler_mode, soc): - super().__init__(profiling_args, profiler_mode, soc) + def __init__(self, profiling_args, profiler_mode, soc, supported_archs): + super().__init__(profiling_args, profiler_mode, soc, supported_archs) # ----------------------- # Required child methods diff --git a/src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline_chart.yaml b/src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline_chart.yaml deleted file mode 100644 index f1f87c5c9f..0000000000 --- a/src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline_chart.yaml +++ /dev/null @@ -1,11 +0,0 @@ ---- -Panel Config: - id: 400 - title: Roofline - data source: - - raw_csv_table: - id: 401 - source: roofline.csv - comparable: false # for now - cli_style: roofline_chart - # TODO: refactoring the data structure to have metrics here! diff --git a/src/rocprof_compute_soc/soc_base.py b/src/rocprof_compute_soc/soc_base.py index 88a2a10415..ff6eff4eb8 100644 --- a/src/rocprof_compute_soc/soc_base.py +++ b/src/rocprof_compute_soc/soc_base.py @@ -32,9 +32,17 @@ from collections import OrderedDict from pathlib import Path import numpy as np +import yaml from rocprof_compute_base import MI300_CHIP_IDS, SUPPORTED_ARCHS -from utils.utils import console_debug, console_error, console_log, demarcate +from utils.parser import build_in_vars, supported_denom +from utils.utils import ( + console_debug, + console_error, + console_log, + convert_metric_id_to_panel_idx, + demarcate, +) class OmniSoC_Base: @@ -48,19 +56,10 @@ class OmniSoC_Base: self.__perfmon_config = ( {} ) # Per IP block max number of simulutaneous counters. GFX IP Blocks + self.__section_counters = set() # hw counters corresponding to filtered sections self.__soc_params = {} # SoC specifications self.__compatible_profilers = [] # Store profilers compatible with SoC self.populate_mspec() - # In some cases (i.e. --specs) path will not be given - if hasattr(self.__args, "path"): - if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")): - self.__workload_dir = str( - Path(self.__args.path).joinpath( - self.__args.name, self._mspec.gpu_model - ) - ) - else: - self.__workload_dir = self.__args.path def __hash__(self): return hash(self.__arch) @@ -189,6 +188,47 @@ class OmniSoC_Base: total_xcds(self._mspec.gpu_model, self._mspec.compute_partition) ) + @demarcate + def section_filter(self): + """ + Create a set of counters required for the selected report sections. + Parse analysis report configuration files based on the selected report sections to be filtered. + """ + args = self.__args + for section in self.__filter_metric_ids: + section_num = convert_metric_id_to_panel_idx(section) + file_id = str(section_num // 100) + # Convert "4" to "04" + if len(file_id) == 1: + file_id = f"0{file_id}" + # Identify yaml file corresponding to file_id + config_filename = [ + filename + for filename in os.listdir(Path(args.config_dir).joinpath(self.__arch)) + if filename.endswith(".yaml") and filename.startswith(file_id) + ][0] + # Read the yaml file + with open( + Path(args.config_dir).joinpath(self.__arch, config_filename), "r" + ) as stream: + section_config = yaml.safe_load(stream) + # Extract subsection if section is of the form 4.52 + if section_num % 100: + section_config_text = "\n".join( + [ + # Convert yaml to string + yaml.dump(subsection) + for subsection in section_config["Panel Config"]["data source"] + if subsection["metric_table"]["id"] == section_num + ] + ) + else: + # Convert yaml to string + section_config_text = yaml.dump(section_config) + self.__section_counters = self.__section_counters.union( + parse_counters(section_config_text) + ) + @demarcate def perfmon_filter(self, roofline_perfmon_only: bool): """Filter default performance counter set based on user arguments""" @@ -197,15 +237,40 @@ class OmniSoC_Base: and Path(self.get_args().path).joinpath("pmc_perf.csv").is_file() ): return - workload_perfmon_dir = self.__workload_dir + "/perfmon" + + # In some cases (i.e. --specs) path will not be given + if hasattr(self.__args, "path"): + if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")): + workload_dir = str( + Path(self.__args.path).joinpath( + self.__args.name, self._mspec.gpu_model + ) + ) + else: + workload_dir = self.__args.path + + workload_perfmon_dir = workload_dir + "/perfmon" + + self.__filter_hardware_blocks = [ + name + for name, type in self.get_args().filter_blocks.items() + if type == "hardware_block" + ] + self.__filter_metric_ids = [ + name + for name, type in self.get_args().filter_blocks.items() + if type == "metric_id" + ] + + self.section_filter() # Initialize directories - if not Path(self.__workload_dir).is_dir(): - os.makedirs(self.__workload_dir) - elif not Path(self.__workload_dir).is_symlink(): - shutil.rmtree(self.__workload_dir) + if not Path(workload_dir).is_dir(): + os.makedirs(workload_dir) + elif not Path(workload_dir).is_symlink(): + shutil.rmtree(workload_dir) else: - os.unlink(self.__workload_dir) + os.unlink(workload_dir) os.makedirs(workload_perfmon_dir) @@ -216,16 +281,17 @@ class OmniSoC_Base: ) # Perfmon list filtering - if self.__args.ipblocks != None: - for i in range(len(self.__args.ipblocks)): - self.__args.ipblocks[i] = self.__args.ipblocks[i].lower() + if self.__filter_hardware_blocks: + hardware_blocks = [ + block.lower() for block in self.__filter_hardware_blocks + ] mpattern = "pmc_([a-zA-Z0-9_]+)_perf*" pmc_files_list = [] for fname in ref_pmc_files_list: fbase = Path(fname).stem ip = re.match(mpattern, fbase).group(1) - if ip in self.__args.ipblocks: + if ip in hardware_blocks: pmc_files_list.append(fname) console_log("fname: " + fbase + ": Added") else: @@ -242,8 +308,9 @@ class OmniSoC_Base: perfmon_coalesce( pmc_files_list, self.__perfmon_config, - self.__workload_dir, + workload_dir, self.get_args().spatial_multiplexing, + self.__section_counters, ) # ---------------------------------------------------- @@ -310,7 +377,38 @@ def using_v3(): @demarcate -def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir, spatial_multiplexing): +def parse_counters(config_text): + """ + Create a set of all hardware counters mentioned in the given config file content string + """ + # hw counter name should start with ip block name + hw_counter_regex = r"(?:SQ|SQC|TA|TD|TCP|TCC|CPC|CPF|SPI|GRBM)_[0-9A-Za-z_]+" + # only capture the variable name after $ using capturing group + variable_regex = r"\$([0-9A-Za-z_]+)" + hw_counter_matches = set(re.findall(hw_counter_regex, config_text)) + variable_matches = set(re.findall(variable_regex, config_text)) + # get hw counters and variables for all supported denominators + for formula in supported_denom.values(): + hw_counter_matches.update(re.findall(hw_counter_regex, formula)) + variable_matches.update(re.findall(variable_regex, formula)) + # get hw counters corresponding to variables recursively + while variable_matches: + subvariable_matches = set() + for var in variable_matches: + if var in build_in_vars: + hw_counter_matches.update( + re.findall(hw_counter_regex, build_in_vars[var]) + ) + subvariable_matches.update(re.findall(variable_regex, build_in_vars[var])) + # process new found variables + variable_matches = subvariable_matches - variable_matches + return list(hw_counter_matches) + + +@demarcate +def perfmon_coalesce( + pmc_files_list, perfmon_config, workload_dir, spatial_multiplexing, section_counters +): """Sort and bucket all related performance counters to minimize required application passes""" workload_perfmon_dir = workload_dir + "/perfmon" @@ -388,6 +486,49 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir, spatial_multi if accu in normal_counters: del normal_counters[accu] + # If section report filters have been provided, only collect counters necessary for those section reports + # Remove _sum and _expand suffixes while matching + def remove_suffixes(string): + for suffix in ["_sum", "_expand"]: + if string.endswith(suffix): + string = string[: -len(suffix)] + break + return string + + section_counters = {remove_suffixes(counter) for counter in section_counters} + ignored_counters = list() + + if section_counters: + # Remove unnecessary normal counters + for counter_name in list(normal_counters.keys()): + if remove_suffixes(counter_name) not in section_counters: + del normal_counters[counter_name] + ignored_counters.append(counter_name) + + # Remove unnecessary accumulate counters + filtered_accumlate_counters = list() + for counters in accumulate_counters: + if any( + remove_suffixes(counter_name) in section_counters + for counter_name in counters + ): + filtered_accumlate_counters.append(counters) + else: + ignored_counters.extend(counter_name) + accumulate_counters = filtered_accumlate_counters + + if ignored_counters: + console_log( + f"Not collecting following counters per provided filter: {', '.join(ignored_counters)} " + ) + + # Throw error if no counters to be collected + if len(normal_counters) == 0 and len(accumulate_counters) == 0: + console_error( + "profiling", + "No performance counters to collect, please check the provided profiling filters", + ) + output_files = [] accu_file_count = 0 diff --git a/src/roofline.py b/src/roofline.py index bd76480dab..e83f15b85a 100644 --- a/src/roofline.py +++ b/src/roofline.py @@ -25,9 +25,11 @@ import os import time from abc import ABC, abstractmethod +from collections import OrderedDict from pathlib import Path import numpy as np +import pandas as pd import plotly.graph_objects as go from dash import dcc, html @@ -75,12 +77,6 @@ class Roofline: if hasattr(self.__args, "sort") and self.__args.sort != "ALL": self.__run_parameters["sort_type"] = self.__args.sort - if ( - not isinstance(self.__run_parameters["workload_dir"], list) - and self.__run_parameters["workload_dir"] != None - ): - self.roof_setup() - self.validate_parameters() def validate_parameters(self): @@ -110,6 +106,12 @@ class Roofline: ret_df, ): """Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data""" + if ( + not isinstance(self.__run_parameters["workload_dir"], list) + and self.__run_parameters["workload_dir"] != None + ): + self.roof_setup() + # Create arithmetic intensity data that will populate the roofline model console_debug("roofline", "Path: %s" % self.__run_parameters["workload_dir"]) self.__ai_data = calc_ai(self.__mspec, self.__run_parameters["sort_type"], ret_df) @@ -375,9 +377,11 @@ class Roofline: @demarcate def standalone_roofline(self): - from collections import OrderedDict - - import pandas as pd + if ( + not isinstance(self.__run_parameters["workload_dir"], list) + and self.__run_parameters["workload_dir"] != None + ): + self.roof_setup() # Change vL1D to a interpretable str, if required if "vL1D" in self.__run_parameters["mem_level"]: @@ -394,32 +398,6 @@ class Roofline: t_df["pmc_perf"] = pd.read_csv(app_path) self.empirical_roofline(ret_df=t_df) - # Main methods - @abstractmethod - def pre_processing(self): - if self.__args.roof_only: - # check for sysinfo - console_log( - "roofline", "Checking for sysinfo.csv in " + str(self.__args.path) - ) - sysinfo_path = str(Path(self.__args.path).joinpath("sysinfo.csv")) - if not Path(sysinfo_path).is_file(): - console_log("roofline", "sysinfo.csv not found. Generating...") - - class Dummy_SoC: - roofline_obj = True - - gen_sysinfo( - workload_name=self.__args.name, - workload_dir=self.__workload_dir, - ip_blocks=self.__args.ipblocks, - app_cmd=self.__args.remaining, - skip_roof=self.__args.no_roof, - roof_only=self.__args.roof_only, - mspec=self.__mspec, - soc=Dummy_SoC, - ) - @abstractmethod def profile(self): if self.__args.roof_only: diff --git a/src/utils/file_io.py b/src/utils/file_io.py index 6de537def9..982a3d85e5 100644 --- a/src/utils/file_io.py +++ b/src/utils/file_io.py @@ -36,7 +36,7 @@ import yaml import config from utils import schema from utils.kernel_name_shortener import kernel_name_shortener -from utils.utils import console_debug, console_error, demarcate +from utils.utils import console_debug, console_error, console_log, demarcate # TODO: use pandas chunksize or dask to read really large csv file # from dask import dataframe as dd @@ -85,6 +85,21 @@ def load_panel_configs(dir): return od +def load_profiling_config(config_dir): + """ + Load profiling config from yaml file. + """ + try: + with open(Path(config_dir).joinpath("profiling_config.yaml")) as file: + prof_config = yaml.safe_load(file) + return prof_config + except FileNotFoundError: + console_log( + f"Could not find profiling_config.yaml in {config_dir} for filtering analysis report" + ) + return dict() + + @demarcate def create_df_kernel_top_stats( df_in, diff --git a/src/utils/parser.py b/src/utils/parser.py index 74dbc6c300..20870597e3 100644 --- a/src/utils/parser.py +++ b/src/utils/parser.py @@ -492,7 +492,9 @@ def build_dfs(archConfigs, filter_metrics, sys_info): if type == "metric_table": headers = ["Metric_ID"] data_source_idx = str(data_config["id"] // 100) - if data_source_idx != 0 or data_source_idx in filter_metrics: + if data_source_idx != 0 or ( + filter_metrics and data_source_idx in filter_metrics + ): metric_list[data_source_idx] = panel["title"] if ( "cli_style" in data_config diff --git a/src/utils/tty.py b/src/utils/tty.py index 2294661f0b..91f37ad9e8 100644 --- a/src/utils/tty.py +++ b/src/utils/tty.py @@ -29,7 +29,7 @@ import pandas as pd from tabulate import tabulate from utils import parser -from utils.utils import console_log, console_warning +from utils.utils import console_log, console_warning, convert_metric_id_to_panel_idx hidden_columns = ["Tips", "coll_level"] hidden_sections = [1900, 2000] @@ -60,11 +60,20 @@ def get_table_string(df, transpose=False, decimal=2): ) -def show_all(args, runs, archConfigs, output): +def show_all(args, runs, archConfigs, output, profiling_config): """ Show all panels with their data in plain text mode. """ comparable_columns = parser.build_comparable_columns(args.time_unit) + filter_panel_ids = [ + convert_metric_id_to_panel_idx(section) + for section in [ + name + for name, type in profiling_config.get("filter_blocks", {}).items() + if type == "metric_id" + ] + ] + comparable_columns = parser.build_comparable_columns(args.time_unit) for panel_id, panel in archConfigs.panel_configs.items(): # Skip panels that don't support baseline comparison @@ -74,6 +83,27 @@ def show_all(args, runs, archConfigs, output): for data_source in panel["data source"]: for type, table_config in data_source.items(): + # If block filtering was used during analysis, then dont use profiling config + # If block filtering was used in profiling config, only show those panels + # If block filtering not used in profiling config, show all panels + # Skip this table if table id or panel id is not present in block filters + # However, always show panel id <= 100 + if ( + not args.filter_metrics + and filter_panel_ids + and table_config["id"] not in filter_panel_ids + and panel_id not in filter_panel_ids + and panel_id > 100 + ): + table_id_str = ( + str(table_config["id"] // 100) + + "." + + str(table_config["id"] % 100) + ) + console_log( + f"Not showing table not selected during profiling: {table_id_str} {table_config['title']}" + ) + continue # take the 1st run as baseline base_run, base_data = next(iter(runs.items())) base_df = base_data.dfs[table_config["id"]] @@ -207,7 +237,25 @@ def show_all(args, runs, archConfigs, output): + str(table_config["id"] % 100) ) - if "title" in table_config and table_config["title"]: + # Check if any column in df is empty + is_empty_columns_exist = any( + [ + df.columns[col_idx] + for col_idx in range(len(df.columns)) + if df.replace("", None).iloc[:, col_idx].isnull().all() + ] + ) + # Do not print the table if any column is empty + if is_empty_columns_exist: + console_log( + f"Not showing table with empty column(s): {table_id_str} {table_config['title']}" + ) + + if ( + "title" in table_config + and table_config["title"] + and not is_empty_columns_exist + ): ss += table_id_str + " " + table_config["title"] + "\n" if args.df_file_dir: @@ -238,10 +286,13 @@ def show_all(args, runs, archConfigs, output): and "columnwise" in table_config and table_config["columnwise"] == True ) - ss += ( - get_table_string(df, transpose=transpose, decimal=args.decimal) - + "\n" - ) + if not is_empty_columns_exist: + ss += ( + get_table_string( + df, transpose=transpose, decimal=args.decimal + ) + + "\n" + ) if ss: print("\n" + "-" * 80, file=output) diff --git a/src/utils/utils.py b/src/utils/utils.py index aada34f17a..fa8c4a3f4c 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -191,7 +191,7 @@ def capture_subprocess_output(subprocess_args, new_env=None, profileMode=False): global rocprof_args # Format command for debug messages, formatting for rocprofv1 and rocprofv2 command = " ".join(rocprof_args) - console_debug("subprocess", "Running: " + command) + console_debug("subprocess", "Running: " + command + " " + " ".join(subprocess_args)) # Start subprocess # bufsize = 1 means output is line buffered # universal_newlines = True is required for line buffering @@ -820,7 +820,7 @@ def gen_sysinfo( df["workload_name"] = workload_name blocks = [] - if ip_blocks == None: + if not ip_blocks: t = ["SQ", "LDS", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"] blocks += t else: @@ -1249,3 +1249,16 @@ def merge_counters_spatial_multiplex(df_multi_index): final_df = pd.concat(result_dfs, keys=coll_levels, axis=1, copy=False) return final_df + + +def convert_metric_id_to_panel_idx(metric_id): + # "4.02" -> 402 + # "4.23" -> 423 + # "4" -> 400 + tokens = metric_id.split(".") + if len(tokens) == 1: + return int(tokens[0]) * 100 + elif len(tokens) == 2: + return int(tokens[0]) * 100 + int(tokens[1]) + else: + raise Exception(f"Invalid metric id: {metric_id}") diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8841730cc1..01b3be1cd8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,3 +13,8 @@ set(VCOPY_SOURCES ../sample/vcopy.cpp) set_source_files_properties(${VCOPY_SOURCES} PROPERTIES LANGUAGE HIP) add_executable(vcopy ${VCOPY_SOURCES}) set_target_properties(vcopy PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/tests) + +set(VMEM_SOURCES ../sample/vmem.hip) +set_source_files_properties(${VMEM_SOURCES} PROPERTIES LANGUAGE HIP) +add_executable(vmem ${VMEM_SOURCES}) +set_target_properties(vmem PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/tests) diff --git a/tests/conftest.py b/tests/conftest.py index bdcb56866d..bfd9772930 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,11 @@ +import subprocess +from importlib.machinery import SourceFileLoader +from unittest.mock import patch + import pytest +rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module() + def pytest_addoption(parser): parser.addoption( @@ -8,3 +14,69 @@ def pytest_addoption(parser): default=False, help="Call standalone binary instead of main function during tests", ) + + +@pytest.fixture +def binary_handler_profile_rocprof_compute(request): + def _handler(config, workload_dir, options=[], check_success=True, roof=False): + if request.config.getoption("--call-binary"): + baseline_opts = [ + "build/rocprof-compute.bin", + "profile", + "-n", + "app_1", + "-VVV", + ] + if not roof: + baseline_opts.append("--no-roof") + process = subprocess.run( + baseline_opts + + options + + ["--path", workload_dir, "--"] + + config["app_1"], + text=True, + ) + # verify run status + if check_success: + assert process.returncode == 0 + return process.returncode + else: + baseline_opts = ["rocprof-compute", "profile", "-n", "app_1", "-VVV"] + if not roof: + baseline_opts.append("--no-roof") + with pytest.raises(SystemExit) as e: + with patch( + "sys.argv", + baseline_opts + + options + + ["--path", workload_dir, "--"] + + config["app_1"], + ): + rocprof_compute.main() + # verify run status + if check_success: + assert e.value.code == 0 + return e.value.code + + return _handler + + +@pytest.fixture +def binary_handler_analyze_rocprof_compute(request): + def _handler(arguments): + if request.config.getoption("--call-binary"): + process = subprocess.run( + ["build/rocprof-compute.bin", *arguments], + text=True, + ) + return process.returncode + else: + with pytest.raises(SystemExit) as e: + with patch( + "sys.argv", + ["rocprof-compute", *arguments], + ): + rocprof_compute.main() + return e.value.code + + return _handler diff --git a/tests/test_analyze_commands.py b/tests/test_analyze_commands.py index e3a910ae3b..df1e9603de 100644 --- a/tests/test_analyze_commands.py +++ b/tests/test_analyze_commands.py @@ -6,7 +6,6 @@ from unittest.mock import patch import pandas as pd import pytest import test_utils -from test_utils import binary_handler_analyze_rocprof_compute config = {} config["cleanup"] = True if "PYTEST_XDIST_WORKER_COUNT" in os.environ else False diff --git a/tests/test_analyze_workloads.py b/tests/test_analyze_workloads.py index eddd54937f..94d96f8e85 100644 --- a/tests/test_analyze_workloads.py +++ b/tests/test_analyze_workloads.py @@ -1,8 +1,6 @@ from unittest.mock import patch -import pandas as pd import pytest -from test_utils import binary_handler_analyze_rocprof_compute ################################################## ## Generated tests ## diff --git a/tests/test_profile_general.py b/tests/test_profile_general.py index 1083666697..3b573e4264 100644 --- a/tests/test_profile_general.py +++ b/tests/test_profile_general.py @@ -11,7 +11,6 @@ from unittest.mock import patch import pandas as pd import pytest import test_utils -from test_utils import binary_handler_profile_rocprof_compute # Globals @@ -1458,3 +1457,136 @@ def test_mem_levels_LDS(binary_handler_profile_rocprof_compute): ) test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.section +def test_instmix_section(binary_handler_profile_rocprof_compute): + options = ["--block", "10"] + workload_dir = test_utils.get_output_dir() + _ = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=True, roof=False + ) + + file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) + validate( + inspect.stack()[0][3], + workload_dir, + file_dict, + ) + + assert test_utils.check_file_pattern( + "'10': metric_id", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "SQ_INSTS_VALU_MFMA_F64", f"{workload_dir}/pmc_perf.csv" + ) + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.section +def test_instmix_memchart_section(binary_handler_profile_rocprof_compute): + options = ["--block", "10", "3"] + workload_dir = test_utils.get_output_dir() + _ = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=True, roof=False + ) + + file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) + validate( + inspect.stack()[0][3], + workload_dir, + file_dict, + ) + + assert test_utils.check_file_pattern( + "'10': metric_id", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "'3': metric_id", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "SQ_INSTS_VALU_MFMA_F64", f"{workload_dir}/pmc_perf.csv" + ) + assert test_utils.check_file_pattern( + "SQC_TC_DATA_READ_REQ", f"{workload_dir}/pmc_perf.csv" + ) + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.section +def test_instmix_section_TA_block(binary_handler_profile_rocprof_compute): + options = ["--block", "10", "TA"] + workload_dir = test_utils.get_output_dir() + _ = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=True, roof=False + ) + + file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) + validate( + inspect.stack()[0][3], + workload_dir, + file_dict, + ) + + assert test_utils.check_file_pattern( + "'10': metric_id", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "TA: hardware_block", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv" + ) + assert not test_utils.check_file_pattern( + "SQC_TC_DATA_READ_REQ", f"{workload_dir}/pmc_perf.csv" + ) + assert test_utils.check_file_pattern("", f"{workload_dir}/pmc_perf.csv") + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.section +def test_instmix_section_global_write_kernel(binary_handler_profile_rocprof_compute): + options = ["-k", "global_write", "--block", "10"] + custom_config = dict(config) + custom_config["kernel_name_1"] = "global_write" + custom_config["app_1"] = ["./tests/vmem"] + num_kernels = 1 + + workload_dir = test_utils.get_output_dir() + _ = binary_handler_profile_rocprof_compute( + custom_config, workload_dir, options, check_success=True, roof=False + ) + + file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) + validate( + inspect.stack()[0][3], + workload_dir, + file_dict, + ) + + assert test_utils.check_file_pattern( + "'10': metric_id", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "- global_write", f"{workload_dir}/profiling_config.yaml" + ) + assert test_utils.check_file_pattern( + "SQ_INSTS_VALU_MFMA_F64", f"{workload_dir}/pmc_perf.csv" + ) + assert test_utils.check_file_pattern("global_write", f"{workload_dir}/pmc_perf.csv") + assert not test_utils.check_file_pattern( + "global_read", f"{workload_dir}/pmc_perf.csv" + ) + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.section +def test_list_metrics(binary_handler_profile_rocprof_compute): + options = ["--list-metrics"] + workload_dir = test_utils.get_output_dir() + _ = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=True, roof=False + ) + # workload dir should be empty + assert not os.listdir(workload_dir) + test_utils.clean_output_dir(config["cleanup"], workload_dir) diff --git a/tests/test_utils.py b/tests/test_utils.py index e1a3c2c7e8..0a9bd05149 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -25,16 +25,11 @@ import inspect import os +import re import shutil -import subprocess -from importlib.machinery import SourceFileLoader from pathlib import Path -from unittest.mock import patch import pandas as pd -import pytest - -rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module() def check_resource_allocation(): @@ -57,6 +52,14 @@ def check_resource_allocation(): return +def check_file_pattern(pattern, file_path): + """Check if the given pattern exists in the file""" + content = "" + with open(file_path) as f: + content = f.read() + return len(re.findall(pattern, content)) != 0 + + def get_output_dir(suffix="_output", clean_existing=True): """Provides a unique output directory based on the name of the calling test function with a suffix applied. @@ -130,69 +133,3 @@ def check_csv_files(output_dir, num_devices, num_kernels): elif file.endswith(".pdf"): file_dict[file] = "pdf" return file_dict - - -@pytest.fixture -def binary_handler_profile_rocprof_compute(request): - def _handler(config, workload_dir, options=[], check_success=True, roof=False): - if request.config.getoption("--call-binary"): - baseline_opts = [ - "build/rocprof-compute.bin", - "profile", - "-n", - "app_1", - "-VVV", - ] - if not roof: - baseline_opts.append("--no-roof") - process = subprocess.run( - baseline_opts - + options - + ["--path", workload_dir, "--"] - + config["app_1"], - text=True, - ) - # verify run status - if check_success: - assert process.returncode == 0 - return process.returncode - else: - baseline_opts = ["rocprof-compute", "profile", "-n", "app_1", "-VVV"] - if not roof: - baseline_opts.append("--no-roof") - with pytest.raises(SystemExit) as e: - with patch( - "sys.argv", - baseline_opts - + options - + ["--path", workload_dir, "--"] - + config["app_1"], - ): - rocprof_compute.main() - # verify run status - if check_success: - assert e.value.code == 0 - return e.value.code - - return _handler - - -@pytest.fixture -def binary_handler_analyze_rocprof_compute(request): - def _handler(arguments): - if request.config.getoption("--call-binary"): - process = subprocess.run( - ["build/rocprof-compute.bin", *arguments], - text=True, - ) - return process.returncode - else: - with pytest.raises(SystemExit) as e: - with patch( - "sys.argv", - ["rocprof-compute", *arguments], - ): - rocprof_compute.main() - return e.value.code - - return _handler