From 98bb0f4237fca93a05f26945db29c034da22d12e Mon Sep 17 00:00:00 2001 From: vedithal-amd Date: Mon, 21 Jul 2025 09:37:35 -0400 Subject: [PATCH] Remove hardware IP block based filtering (#820) * Analysis report block based filtering is the default now * Update documentation * Update CHANGELOG * Fix tests * Replace hardware block based filtering tests with report block based filtering tests --- CHANGELOG.md | 1 + CMakeLists.txt | 8 - docs/how-to/profile/mode.rst | 54 +- docs/how-to/use.rst | 4 +- pyproject.toml | 1 - src/argparser.py | 34 +- src/rocprof_compute_base.py | 8 - src/rocprof_compute_profile/profiler_base.py | 29 +- src/rocprof_compute_soc/soc_base.py | 26 +- src/rocprof_compute_tui/utils/tui_utils.py | 13 +- src/utils/tty.py | 13 +- src/utils/utils.py | 16 +- tests/test_profile_general.py | 1031 +---------------- .../no_roof/MI350/profiling_config.yaml | 2 +- .../vcopy/MI350/profiling_config.yaml | 2 +- 15 files changed, 52 insertions(+), 1190 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec2b257f16..5d7c4ac79f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -116,6 +116,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Roofline support for Ubuntu 20.04 and SLES below 15.6 * Usage of rocm-smi * Remove support for MI50/MI60 in accordance with the documentation +* Hardware IP block based filtering has been removed in favor of analysis report block based filtering ## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 1287691670..ac0438b59a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,14 +214,6 @@ add_test( ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -add_test( - NAME test_profile_ipblocks - COMMAND - ${Python3_EXECUTABLE} -m pytest -m block --junitxml=tests/test_profile_blocks.xml - ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -set_property(TEST test_profile_ipblocks PROPERTY COST 11) - add_test( NAME test_profile_dispatch COMMAND diff --git a/docs/how-to/profile/mode.rst b/docs/how-to/profile/mode.rst index 834358c0ba..d8fba13205 100644 --- a/docs/how-to/profile/mode.rst +++ b/docs/how-to/profile/mode.rst @@ -232,7 +232,7 @@ Filtering options ----------------- ``-b``, ``--block `` - Allows system profiling on one or more selected hardware report blocks to speed + Allows system profiling on one or more selected analysis report blocks to speed up the profiling process. See :ref:`profiling-hw-component-filtering`. ``-k``, ``--kernel `` @@ -253,11 +253,11 @@ Filtering options .. _profiling-hw-component-filtering: -Hardware report block filtering +Analysis report block filtering ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can profile specific hardware report blocks to speed up the profiling process. -In ROCm Compute Profiler, the term hardware report block refers to a section of the +In ROCm Compute Profiler, the term analysis report block refers to a section of the analysis report which focuses on metrics associated with a hardware component or a group of hardware components. All profiling results are accumulated in the same target directory without overwriting those for other hardware components. @@ -332,54 +332,6 @@ To see a list of available hardware report blocks, use the ``--list-metrics`` op 6.1.2 -> Workgroup Manager Utilization -It is also possible to filter counter collection by hardware component such as Shader Sequencer (SQ) -and L2 cache (TCC) as shown below. - -.. code-block:: shell-session - - $ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256 - - __ _ - _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ - | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ - | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ - |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| - |_| |_| - - fname: pmc_cpc_perf: Skipped - fname: pmc_spi_perf: Skipped - fname: pmc_cpf_perf: Skipped - fname: pmc_tcp_perf: Skipped - fname: pmc_sq_perf4: Added - fname: pmc_tcc_perf: Added - fname: pmc_sq_perf8: Added - fname: pmc_ta_perf: Skipped - fname: pmc_sq_perf1: Added - fname: pmc_sq_perf3: Added - fname: pmc_td_perf: Skipped - fname: pmc_tcc2_perf: Skipped - fname: pmc_sqc_perf1: Skipped - fname: pmc_sq_perf6: Added - fname: pmc_sq_perf2: Added - rocprofiler-compute version: 2.0.0 - Profiler choice: rocprofv1 - Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200 - Target: MI200 - Command: ./vcopy -n 1048576 -b 256 - Kernel Selection: None - Dispatch Selection: None - Hardware Blocks: ['sq', 'tcc'] - Report Sections: [] - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Collecting Performance Counters - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ... - -.. warning:: - - Filtering by hardware components (e.g. SQ, TCC) will soon be deprecated. - It is recommended to use hardware report block based filtering. .. _profiling-kernel-filtering: diff --git a/docs/how-to/use.rst b/docs/how-to/use.rst index f361df142d..594939521f 100644 --- a/docs/how-to/use.rst +++ b/docs/how-to/use.rst @@ -57,7 +57,7 @@ Common filters to customize data collection include: Enables filtering based on dispatch ID. ``-b``, ``--block`` - Enables collection metrics for only the specified hardware report blocks. + Enables collection metrics for only the specified analysis report blocks. See :ref:`Filtering ` for an in-depth walkthrough. @@ -80,7 +80,7 @@ interface with profiling results. View different metrics derived from your profiled results and get immediate access all metrics organized by hardware blocks. -If you don't apply kernel, dispatch, or hardware report block filters at this stage, +If you don't apply kernel, dispatch, or analysis report block filters at this stage, analysis is reflective of the entirety of the profiling data. To interact with profiling results from a different session, provide the diff --git a/pyproject.toml b/pyproject.toml index 696427d52b..608f8b02a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,6 @@ pythonpath = [ markers = [ "section", "kernel_execution", - "block", "misc", "mem", "sort", diff --git a/src/argparser.py b/src/argparser.py index 58e31838b4..8959270113 100644 --- a/src/argparser.py +++ b/src/argparser.py @@ -178,50 +178,24 @@ Examples: help="\t\t\tDispatch ID filtering.", ) - class AggregateDict(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - aggregated_dict = getattr(namespace, self.dest, {}) - if aggregated_dict is None: - aggregated_dict = {} - for key, value in values: - aggregated_dict[key] = value - setattr(namespace, self.dest, aggregated_dict) - def validate_block(value): # Metric id regex, for example, 10, 4, 4.3, 4.32 # Dont allow more than two digits after decimal point metric_id_pattern = re.compile(r"^\d+$|^\d+\.\d$|^\d+\.\d\d$") - # Allow only the following hardware blocks - hardware_block_pattern = re.compile(r"^(SQ|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF)$") if metric_id_pattern.match(value): - return (str(value), "metric_id") - if hardware_block_pattern.match(value): - return (str(value), "hardware_block") - raise argparse.ArgumentTypeError(f"Invalid hardware block or metric id: {value}") + return value + raise argparse.ArgumentTypeError(f"Invalid metric id: {value}") profile_group.add_argument( "-b", "--block", type=validate_block, - action=AggregateDict, dest="filter_blocks", metavar="", nargs="+", required=False, - default={}, - help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 10, 4, 4.3). - \t\t\tCan provide multiple space separated arguments. - \t\t\tCan also accept Hardware blocks. - \t\t\tHardware block filtering (to be deprecated soon): - \t\t\t SQ - \t\t\t SQC - \t\t\t TA - \t\t\t TD - \t\t\t TCP - \t\t\t TCC - \t\t\t SPI - \t\t\t CPC - \t\t\t CPF""", + default=[], + help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 10, 4, 4.3).\n\t\t\tCan provide multiple space separated arguments.""", ) profile_group.add_argument( "--list-metrics", diff --git a/src/rocprof_compute_base.py b/src/rocprof_compute_base.py index 828ac9bd48..5b2d6137e7 100644 --- a/src/rocprof_compute_base.py +++ b/src/rocprof_compute_base.py @@ -246,14 +246,6 @@ class RocProfCompute: if self.__args.name.find("/") != -1: console_error("'/' not permitted in profile name") - # Deprecation warning for hardware blocks - if [ - name - for name, type in self.__args.filter_blocks.items() - if type == "hardware_block" - ]: - console_warning("Hardware block based filtering will be deprecated soon") - # FIXME: # Changing default path should be done at the end of arg parsing stage, # unless there is a specific reason to do here. diff --git a/src/rocprof_compute_profile/profiler_base.py b/src/rocprof_compute_profile/profiler_base.py index bbc83fd7b6..b1dc25721a 100644 --- a/src/rocprof_compute_profile/profiler_base.py +++ b/src/rocprof_compute_profile/profiler_base.py @@ -56,14 +56,6 @@ class RocProfCompute_Base: self.__profiler = profiler_mode self.__supported_archs = supported_archs self._soc = soc # OmniSoC obj - self.__filter_hardware_blocks = [ - name for name, type in args.filter_blocks.items() if type == "hardware_block" - ] - self.__filter_metric_ids = [ - name for name, type in args.filter_blocks.items() if type == "metric_id" - ] - # Fixme: remove the hack code "21" after we could enable pc sampling as default - self.__pc_sampling = True if "21" in self.__filter_metric_ids else False def get_args(self): return self.__args @@ -309,14 +301,8 @@ class RocProfCompute_Base: gen_sysinfo( workload_name=self.__args.name, workload_dir=self.get_args().path, - ip_blocks=[ - name - for name, type in self.__args.filter_blocks.items() - if type == "hardware_block" - ], app_cmd=self.__args.remaining, skip_roof=self.__args.no_roof, - roof_only=self.__args.roof_only, mspec=self._soc._mspec, soc=self._soc, ) @@ -336,14 +322,10 @@ class RocProfCompute_Base: console_log("Command: " + str(self.__args.remaining)) console_log("Kernel Selection: " + str(self.__args.kernel)) console_log("Dispatch Selection: " + str(self.__args.dispatch)) - if self.__filter_hardware_blocks == None: - console_log("Hardware Blocks: All") - else: - console_log("Hardware Blocks: " + str(self.__filter_hardware_blocks)) - if self.__filter_metric_ids == None: + if self.get_args().filter_blocks is None: console_log("Report Sections: All") else: - console_log("Report Sections: " + str(self.__filter_metric_ids)) + console_log("Report Sections: " + str(self.get_args().filter_blocks)) msg = "Collecting Performance Counters" ( @@ -443,7 +425,8 @@ class RocProfCompute_Base: else: console_error("Profiler not supported") total_profiling_time_so_far += actual_profiling_duration - if self.__pc_sampling == True and self.__profiler in ( + # PC sampling data is only collected when block "21" is specified + if "21" in self.get_args().filter_blocks and self.__profiler in ( "rocprofv3", "rocprofiler-sdk", ): @@ -460,8 +443,8 @@ class RocProfCompute_Base: pc_sampling_duration = end_run_prof - start_run_prof console_debug( "The time of pc sampling profiling is {} m {} sec".format( - int((end_run_prof - start_run_prof) / 60), - str((end_run_prof - start_run_prof) % 60), + int((pc_sampling_duration) / 60), + str((pc_sampling_duration) % 60), ) ) diff --git a/src/rocprof_compute_soc/soc_base.py b/src/rocprof_compute_soc/soc_base.py index 84c5594647..e86789ddea 100644 --- a/src/rocprof_compute_soc/soc_base.py +++ b/src/rocprof_compute_soc/soc_base.py @@ -267,13 +267,8 @@ class OmniSoC_Base: ) if filename.endswith(".yaml") } - metric_ids = [ - name - for name, type in self.get_args().filter_blocks.items() - if type == "metric_id" - ] file_ids = [] - for section in metric_ids: + for section in self.get_args().filter_blocks: section_num = convert_metric_id_to_panel_idx(section) file_id = str(section_num // 100) # Convert "4" to "04" @@ -282,16 +277,17 @@ class OmniSoC_Base: file_ids.append(file_id) # Apply sub section filtering for config_filename in config_filenames: - if config_filename.startswith(file_id) and section_num % 100: + # If first two characters of the config filename match the file_id + if config_filename[:2].startswith(file_id) and section_num % 100: config_filenames[config_filename].append(section_num) # Apply section filters only if metric ids have been provided for filtering - if metric_ids: + if self.get_args().filter_blocks: # Identify yaml files corresponding to file_ids config_filenames = { filename: subsections for filename, subsections in config_filenames.items() - if filename.startswith(tuple(file_ids)) + if filename[:2].startswith(tuple(file_ids)) } for config_filename, subsections in config_filenames.items(): @@ -362,18 +358,6 @@ class OmniSoC_Base: counters = counters.union(set(m.group(1).split())) else: counters = self.detect_counters() - # Perfmon hardware block filtering - filter_hardware_blocks = [ - name - for name, type in self.get_args().filter_blocks.items() - if type == "hardware_block" - ] - if filter_hardware_blocks: - counters = { - counter_name - for counter_name in counters - if counter_name.startswith(tuple(filter_hardware_blocks)) - } if not using_v3(): # Counters not supported in rocprof v1 / v2 diff --git a/src/rocprof_compute_tui/utils/tui_utils.py b/src/rocprof_compute_tui/utils/tui_utils.py index 040234a915..44021a1cf3 100644 --- a/src/rocprof_compute_tui/utils/tui_utils.py +++ b/src/rocprof_compute_tui/utils/tui_utils.py @@ -306,13 +306,14 @@ def process_panels_to_dataframes( """ comparable_columns = build_comparable_columns(args.time_unit) - filter_panel_ids = [ - convert_metric_id_to_panel_idx(section) - for section in [ - name - for name, type in profiling_config.get("filter_blocks", {}).items() - if type == "metric_id" + filter_panel_ids = profiling_config.get("filter_blocks", []) + if isinstance(filter_panel_ids, dict): + # For backward compatibility + filter_panel_ids = [ + name for name, type in filter_panel_ids.items() if type == "metric_id" ] + filter_panel_ids = [ + convert_metric_id_to_panel_idx(section) for section in filter_panel_ids ] # Initialize the result structure diff --git a/src/utils/tty.py b/src/utils/tty.py index 9c7d0c3049..a9fa977b5e 100644 --- a/src/utils/tty.py +++ b/src/utils/tty.py @@ -64,13 +64,14 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None): Show all panels with their data in plain text mode. """ comparable_columns = parser.build_comparable_columns(args.time_unit) - filter_panel_ids = [ - convert_metric_id_to_panel_idx(section) - for section in [ - name - for name, type in profiling_config.get("filter_blocks", {}).items() - if type == "metric_id" + filter_panel_ids = profiling_config.get("filter_blocks", []) + if isinstance(filter_panel_ids, dict): + # For backward compatibility + filter_panel_ids = [ + name for name, type in filter_panel_ids.items() if type == "metric_id" ] + filter_panel_ids = [ + convert_metric_id_to_panel_idx(section) for section in filter_panel_ids ] comparable_columns = parser.build_comparable_columns(args.time_unit) diff --git a/src/utils/utils.py b/src/utils/utils.py index 2f14e1503e..c12611405b 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -1142,9 +1142,7 @@ def replace_timestamps(workload_dir): ) -def gen_sysinfo( - workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only, mspec, soc -): +def gen_sysinfo(workload_name, workload_dir, app_cmd, skip_roof, mspec, soc): console_debug("[gen_sysinfo]") df = mspec.get_class_members() @@ -1152,12 +1150,7 @@ def gen_sysinfo( df["command"] = app_cmd df["workload_name"] = workload_name - blocks = [] - if not ip_blocks: - t = ["SQ", "LDS", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"] - blocks += t - else: - blocks += ip_blocks + blocks = ["SQ", "LDS", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"] if hasattr(soc, "roofline_obj") and (not skip_roof): blocks.append("roofline") df["ip_blocks"] = "|".join(blocks) @@ -1550,10 +1543,9 @@ def convert_metric_id_to_panel_idx(metric_id): tokens = metric_id.split(".") if len(tokens) == 1: return int(tokens[0]) * 100 - elif len(tokens) == 2: + if len(tokens) == 2: return int(tokens[0]) * 100 + int(tokens[1]) - else: - raise Exception(f"Invalid metric id: {metric_id}") + raise Exception(f"Invalid metric id: {metric_id}") def format_time(seconds): diff --git a/tests/test_profile_general.py b/tests/test_profile_general.py index d487532f63..294c5a1a9e 100644 --- a/tests/test_profile_general.py +++ b/tests/test_profile_general.py @@ -1125,1004 +1125,6 @@ def test_kernel(binary_handler_profile_rocprof_compute): test_utils.clean_output_dir(config["cleanup"], workload_dir) -@pytest.mark.block -def test_block_SQ(binary_handler_profile_rocprof_compute): - options = ["--block", "SQ"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SQC(binary_handler_profile_rocprof_compute): - options = ["--block", "SQC"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_TA(binary_handler_profile_rocprof_compute): - options = ["--block", "TA"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_TD(binary_handler_profile_rocprof_compute): - options = ["--block", "TD"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "sysinfo.csv", - ] - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "sysinfo.csv", - ] - if soc == "MI350": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_TCP(binary_handler_profile_rocprof_compute): - options = ["--block", "TCP"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "pmc_perf_11.csv", - "pmc_perf_12.csv", - "pmc_perf_13.csv", - "sysinfo.csv", - ] - - if soc == "MI100" or soc == "MI200": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_TCC(binary_handler_profile_rocprof_compute): - options = ["--block", "TCC"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "pmc_perf_11.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "pmc_perf_11.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "pmc_perf_11.csv", - "pmc_perf_12.csv", - "pmc_perf_13.csv", - "pmc_perf_14.csv", - "pmc_perf_15.csv", - "pmc_perf_16.csv", - "pmc_perf_17.csv", - "pmc_perf_18.csv", - "pmc_perf_19.csv", - "sysinfo.csv", - ] - - if soc == "MI100" or soc == "MI200": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SPI(binary_handler_profile_rocprof_compute): - options = ["--block", "SPI"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_CPC(binary_handler_profile_rocprof_compute): - options = ["--block", "CPC"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate(inspect.stack()[0][3], workload_dir, file_dict) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_CPF(binary_handler_profile_rocprof_compute): - options = ["--block", "CPF"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "sysinfo.csv", - ] - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SQ_CPC(binary_handler_profile_rocprof_compute): - options = ["--block", "SQ", "CPC"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SQ_TA(binary_handler_profile_rocprof_compute): - options = ["--block", "SQ", "TA"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate(inspect.stack()[0][3], workload_dir, file_dict) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SQ_SPI(binary_handler_profile_rocprof_compute): - options = ["--block", "SQ", "SPI"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SQ_SQC_TCP_CPC(binary_handler_profile_rocprof_compute): - options = ["--block", "SQ", "SQC", "TCP", "CPC"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "sysinfo.csv", - ] - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate(inspect.stack()[0][3], workload_dir, file_dict) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - -@pytest.mark.block -def test_block_SQ_SPI_TA_TCC_CPF(binary_handler_profile_rocprof_compute): - options = ["--block", "SQ", "SPI", "TA", "TCC", "CPF"] - workload_dir = test_utils.get_output_dir() - binary_handler_profile_rocprof_compute(config, workload_dir, options) - - file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "sysinfo.csv", - ] - - if soc == "MI100": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "sysinfo.csv", - ] - - if soc == "MI200" or "MI300" in soc: - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if using_v3(): - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", - ] - - if soc == "MI350": - expected_csvs = [ - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "pmc_perf_11.csv", - "pmc_perf_12.csv", - "pmc_perf_13.csv", - "pmc_perf_14.csv", - "sysinfo.csv", - ] - - assert sorted(list(file_dict.keys())) == sorted(expected_csvs) - - validate( - inspect.stack()[0][3], - workload_dir, - file_dict, - ) - - test_utils.clean_output_dir(config["cleanup"], workload_dir) - - @pytest.mark.dispatch def test_dispatch_0(binary_handler_profile_rocprof_compute): options = ["--dispatch", "0"] @@ -2454,8 +1456,8 @@ def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute): @pytest.mark.section -def test_instmix_section(binary_handler_profile_rocprof_compute): - options = ["--block", "10"] +def test_lds_section(binary_handler_profile_rocprof_compute): + options = ["--block", "12"] workload_dir = test_utils.get_output_dir() _ = binary_handler_profile_rocprof_compute( config, workload_dir, options, check_success=True, roof=False @@ -2469,11 +1471,9 @@ def test_instmix_section(binary_handler_profile_rocprof_compute): ) assert test_utils.check_file_pattern( - "'10': metric_id", f"{workload_dir}/profiling_config.yaml" - ) - assert test_utils.check_file_pattern( - "TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv" + "- '12'", f"{workload_dir}/profiling_config.yaml" ) + assert test_utils.check_file_pattern("SQ_INSTS_LDS", f"{workload_dir}/pmc_perf.csv") test_utils.clean_output_dir(config["cleanup"], workload_dir) @@ -2493,11 +1493,9 @@ def test_instmix_memchart_section(binary_handler_profile_rocprof_compute): ) assert test_utils.check_file_pattern( - "'10': metric_id", f"{workload_dir}/profiling_config.yaml" - ) - assert test_utils.check_file_pattern( - "'3': metric_id", f"{workload_dir}/profiling_config.yaml" + "- '10'", f"{workload_dir}/profiling_config.yaml" ) + assert test_utils.check_file_pattern("- '3'", f"{workload_dir}/profiling_config.yaml") assert test_utils.check_file_pattern( "TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv" ) @@ -2508,8 +1506,8 @@ def test_instmix_memchart_section(binary_handler_profile_rocprof_compute): @pytest.mark.section -def test_instmix_section_TA_block(binary_handler_profile_rocprof_compute): - options = ["--block", "10", "TA"] +def test_lds_sol_section(binary_handler_profile_rocprof_compute): + options = ["--block", "12.1"] workload_dir = test_utils.get_output_dir() _ = binary_handler_profile_rocprof_compute( config, workload_dir, options, check_success=True, roof=False @@ -2523,18 +1521,11 @@ def test_instmix_section_TA_block(binary_handler_profile_rocprof_compute): ) assert test_utils.check_file_pattern( - "'10': metric_id", f"{workload_dir}/profiling_config.yaml" + "- '12.1'", f"{workload_dir}/profiling_config.yaml" ) assert test_utils.check_file_pattern( - "TA: hardware_block", f"{workload_dir}/profiling_config.yaml" + "SQ_ACTIVE_INST_LDS", f"{workload_dir}/pmc_perf.csv" ) - assert test_utils.check_file_pattern( - "TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv" - ) - assert not test_utils.check_file_pattern( - "SQC_TC_DATA_READ_REQ", f"{workload_dir}/pmc_perf.csv" - ) - assert test_utils.check_file_pattern("", f"{workload_dir}/pmc_perf.csv") test_utils.clean_output_dir(config["cleanup"], workload_dir) @@ -2559,7 +1550,7 @@ def test_instmix_section_global_write_kernel(binary_handler_profile_rocprof_comp ) assert test_utils.check_file_pattern( - "'10': metric_id", f"{workload_dir}/profiling_config.yaml" + "- '10'", f"{workload_dir}/profiling_config.yaml" ) assert test_utils.check_file_pattern( "- global_write", f"{workload_dir}/profiling_config.yaml" diff --git a/tests/workloads/no_roof/MI350/profiling_config.yaml b/tests/workloads/no_roof/MI350/profiling_config.yaml index 8182466e4b..745cc95955 100644 --- a/tests/workloads/no_roof/MI350/profiling_config.yaml +++ b/tests/workloads/no_roof/MI350/profiling_config.yaml @@ -1,7 +1,7 @@ config_dir: /app/src/rocprof_compute_soc/analysis_configs device: -1 dispatch: null -filter_blocks: {} +filter_blocks: [] format_rocprof_output: csv hip_trace: false join_type: grid diff --git a/tests/workloads/vcopy/MI350/profiling_config.yaml b/tests/workloads/vcopy/MI350/profiling_config.yaml index 8182466e4b..745cc95955 100644 --- a/tests/workloads/vcopy/MI350/profiling_config.yaml +++ b/tests/workloads/vcopy/MI350/profiling_config.yaml @@ -1,7 +1,7 @@ config_dir: /app/src/rocprof_compute_soc/analysis_configs device: -1 dispatch: null -filter_blocks: {} +filter_blocks: [] format_rocprof_output: csv hip_trace: false join_type: grid