diff --git a/projects/rocprofiler-compute/CMakeLists.txt b/projects/rocprofiler-compute/CMakeLists.txt index 2954e9b29c..5f0963bbf2 100644 --- a/projects/rocprofiler-compute/CMakeLists.txt +++ b/projects/rocprofiler-compute/CMakeLists.txt @@ -254,6 +254,14 @@ add_test( ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +add_test( + NAME test_profile_sets_func + COMMAND + ${Python3_EXECUTABLE} -m pytest -m sets_func + --junitxml=tests/test_profile_sets_func.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + set_tests_properties( test_profile_kernel_execution test_profile_dispatch test_profile_mem test_profile_join test_profile_sort test_profile_misc PROPERTIES LABELS "profile" RESOURCE_GROUPS diff --git a/projects/rocprofiler-compute/pyproject.toml b/projects/rocprofiler-compute/pyproject.toml index 608f8b02a3..202d61854f 100644 --- a/projects/rocprofiler-compute/pyproject.toml +++ b/projects/rocprofiler-compute/pyproject.toml @@ -41,35 +41,37 @@ addopts = [ ] pythonpath = [ - ".", - "src", - "src/rocprof_compute_soc", - "src/utils", - "src/rocprof_compute_analyze/utils", - "tests" - ] + ".", + "src", + "src/rocprof_compute_soc", + "src/utils", + "src/rocprof_compute_analyze/utils", + "tests" +] markers = [ - "section", - "kernel_execution", - "misc", - "mem", - "sort", - "join", - "verbosity", - "dispatch", - "list_metrics", - "filter_block", - "filter_kernel", - "dispatch", - "normal_unit", - "max_stat", - "time_unit", - "decimal", - "col", - "kernel_verbose", - "serial", - "L1_cache", - "num_xcds_spec_class", - "num_xcds_cli_output", + "section", + "kernel_execution", + "misc", + "mem", + "sort", + "join", + "verbosity", + "dispatch", + "list_metrics", + "filter_block", + "filter_kernel", + "dispatch", + "normal_unit", + "max_stat", + "time_unit", + "decimal", + "col", + "kernel_verbose", + "serial", + "L1_cache", + "num_xcds_spec_class", + "num_xcds_cli_output", + "sets_func", + "sets_perf" ] diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index a39d2ee97b..1338ee5925 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -27,12 +27,11 @@ import argparse import os import re -import shutil from pathlib import Path def print_avail_arch(avail_arch: list): - ret_str = "\t\tList all available metrics for analysis on specified arch:" + ret_str = "\t\t\tList all available metrics for analysis on specified arch:" for arch in avail_arch: ret_str += "\n\t\t\t {}".format(arch) return ret_str @@ -207,6 +206,19 @@ Examples: choices=[""] + list(supported_archs.keys()), # ["gfx908", "gfx90a"], help=print_avail_arch(supported_archs.keys()), ) + + profile_group.add_argument( + "--list-sets", + action="store_true", + help="\t\t\tDisplay available metric sets and their descriptions", + ) + profile_group.add_argument( + "--set", + default=None, + dest="set_selected", + help="\t\t\tProfile a set of metrics of topic of interest by collecting counters in a single pass.\n\t\t\tFor available sets, see --list-sets", + ) + profile_group.add_argument( "--config-dir", dest="config_dir", diff --git a/projects/rocprofiler-compute/src/rocprof_compute_base.py b/projects/rocprofiler-compute/src/rocprof_compute_base.py index 12e5983cbe..a9a264d491 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_base.py @@ -27,7 +27,6 @@ import argparse import importlib import os -import shutil import socket import sys import time @@ -55,6 +54,7 @@ from utils.utils import ( get_submodules, get_version, get_version_display, + parse_sets_yaml, set_locale_encoding, ) @@ -243,6 +243,53 @@ class RocProfCompute: else: console_error("Unsupported arch") + @demarcate + def list_sets(self): + sets_info = parse_sets_yaml(self.__mspec.gpu_arch) + + if not sets_info: + console_error("No sets configuration found.") + + print("\nAvailable Sets:") + print("=" * 115) + + # Print header + print( + f"{'Set Option':<35} {'Set Title':<35} {'Metric Name':<30} {'Metric ID':<10}" + ) + print("-" * 115) + + # Print data grouped by set + for set_option, set_data in sets_info.items(): + title = set_data.get("title", set_option) + metrics = set_data.get("metric", []) + + first_row = True + for metric in metrics: + if isinstance(metric, dict) and metric: + metric_id = next(iter(metric.keys())) + metric_name = next(iter(metric.values())) + + # Only show set info on first row of each set + set_display = set_option if first_row else "" + title_display = title if first_row else "" + + print( + f"{set_display:<35} {title_display:<35} {metric_name:<30} {metric_id:<10}" + ) + first_row = False + # Empty line between sets + print() + + print("Usage Examples:") + if sets_info: + first_set = next(iter(sets_info.keys())) + print(f" rocprof-compute profile --set {first_set} # Profile this set") + print(f" rocprof-compute profile --list-sets # Show this help") + print() + + sys.exit(0) + @demarcate def run_profiler(self): self.print_graphic() @@ -250,6 +297,8 @@ class RocProfCompute: if self.__args.list_metrics is not None: self.list_metrics() + elif self.__args.list_sets: + self.list_sets() elif self.__args.name is None: sys.exit("Either --list-name or --name is required") diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index 85c03a61ba..c0bd2bdbc3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -26,13 +26,12 @@ import csv import glob -import logging import os import re import shlex import shutil import time -from abc import ABC, abstractmethod +from abc import abstractmethod from pathlib import Path import pandas as pd @@ -350,6 +349,10 @@ class RocProfCompute_Base: console_log("Command: " + str(self.__args.remaining)) console_log("Kernel Selection: " + str(self.__args.kernel)) console_log("Dispatch Selection: " + str(self.__args.dispatch)) + + if self.get_args().set_selected: + console_log("Set Selection: " + str(self.__args.set_selected)) + if self.get_args().filter_blocks is None: console_log("Report Sections: All") else: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml new file mode 100644 index 0000000000..939d12f04d --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml @@ -0,0 +1,37 @@ +# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + - 11.2.2: SALU Utilization + - 11.2.3: VALU Utilization +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + - 2.1.2: MFMA FLOPs (BF16) + - 2.1.3: MFMA FLOPs (F16) + - 2.1.4: MFMA FLOPs (F32) + - 2.1.5: MFMA FLOPs (F64) + - 2.1.6: MFMA IOPs (Int8) +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + - 2.1.16: Theoretical LDS Bandwidth + - 2.1.17: LDS Bank Conflicts/Access + - 16.1.2: Utilization + - 17.1.0: Utilization +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + - 7.1.0: Grid Size + - 7.1.1: Workgroup Size + - 7.1.2: Total Wavefronts + - 7.1.5: VGPRs + - 7.1.6: AGPRs + - 7.1.7: SGPRs + - 7.1.8: LDS Allocation + - 7.1.9: Scratch Allocation diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml new file mode 100644 index 0000000000..3a970342f2 --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml @@ -0,0 +1,39 @@ +# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + - 11.2.3: VALU Utilization + - 11.2.4: VMEM Utilization + - 11.2.5: Branch Utilization + - 11.2.6: VALU Active Threads +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + - 2.1.2: MFMA FLOPs (BF16) + - 2.1.3: MFMA FLOPs (F16) + - 2.1.4: MFMA FLOPs (F32) + - 2.1.5: MFMA FLOPs (F64) + - 2.1.6: MFMA IOPs (Int8) +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + - 2.1.16: Theoretical LDS Bandwidth + - 2.1.17: LDS Bank Conflicts/Access + - 16.1.2: Utilization + - 17.1.0: Utilization +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + - 7.1.0: Grid Size + - 7.1.1: Workgroup Size + - 7.1.2: Total Wavefronts + - 7.1.5: VGPRs + - 7.1.6: AGPRs + - 7.1.7: SGPRs + - 7.1.8: LDS Allocation + - 7.1.9: Scratch Allocation diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml new file mode 100644 index 0000000000..b549f0fede --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml @@ -0,0 +1,40 @@ +# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + - 11.2.2: SALU Utilization + - 11.2.3: VALU Utilization + - 11.2.4: VMEM Utilization + - 11.2.5: Branch Utilization +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + - 2.1.2: MFMA FLOPs (F8) + - 2.1.3: MFMA FLOPs (BF16) + - 2.1.4: MFMA FLOPs (F16) + - 2.1.5: MFMA FLOPs (F32) + - 2.1.6: MFMA FLOPs (F64) + - 2.1.7: MFMA IOPs (Int8) +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + - 2.1.17: Theoretical LDS Bandwidth + - 2.1.18: LDS Bank Conflicts/Access + - 16.1.2: Utilization + - 17.1.0: Utilization +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + - 7.1.0: Grid Size + - 7.1.1: Workgroup Size + - 7.1.2: Total Wavefronts + - 7.1.5: VGPRs + - 7.1.6: AGPRs + - 7.1.7: SGPRs + - 7.1.8: LDS Allocation + - 7.1.9: Scratch Allocation diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml new file mode 100644 index 0000000000..b549f0fede --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml @@ -0,0 +1,40 @@ +# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + - 11.2.2: SALU Utilization + - 11.2.3: VALU Utilization + - 11.2.4: VMEM Utilization + - 11.2.5: Branch Utilization +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + - 2.1.2: MFMA FLOPs (F8) + - 2.1.3: MFMA FLOPs (BF16) + - 2.1.4: MFMA FLOPs (F16) + - 2.1.5: MFMA FLOPs (F32) + - 2.1.6: MFMA FLOPs (F64) + - 2.1.7: MFMA IOPs (Int8) +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + - 2.1.17: Theoretical LDS Bandwidth + - 2.1.18: LDS Bank Conflicts/Access + - 16.1.2: Utilization + - 17.1.0: Utilization +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + - 7.1.0: Grid Size + - 7.1.1: Workgroup Size + - 7.1.2: Total Wavefronts + - 7.1.5: VGPRs + - 7.1.6: AGPRs + - 7.1.7: SGPRs + - 7.1.8: LDS Allocation + - 7.1.9: Scratch Allocation diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml new file mode 100644 index 0000000000..b549f0fede --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml @@ -0,0 +1,40 @@ +# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + - 11.2.2: SALU Utilization + - 11.2.3: VALU Utilization + - 11.2.4: VMEM Utilization + - 11.2.5: Branch Utilization +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + - 2.1.2: MFMA FLOPs (F8) + - 2.1.3: MFMA FLOPs (BF16) + - 2.1.4: MFMA FLOPs (F16) + - 2.1.5: MFMA FLOPs (F32) + - 2.1.6: MFMA FLOPs (F64) + - 2.1.7: MFMA IOPs (Int8) +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + - 2.1.17: Theoretical LDS Bandwidth + - 2.1.18: LDS Bank Conflicts/Access + - 16.1.2: Utilization + - 17.1.0: Utilization +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + - 7.1.0: Grid Size + - 7.1.1: Workgroup Size + - 7.1.2: Total Wavefronts + - 7.1.5: VGPRs + - 7.1.6: AGPRs + - 7.1.7: SGPRs + - 7.1.8: LDS Allocation + - 7.1.9: Scratch Allocation diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml new file mode 100644 index 0000000000..f93a0af246 --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml @@ -0,0 +1,40 @@ +# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + - 11.2.2: SALU Utilization + - 11.2.3: VALU Utilization + - 11.2.5: VMEM Utilization + - 11.2.6: Branch Utilization +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + - 2.1.2: MFMA FLOPs (F8) + - 2.1.3: MFMA FLOPs (BF16) + - 2.1.4: MFMA FLOPs (F16) + - 2.1.5: MFMA FLOPs (F32) + - 2.1.6: MFMA FLOPs (F64) + - 2.1.8: MFMA IOPs (Int8) +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + - 2.1.18: Theoretical LDS Bandwidth + - 2.1.19: LDS Bank Conflicts/Access + - 16.1.2: Utilization + - 17.1.0: Utilization +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + - 7.1.0: Grid Size + - 7.1.1: Workgroup Size + - 7.1.2: Total Wavefronts + - 7.1.5: VGPRs + - 7.1.6: AGPRs + - 7.1.7: SGPRs + - 7.1.8: LDS Allocation + - 7.1.9: Scratch Allocation diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py index bc1937978b..f842763edf 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py @@ -24,7 +24,6 @@ ############################################################################## -import ctypes import glob import json import math @@ -32,7 +31,6 @@ import os import re import shutil import sys -import threading from abc import abstractmethod from pathlib import Path @@ -56,6 +54,7 @@ from utils.utils import ( detect_rocprof, get_submodules, is_tcc_channel_counter, + parse_sets_yaml, using_v3, ) @@ -277,10 +276,26 @@ class OmniSoC_Base: Path(filename).name.split("_")[0]: filename for filename in glob.glob(f"{config_root_dir}/*.yaml") } + texts = list() + set_selected = self.get_args().set_selected + + if set_selected: + # NOTE: --blocks and --set are mutually exclusive + if self.get_args().filter_blocks: + console_error("--block and --set are exclusive options.") + + sets_info = parse_sets_yaml(self.__arch) + if set_selected not in set(sets_info.keys()): + console_error( + f"argument --set: invalid choice: '{set_selected}' (choose from {sets_info.keys()})" + ) + self.__args.filter_blocks = [ + next(iter(metric.keys())) for metric in sets_info[set_selected]["metric"] + ] + if not self.get_args().filter_blocks: - # Read all config files if no filter_blocks are specified for filename in config_filename_dict.values(): with open(filename, "r") as stream: texts.append(stream.read()) diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index 2fa180354c..3ee0682be6 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -35,11 +35,8 @@ import re import selectors import shutil import subprocess -import sys import tempfile import time -from collections import OrderedDict -from itertools import product from pathlib import Path as path from typing import Optional @@ -1616,3 +1613,25 @@ def format_time(seconds): if secs > 0 or not parts: parts.append(f"{secs} second{'s' if secs != 1 else ''}") return ", ".join(parts[:-1]) + (" and " if len(parts) > 1 else "") + parts[-1] + + +def parse_sets_yaml(arch): + filename = ( + config.rocprof_compute_home + / "rocprof_compute_soc" + / "profile_configs" + / "sets" + / f"{arch}_sets.yaml" + ) + with open(filename, "r") as file: + content = file.read() + data = yaml.safe_load(content) + + sets_data = data.get("sets", []) + + sets_info = {} + for set_item in sets_data: + set_option = set_item.get("set_option", "") + if set_option: + sets_info[set_option] = set_item + return sets_info diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index d3094caf1a..b294854398 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -27,13 +27,9 @@ import inspect import os import re -import shutil import subprocess import sys -import tempfile -from importlib.machinery import SourceFileLoader from pathlib import Path -from unittest.mock import patch import pandas as pd import pytest @@ -1649,3 +1645,117 @@ def test_comprehensive_error_paths(): assert False, "Should raise exception for None coll_level" except Exception as e: assert "coll_level can not be None" in str(e) + + +@pytest.mark.sets_func +class TestSetsIntegration: + def test_memory_throughput_set(self, binary_handler_profile_rocprof_compute): + options = ["--set", "mem_thruput"] + workload_dir = test_utils.get_output_dir() + + binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=True, + roof=False, + ) + + assert test_utils.get_num_pmc_file(workload_dir) == 1 + + memory_metrics = ["16.1.2", "17.1.0"] + for metric_id in memory_metrics: + assert ( + metric_id in open(Path(workload_dir) / "log.txt", "r").read() + ), f"Expected memory metric {metric_id} not found" + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + def test_launch_stats_set(self, binary_handler_profile_rocprof_compute): + options = ["--set", "launch_stats"] + workload_dir = test_utils.get_output_dir() + + binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=True, + roof=False, + ) + + assert test_utils.get_num_pmc_file(workload_dir) == 1 + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + def test_compute_thruput_util_set(self, binary_handler_profile_rocprof_compute): + options = ["--set", "compute_thruput_util"] + workload_dir = test_utils.get_output_dir() + + binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=True, + roof=False, + ) + + assert test_utils.get_num_pmc_file(workload_dir) == 1 + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + def test_compute_thruput_flops_set(self, binary_handler_profile_rocprof_compute): + options = ["--set", "compute_thruput_flops"] + workload_dir = test_utils.get_output_dir() + + binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=True, + roof=False, + ) + + assert test_utils.get_num_pmc_file(workload_dir) == 1 + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + def test_invalid_set_error_handling(self, binary_handler_profile_rocprof_compute): + options = ["--set", "nonexistent_set"] + workload_dir = test_utils.get_output_dir() + + returncode = binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=False, + roof=False, + ) + + assert returncode == 1 + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + def test_set_and_block_mutual_exclusion(self, binary_handler_profile_rocprof_compute): + options = ["--set", "compute_thruput_util", "--block", "12"] + workload_dir = test_utils.get_output_dir() + + returncode = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=False, roof=False + ) + + assert returncode == 1 + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + def test_list_sets_functionality(self, binary_handler_profile_rocprof_compute): + options = ["--list-sets"] + workload_dir = test_utils.get_output_dir() + + binary_handler_profile_rocprof_compute( + config, + workload_dir, + options, + check_success=False, + roof=False, + ) + # workload dir should be empty + assert not os.listdir(workload_dir) + test_utils.clean_output_dir(config["cleanup"], workload_dir) diff --git a/projects/rocprofiler-compute/tests/test_utils.py b/projects/rocprofiler-compute/tests/test_utils.py index 481509b2ad..0c50d74b63 100644 --- a/projects/rocprofiler-compute/tests/test_utils.py +++ b/projects/rocprofiler-compute/tests/test_utils.py @@ -29,7 +29,6 @@ import logging logging.trace = lambda *args, **kwargs: None import builtins -import glob import inspect import io import json @@ -38,16 +37,15 @@ import logging import os import pathlib import re -import selectors import shutil import subprocess -import tempfile from pathlib import Path from types import SimpleNamespace from unittest import mock import pandas as pd import pytest +import yaml import utils.utils as utils @@ -163,6 +161,16 @@ def check_csv_files(output_dir, num_devices, num_kernels): return file_dict +def get_num_pmc_file(output_dir): + """ + Returns: + int: number of pmc perf text files in perfmon dir + """ + + perfmon_path = Path(output_dir) / "perfmon" + return len([f for f in perfmon_path.iterdir() if f.is_file() and f.suffix == ".txt"]) + + # ============================================================================= # VERSION UTILITIES TESTS # ============================================================================= @@ -5975,11 +5983,10 @@ def test_get_submodules_basic_functionality(): Returns: None: Asserts function correctly lists submodules from a real package. """ - from unittest.mock import MagicMock, patch import utils.utils as utils_mod - mock_package = MagicMock() + mock_package = mock.MagicMock() mock_package.__path__ = ["/fake/path"] mock_submodules = [ @@ -5988,8 +5995,8 @@ def test_get_submodules_basic_functionality(): (None, "module_error", False), ] - with patch("importlib.import_module", return_value=mock_package): - with patch("pkgutil.walk_packages", return_value=mock_submodules): + with mock.patch("importlib.import_module", return_value=mock_package): + with mock.patch("pkgutil.walk_packages", return_value=mock_submodules): result = utils_mod.get_submodules("test_package") assert isinstance(result, list) @@ -9490,3 +9497,12 @@ def test_replace_timestamps_no_other_csvs_to_update( df_sysinfo_original = pd.read_csv(sysinfo_csv_path_str) assert list(df_sysinfo_original["Start_Timestamp"]) == [5] assert list(df_sysinfo_original["End_Timestamp"]) == [7] + + +def test_set_parser(): + from utils.utils import parse_sets_yaml + + result = parse_sets_yaml("gfx90a") + + assert "compute_thruput_util" in result + assert result["compute_thruput_util"]["title"] == "Compute Throughput Utilization" diff --git a/projects/rocprofiler-compute/utils/autogen_hash.yaml b/projects/rocprofiler-compute/utils/autogen_hash.yaml index b3b20b7a8e..2c50e5470b 100644 --- a/projects/rocprofiler-compute/utils/autogen_hash.yaml +++ b/projects/rocprofiler-compute/utils/autogen_hash.yaml @@ -107,4 +107,10 @@ src/rocprof_compute_soc/analysis_configs/gfx940/2100_pc_sampling.yaml: 4f3af5504 src/rocprof_compute_soc/analysis_configs/gfx941/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7 src/rocprof_compute_soc/analysis_configs/gfx942/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7 src/rocprof_compute_soc/analysis_configs/gfx950/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7 +src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml: ee28989e70d0537db8b0f0a4bc5499444b44ff0e73d3e7f2926943be11d0aeda +src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml: 9c9533174a3f7bd5c8e09ec998743c7bb2642c4ce3f818b546673be9cafc40a8 +src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242 +src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242 +src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242 +src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml: 238d9dc8a98cfead3fc904885bfe413e5bcb4f1af31e9820cd640388bcd1e1c2 docs/data/metrics_description.yaml: 819c08a584ae8b418e6983aa51108b95e43eda4f3b7892eab336c61d844b20bf diff --git a/projects/rocprofiler-compute/utils/split_config.py b/projects/rocprofiler-compute/utils/split_config.py index ae178978d3..97b6e810f2 100644 --- a/projects/rocprofiler-compute/utils/split_config.py +++ b/projects/rocprofiler-compute/utils/split_config.py @@ -17,14 +17,23 @@ import yaml ROOT_DIR = Path(__file__).parent.parent SOURCE_DIR = ROOT_DIR.joinpath("utils") TARGET_DIR = ROOT_DIR.joinpath("src", "rocprof_compute_soc", "analysis_configs") +SETS_TARGET_DIR = ROOT_DIR.joinpath( + "src", "rocprof_compute_soc", "profile_configs", "sets" +) DOC_TARGET_DIR = ROOT_DIR.joinpath("docs", "data") -AUTOGEN_TEXT = "# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_config.yaml. Generated by utils/split_config.py\n" HASH_FILE = ROOT_DIR.joinpath("utils", "autogen_hash.yaml") HASH_FILE_MAP = {} GFX_VERSIONS = ["gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx950"] +METRIC_ID_TO_NAME_MAP = {gfx_version: {} for gfx_version in GFX_VERSIONS} + + +def get_autogen_text(config_file="utils/unified_config.yaml"): + return f"# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from {config_file}. Generated by utils/split_config.py\n" def update_analysis_config(): + global METRIC_ID_TO_NAME_MAP + # Read the unified config file with open(SOURCE_DIR.joinpath("unified_config.yaml")) as file: unified_config = yaml.safe_load(file) @@ -38,6 +47,7 @@ def update_analysis_config(): key: value["plain"] for key, value in panel_config.get("metrics_description", {}).items() } + panel_id_int = panel_config["id"] # Convert int into str with 4 digits panel_id = str(panel_config["id"]).zfill(4) # Replace parentehsis, hyphen, slash and space with underscore @@ -57,19 +67,28 @@ def update_analysis_config(): # Select metrics from current gfx arch new_panel_config["Panel Config"]["data source"] = [] - for data_source_config in panel_config["data source"]: + for data_source_index, data_source_config in enumerate( + panel_config["data source"] + ): data_source_config = copy.deepcopy(data_source_config) if "metric_table" in data_source_config: data_source_config["metric_table"]["metric"] = data_source_config[ "metric_table" ]["metric"][gfx_version] + + build_metric_id_mapping( + panel_id_int, + data_source_index, + data_source_config["metric_table"]["metric"], + gfx_version, + ) new_panel_config["Panel Config"]["data source"].append(data_source_config) # Write panel config to file filename = Path( TARGET_DIR.joinpath(gfx_version, f"{panel_id}_{panel_title}.yaml") ) with open(filename, "w") as file: - file.write(AUTOGEN_TEXT) + file.write(get_autogen_text()) yaml.dump(new_panel_config, file, sort_keys=False) print(f"File write: {filename}") # Calculate hash of filename @@ -78,6 +97,56 @@ def update_analysis_config(): ).hexdigest() +def build_metric_id_mapping(panel_id, data_source_index, metrics, gfx_version): + # Build metric id to metric name mapping + global METRIC_ID_TO_NAME_MAP + for metric_index, metric_name in enumerate(metrics.keys()): + metric_id = f"{panel_id // 100}.{data_source_index + 1}.{metric_index}" + METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)] = metric_name + + +def update_sets_config(): + # Create directory if it doesn't exist + if not SETS_TARGET_DIR.exists(): + SETS_TARGET_DIR.mkdir() + print(f"Created directory: {SETS_TARGET_DIR}") + + # Read the unified config file + with open(SOURCE_DIR.joinpath("unified_sets.yaml")) as file: + unified_sets = yaml.safe_load(file) + + # Create per gfx version file + for gfx_version in GFX_VERSIONS: + new_sets = {"sets": []} + + for sets in unified_sets["sets"]: + # Create new set object for each set + current_set = { + "title": sets["title"], + "set_option": sets["set_option"], + "description": sets["description"], + "metric": [], + } + + for metric_id in sets["metric"][gfx_version]: + current_set["metric"].append( + {metric_id: METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)]} + ) + + new_sets["sets"].append(current_set) + + # Write gfx version sets to file + filename = Path(SETS_TARGET_DIR.joinpath(f"{gfx_version}_sets.yaml")) + with open(filename, "w") as file: + file.write(get_autogen_text("utils/unified_sets.yaml")) + yaml.dump(new_sets, file, sort_keys=False) + print(f"File write: {filename}") + # Calculate hash of filename + HASH_FILE_MAP[str(filename.relative_to(ROOT_DIR))] = hashlib.sha256( + filename.read_bytes() + ).hexdigest() + + def update_documentation(): # Documentation sections section_panel_map = { @@ -153,7 +222,7 @@ def update_documentation(): # Write documentation metrics description file filename = Path(DOC_TARGET_DIR.joinpath("metrics_description.yaml")) with open(filename, "w") as file: - file.write(AUTOGEN_TEXT) + file.write(get_autogen_text()) yaml.dump(section_metric_map, file, sort_keys=False) print(f"File write: {filename}") # Calculate hash of filename @@ -165,12 +234,13 @@ def update_documentation(): def update_hash(): # Write hash file with open(HASH_FILE, "w") as file: - file.write(AUTOGEN_TEXT) + file.write(get_autogen_text()) yaml.dump(HASH_FILE_MAP, file, sort_keys=False) print(f"File write: {HASH_FILE}") if __name__ == "__main__": update_analysis_config() + update_sets_config() update_documentation() update_hash() diff --git a/projects/rocprofiler-compute/utils/unified_sets.yaml b/projects/rocprofiler-compute/utils/unified_sets.yaml new file mode 100644 index 0000000000..43b1da6dce --- /dev/null +++ b/projects/rocprofiler-compute/utils/unified_sets.yaml @@ -0,0 +1,176 @@ +--- +# Pre-defined sets containing a collection of relevant metrics that can be collected in a single pass. +# To profile customized set(s), append to this yaml file. + +sets: +- title: Compute Throughput Utilization + set_option: compute_thruput_util + description: Placeholder + metric: + gfx908: + - 11.2.2 + - 11.2.3 + gfx90a: + - 11.2.3 + - 11.2.4 + - 11.2.5 + - 11.2.6 + gfx940: + - 11.2.2 + - 11.2.3 + - 11.2.4 + - 11.2.5 + gfx941: + - 11.2.2 + - 11.2.3 + - 11.2.4 + - 11.2.5 + gfx942: + - 11.2.2 + - 11.2.3 + - 11.2.4 + - 11.2.5 + gfx950: + - 11.2.2 + - 11.2.3 + - 11.2.5 + - 11.2.6 + +- title: Compute Throughput FLOPS + set_option: compute_thruput_flops + description: Placeholder + metric: + gfx908: + - 2.1.2 + - 2.1.3 + - 2.1.4 + - 2.1.5 + - 2.1.6 + gfx90a: + - 2.1.2 + - 2.1.3 + - 2.1.4 + - 2.1.5 + - 2.1.6 + gfx940: + - 2.1.2 + - 2.1.3 + - 2.1.4 + - 2.1.5 + - 2.1.6 + - 2.1.7 + gfx941: + - 2.1.2 + - 2.1.3 + - 2.1.4 + - 2.1.5 + - 2.1.6 + - 2.1.7 + gfx942: + - 2.1.2 + - 2.1.3 + - 2.1.4 + - 2.1.5 + - 2.1.6 + - 2.1.7 + gfx950: + - 2.1.2 + - 2.1.3 + - 2.1.4 + - 2.1.5 + - 2.1.6 + - 2.1.8 + +- title: Memory Throughput + set_option: mem_thruput + description: Placeholder + metric: + gfx908: + - 2.1.16 + - 2.1.17 + - 16.1.2 + - 17.1.0 + gfx90a: + - 2.1.16 + - 2.1.17 + - 16.1.2 + - 17.1.0 + gfx940: + - 2.1.17 + - 2.1.18 + - 16.1.2 + - 17.1.0 + gfx941: + - 2.1.17 + - 2.1.18 + - 16.1.2 + - 17.1.0 + gfx942: + - 2.1.17 + - 2.1.18 + - 16.1.2 + - 17.1.0 + gfx950: + - 2.1.18 + - 2.1.19 + - 16.1.2 + - 17.1.0 + +- title: Launch Stats + set_option: launch_stats + description: Placeholder + metric: + gfx908: + - 7.1.0 + - 7.1.1 + - 7.1.2 + - 7.1.5 + - 7.1.6 + - 7.1.7 + - 7.1.8 + - 7.1.9 + gfx90a: + - 7.1.0 + - 7.1.1 + - 7.1.2 + - 7.1.5 + - 7.1.6 + - 7.1.7 + - 7.1.8 + - 7.1.9 + gfx940: + - 7.1.0 + - 7.1.1 + - 7.1.2 + - 7.1.5 + - 7.1.6 + - 7.1.7 + - 7.1.8 + - 7.1.9 + gfx941: + - 7.1.0 + - 7.1.1 + - 7.1.2 + - 7.1.5 + - 7.1.6 + - 7.1.7 + - 7.1.8 + - 7.1.9 + gfx942: + - 7.1.0 + - 7.1.1 + - 7.1.2 + - 7.1.5 + - 7.1.6 + - 7.1.7 + - 7.1.8 + - 7.1.9 + gfx950: + - 7.1.0 + - 7.1.1 + - 7.1.2 + - 7.1.5 + - 7.1.6 + - 7.1.7 + - 7.1.8 + - 7.1.9 \ No newline at end of file