Enable single pass counter collection (#833)

[ROCm/rocprofiler-compute commit: 6a77d241ed]
2025-08-06 10:35:05 -04:00
@@ -254,6 +254,14 @@ add_test(
        ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})

+add_test(
+    NAME test_profile_sets_func
+    COMMAND
+        ${Python3_EXECUTABLE} -m pytest -m sets_func
+        --junitxml=tests/test_profile_sets_func.xml ${COV_OPTION}
+        ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+
 set_tests_properties(
    test_profile_kernel_execution test_profile_dispatch test_profile_mem test_profile_join
    test_profile_sort test_profile_misc PROPERTIES LABELS "profile" RESOURCE_GROUPS
@@ -41,35 +41,37 @@ addopts = [
 ]

 pythonpath = [
-      ".",
-      "src",
-      "src/rocprof_compute_soc",
-      "src/utils",
-      "src/rocprof_compute_analyze/utils",
-      "tests"
-    ]
+    ".",
+    "src",
+    "src/rocprof_compute_soc",
+    "src/utils",
+    "src/rocprof_compute_analyze/utils",
+    "tests"
+]

 markers = [
-	"section",
-	"kernel_execution",
-	"misc",
-	"mem",
-	"sort",
-	"join",
-	"verbosity",
-	"dispatch",
-	"list_metrics",
-	"filter_block",
-	"filter_kernel",
-	"dispatch",
-	"normal_unit",
-	"max_stat",
-	"time_unit",
-	"decimal",
-	"col",
-	"kernel_verbose",
-	"serial",
-	"L1_cache",
-	"num_xcds_spec_class",
-	"num_xcds_cli_output",
+    "section",
+    "kernel_execution",
+    "misc",
+    "mem",
+    "sort",
+    "join",
+    "verbosity",
+    "dispatch",
+    "list_metrics",
+    "filter_block",
+    "filter_kernel",
+    "dispatch",
+    "normal_unit",
+    "max_stat",
+    "time_unit",
+    "decimal",
+    "col",
+    "kernel_verbose",
+    "serial",
+    "L1_cache",
+    "num_xcds_spec_class",
+    "num_xcds_cli_output",
+    "sets_func",
+    "sets_perf"
 ]
@@ -27,12 +27,11 @@
 import argparse
 import os
 import re
-import shutil
 from pathlib import Path


 def print_avail_arch(avail_arch: list):
-    ret_str = "\t\tList all available metrics for analysis on specified arch:"
+    ret_str = "\t\t\tList all available metrics for analysis on specified arch:"
    for arch in avail_arch:
        ret_str += "\n\t\t\t   {}".format(arch)
    return ret_str
@@ -207,6 +206,19 @@ Examples:
        choices=[""] + list(supported_archs.keys()),  # ["gfx908", "gfx90a"],
        help=print_avail_arch(supported_archs.keys()),
    )
+
+    profile_group.add_argument(
+        "--list-sets",
+        action="store_true",
+        help="\t\t\tDisplay available metric sets and their descriptions",
+    )
+    profile_group.add_argument(
+        "--set",
+        default=None,
+        dest="set_selected",
+        help="\t\t\tProfile a set of metrics of topic of interest by collecting counters in a single pass.\n\t\t\tFor available sets, see --list-sets",
+    )
+
    profile_group.add_argument(
        "--config-dir",
        dest="config_dir",
@@ -27,7 +27,6 @@
 import argparse
 import importlib
 import os
-import shutil
 import socket
 import sys
 import time
@@ -55,6 +54,7 @@ from utils.utils import (
    get_submodules,
    get_version,
    get_version_display,
+    parse_sets_yaml,
    set_locale_encoding,
 )

@@ -243,6 +243,53 @@ class RocProfCompute:
        else:
            console_error("Unsupported arch")

+    @demarcate
+    def list_sets(self):
+        sets_info = parse_sets_yaml(self.__mspec.gpu_arch)
+
+        if not sets_info:
+            console_error("No sets configuration found.")
+
+        print("\nAvailable Sets:")
+        print("=" * 115)
+
+        # Print header
+        print(
+            f"{'Set Option':<35} {'Set Title':<35} {'Metric Name':<30} {'Metric ID':<10}"
+        )
+        print("-" * 115)
+
+        # Print data grouped by set
+        for set_option, set_data in sets_info.items():
+            title = set_data.get("title", set_option)
+            metrics = set_data.get("metric", [])
+
+            first_row = True
+            for metric in metrics:
+                if isinstance(metric, dict) and metric:
+                    metric_id = next(iter(metric.keys()))
+                    metric_name = next(iter(metric.values()))
+
+                    # Only show set info on first row of each set
+                    set_display = set_option if first_row else ""
+                    title_display = title if first_row else ""
+
+                    print(
+                        f"{set_display:<35} {title_display:<35} {metric_name:<30} {metric_id:<10}"
+                    )
+                    first_row = False
+            # Empty line between sets
+            print()
+
+        print("Usage Examples:")
+        if sets_info:
+            first_set = next(iter(sets_info.keys()))
+            print(f"  rocprof-compute profile --set {first_set}  # Profile this set")
+        print(f"  rocprof-compute profile --list-sets        # Show this help")
+        print()
+
+        sys.exit(0)
+
    @demarcate
    def run_profiler(self):
        self.print_graphic()
@@ -250,6 +297,8 @@ class RocProfCompute:

        if self.__args.list_metrics is not None:
            self.list_metrics()
+        elif self.__args.list_sets:
+            self.list_sets()
        elif self.__args.name is None:
            sys.exit("Either --list-name or --name is required")

@@ -26,13 +26,12 @@

 import csv
 import glob
-import logging
 import os
 import re
 import shlex
 import shutil
 import time
-from abc import ABC, abstractmethod
+from abc import abstractmethod
 from pathlib import Path

 import pandas as pd
@@ -350,6 +349,10 @@ class RocProfCompute_Base:
        console_log("Command: " + str(self.__args.remaining))
        console_log("Kernel Selection: " + str(self.__args.kernel))
        console_log("Dispatch Selection: " + str(self.__args.dispatch))
+
+        if self.get_args().set_selected:
+            console_log("Set Selection: " + str(self.__args.set_selected))
+
        if self.get_args().filter_blocks is None:
            console_log("Report Sections: All")
        else:
@@ -0,0 +1,37 @@
+# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+  - 11.2.2: SALU Utilization
+  - 11.2.3: VALU Utilization
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+  - 2.1.2: MFMA FLOPs (BF16)
+  - 2.1.3: MFMA FLOPs (F16)
+  - 2.1.4: MFMA FLOPs (F32)
+  - 2.1.5: MFMA FLOPs (F64)
+  - 2.1.6: MFMA IOPs (Int8)
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+  - 2.1.16: Theoretical LDS Bandwidth
+  - 2.1.17: LDS Bank Conflicts/Access
+  - 16.1.2: Utilization
+  - 17.1.0: Utilization
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+  - 7.1.0: Grid Size
+  - 7.1.1: Workgroup Size
+  - 7.1.2: Total Wavefronts
+  - 7.1.5: VGPRs
+  - 7.1.6: AGPRs
+  - 7.1.7: SGPRs
+  - 7.1.8: LDS Allocation
+  - 7.1.9: Scratch Allocation
@@ -0,0 +1,39 @@
+# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+  - 11.2.3: VALU Utilization
+  - 11.2.4: VMEM Utilization
+  - 11.2.5: Branch Utilization
+  - 11.2.6: VALU Active Threads
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+  - 2.1.2: MFMA FLOPs (BF16)
+  - 2.1.3: MFMA FLOPs (F16)
+  - 2.1.4: MFMA FLOPs (F32)
+  - 2.1.5: MFMA FLOPs (F64)
+  - 2.1.6: MFMA IOPs (Int8)
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+  - 2.1.16: Theoretical LDS Bandwidth
+  - 2.1.17: LDS Bank Conflicts/Access
+  - 16.1.2: Utilization
+  - 17.1.0: Utilization
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+  - 7.1.0: Grid Size
+  - 7.1.1: Workgroup Size
+  - 7.1.2: Total Wavefronts
+  - 7.1.5: VGPRs
+  - 7.1.6: AGPRs
+  - 7.1.7: SGPRs
+  - 7.1.8: LDS Allocation
+  - 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
+# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+  - 11.2.2: SALU Utilization
+  - 11.2.3: VALU Utilization
+  - 11.2.4: VMEM Utilization
+  - 11.2.5: Branch Utilization
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+  - 2.1.2: MFMA FLOPs (F8)
+  - 2.1.3: MFMA FLOPs (BF16)
+  - 2.1.4: MFMA FLOPs (F16)
+  - 2.1.5: MFMA FLOPs (F32)
+  - 2.1.6: MFMA FLOPs (F64)
+  - 2.1.7: MFMA IOPs (Int8)
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+  - 2.1.17: Theoretical LDS Bandwidth
+  - 2.1.18: LDS Bank Conflicts/Access
+  - 16.1.2: Utilization
+  - 17.1.0: Utilization
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+  - 7.1.0: Grid Size
+  - 7.1.1: Workgroup Size
+  - 7.1.2: Total Wavefronts
+  - 7.1.5: VGPRs
+  - 7.1.6: AGPRs
+  - 7.1.7: SGPRs
+  - 7.1.8: LDS Allocation
+  - 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
+# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+  - 11.2.2: SALU Utilization
+  - 11.2.3: VALU Utilization
+  - 11.2.4: VMEM Utilization
+  - 11.2.5: Branch Utilization
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+  - 2.1.2: MFMA FLOPs (F8)
+  - 2.1.3: MFMA FLOPs (BF16)
+  - 2.1.4: MFMA FLOPs (F16)
+  - 2.1.5: MFMA FLOPs (F32)
+  - 2.1.6: MFMA FLOPs (F64)
+  - 2.1.7: MFMA IOPs (Int8)
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+  - 2.1.17: Theoretical LDS Bandwidth
+  - 2.1.18: LDS Bank Conflicts/Access
+  - 16.1.2: Utilization
+  - 17.1.0: Utilization
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+  - 7.1.0: Grid Size
+  - 7.1.1: Workgroup Size
+  - 7.1.2: Total Wavefronts
+  - 7.1.5: VGPRs
+  - 7.1.6: AGPRs
+  - 7.1.7: SGPRs
+  - 7.1.8: LDS Allocation
+  - 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
+# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+  - 11.2.2: SALU Utilization
+  - 11.2.3: VALU Utilization
+  - 11.2.4: VMEM Utilization
+  - 11.2.5: Branch Utilization
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+  - 2.1.2: MFMA FLOPs (F8)
+  - 2.1.3: MFMA FLOPs (BF16)
+  - 2.1.4: MFMA FLOPs (F16)
+  - 2.1.5: MFMA FLOPs (F32)
+  - 2.1.6: MFMA FLOPs (F64)
+  - 2.1.7: MFMA IOPs (Int8)
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+  - 2.1.17: Theoretical LDS Bandwidth
+  - 2.1.18: LDS Bank Conflicts/Access
+  - 16.1.2: Utilization
+  - 17.1.0: Utilization
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+  - 7.1.0: Grid Size
+  - 7.1.1: Workgroup Size
+  - 7.1.2: Total Wavefronts
+  - 7.1.5: VGPRs
+  - 7.1.6: AGPRs
+  - 7.1.7: SGPRs
+  - 7.1.8: LDS Allocation
+  - 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
+# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+  - 11.2.2: SALU Utilization
+  - 11.2.3: VALU Utilization
+  - 11.2.5: VMEM Utilization
+  - 11.2.6: Branch Utilization
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+  - 2.1.2: MFMA FLOPs (F8)
+  - 2.1.3: MFMA FLOPs (BF16)
+  - 2.1.4: MFMA FLOPs (F16)
+  - 2.1.5: MFMA FLOPs (F32)
+  - 2.1.6: MFMA FLOPs (F64)
+  - 2.1.8: MFMA IOPs (Int8)
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+  - 2.1.18: Theoretical LDS Bandwidth
+  - 2.1.19: LDS Bank Conflicts/Access
+  - 16.1.2: Utilization
+  - 17.1.0: Utilization
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+  - 7.1.0: Grid Size
+  - 7.1.1: Workgroup Size
+  - 7.1.2: Total Wavefronts
+  - 7.1.5: VGPRs
+  - 7.1.6: AGPRs
+  - 7.1.7: SGPRs
+  - 7.1.8: LDS Allocation
+  - 7.1.9: Scratch Allocation
@@ -24,7 +24,6 @@
 ##############################################################################


-import ctypes
 import glob
 import json
 import math
@@ -32,7 +31,6 @@ import os
 import re
 import shutil
 import sys
-import threading
 from abc import abstractmethod
 from pathlib import Path

@@ -56,6 +54,7 @@ from utils.utils import (
    detect_rocprof,
    get_submodules,
    is_tcc_channel_counter,
+    parse_sets_yaml,
    using_v3,
 )

@@ -277,10 +276,26 @@ class OmniSoC_Base:
            Path(filename).name.split("_")[0]: filename
            for filename in glob.glob(f"{config_root_dir}/*.yaml")
        }
+
        texts = list()

+        set_selected = self.get_args().set_selected
+
+        if set_selected:
+            # NOTE: --blocks and --set are mutually exclusive
+            if self.get_args().filter_blocks:
+                console_error("--block and --set are exclusive options.")
+
+            sets_info = parse_sets_yaml(self.__arch)
+            if set_selected not in set(sets_info.keys()):
+                console_error(
+                    f"argument --set: invalid choice: '{set_selected}' (choose from {sets_info.keys()})"
+                )
+            self.__args.filter_blocks = [
+                next(iter(metric.keys())) for metric in sets_info[set_selected]["metric"]
+            ]
+
        if not self.get_args().filter_blocks:
-            # Read all config files if no filter_blocks are specified
            for filename in config_filename_dict.values():
                with open(filename, "r") as stream:
                    texts.append(stream.read())
@@ -35,11 +35,8 @@ import re
 import selectors
 import shutil
 import subprocess
-import sys
 import tempfile
 import time
-from collections import OrderedDict
-from itertools import product
 from pathlib import Path as path
 from typing import Optional

@@ -1616,3 +1613,25 @@ def format_time(seconds):
    if secs > 0 or not parts:
        parts.append(f"{secs} second{'s' if secs != 1 else ''}")
    return ", ".join(parts[:-1]) + (" and " if len(parts) > 1 else "") + parts[-1]
+
+
+def parse_sets_yaml(arch):
+    filename = (
+        config.rocprof_compute_home
+        / "rocprof_compute_soc"
+        / "profile_configs"
+        / "sets"
+        / f"{arch}_sets.yaml"
+    )
+    with open(filename, "r") as file:
+        content = file.read()
+    data = yaml.safe_load(content)
+
+    sets_data = data.get("sets", [])
+
+    sets_info = {}
+    for set_item in sets_data:
+        set_option = set_item.get("set_option", "")
+        if set_option:
+            sets_info[set_option] = set_item
+    return sets_info
@@ -27,13 +27,9 @@
 import inspect
 import os
 import re
-import shutil
 import subprocess
 import sys
-import tempfile
-from importlib.machinery import SourceFileLoader
 from pathlib import Path
-from unittest.mock import patch

 import pandas as pd
 import pytest
@@ -1649,3 +1645,117 @@ def test_comprehensive_error_paths():
        assert False, "Should raise exception for None coll_level"
    except Exception as e:
        assert "coll_level can not be None" in str(e)
+
+
+@pytest.mark.sets_func
+class TestSetsIntegration:
+    def test_memory_throughput_set(self, binary_handler_profile_rocprof_compute):
+        options = ["--set", "mem_thruput"]
+        workload_dir = test_utils.get_output_dir()
+
+        binary_handler_profile_rocprof_compute(
+            config,
+            workload_dir,
+            options,
+            check_success=True,
+            roof=False,
+        )
+
+        assert test_utils.get_num_pmc_file(workload_dir) == 1
+
+        memory_metrics = ["16.1.2", "17.1.0"]
+        for metric_id in memory_metrics:
+            assert (
+                metric_id in open(Path(workload_dir) / "log.txt", "r").read()
+            ), f"Expected memory metric {metric_id} not found"
+
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    def test_launch_stats_set(self, binary_handler_profile_rocprof_compute):
+        options = ["--set", "launch_stats"]
+        workload_dir = test_utils.get_output_dir()
+
+        binary_handler_profile_rocprof_compute(
+            config,
+            workload_dir,
+            options,
+            check_success=True,
+            roof=False,
+        )
+
+        assert test_utils.get_num_pmc_file(workload_dir) == 1
+
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    def test_compute_thruput_util_set(self, binary_handler_profile_rocprof_compute):
+        options = ["--set", "compute_thruput_util"]
+        workload_dir = test_utils.get_output_dir()
+
+        binary_handler_profile_rocprof_compute(
+            config,
+            workload_dir,
+            options,
+            check_success=True,
+            roof=False,
+        )
+
+        assert test_utils.get_num_pmc_file(workload_dir) == 1
+
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    def test_compute_thruput_flops_set(self, binary_handler_profile_rocprof_compute):
+        options = ["--set", "compute_thruput_flops"]
+        workload_dir = test_utils.get_output_dir()
+
+        binary_handler_profile_rocprof_compute(
+            config,
+            workload_dir,
+            options,
+            check_success=True,
+            roof=False,
+        )
+
+        assert test_utils.get_num_pmc_file(workload_dir) == 1
+
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    def test_invalid_set_error_handling(self, binary_handler_profile_rocprof_compute):
+        options = ["--set", "nonexistent_set"]
+        workload_dir = test_utils.get_output_dir()
+
+        returncode = binary_handler_profile_rocprof_compute(
+            config,
+            workload_dir,
+            options,
+            check_success=False,
+            roof=False,
+        )
+
+        assert returncode == 1
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    def test_set_and_block_mutual_exclusion(self, binary_handler_profile_rocprof_compute):
+        options = ["--set", "compute_thruput_util", "--block", "12"]
+        workload_dir = test_utils.get_output_dir()
+
+        returncode = binary_handler_profile_rocprof_compute(
+            config, workload_dir, options, check_success=False, roof=False
+        )
+
+        assert returncode == 1
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    def test_list_sets_functionality(self, binary_handler_profile_rocprof_compute):
+        options = ["--list-sets"]
+        workload_dir = test_utils.get_output_dir()
+
+        binary_handler_profile_rocprof_compute(
+            config,
+            workload_dir,
+            options,
+            check_success=False,
+            roof=False,
+        )
+        # workload dir should be empty
+        assert not os.listdir(workload_dir)
+        test_utils.clean_output_dir(config["cleanup"], workload_dir)
@@ -29,7 +29,6 @@ import logging
 logging.trace = lambda *args, **kwargs: None

 import builtins
-import glob
 import inspect
 import io
 import json
@@ -38,16 +37,15 @@ import logging
 import os
 import pathlib
 import re
-import selectors
 import shutil
 import subprocess
-import tempfile
 from pathlib import Path
 from types import SimpleNamespace
 from unittest import mock

 import pandas as pd
 import pytest
+import yaml

 import utils.utils as utils

@@ -163,6 +161,16 @@ def check_csv_files(output_dir, num_devices, num_kernels):
    return file_dict


+def get_num_pmc_file(output_dir):
+    """
+    Returns:
+        int: number of pmc perf text files in perfmon dir
+    """
+
+    perfmon_path = Path(output_dir) / "perfmon"
+    return len([f for f in perfmon_path.iterdir() if f.is_file() and f.suffix == ".txt"])
+
+
 # =============================================================================
 # VERSION UTILITIES TESTS
 # =============================================================================
@@ -5975,11 +5983,10 @@ def test_get_submodules_basic_functionality():
    Returns:
        None: Asserts function correctly lists submodules from a real package.
    """
-    from unittest.mock import MagicMock, patch

    import utils.utils as utils_mod

-    mock_package = MagicMock()
+    mock_package = mock.MagicMock()
    mock_package.__path__ = ["/fake/path"]

    mock_submodules = [
@@ -5988,8 +5995,8 @@ def test_get_submodules_basic_functionality():
        (None, "module_error", False),
    ]

-    with patch("importlib.import_module", return_value=mock_package):
-        with patch("pkgutil.walk_packages", return_value=mock_submodules):
+    with mock.patch("importlib.import_module", return_value=mock_package):
+        with mock.patch("pkgutil.walk_packages", return_value=mock_submodules):
            result = utils_mod.get_submodules("test_package")

    assert isinstance(result, list)
@@ -9490,3 +9497,12 @@ def test_replace_timestamps_no_other_csvs_to_update(
    df_sysinfo_original = pd.read_csv(sysinfo_csv_path_str)
    assert list(df_sysinfo_original["Start_Timestamp"]) == [5]
    assert list(df_sysinfo_original["End_Timestamp"]) == [7]
+
+
+def test_set_parser():
+    from utils.utils import parse_sets_yaml
+
+    result = parse_sets_yaml("gfx90a")
+
+    assert "compute_thruput_util" in result
+    assert result["compute_thruput_util"]["title"] == "Compute Throughput Utilization"
@@ -107,4 +107,10 @@ src/rocprof_compute_soc/analysis_configs/gfx940/2100_pc_sampling.yaml: 4f3af5504
 src/rocprof_compute_soc/analysis_configs/gfx941/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
 src/rocprof_compute_soc/analysis_configs/gfx942/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
 src/rocprof_compute_soc/analysis_configs/gfx950/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
+src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml: ee28989e70d0537db8b0f0a4bc5499444b44ff0e73d3e7f2926943be11d0aeda
+src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml: 9c9533174a3f7bd5c8e09ec998743c7bb2642c4ce3f818b546673be9cafc40a8
+src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
+src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
+src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
+src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml: 238d9dc8a98cfead3fc904885bfe413e5bcb4f1af31e9820cd640388bcd1e1c2
 docs/data/metrics_description.yaml: 819c08a584ae8b418e6983aa51108b95e43eda4f3b7892eab336c61d844b20bf
@@ -17,14 +17,23 @@ import yaml
 ROOT_DIR = Path(__file__).parent.parent
 SOURCE_DIR = ROOT_DIR.joinpath("utils")
 TARGET_DIR = ROOT_DIR.joinpath("src", "rocprof_compute_soc", "analysis_configs")
+SETS_TARGET_DIR = ROOT_DIR.joinpath(
+    "src", "rocprof_compute_soc", "profile_configs", "sets"
+)
 DOC_TARGET_DIR = ROOT_DIR.joinpath("docs", "data")
-AUTOGEN_TEXT = "# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_config.yaml. Generated by utils/split_config.py\n"
 HASH_FILE = ROOT_DIR.joinpath("utils", "autogen_hash.yaml")
 HASH_FILE_MAP = {}
 GFX_VERSIONS = ["gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx950"]
+METRIC_ID_TO_NAME_MAP = {gfx_version: {} for gfx_version in GFX_VERSIONS}
+
+
+def get_autogen_text(config_file="utils/unified_config.yaml"):
+    return f"# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from {config_file}. Generated by utils/split_config.py\n"


 def update_analysis_config():
+    global METRIC_ID_TO_NAME_MAP
+
    # Read the unified config file
    with open(SOURCE_DIR.joinpath("unified_config.yaml")) as file:
        unified_config = yaml.safe_load(file)
@@ -38,6 +47,7 @@ def update_analysis_config():
            key: value["plain"]
            for key, value in panel_config.get("metrics_description", {}).items()
        }
+        panel_id_int = panel_config["id"]
        # Convert int into str with 4 digits
        panel_id = str(panel_config["id"]).zfill(4)
        # Replace parentehsis, hyphen, slash and space with underscore
@@ -57,19 +67,28 @@ def update_analysis_config():

            # Select metrics from current gfx arch
            new_panel_config["Panel Config"]["data source"] = []
-            for data_source_config in panel_config["data source"]:
+            for data_source_index, data_source_config in enumerate(
+                panel_config["data source"]
+            ):
                data_source_config = copy.deepcopy(data_source_config)
                if "metric_table" in data_source_config:
                    data_source_config["metric_table"]["metric"] = data_source_config[
                        "metric_table"
                    ]["metric"][gfx_version]
+
+                    build_metric_id_mapping(
+                        panel_id_int,
+                        data_source_index,
+                        data_source_config["metric_table"]["metric"],
+                        gfx_version,
+                    )
                new_panel_config["Panel Config"]["data source"].append(data_source_config)
            # Write panel config to file
            filename = Path(
                TARGET_DIR.joinpath(gfx_version, f"{panel_id}_{panel_title}.yaml")
            )
            with open(filename, "w") as file:
-                file.write(AUTOGEN_TEXT)
+                file.write(get_autogen_text())
                yaml.dump(new_panel_config, file, sort_keys=False)
                print(f"File write: {filename}")
            # Calculate hash of filename
@@ -78,6 +97,56 @@ def update_analysis_config():
            ).hexdigest()


+def build_metric_id_mapping(panel_id, data_source_index, metrics, gfx_version):
+    # Build metric id to metric name mapping
+    global METRIC_ID_TO_NAME_MAP
+    for metric_index, metric_name in enumerate(metrics.keys()):
+        metric_id = f"{panel_id // 100}.{data_source_index + 1}.{metric_index}"
+        METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)] = metric_name
+
+
+def update_sets_config():
+    # Create directory if it doesn't exist
+    if not SETS_TARGET_DIR.exists():
+        SETS_TARGET_DIR.mkdir()
+        print(f"Created directory: {SETS_TARGET_DIR}")
+
+    # Read the unified config file
+    with open(SOURCE_DIR.joinpath("unified_sets.yaml")) as file:
+        unified_sets = yaml.safe_load(file)
+
+    # Create per gfx version file
+    for gfx_version in GFX_VERSIONS:
+        new_sets = {"sets": []}
+
+        for sets in unified_sets["sets"]:
+            # Create new set object for each set
+            current_set = {
+                "title": sets["title"],
+                "set_option": sets["set_option"],
+                "description": sets["description"],
+                "metric": [],
+            }
+
+            for metric_id in sets["metric"][gfx_version]:
+                current_set["metric"].append(
+                    {metric_id: METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)]}
+                )
+
+            new_sets["sets"].append(current_set)
+
+        # Write gfx version sets to file
+        filename = Path(SETS_TARGET_DIR.joinpath(f"{gfx_version}_sets.yaml"))
+        with open(filename, "w") as file:
+            file.write(get_autogen_text("utils/unified_sets.yaml"))
+            yaml.dump(new_sets, file, sort_keys=False)
+            print(f"File write: {filename}")
+        # Calculate hash of filename
+        HASH_FILE_MAP[str(filename.relative_to(ROOT_DIR))] = hashlib.sha256(
+            filename.read_bytes()
+        ).hexdigest()
+
+
 def update_documentation():
    # Documentation sections
    section_panel_map = {
@@ -153,7 +222,7 @@ def update_documentation():
    # Write documentation metrics description file
    filename = Path(DOC_TARGET_DIR.joinpath("metrics_description.yaml"))
    with open(filename, "w") as file:
-        file.write(AUTOGEN_TEXT)
+        file.write(get_autogen_text())
        yaml.dump(section_metric_map, file, sort_keys=False)
        print(f"File write: {filename}")
    # Calculate hash of filename
@@ -165,12 +234,13 @@ def update_documentation():
 def update_hash():
    # Write hash file
    with open(HASH_FILE, "w") as file:
-        file.write(AUTOGEN_TEXT)
+        file.write(get_autogen_text())
        yaml.dump(HASH_FILE_MAP, file, sort_keys=False)
        print(f"File write: {HASH_FILE}")


 if __name__ == "__main__":
    update_analysis_config()
+    update_sets_config()
    update_documentation()
    update_hash()
@@ -0,0 +1,176 @@
+---
+# Pre-defined sets containing a collection of relevant metrics that can be collected in a single pass.
+# To profile customized set(s), append to this yaml file.
+
+sets:
+- title: Compute Throughput Utilization
+  set_option: compute_thruput_util
+  description: Placeholder
+  metric:
+    gfx908:
+      - 11.2.2
+      - 11.2.3
+    gfx90a:
+      - 11.2.3
+      - 11.2.4
+      - 11.2.5
+      - 11.2.6
+    gfx940:
+      - 11.2.2
+      - 11.2.3
+      - 11.2.4
+      - 11.2.5
+    gfx941:
+      - 11.2.2
+      - 11.2.3
+      - 11.2.4
+      - 11.2.5
+    gfx942:
+      - 11.2.2
+      - 11.2.3
+      - 11.2.4
+      - 11.2.5
+    gfx950:
+      - 11.2.2
+      - 11.2.3
+      - 11.2.5
+      - 11.2.6
+
+- title: Compute Throughput FLOPS
+  set_option: compute_thruput_flops
+  description: Placeholder
+  metric:
+    gfx908:
+      - 2.1.2
+      - 2.1.3
+      - 2.1.4
+      - 2.1.5
+      - 2.1.6
+    gfx90a:
+      - 2.1.2
+      - 2.1.3
+      - 2.1.4
+      - 2.1.5
+      - 2.1.6
+    gfx940:
+      - 2.1.2
+      - 2.1.3
+      - 2.1.4
+      - 2.1.5
+      - 2.1.6
+      - 2.1.7
+    gfx941:
+      - 2.1.2
+      - 2.1.3
+      - 2.1.4
+      - 2.1.5
+      - 2.1.6
+      - 2.1.7
+    gfx942:
+      - 2.1.2
+      - 2.1.3
+      - 2.1.4
+      - 2.1.5
+      - 2.1.6
+      - 2.1.7
+    gfx950:
+      - 2.1.2
+      - 2.1.3
+      - 2.1.4
+      - 2.1.5
+      - 2.1.6
+      - 2.1.8
+
+- title: Memory Throughput
+  set_option: mem_thruput
+  description: Placeholder
+  metric:
+    gfx908:
+      - 2.1.16
+      - 2.1.17
+      - 16.1.2
+      - 17.1.0
+    gfx90a:
+      - 2.1.16
+      - 2.1.17
+      - 16.1.2
+      - 17.1.0
+    gfx940:
+      - 2.1.17
+      - 2.1.18
+      - 16.1.2
+      - 17.1.0
+    gfx941:
+      - 2.1.17
+      - 2.1.18
+      - 16.1.2
+      - 17.1.0
+    gfx942:
+      - 2.1.17
+      - 2.1.18
+      - 16.1.2
+      - 17.1.0
+    gfx950:
+      - 2.1.18
+      - 2.1.19
+      - 16.1.2
+      - 17.1.0
+
+- title: Launch Stats
+  set_option: launch_stats
+  description: Placeholder
+  metric:
+    gfx908:
+      - 7.1.0
+      - 7.1.1
+      - 7.1.2
+      - 7.1.5
+      - 7.1.6
+      - 7.1.7
+      - 7.1.8
+      - 7.1.9
+    gfx90a:
+      - 7.1.0
+      - 7.1.1
+      - 7.1.2
+      - 7.1.5
+      - 7.1.6
+      - 7.1.7
+      - 7.1.8
+      - 7.1.9
+    gfx940:
+      - 7.1.0
+      - 7.1.1
+      - 7.1.2
+      - 7.1.5
+      - 7.1.6
+      - 7.1.7
+      - 7.1.8
+      - 7.1.9
+    gfx941:
+      - 7.1.0
+      - 7.1.1
+      - 7.1.2
+      - 7.1.5
+      - 7.1.6
+      - 7.1.7
+      - 7.1.8
+      - 7.1.9
+    gfx942:
+      - 7.1.0
+      - 7.1.1
+      - 7.1.2
+      - 7.1.5
+      - 7.1.6
+      - 7.1.7
+      - 7.1.8
+      - 7.1.9
+    gfx950:
+      - 7.1.0
+      - 7.1.1
+      - 7.1.2
+      - 7.1.5
+      - 7.1.6
+      - 7.1.7
+      - 7.1.8
+      - 7.1.9