Streamline --list-metrics command line option in rocprof-compute (#310)

* Remove L2 channels from --list-metrics --list-metrics moved to general options List metrics for the current architecture Filter blocks for metrics Removed test for --list-metrics in profile mode Test the options don't throw error Fixed --config-dir error Test stdout for command line options Provide path list for loading panel configs Show L2 Cache (per) channel metrics Changed command line option names Can show two levels only Removed filtering blocks Moved blocks to original position Removed filter block tests Removed filtering Formaating fix Readability enhancement Test formatting Filter L2 channels without sysinfo Show avilable metrics for current arch Intermediate commit Fixed tests Added argument sanitization Added list_metrics to ctest merge iconflict resolution Updated test marker Updated changelog Fixed formatting * Updated docs
2025-09-08 20:21:46 +05:30
@@ -110,6 +110,14 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
    * Add ``--output-name`` analysis mode option to override the default file/folder name.
  * Replace `--save-dfs` analyze mode option with `--output-format csv`

+* Command-line options:
+  * `--list-metrics` and `--config-dir` options moved to general command-line options.
+  * * `--list-metrics` option cannot be used without argument (GPU architecture).
+  * `--list-metrics` option do not show number of L2 channels.
+  * `--list-available-metrics` profile mode option to display the metrics available for profiling in current GPU.
+  * `--list-available-metrics` analyze mode option to display the metrics available for analysis.
+  * `--block` option cannot be used with `--list-metrics` and `--list-available-metrics`options.
+
 ### Resolved issues

 * Fixed not detecting memory clock issue when using amd-smi
@@ -72,13 +72,13 @@ There are three high-level GPU analysis views:
   * Visualized memory chart requires the width of the terminal output to be greater than or equal to 234 to display the whole chart properly.
   * Visualized Roofline chart is adapted to the initial terminal size only. If it is not clear, you may need to adjust the terminal size and regenerate it to check the display effect. Roofline analysis provides detailed, structured table output with measured empirical peak values for comparison.

-.. _cli-list-metrics:
+.. _cli-list-available-metrics:

-2. Use ``--list-metrics`` to generate a list of available metrics for inspection.
+2. Use ``--list-available-metrics`` to generate a list of available metrics for inspection.

   .. code-block:: shell-session

-      $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a
+      $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-available-metrics

                                       __                                       _
       _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
@@ -359,11 +359,11 @@ The following example only collects the counters required to calculate ``Total V
   ...


-To see a list of available hardware report blocks, use the ``--list-metrics`` option.
+To see a list of available hardware report blocks, use the ``--list-available-metrics`` option.

 .. code-block:: shell-session

-   $ rocprof-compute profile --list-metrics
+   $ rocprof-compute profile --list-available-metrics

                                    __                                       _
    _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
@@ -61,13 +61,14 @@ Common filters to customize data collection include:

 See :ref:`Filtering <filtering>` for an in-depth walkthrough.

-To view available metrics by hardware block, use the ``profile`` mode ``--list-metrics``
-option with an optional system architecture argument (inferred if not provided):
+To view available metrics by hardware block, use the ``--list-metrics``
+option with a system architecture argument or ``--list-available-metrics``
+to view the metrics for current system architecture:

 .. code-block:: shell

-   $ rocprof-compute profile --list-metrics
-   $ rocprof-compute profile --list-metrics <sys_arch>
+   $ rocprof-compute --list-metrics <sys_arch>
+   $ rocprof-compute profile --list-available-metrics

 .. _basic-analyze-cli:

@@ -30,13 +30,15 @@ from pathlib import Path


 def print_avail_arch(avail_arch: list):
-    ret_str = "\t\t\tList all available metrics for analysis on specified arch:"
+    ret_str = "List all available metrics for analysis on specified arch:"
    for arch in avail_arch:
-        ret_str += "\n\t\t\t   {}".format(arch)
+        ret_str += "\n   {}".format(arch)
    return ret_str


-def add_general_group(parser, rocprof_compute_version):
+def add_general_group(
+    parser, rocprof_compute_version, supported_archs, rocprof_compute_home
+):
    general_group = parser.add_argument_group("General Options")

    general_group.add_argument(
@@ -55,6 +57,20 @@ def add_general_group(parser, rocprof_compute_version):
    general_group.add_argument(
        "-q", "--quiet", action="store_true", help="Reduce output and run quietly."
    )
+    general_group.add_argument(
+        "--list-metrics",
+        dest="list_metrics",
+        metavar="",
+        choices=supported_archs.keys(),  # ["gfx908", "gfx90a"],
+        help=print_avail_arch(supported_archs.keys()),
+    )
+    general_group.add_argument(
+        "--config-dir",
+        dest="config_dir",
+        metavar="",
+        help="Specify the directory of customized report section configs.",
+        default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"),
+    )
    # Nowhere to load specs from in db mode
    if "database" not in parser.usage:
        general_group.add_argument(
@@ -71,7 +87,9 @@ def omniarg_parser(

    ## General Command Line Options
    ## ----------------------------
-    add_general_group(parser, rocprof_compute_version)
+    add_general_group(
+        parser, rocprof_compute_version, supported_archs, rocprof_compute_home
+    )
    parser._positionals.title = "Modes"
    parser._optionals.title = "Help"

@@ -106,7 +124,9 @@ Examples:
    )
    profile_parser._optionals.title = "Help"

-    add_general_group(profile_parser, rocprof_compute_version)
+    add_general_group(
+        profile_parser, rocprof_compute_version, supported_archs, rocprof_compute_home
+    )
    profile_group = profile_parser.add_argument_group("Profile Options")
    roofline_group = profile_parser.add_argument_group("Standalone Roofline Options")

@@ -194,6 +214,12 @@ Examples:
            return value
        raise argparse.ArgumentTypeError(f"Invalid metric id: {value}")

+    profile_group.add_argument(
+        "--list-available-metrics",
+        dest="list_available_metrics",
+        help="\t\t\tList all available metrics for analysis on current arch",
+        action="store_true",
+    )
    profile_group.add_argument(
        "-b",
        "--block",
@@ -209,16 +235,6 @@ Examples:
            "\t\t\tCan provide multiple space separated arguments."
        ),
    )
-    profile_group.add_argument(
-        "--list-metrics",
-        metavar="",
-        nargs="?",
-        const="",
-        # Argument to --list-metrics is optional
-        choices=[""] + list(supported_archs.keys()),  # ["gfx908", "gfx90a"],
-        help=print_avail_arch(supported_archs.keys()),
-    )
-
    profile_group.add_argument(
        "--list-sets",
        action="store_true",
@@ -232,13 +248,6 @@ Examples:
        "counters in a single pass.\n\t\t\tFor available sets, see --list-sets",
    )

-    profile_group.add_argument(
-        "--config-dir",
-        dest="config_dir",
-        metavar="",
-        help="\t\t\tSpecify the directory of customized report section configs.",
-        default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"),
-    )
    profile_group.add_argument(
        "--join-type",
        metavar="",
@@ -465,7 +474,9 @@ Examples:
    )
    db_parser._optionals.title = "Help"

-    add_general_group(db_parser, rocprof_compute_version)
+    add_general_group(
+        db_parser, rocprof_compute_version, supported_archs, rocprof_compute_home
+    )
    interaction_group = db_parser.add_argument_group("Interaction Type")
    connection_group = db_parser.add_argument_group("Connection Options")

@@ -565,7 +576,9 @@ Examples:
    )
    analyze_parser._optionals.title = "Help"

-    add_general_group(analyze_parser, rocprof_compute_version)
+    add_general_group(
+        analyze_parser, rocprof_compute_version, supported_archs, rocprof_compute_home
+    )
    analyze_group = analyze_parser.add_argument_group("Analyze Options")
    analyze_advanced_group = analyze_parser.add_argument_group("Advanced Options")

@@ -585,10 +598,10 @@ Examples:
        help="\t\tList all detected kernels and kernel dispatches.",
    )
    analyze_group.add_argument(
-        "--list-metrics",
-        metavar="",
-        choices=supported_archs.keys(),  # ["gfx906", "gfx908", "gfx90a"],
-        help=print_avail_arch(supported_archs.keys()),
+        "--list-available-metrics",
+        dest="list_available_metrics",
+        help="\t\tList all available metrics for analysis on current arch",
+        action="store_true",
    )
    analyze_group.add_argument(
        "-k",
@@ -767,13 +780,6 @@ Examples:
        default=2,
        help="\t\tSpecify desired decimal precision of analysis results. (DEFAULT: 2)",
    )
-    analyze_advanced_group.add_argument(
-        "--config-dir",
-        dest="config_dir",
-        metavar="",
-        help="\t\tSpecify the directory of customized configs.",
-        default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"),
-    )
    analyze_advanced_group.add_argument(
        "--cols",
        type=int,
@@ -85,6 +85,8 @@ class RocProfCompute:
        setattr(self.__args, "loglevel", self.__loglevel)
        set_locale_encoding()

+        self.sanitize()
+
        if self.__mode == "profile":
            self.detect_profiler()
        elif self.__mode == "analyze":
@@ -143,6 +145,21 @@ class RocProfCompute:
            self.__analyze_mode = "cli"
        return

+    def sanitize(self):
+        block = False
+        if (hasattr(self.__args, "filter_metrics") and self.__args.filter_metrics) or (
+            hasattr(self.__args, "filter_blocks") and self.__args.filter_blocks
+        ):
+            block = True
+
+        if self.__args.list_metrics is not None and block:
+            console_error("Cannot use --list-metrics with --blocks")
+        if (
+            hasattr(self.__args, "list_available_metrics")
+            and self.__args.list_available_metrics
+        ) and block:
+            console_error("Cannot use --list-available-metrics with --blocks")
+
    @demarcate
    def load_soc_specs(self, sysinfo: dict = None):
        """Load OmniSoC instance for RocProfCompute run"""
@@ -190,6 +207,15 @@ class RocProfCompute:
            if self.__args.specs:
                print(generate_machine_specs(self.__args))
                sys.exit(0)
+            elif self.__args.list_metrics is not None:
+                self.list_metrics()
+                sys.exit(0)
+            elif self.__args.config_dir:
+                parser.print_help(sys.stderr)
+                console_error(
+                    "rocprof-compute requires you to pass --list-metrics "
+                    "with --config-dir."
+                )
            parser.print_help(sys.stderr)
            console_error(
                "rocprof-compute requires you to pass a valid mode. Detected None."
@@ -225,16 +251,27 @@ class RocProfCompute:

    @demarcate
    def list_metrics(self):
-        if not self.__args.list_metrics:
-            arch = self.__mspec.gpu_arch
-        else:
-            arch = self.__args.list_metrics
+        self.load_soc_specs()
+
+        for_current_arch = False
+        if (
+            hasattr(self.__args, "list_available_metrics")
+            and self.__args.list_available_metrics
+        ):
+            for_current_arch = True
+
+        arch = (
+            self.__mspec.gpu_arch
+            if (for_current_arch or self.__args.list_metrics is None)
+            else self.__args.list_metrics
+        )
        if arch in self.__supported_archs.keys():
            ac = schema.ArchConfig()
-            ac.panel_configs = file_io.load_panel_configs([
-                self.__args.config_dir.joinpath(arch)
-            ])
-            sys_info = self.__mspec.get_class_members().iloc[0]
+            config_dir = Path(self.__args.config_dir)
+            ac.panel_configs = file_io.load_panel_configs([config_dir.joinpath(arch)])
+            sys_info = (
+                self.__mspec.get_class_members().iloc[0] if for_current_arch else None
+            )
            parser.build_dfs(archConfigs=ac, filter_metrics=[], sys_info=sys_info)
            for key, value in ac.metric_list.items():
                prefix = ""
@@ -303,7 +340,7 @@ class RocProfCompute:
        self.print_graphic()
        self.load_soc_specs()

-        if self.__args.list_metrics is not None:
+        if self.__args.list_metrics is not None or self.__args.list_available_metrics:
            self.list_metrics()
        elif self.__args.list_sets:
            self.list_sets()
@@ -555,24 +555,25 @@ def build_dfs(archConfigs, filter_metrics, sys_info):
                ):
                    # print(data_config["metric"])
                    new_metrics = {}
-                    # NB: support single placeholder for now!!
-                    p_range = data_config["metric"].pop("placeholder_range")
-                    metric, metric_expr = data_config["metric"].popitem()
-                    # print(len(data_config["metric"]))
-                    # data_config['metric'].clear()
-                    for p, r in p_range.items():
-                        # NB: We have to resolve placeholder range first if it
-                        #   is a build-in var. It will be too late to do it in
-                        #   eval_metric(). This is the only reason we need
-                        #   sys_info at this stage.
-                        var = calc_builtin_var(r, sys_info)
-                        for i in range(var):
-                            new_key = metric.replace(p, str(i))
-                            new_val = {}
-                            for k, v in metric_expr.items():
-                                new_val[k] = metric_expr[k].replace(p, str(i))
-                            # print(new_val)
-                            new_metrics[new_key] = new_val
+                    if sys_info is not None:
+                        # NB: support single placeholder for now!!
+                        p_range = data_config["metric"].pop("placeholder_range")
+                        metric, metric_expr = data_config["metric"].popitem()
+                        # print(len(data_config["metric"]))
+                        # data_config['metric'].clear()
+                        for p, r in p_range.items():
+                            # NB: We have to resolve placeholder range first if it
+                            #   is a build-in var. It will be too late to do it in
+                            #   eval_metric(). This is the only reason we need
+                            #   sys_info at this stage.
+                            var = calc_builtin_var(r, sys_info)
+                            for i in range(var):
+                                new_key = metric.replace(p, str(i))
+                                new_val = {}
+                                for k, v in metric_expr.items():
+                                    new_val[k] = metric_expr[k].replace(p, str(i))
+                                    # print(new_val)
+                                    new_metrics[new_key] = new_val

                    # print(p_range)
                    # print(new_metrics)
@@ -616,6 +617,16 @@ def build_dfs(archConfigs, filter_metrics, sys_info):
                    df = pd.DataFrame(columns=headers)

                    i = 0
+
+                    if not data_config["metric"]:
+                        data_source_idx = (
+                            str(data_config["id"] // 100)
+                            + "."
+                            + str(data_config["id"] % 100)
+                        )
+                        metric_idx = data_source_idx + "." + str(i)
+                        metric_list[data_source_idx] = data_config["title"]
+
                    for key, entries in data_config["metric"].items():
                        data_source_idx = (
                            str(data_config["id"] // 100)
@@ -120,6 +120,86 @@ def test_list_metrics_gfx908(binary_handler_analyze_rocprof_compute):
    test_utils.clean_output_dir(config["cleanup"], workload_dir)


+@pytest.mark.list_metrics
+def test_list_metrics_gfx908_with_block(binary_handler_analyze_rocprof_compute):
+    code = binary_handler_analyze_rocprof_compute([
+        "analyze",
+        "--list-metrics",
+        "gfx908",
+        "--block",
+        "1",
+    ])
+    assert code == 1
+
+    for dir in indirs:
+        workload_dir = test_utils.setup_workload_dir(dir)
+        code = binary_handler_analyze_rocprof_compute([
+            "analyze",
+            "--path",
+            workload_dir,
+            "--list-metrics",
+            "gfx908",
+            "--block",
+            "1",
+        ])
+        assert code == 1
+
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.list_metrics
+def test_list_available_metrics(binary_handler_analyze_rocprof_compute, capsys):
+    code = binary_handler_analyze_rocprof_compute([
+        "analyze",
+        "--list-available-metrics",
+    ])
+    assert code == 1
+
+    for dir in indirs:
+        workload_dir = test_utils.setup_workload_dir(dir)
+        code = binary_handler_analyze_rocprof_compute([
+            "analyze",
+            "--path",
+            workload_dir,
+            "--list-available-metrics",
+        ])
+        assert code == 0
+
+        # Test output
+        output = capsys.readouterr().out
+        assert "0. Top Stats" in output
+        assert "1. System Info" in output
+
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.list_metrics
+def test_list_available_metrics_with_block(
+    binary_handler_analyze_rocprof_compute, capsys
+):
+    code = binary_handler_analyze_rocprof_compute([
+        "analyze",
+        "--list-available-metrics",
+        "--block",
+        "1",
+    ])
+    assert code == 1
+
+    for dir in indirs:
+        workload_dir = test_utils.setup_workload_dir(dir)
+        code = binary_handler_analyze_rocprof_compute([
+            "analyze",
+            "--path",
+            workload_dir,
+            "--list-available-metrics",
+            "--block",
+            "1",
+        ])
+        assert code == 1
+
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
@pytest.mark.filter_block
 def test_filter_block_1(binary_handler_analyze_rocprof_compute):
    for dir in indirs:
@@ -1706,7 +1706,7 @@ def test_instmix_section_global_write_kernel(binary_handler_profile_rocprof_comp

@pytest.mark.section
 def test_list_metrics(binary_handler_profile_rocprof_compute):
-    options = ["--list-metrics"]
+    options = ["--list-metrics", "gfx90a"]
    workload_dir = test_utils.get_output_dir()
    _ = binary_handler_profile_rocprof_compute(
        config, workload_dir, options, check_success=True, roof=False
@@ -1716,6 +1716,53 @@ def test_list_metrics(binary_handler_profile_rocprof_compute):
    test_utils.clean_output_dir(config["cleanup"], workload_dir)


+@pytest.mark.section
+def test_list_metrics_with_block(binary_handler_profile_rocprof_compute):
+    options = ["--list-metrics", "gfx90a", "--block", "10"]
+    workload_dir = test_utils.get_output_dir()
+    code = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=False
+    )
+    # Should return code 1 since --block cannot be used with --list-metrics
+    assert code == 1
+    # workload dir should be empty
+    assert not os.listdir(workload_dir)
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
+@pytest.mark.section
+def test_list_available_metrics(binary_handler_profile_rocprof_compute, capsys):
+    options = ["--list-available-metrics"]
+    workload_dir = test_utils.get_output_dir()
+    _ = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=True, roof=False
+    )
+    # workload dir should be empty
+    assert not os.listdir(workload_dir)
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+    # Test output
+    output = capsys.readouterr().out
+    assert "0 -> Top Stats" in output
+    assert "1 -> System Info" in output
+
+
+@pytest.mark.section
+def test_list_available_metrics_with_block(
+    binary_handler_profile_rocprof_compute, capsys
+):
+    options = ["--list-available-metrics", "--block", "10"]
+    workload_dir = test_utils.get_output_dir()
+    code = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=False
+    )
+    # Should return code 1 since --block cannot be used with --list-available-metrics
+    assert code == 1
+    # workload dir should be empty
+    assert not os.listdir(workload_dir)
+    test_utils.clean_output_dir(config["cleanup"], workload_dir)
+
+
@pytest.mark.misc
 def test_comprehensive_error_paths():
    """Simplified test for error path coverage"""
@@ -9359,3 +9359,19 @@ def test_alignment_and_width():
        max_length=8,
    )
    assert pytest.approx(float(result.strip()), rel=1e-9) == value
+
+
+# =============================================================================
+# TESTS FOR MODELESS COMMAND LINE OPTIONS
+# =============================================================================
+
+
+@pytest.mark.list_metrics
+def test_list_metrics(binary_handler_analyze_rocprof_compute, capsys):
+    return_code = binary_handler_analyze_rocprof_compute(["--list-metrics", "gfx90a"])
+    assert return_code == 0
+
+    # Test output
+    output = capsys.readouterr().out
+    assert "6 -> Workgroup Manager (SPI)" in output
+    assert "5.2 -> Command processor packet processor (CPC)" in output