diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index e8f3170946..33addf4b52 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -110,6 +110,14 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Add ``--output-name`` analysis mode option to override the default file/folder name. * Replace `--save-dfs` analyze mode option with `--output-format csv` +* Command-line options: + * `--list-metrics` and `--config-dir` options moved to general command-line options. + * * `--list-metrics` option cannot be used without argument (GPU architecture). + * `--list-metrics` option do not show number of L2 channels. + * `--list-available-metrics` profile mode option to display the metrics available for profiling in current GPU. + * `--list-available-metrics` analyze mode option to display the metrics available for analysis. + * `--block` option cannot be used with `--list-metrics` and `--list-available-metrics`options. + ### Resolved issues * Fixed not detecting memory clock issue when using amd-smi diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst index 2c43a61a49..e0932f840b 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -72,13 +72,13 @@ There are three high-level GPU analysis views: * Visualized memory chart requires the width of the terminal output to be greater than or equal to 234 to display the whole chart properly. * Visualized Roofline chart is adapted to the initial terminal size only. If it is not clear, you may need to adjust the terminal size and regenerate it to check the display effect. Roofline analysis provides detailed, structured table output with measured empirical peak values for comparison. -.. _cli-list-metrics: +.. _cli-list-available-metrics: -2. Use ``--list-metrics`` to generate a list of available metrics for inspection. +2. Use ``--list-available-metrics`` to generate a list of available metrics for inspection. .. code-block:: shell-session - $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-metrics gfx90a + $ rocprof-compute analyze -p workloads/vcopy/MI200/ --list-available-metrics __ _ _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ diff --git a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst index 8fc65b35d2..fcf6e11827 100644 --- a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst +++ b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst @@ -359,11 +359,11 @@ The following example only collects the counters required to calculate ``Total V ... -To see a list of available hardware report blocks, use the ``--list-metrics`` option. +To see a list of available hardware report blocks, use the ``--list-available-metrics`` option. .. code-block:: shell-session - $ rocprof-compute profile --list-metrics + $ rocprof-compute profile --list-available-metrics __ _ _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ diff --git a/projects/rocprofiler-compute/docs/how-to/use.rst b/projects/rocprofiler-compute/docs/how-to/use.rst index 594939521f..2c43a111ce 100644 --- a/projects/rocprofiler-compute/docs/how-to/use.rst +++ b/projects/rocprofiler-compute/docs/how-to/use.rst @@ -61,13 +61,14 @@ Common filters to customize data collection include: See :ref:`Filtering ` for an in-depth walkthrough. -To view available metrics by hardware block, use the ``profile`` mode ``--list-metrics`` -option with an optional system architecture argument (inferred if not provided): +To view available metrics by hardware block, use the ``--list-metrics`` +option with a system architecture argument or ``--list-available-metrics`` +to view the metrics for current system architecture: .. code-block:: shell - $ rocprof-compute profile --list-metrics - $ rocprof-compute profile --list-metrics + $ rocprof-compute --list-metrics + $ rocprof-compute profile --list-available-metrics .. _basic-analyze-cli: diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index 51446eae49..ac17f98227 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -30,13 +30,15 @@ from pathlib import Path def print_avail_arch(avail_arch: list): - ret_str = "\t\t\tList all available metrics for analysis on specified arch:" + ret_str = "List all available metrics for analysis on specified arch:" for arch in avail_arch: - ret_str += "\n\t\t\t {}".format(arch) + ret_str += "\n {}".format(arch) return ret_str -def add_general_group(parser, rocprof_compute_version): +def add_general_group( + parser, rocprof_compute_version, supported_archs, rocprof_compute_home +): general_group = parser.add_argument_group("General Options") general_group.add_argument( @@ -55,6 +57,20 @@ def add_general_group(parser, rocprof_compute_version): general_group.add_argument( "-q", "--quiet", action="store_true", help="Reduce output and run quietly." ) + general_group.add_argument( + "--list-metrics", + dest="list_metrics", + metavar="", + choices=supported_archs.keys(), # ["gfx908", "gfx90a"], + help=print_avail_arch(supported_archs.keys()), + ) + general_group.add_argument( + "--config-dir", + dest="config_dir", + metavar="", + help="Specify the directory of customized report section configs.", + default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"), + ) # Nowhere to load specs from in db mode if "database" not in parser.usage: general_group.add_argument( @@ -71,7 +87,9 @@ def omniarg_parser( ## General Command Line Options ## ---------------------------- - add_general_group(parser, rocprof_compute_version) + add_general_group( + parser, rocprof_compute_version, supported_archs, rocprof_compute_home + ) parser._positionals.title = "Modes" parser._optionals.title = "Help" @@ -106,7 +124,9 @@ Examples: ) profile_parser._optionals.title = "Help" - add_general_group(profile_parser, rocprof_compute_version) + add_general_group( + profile_parser, rocprof_compute_version, supported_archs, rocprof_compute_home + ) profile_group = profile_parser.add_argument_group("Profile Options") roofline_group = profile_parser.add_argument_group("Standalone Roofline Options") @@ -194,6 +214,12 @@ Examples: return value raise argparse.ArgumentTypeError(f"Invalid metric id: {value}") + profile_group.add_argument( + "--list-available-metrics", + dest="list_available_metrics", + help="\t\t\tList all available metrics for analysis on current arch", + action="store_true", + ) profile_group.add_argument( "-b", "--block", @@ -209,16 +235,6 @@ Examples: "\t\t\tCan provide multiple space separated arguments." ), ) - profile_group.add_argument( - "--list-metrics", - metavar="", - nargs="?", - const="", - # Argument to --list-metrics is optional - choices=[""] + list(supported_archs.keys()), # ["gfx908", "gfx90a"], - help=print_avail_arch(supported_archs.keys()), - ) - profile_group.add_argument( "--list-sets", action="store_true", @@ -232,13 +248,6 @@ Examples: "counters in a single pass.\n\t\t\tFor available sets, see --list-sets", ) - profile_group.add_argument( - "--config-dir", - dest="config_dir", - metavar="", - help="\t\t\tSpecify the directory of customized report section configs.", - default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"), - ) profile_group.add_argument( "--join-type", metavar="", @@ -465,7 +474,9 @@ Examples: ) db_parser._optionals.title = "Help" - add_general_group(db_parser, rocprof_compute_version) + add_general_group( + db_parser, rocprof_compute_version, supported_archs, rocprof_compute_home + ) interaction_group = db_parser.add_argument_group("Interaction Type") connection_group = db_parser.add_argument_group("Connection Options") @@ -565,7 +576,9 @@ Examples: ) analyze_parser._optionals.title = "Help" - add_general_group(analyze_parser, rocprof_compute_version) + add_general_group( + analyze_parser, rocprof_compute_version, supported_archs, rocprof_compute_home + ) analyze_group = analyze_parser.add_argument_group("Analyze Options") analyze_advanced_group = analyze_parser.add_argument_group("Advanced Options") @@ -585,10 +598,10 @@ Examples: help="\t\tList all detected kernels and kernel dispatches.", ) analyze_group.add_argument( - "--list-metrics", - metavar="", - choices=supported_archs.keys(), # ["gfx906", "gfx908", "gfx90a"], - help=print_avail_arch(supported_archs.keys()), + "--list-available-metrics", + dest="list_available_metrics", + help="\t\tList all available metrics for analysis on current arch", + action="store_true", ) analyze_group.add_argument( "-k", @@ -767,13 +780,6 @@ Examples: default=2, help="\t\tSpecify desired decimal precision of analysis results. (DEFAULT: 2)", ) - analyze_advanced_group.add_argument( - "--config-dir", - dest="config_dir", - metavar="", - help="\t\tSpecify the directory of customized configs.", - default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"), - ) analyze_advanced_group.add_argument( "--cols", type=int, diff --git a/projects/rocprofiler-compute/src/rocprof_compute_base.py b/projects/rocprofiler-compute/src/rocprof_compute_base.py index 33877c6a2a..e75d6be2ba 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_base.py @@ -85,6 +85,8 @@ class RocProfCompute: setattr(self.__args, "loglevel", self.__loglevel) set_locale_encoding() + self.sanitize() + if self.__mode == "profile": self.detect_profiler() elif self.__mode == "analyze": @@ -143,6 +145,21 @@ class RocProfCompute: self.__analyze_mode = "cli" return + def sanitize(self): + block = False + if (hasattr(self.__args, "filter_metrics") and self.__args.filter_metrics) or ( + hasattr(self.__args, "filter_blocks") and self.__args.filter_blocks + ): + block = True + + if self.__args.list_metrics is not None and block: + console_error("Cannot use --list-metrics with --blocks") + if ( + hasattr(self.__args, "list_available_metrics") + and self.__args.list_available_metrics + ) and block: + console_error("Cannot use --list-available-metrics with --blocks") + @demarcate def load_soc_specs(self, sysinfo: dict = None): """Load OmniSoC instance for RocProfCompute run""" @@ -190,6 +207,15 @@ class RocProfCompute: if self.__args.specs: print(generate_machine_specs(self.__args)) sys.exit(0) + elif self.__args.list_metrics is not None: + self.list_metrics() + sys.exit(0) + elif self.__args.config_dir: + parser.print_help(sys.stderr) + console_error( + "rocprof-compute requires you to pass --list-metrics " + "with --config-dir." + ) parser.print_help(sys.stderr) console_error( "rocprof-compute requires you to pass a valid mode. Detected None." @@ -225,16 +251,27 @@ class RocProfCompute: @demarcate def list_metrics(self): - if not self.__args.list_metrics: - arch = self.__mspec.gpu_arch - else: - arch = self.__args.list_metrics + self.load_soc_specs() + + for_current_arch = False + if ( + hasattr(self.__args, "list_available_metrics") + and self.__args.list_available_metrics + ): + for_current_arch = True + + arch = ( + self.__mspec.gpu_arch + if (for_current_arch or self.__args.list_metrics is None) + else self.__args.list_metrics + ) if arch in self.__supported_archs.keys(): ac = schema.ArchConfig() - ac.panel_configs = file_io.load_panel_configs([ - self.__args.config_dir.joinpath(arch) - ]) - sys_info = self.__mspec.get_class_members().iloc[0] + config_dir = Path(self.__args.config_dir) + ac.panel_configs = file_io.load_panel_configs([config_dir.joinpath(arch)]) + sys_info = ( + self.__mspec.get_class_members().iloc[0] if for_current_arch else None + ) parser.build_dfs(archConfigs=ac, filter_metrics=[], sys_info=sys_info) for key, value in ac.metric_list.items(): prefix = "" @@ -303,7 +340,7 @@ class RocProfCompute: self.print_graphic() self.load_soc_specs() - if self.__args.list_metrics is not None: + if self.__args.list_metrics is not None or self.__args.list_available_metrics: self.list_metrics() elif self.__args.list_sets: self.list_sets() diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index a2f625f206..bfc043e489 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -555,24 +555,25 @@ def build_dfs(archConfigs, filter_metrics, sys_info): ): # print(data_config["metric"]) new_metrics = {} - # NB: support single placeholder for now!! - p_range = data_config["metric"].pop("placeholder_range") - metric, metric_expr = data_config["metric"].popitem() - # print(len(data_config["metric"])) - # data_config['metric'].clear() - for p, r in p_range.items(): - # NB: We have to resolve placeholder range first if it - # is a build-in var. It will be too late to do it in - # eval_metric(). This is the only reason we need - # sys_info at this stage. - var = calc_builtin_var(r, sys_info) - for i in range(var): - new_key = metric.replace(p, str(i)) - new_val = {} - for k, v in metric_expr.items(): - new_val[k] = metric_expr[k].replace(p, str(i)) - # print(new_val) - new_metrics[new_key] = new_val + if sys_info is not None: + # NB: support single placeholder for now!! + p_range = data_config["metric"].pop("placeholder_range") + metric, metric_expr = data_config["metric"].popitem() + # print(len(data_config["metric"])) + # data_config['metric'].clear() + for p, r in p_range.items(): + # NB: We have to resolve placeholder range first if it + # is a build-in var. It will be too late to do it in + # eval_metric(). This is the only reason we need + # sys_info at this stage. + var = calc_builtin_var(r, sys_info) + for i in range(var): + new_key = metric.replace(p, str(i)) + new_val = {} + for k, v in metric_expr.items(): + new_val[k] = metric_expr[k].replace(p, str(i)) + # print(new_val) + new_metrics[new_key] = new_val # print(p_range) # print(new_metrics) @@ -616,6 +617,16 @@ def build_dfs(archConfigs, filter_metrics, sys_info): df = pd.DataFrame(columns=headers) i = 0 + + if not data_config["metric"]: + data_source_idx = ( + str(data_config["id"] // 100) + + "." + + str(data_config["id"] % 100) + ) + metric_idx = data_source_idx + "." + str(i) + metric_list[data_source_idx] = data_config["title"] + for key, entries in data_config["metric"].items(): data_source_idx = ( str(data_config["id"] // 100) diff --git a/projects/rocprofiler-compute/tests/test_analyze_commands.py b/projects/rocprofiler-compute/tests/test_analyze_commands.py index e0e63868fb..9a87a3d7b2 100644 --- a/projects/rocprofiler-compute/tests/test_analyze_commands.py +++ b/projects/rocprofiler-compute/tests/test_analyze_commands.py @@ -120,6 +120,86 @@ def test_list_metrics_gfx908(binary_handler_analyze_rocprof_compute): test_utils.clean_output_dir(config["cleanup"], workload_dir) +@pytest.mark.list_metrics +def test_list_metrics_gfx908_with_block(binary_handler_analyze_rocprof_compute): + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--list-metrics", + "gfx908", + "--block", + "1", + ]) + assert code == 1 + + for dir in indirs: + workload_dir = test_utils.setup_workload_dir(dir) + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--path", + workload_dir, + "--list-metrics", + "gfx908", + "--block", + "1", + ]) + assert code == 1 + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.list_metrics +def test_list_available_metrics(binary_handler_analyze_rocprof_compute, capsys): + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--list-available-metrics", + ]) + assert code == 1 + + for dir in indirs: + workload_dir = test_utils.setup_workload_dir(dir) + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--path", + workload_dir, + "--list-available-metrics", + ]) + assert code == 0 + + # Test output + output = capsys.readouterr().out + assert "0. Top Stats" in output + assert "1. System Info" in output + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.list_metrics +def test_list_available_metrics_with_block( + binary_handler_analyze_rocprof_compute, capsys +): + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--list-available-metrics", + "--block", + "1", + ]) + assert code == 1 + + for dir in indirs: + workload_dir = test_utils.setup_workload_dir(dir) + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--path", + workload_dir, + "--list-available-metrics", + "--block", + "1", + ]) + assert code == 1 + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + @pytest.mark.filter_block def test_filter_block_1(binary_handler_analyze_rocprof_compute): for dir in indirs: diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index 6a4d567711..459812f9a2 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -1706,7 +1706,7 @@ def test_instmix_section_global_write_kernel(binary_handler_profile_rocprof_comp @pytest.mark.section def test_list_metrics(binary_handler_profile_rocprof_compute): - options = ["--list-metrics"] + options = ["--list-metrics", "gfx90a"] workload_dir = test_utils.get_output_dir() _ = binary_handler_profile_rocprof_compute( config, workload_dir, options, check_success=True, roof=False @@ -1716,6 +1716,53 @@ def test_list_metrics(binary_handler_profile_rocprof_compute): test_utils.clean_output_dir(config["cleanup"], workload_dir) +@pytest.mark.section +def test_list_metrics_with_block(binary_handler_profile_rocprof_compute): + options = ["--list-metrics", "gfx90a", "--block", "10"] + workload_dir = test_utils.get_output_dir() + code = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=False, roof=False + ) + # Should return code 1 since --block cannot be used with --list-metrics + assert code == 1 + # workload dir should be empty + assert not os.listdir(workload_dir) + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.section +def test_list_available_metrics(binary_handler_profile_rocprof_compute, capsys): + options = ["--list-available-metrics"] + workload_dir = test_utils.get_output_dir() + _ = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=True, roof=False + ) + # workload dir should be empty + assert not os.listdir(workload_dir) + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + # Test output + output = capsys.readouterr().out + assert "0 -> Top Stats" in output + assert "1 -> System Info" in output + + +@pytest.mark.section +def test_list_available_metrics_with_block( + binary_handler_profile_rocprof_compute, capsys +): + options = ["--list-available-metrics", "--block", "10"] + workload_dir = test_utils.get_output_dir() + code = binary_handler_profile_rocprof_compute( + config, workload_dir, options, check_success=False, roof=False + ) + # Should return code 1 since --block cannot be used with --list-available-metrics + assert code == 1 + # workload dir should be empty + assert not os.listdir(workload_dir) + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + @pytest.mark.misc def test_comprehensive_error_paths(): """Simplified test for error path coverage""" diff --git a/projects/rocprofiler-compute/tests/test_utils.py b/projects/rocprofiler-compute/tests/test_utils.py index 5fbcc2065e..85c58a51fe 100644 --- a/projects/rocprofiler-compute/tests/test_utils.py +++ b/projects/rocprofiler-compute/tests/test_utils.py @@ -9359,3 +9359,19 @@ def test_alignment_and_width(): max_length=8, ) assert pytest.approx(float(result.strip()), rel=1e-9) == value + + +# ============================================================================= +# TESTS FOR MODELESS COMMAND LINE OPTIONS +# ============================================================================= + + +@pytest.mark.list_metrics +def test_list_metrics(binary_handler_analyze_rocprof_compute, capsys): + return_code = binary_handler_analyze_rocprof_compute(["--list-metrics", "gfx90a"]) + assert return_code == 0 + + # Test output + output = capsys.readouterr().out + assert "6 -> Workgroup Manager (SPI)" in output + assert "5.2 -> Command processor packet processor (CPC)" in output