diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index e4d1648c36..c36b4b297a 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -115,6 +115,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Fixed L2 read/write/atomic bandwidths on MI350 * Update metric names for better alignment between analysis configuration and documentation * Fixed an issue where accumulation counters could not be collected on AMD Instinct MI100 +* Updated Roofline plots to handle and apply kernel filtering. + ### Known issues diff --git a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst index c6f774d5f0..8fc65b35d2 100644 --- a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst +++ b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst @@ -477,7 +477,9 @@ Standalone roofline Roofline analysis occurs on any profile mode run, provided ``--no-roof`` option is not included. You don't need to include any additional roofline-specific options for roofline analysis. If you want to focus only on roofline-specific performance data and reduce the time it takes to profile, you can use the ``--roof-only`` option. -This option limits the profiling to just the roofline performance counters. +This option checks if there is existing profiling data in the workload directory (``pmc_perf.csv`` and ``roofline.csv``): + a) If found, uses the data files with the provided arguments to create another roofline PDF output; otherwise, + b) Profile mode runs but is limited to collecting only roofline performance counters. Roofline options ---------------- @@ -494,6 +496,10 @@ Roofline options Allows you to specify a device ID to collect performance data from when running a roofline benchmark on your system. +``-k``, ``--kernel `` + Allows for kernel filtering. Usage is equivalent with the current ``rocprof`` + utility. See :ref:`profiling-kernel-filtering`. + ``--roofline-data-type `` Allows you to specify data types that you want plotted in the roofline PDF output(s). Selecting more than one data type will overlay the results onto the same plot. Default: FP32 diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 904b09890a..2bdaaece0e 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -193,6 +193,7 @@ class webui_analysis(OmniAnalyze_Base): "include_kernel_names": False, "is_standalone": False, "roofline_data_type": self.__roofline_data_type, + "kernel_filter": False, } ) roof_obj = self.get_socs()[self.arch].roofline_obj diff --git a/projects/rocprofiler-compute/src/roofline.py b/projects/rocprofiler-compute/src/roofline.py index cc7a970fb8..8af0cbc518 100644 --- a/projects/rocprofiler-compute/src/roofline.py +++ b/projects/rocprofiler-compute/src/roofline.py @@ -85,6 +85,7 @@ class Roofline: "include_kernel_names": False, "is_standalone": False, "roofline_data_type": ["FP32"], # default to FP32 + "kernel_filter": False, } ) self.__ai_data = None @@ -102,13 +103,19 @@ class Roofline: if hasattr(self.__args, "sort") and self.__args.sort != "ALL": self.__run_parameters["sort_type"] = self.__args.sort self.__run_parameters["roofline_data_type"] = self.__args.roofline_data_type + if (hasattr(self.__args, "kernel") and self.__args.kernel) or ( + hasattr(self.__args, "gpu_kernel") and self.__args.gpu_kernel + ): + self.__run_parameters["kernel_filter"] = True self.validate_parameters() def validate_parameters(self): if self.__run_parameters["include_kernel_names"] and ( not self.__run_parameters["is_standalone"] ): - console_error("--kernel-names cannot be used with --no-roof option") + console_warning( + "--kernel-names is nonactionable when used with --no-roof option" + ) def roof_setup(self): # Setup the workload directory for roofline profiling. @@ -165,6 +172,48 @@ class Roofline: # Create the directory Path(final_dir).mkdir(parents=True, exist_ok=True) + def validate_apply_kernel_filter(self, df, path=None): + if self.__run_parameters["kernel_filter"] is True: + if self.__args.mode == "profile": + df_pmc = df["pmc_perf"] + df_filtered = df_pmc.copy() + df_list = (df_pmc.loc[:, "Kernel_Name"]).to_list() + for idx in range(0, len(df_list)): + if df_list[idx].split("(")[0] not in self.__args.kernel: + # Drop row from dataframe if kernel has not been requested + df_filtered.drop(index=idx, inplace=True) + # Verify that final filtered kernel df matches the kernel list requested + if len(df_filtered.drop_duplicates(subset=["Kernel_Name"])) != len( + self.__args.kernel + ): + console_debug( + "Profiled kernels: {}\n`--kernel`: {}".format( + df_list, self.__args.kernel + ) + ) + console_error( + "Roofline cannot profile - kernels requested with `--kernel` missing from profiling data!" # noqa: E501 + "\n\tRe-profile workload in full or specify subset of available kernels using `--kernel` option." # noqa: E501 + "\n\tComplete profiled kernels list can be found in pmc_perf file.", # noqa: E501 + exit=True, + ) + # Fix df structure to resemble same df arg passed in + df["pmc_perf"] = df_filtered + elif self.__args.mode == "analyze": + top_kernels_csv = Path(path).joinpath("pmc_kernel_top.csv") + if not top_kernels_csv.is_file(): + console_error( + "roofline", "{} does not exist".format(top_kernels_csv) + ) + k_df = pd.read_csv(top_kernels_csv) + k_df = k_df.loc[self.__args.gpu_kernel[0], "Kernel_Name"] + + df["pmc_perf"] = df["pmc_perf"][ + df["pmc_perf"]["Kernel_Name"].isin(k_df) + ] + + return df + @demarcate def empirical_roofline( self, @@ -183,6 +232,10 @@ class Roofline: console_debug( "roofline", "Path: %s" % self.__run_parameters.get("workload_dir") ) + # Verify kernels have been profiled and filter the df + ret_df = self.validate_apply_kernel_filter( + df=ret_df, path=self.__run_parameters.get("workload_dir") + ) self.__ai_data = calc_ai_profile( self.__mspec, self.__run_parameters.get("sort_type"), ret_df ) @@ -192,7 +245,7 @@ class Roofline: console_debug(msg) ops_figure = flops_figure = None - ops_dt_list = flops_dt_list = "" + ops_dt_list = flops_dt_list = kernel_list = "" for dt in self.__run_parameters.get("roofline_data_type", []): gpu_arch = getattr(self.__mspec, "gpu_arch", "unknown_arch") @@ -245,6 +298,9 @@ class Roofline: original_kernel_names = [] else: original_kernel_names = self.__ai_data.get("kernelNames", []) + if self.__run_parameters.get("kernel_filter", False): + for name in sorted(self.__args.kernel): + kernel_list += "_" + name num_kernels = len(original_kernel_names) @@ -376,18 +432,23 @@ class Roofline: if ops_figure: ops_figure.write_image( self.__run_parameters["workload_dir"] - + "/empirRoof_gpu-{}{}.pdf".format(dev_id, ops_dt_list) + + "/empirRoof_gpu-{}{}{}.pdf".format( + dev_id, ops_dt_list, kernel_list + ) ) if flops_figure: flops_figure.write_image( self.__run_parameters["workload_dir"] - + "/empirRoof_gpu-{}{}.pdf".format(dev_id, flops_dt_list) + + "/empirRoof_gpu-{}{}{}.pdf".format( + dev_id, flops_dt_list, kernel_list + ) ) # only save a legend if kernel_names option is toggled if self.__run_parameters["include_kernel_names"]: self.__figure.write_image( - self.__run_parameters["workload_dir"] + "/kernelName_legend.pdf" + self.__run_parameters["workload_dir"] + + "/kernelName_legend{}.pdf".format(kernel_list) ) time.sleep(1) console_log("roofline", "Empirical Roofline PDFs saved!") @@ -697,6 +758,7 @@ class Roofline: if profiling_config.get("format_rocprof_output") == "rocpd": t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"]) + t_df = self.validate_apply_kernel_filter(df=t_df, path=base_path) self.__ai_data = calc_ai_profile( self.__mspec, self.__run_parameters["sort_type"], t_df ) diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index a06aefb6a1..a26cfc810e 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -895,9 +895,48 @@ def test_roofline_empty_kernel_names_handling(binary_handler_profile_rocprof_com ] workload_dir = test_utils.get_output_dir() + returncode = binary_handler_profile_rocprof_compute( # noqa: F841 + config, workload_dir, options, check_success=True, roof=True + ) + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.misc +def test_roofline_kernel_filter(binary_handler_profile_rocprof_compute): + """ + Test roofline multi-attempt profiling with `--kernel` + Expect to be able to re-profile from same workload if kernels are valid. + (Validity of --kernels tested in test_roofline_kernel_filter_error_handling already) + """ + if soc in ("MI100"): + pytest.skip("Skipping roofline test for MI100") + return + + options = [ + "--device", + "0", + "--roof-only", + "--kernel-names", + ] + workload_dir = test_utils.get_output_dir() + + returncode = binary_handler_profile_rocprof_compute( # noqa: F841 + config, workload_dir, options, check_success=True, roof=True + ) + # Don't clean output dir, use same workload + options.extend(["--kernel", config["kernel_name_1"]]) + returncode = binary_handler_profile_rocprof_compute( # noqa: F841 + config, workload_dir, options, check_success=True, roof=True + ) + + # Test nonexistent kernel on roof profile using existing profiling data + # Since already profiled, throw error if non-existent kernel requested for roofline + options.append("nonexistent_kernel_name_that_should_not_match_anything") returncode = binary_handler_profile_rocprof_compute( # noqa: F841 config, workload_dir, options, check_success=False, roof=True ) + assert returncode == 1 test_utils.clean_output_dir(config["cleanup"], workload_dir) @@ -934,6 +973,10 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute): assert True return + # Test `--kernel` filtering outputs are present and labelled correctly + filter_kernelName = "kernelName_legend_" + config["kernel_name_1"] + filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"] + plot_configurations = [ { "options": ["--device", "0", "--roof-only", "--roofline-data-type", "FP32"], @@ -944,8 +987,15 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute): "expected_files": ["empirRoof_gpu-0_FP16.pdf"], }, { - "options": ["--device", "0", "--roof-only", "--kernel-names"], - "expected_files": ["kernelName_legend.pdf"], + "options": [ + "--device", + "0", + "--roof-only", + "--kernel-names", + "--kernel", + config["kernel_name_1"], + ], + "expected_files": [filter_kernelName, filter_empirRoof], }, ]