Add single kernel filtering to roofline plots (#757)

* Add single kernel filtering for roofline
* Add --kernel to documentation
* Add kernel labels to roofline pdfs

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

* Add test cases

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

* Add autodetect for mode (profile or analyze) during roof validate and filter
Prevent --kernel from affecting roofline in gui mode- although this may be broken in develop branch anyways

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

* Add note about roof-only usage checking for existing profiling files in the dir. If roof-only is not provided, rocprof-compute currently assumes it has to profile in full regardless. Will look into this another day.

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

* Update CHANGELOG.md

Add line in resolved issues section to highlight that kernel filtering is now working for roofline plots

* Apply changes suggested by docs team

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

* Update projects/rocprofiler-compute/CHANGELOG.md

Co-authored-by: Pratik Basyal <pratik.basyal@amd.com>

---------

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
Co-authored-by: Pratik Basyal <pratik.basyal@amd.com>
Este commit está contenido en:
cfallows-amd
2025-08-27 13:41:07 -04:00
cometido por GitHub
padre 2e50d88fe6
commit c68ba44e72
Se han modificado 5 ficheros con 129 adiciones y 8 borrados
+2
Ver fichero
@@ -115,6 +115,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Fixed L2 read/write/atomic bandwidths on MI350
* Update metric names for better alignment between analysis configuration and documentation
* Fixed an issue where accumulation counters could not be collected on AMD Instinct MI100
* Updated Roofline plots to handle and apply kernel filtering.
### Known issues
@@ -477,7 +477,9 @@ Standalone roofline
Roofline analysis occurs on any profile mode run, provided ``--no-roof`` option is not included.
You don't need to include any additional roofline-specific options for roofline analysis.
If you want to focus only on roofline-specific performance data and reduce the time it takes to profile, you can use the ``--roof-only`` option.
This option limits the profiling to just the roofline performance counters.
This option checks if there is existing profiling data in the workload directory (``pmc_perf.csv`` and ``roofline.csv``):
a) If found, uses the data files with the provided arguments to create another roofline PDF output; otherwise,
b) Profile mode runs but is limited to collecting only roofline performance counters.
Roofline options
----------------
@@ -494,6 +496,10 @@ Roofline options
Allows you to specify a device ID to collect performance data from when
running a roofline benchmark on your system.
``-k``, ``--kernel <kernel-substr>``
Allows for kernel filtering. Usage is equivalent with the current ``rocprof``
utility. See :ref:`profiling-kernel-filtering`.
``--roofline-data-type <datatype>``
Allows you to specify data types that you want plotted in the roofline PDF output(s). Selecting more than one data type will overlay the results onto the same plot. Default: FP32
@@ -193,6 +193,7 @@ class webui_analysis(OmniAnalyze_Base):
"include_kernel_names": False,
"is_standalone": False,
"roofline_data_type": self.__roofline_data_type,
"kernel_filter": False,
}
)
roof_obj = self.get_socs()[self.arch].roofline_obj
+67 -5
Ver fichero
@@ -85,6 +85,7 @@ class Roofline:
"include_kernel_names": False,
"is_standalone": False,
"roofline_data_type": ["FP32"], # default to FP32
"kernel_filter": False,
}
)
self.__ai_data = None
@@ -102,13 +103,19 @@ class Roofline:
if hasattr(self.__args, "sort") and self.__args.sort != "ALL":
self.__run_parameters["sort_type"] = self.__args.sort
self.__run_parameters["roofline_data_type"] = self.__args.roofline_data_type
if (hasattr(self.__args, "kernel") and self.__args.kernel) or (
hasattr(self.__args, "gpu_kernel") and self.__args.gpu_kernel
):
self.__run_parameters["kernel_filter"] = True
self.validate_parameters()
def validate_parameters(self):
if self.__run_parameters["include_kernel_names"] and (
not self.__run_parameters["is_standalone"]
):
console_error("--kernel-names cannot be used with --no-roof option")
console_warning(
"--kernel-names is nonactionable when used with --no-roof option"
)
def roof_setup(self):
# Setup the workload directory for roofline profiling.
@@ -165,6 +172,48 @@ class Roofline:
# Create the directory
Path(final_dir).mkdir(parents=True, exist_ok=True)
def validate_apply_kernel_filter(self, df, path=None):
if self.__run_parameters["kernel_filter"] is True:
if self.__args.mode == "profile":
df_pmc = df["pmc_perf"]
df_filtered = df_pmc.copy()
df_list = (df_pmc.loc[:, "Kernel_Name"]).to_list()
for idx in range(0, len(df_list)):
if df_list[idx].split("(")[0] not in self.__args.kernel:
# Drop row from dataframe if kernel has not been requested
df_filtered.drop(index=idx, inplace=True)
# Verify that final filtered kernel df matches the kernel list requested
if len(df_filtered.drop_duplicates(subset=["Kernel_Name"])) != len(
self.__args.kernel
):
console_debug(
"Profiled kernels: {}\n`--kernel`: {}".format(
df_list, self.__args.kernel
)
)
console_error(
"Roofline cannot profile - kernels requested with `--kernel` missing from profiling data!" # noqa: E501
"\n\tRe-profile workload in full or specify subset of available kernels using `--kernel` option." # noqa: E501
"\n\tComplete profiled kernels list can be found in pmc_perf file.", # noqa: E501
exit=True,
)
# Fix df structure to resemble same df arg passed in
df["pmc_perf"] = df_filtered
elif self.__args.mode == "analyze":
top_kernels_csv = Path(path).joinpath("pmc_kernel_top.csv")
if not top_kernels_csv.is_file():
console_error(
"roofline", "{} does not exist".format(top_kernels_csv)
)
k_df = pd.read_csv(top_kernels_csv)
k_df = k_df.loc[self.__args.gpu_kernel[0], "Kernel_Name"]
df["pmc_perf"] = df["pmc_perf"][
df["pmc_perf"]["Kernel_Name"].isin(k_df)
]
return df
@demarcate
def empirical_roofline(
self,
@@ -183,6 +232,10 @@ class Roofline:
console_debug(
"roofline", "Path: %s" % self.__run_parameters.get("workload_dir")
)
# Verify kernels have been profiled and filter the df
ret_df = self.validate_apply_kernel_filter(
df=ret_df, path=self.__run_parameters.get("workload_dir")
)
self.__ai_data = calc_ai_profile(
self.__mspec, self.__run_parameters.get("sort_type"), ret_df
)
@@ -192,7 +245,7 @@ class Roofline:
console_debug(msg)
ops_figure = flops_figure = None
ops_dt_list = flops_dt_list = ""
ops_dt_list = flops_dt_list = kernel_list = ""
for dt in self.__run_parameters.get("roofline_data_type", []):
gpu_arch = getattr(self.__mspec, "gpu_arch", "unknown_arch")
@@ -245,6 +298,9 @@ class Roofline:
original_kernel_names = []
else:
original_kernel_names = self.__ai_data.get("kernelNames", [])
if self.__run_parameters.get("kernel_filter", False):
for name in sorted(self.__args.kernel):
kernel_list += "_" + name
num_kernels = len(original_kernel_names)
@@ -376,18 +432,23 @@ class Roofline:
if ops_figure:
ops_figure.write_image(
self.__run_parameters["workload_dir"]
+ "/empirRoof_gpu-{}{}.pdf".format(dev_id, ops_dt_list)
+ "/empirRoof_gpu-{}{}{}.pdf".format(
dev_id, ops_dt_list, kernel_list
)
)
if flops_figure:
flops_figure.write_image(
self.__run_parameters["workload_dir"]
+ "/empirRoof_gpu-{}{}.pdf".format(dev_id, flops_dt_list)
+ "/empirRoof_gpu-{}{}{}.pdf".format(
dev_id, flops_dt_list, kernel_list
)
)
# only save a legend if kernel_names option is toggled
if self.__run_parameters["include_kernel_names"]:
self.__figure.write_image(
self.__run_parameters["workload_dir"] + "/kernelName_legend.pdf"
self.__run_parameters["workload_dir"]
+ "/kernelName_legend{}.pdf".format(kernel_list)
)
time.sleep(1)
console_log("roofline", "Empirical Roofline PDFs saved!")
@@ -697,6 +758,7 @@ class Roofline:
if profiling_config.get("format_rocprof_output") == "rocpd":
t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"])
t_df = self.validate_apply_kernel_filter(df=t_df, path=base_path)
self.__ai_data = calc_ai_profile(
self.__mspec, self.__run_parameters["sort_type"], t_df
)
@@ -895,9 +895,48 @@ def test_roofline_empty_kernel_names_handling(binary_handler_profile_rocprof_com
]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=True, roof=True
)
test_utils.clean_output_dir(config["cleanup"], workload_dir)
@pytest.mark.misc
def test_roofline_kernel_filter(binary_handler_profile_rocprof_compute):
"""
Test roofline multi-attempt profiling with `--kernel`
Expect to be able to re-profile from same workload if kernels are valid.
(Validity of --kernels tested in test_roofline_kernel_filter_error_handling already)
"""
if soc in ("MI100"):
pytest.skip("Skipping roofline test for MI100")
return
options = [
"--device",
"0",
"--roof-only",
"--kernel-names",
]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=True, roof=True
)
# Don't clean output dir, use same workload
options.extend(["--kernel", config["kernel_name_1"]])
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=True, roof=True
)
# Test nonexistent kernel on roof profile using existing profiling data
# Since already profiled, throw error if non-existent kernel requested for roofline
options.append("nonexistent_kernel_name_that_should_not_match_anything")
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=False, roof=True
)
assert returncode == 1
test_utils.clean_output_dir(config["cleanup"], workload_dir)
@@ -934,6 +973,10 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
assert True
return
# Test `--kernel` filtering outputs are present and labelled correctly
filter_kernelName = "kernelName_legend_" + config["kernel_name_1"]
filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"]
plot_configurations = [
{
"options": ["--device", "0", "--roof-only", "--roofline-data-type", "FP32"],
@@ -944,8 +987,15 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
"expected_files": ["empirRoof_gpu-0_FP16.pdf"],
},
{
"options": ["--device", "0", "--roof-only", "--kernel-names"],
"expected_files": ["kernelName_legend.pdf"],
"options": [
"--device",
"0",
"--roof-only",
"--kernel-names",
"--kernel",
config["kernel_name_1"],
],
"expected_files": [filter_kernelName, filter_empirRoof],
},
]