Add single kernel filtering to roofline plots (#757)
* Add single kernel filtering for roofline * Add --kernel to documentation * Add kernel labels to roofline pdfs Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> * Add test cases Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> * Add autodetect for mode (profile or analyze) during roof validate and filter Prevent --kernel from affecting roofline in gui mode- although this may be broken in develop branch anyways Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> * Add note about roof-only usage checking for existing profiling files in the dir. If roof-only is not provided, rocprof-compute currently assumes it has to profile in full regardless. Will look into this another day. Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> * Update CHANGELOG.md Add line in resolved issues section to highlight that kernel filtering is now working for roofline plots * Apply changes suggested by docs team Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> * Update projects/rocprofiler-compute/CHANGELOG.md Co-authored-by: Pratik Basyal <pratik.basyal@amd.com> --------- Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> Co-authored-by: Pratik Basyal <pratik.basyal@amd.com>
Este commit está contenido en:
@@ -115,6 +115,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* Fixed L2 read/write/atomic bandwidths on MI350
|
||||
* Update metric names for better alignment between analysis configuration and documentation
|
||||
* Fixed an issue where accumulation counters could not be collected on AMD Instinct MI100
|
||||
* Updated Roofline plots to handle and apply kernel filtering.
|
||||
|
||||
|
||||
### Known issues
|
||||
|
||||
|
||||
@@ -477,7 +477,9 @@ Standalone roofline
|
||||
Roofline analysis occurs on any profile mode run, provided ``--no-roof`` option is not included.
|
||||
You don't need to include any additional roofline-specific options for roofline analysis.
|
||||
If you want to focus only on roofline-specific performance data and reduce the time it takes to profile, you can use the ``--roof-only`` option.
|
||||
This option limits the profiling to just the roofline performance counters.
|
||||
This option checks if there is existing profiling data in the workload directory (``pmc_perf.csv`` and ``roofline.csv``):
|
||||
a) If found, uses the data files with the provided arguments to create another roofline PDF output; otherwise,
|
||||
b) Profile mode runs but is limited to collecting only roofline performance counters.
|
||||
|
||||
Roofline options
|
||||
----------------
|
||||
@@ -494,6 +496,10 @@ Roofline options
|
||||
Allows you to specify a device ID to collect performance data from when
|
||||
running a roofline benchmark on your system.
|
||||
|
||||
``-k``, ``--kernel <kernel-substr>``
|
||||
Allows for kernel filtering. Usage is equivalent with the current ``rocprof``
|
||||
utility. See :ref:`profiling-kernel-filtering`.
|
||||
|
||||
``--roofline-data-type <datatype>``
|
||||
Allows you to specify data types that you want plotted in the roofline PDF output(s). Selecting more than one data type will overlay the results onto the same plot. Default: FP32
|
||||
|
||||
|
||||
@@ -193,6 +193,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
"include_kernel_names": False,
|
||||
"is_standalone": False,
|
||||
"roofline_data_type": self.__roofline_data_type,
|
||||
"kernel_filter": False,
|
||||
}
|
||||
)
|
||||
roof_obj = self.get_socs()[self.arch].roofline_obj
|
||||
|
||||
@@ -85,6 +85,7 @@ class Roofline:
|
||||
"include_kernel_names": False,
|
||||
"is_standalone": False,
|
||||
"roofline_data_type": ["FP32"], # default to FP32
|
||||
"kernel_filter": False,
|
||||
}
|
||||
)
|
||||
self.__ai_data = None
|
||||
@@ -102,13 +103,19 @@ class Roofline:
|
||||
if hasattr(self.__args, "sort") and self.__args.sort != "ALL":
|
||||
self.__run_parameters["sort_type"] = self.__args.sort
|
||||
self.__run_parameters["roofline_data_type"] = self.__args.roofline_data_type
|
||||
if (hasattr(self.__args, "kernel") and self.__args.kernel) or (
|
||||
hasattr(self.__args, "gpu_kernel") and self.__args.gpu_kernel
|
||||
):
|
||||
self.__run_parameters["kernel_filter"] = True
|
||||
self.validate_parameters()
|
||||
|
||||
def validate_parameters(self):
|
||||
if self.__run_parameters["include_kernel_names"] and (
|
||||
not self.__run_parameters["is_standalone"]
|
||||
):
|
||||
console_error("--kernel-names cannot be used with --no-roof option")
|
||||
console_warning(
|
||||
"--kernel-names is nonactionable when used with --no-roof option"
|
||||
)
|
||||
|
||||
def roof_setup(self):
|
||||
# Setup the workload directory for roofline profiling.
|
||||
@@ -165,6 +172,48 @@ class Roofline:
|
||||
# Create the directory
|
||||
Path(final_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def validate_apply_kernel_filter(self, df, path=None):
|
||||
if self.__run_parameters["kernel_filter"] is True:
|
||||
if self.__args.mode == "profile":
|
||||
df_pmc = df["pmc_perf"]
|
||||
df_filtered = df_pmc.copy()
|
||||
df_list = (df_pmc.loc[:, "Kernel_Name"]).to_list()
|
||||
for idx in range(0, len(df_list)):
|
||||
if df_list[idx].split("(")[0] not in self.__args.kernel:
|
||||
# Drop row from dataframe if kernel has not been requested
|
||||
df_filtered.drop(index=idx, inplace=True)
|
||||
# Verify that final filtered kernel df matches the kernel list requested
|
||||
if len(df_filtered.drop_duplicates(subset=["Kernel_Name"])) != len(
|
||||
self.__args.kernel
|
||||
):
|
||||
console_debug(
|
||||
"Profiled kernels: {}\n`--kernel`: {}".format(
|
||||
df_list, self.__args.kernel
|
||||
)
|
||||
)
|
||||
console_error(
|
||||
"Roofline cannot profile - kernels requested with `--kernel` missing from profiling data!" # noqa: E501
|
||||
"\n\tRe-profile workload in full or specify subset of available kernels using `--kernel` option." # noqa: E501
|
||||
"\n\tComplete profiled kernels list can be found in pmc_perf file.", # noqa: E501
|
||||
exit=True,
|
||||
)
|
||||
# Fix df structure to resemble same df arg passed in
|
||||
df["pmc_perf"] = df_filtered
|
||||
elif self.__args.mode == "analyze":
|
||||
top_kernels_csv = Path(path).joinpath("pmc_kernel_top.csv")
|
||||
if not top_kernels_csv.is_file():
|
||||
console_error(
|
||||
"roofline", "{} does not exist".format(top_kernels_csv)
|
||||
)
|
||||
k_df = pd.read_csv(top_kernels_csv)
|
||||
k_df = k_df.loc[self.__args.gpu_kernel[0], "Kernel_Name"]
|
||||
|
||||
df["pmc_perf"] = df["pmc_perf"][
|
||||
df["pmc_perf"]["Kernel_Name"].isin(k_df)
|
||||
]
|
||||
|
||||
return df
|
||||
|
||||
@demarcate
|
||||
def empirical_roofline(
|
||||
self,
|
||||
@@ -183,6 +232,10 @@ class Roofline:
|
||||
console_debug(
|
||||
"roofline", "Path: %s" % self.__run_parameters.get("workload_dir")
|
||||
)
|
||||
# Verify kernels have been profiled and filter the df
|
||||
ret_df = self.validate_apply_kernel_filter(
|
||||
df=ret_df, path=self.__run_parameters.get("workload_dir")
|
||||
)
|
||||
self.__ai_data = calc_ai_profile(
|
||||
self.__mspec, self.__run_parameters.get("sort_type"), ret_df
|
||||
)
|
||||
@@ -192,7 +245,7 @@ class Roofline:
|
||||
console_debug(msg)
|
||||
|
||||
ops_figure = flops_figure = None
|
||||
ops_dt_list = flops_dt_list = ""
|
||||
ops_dt_list = flops_dt_list = kernel_list = ""
|
||||
|
||||
for dt in self.__run_parameters.get("roofline_data_type", []):
|
||||
gpu_arch = getattr(self.__mspec, "gpu_arch", "unknown_arch")
|
||||
@@ -245,6 +298,9 @@ class Roofline:
|
||||
original_kernel_names = []
|
||||
else:
|
||||
original_kernel_names = self.__ai_data.get("kernelNames", [])
|
||||
if self.__run_parameters.get("kernel_filter", False):
|
||||
for name in sorted(self.__args.kernel):
|
||||
kernel_list += "_" + name
|
||||
|
||||
num_kernels = len(original_kernel_names)
|
||||
|
||||
@@ -376,18 +432,23 @@ class Roofline:
|
||||
if ops_figure:
|
||||
ops_figure.write_image(
|
||||
self.__run_parameters["workload_dir"]
|
||||
+ "/empirRoof_gpu-{}{}.pdf".format(dev_id, ops_dt_list)
|
||||
+ "/empirRoof_gpu-{}{}{}.pdf".format(
|
||||
dev_id, ops_dt_list, kernel_list
|
||||
)
|
||||
)
|
||||
if flops_figure:
|
||||
flops_figure.write_image(
|
||||
self.__run_parameters["workload_dir"]
|
||||
+ "/empirRoof_gpu-{}{}.pdf".format(dev_id, flops_dt_list)
|
||||
+ "/empirRoof_gpu-{}{}{}.pdf".format(
|
||||
dev_id, flops_dt_list, kernel_list
|
||||
)
|
||||
)
|
||||
|
||||
# only save a legend if kernel_names option is toggled
|
||||
if self.__run_parameters["include_kernel_names"]:
|
||||
self.__figure.write_image(
|
||||
self.__run_parameters["workload_dir"] + "/kernelName_legend.pdf"
|
||||
self.__run_parameters["workload_dir"]
|
||||
+ "/kernelName_legend{}.pdf".format(kernel_list)
|
||||
)
|
||||
time.sleep(1)
|
||||
console_log("roofline", "Empirical Roofline PDFs saved!")
|
||||
@@ -697,6 +758,7 @@ class Roofline:
|
||||
if profiling_config.get("format_rocprof_output") == "rocpd":
|
||||
t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"])
|
||||
|
||||
t_df = self.validate_apply_kernel_filter(df=t_df, path=base_path)
|
||||
self.__ai_data = calc_ai_profile(
|
||||
self.__mspec, self.__run_parameters["sort_type"], t_df
|
||||
)
|
||||
|
||||
@@ -895,9 +895,48 @@ def test_roofline_empty_kernel_names_handling(binary_handler_profile_rocprof_com
|
||||
]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=True, roof=True
|
||||
)
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_roofline_kernel_filter(binary_handler_profile_rocprof_compute):
|
||||
"""
|
||||
Test roofline multi-attempt profiling with `--kernel`
|
||||
Expect to be able to re-profile from same workload if kernels are valid.
|
||||
(Validity of --kernels tested in test_roofline_kernel_filter_error_handling already)
|
||||
"""
|
||||
if soc in ("MI100"):
|
||||
pytest.skip("Skipping roofline test for MI100")
|
||||
return
|
||||
|
||||
options = [
|
||||
"--device",
|
||||
"0",
|
||||
"--roof-only",
|
||||
"--kernel-names",
|
||||
]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=True, roof=True
|
||||
)
|
||||
# Don't clean output dir, use same workload
|
||||
options.extend(["--kernel", config["kernel_name_1"]])
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=True, roof=True
|
||||
)
|
||||
|
||||
# Test nonexistent kernel on roof profile using existing profiling data
|
||||
# Since already profiled, throw error if non-existent kernel requested for roofline
|
||||
options.append("nonexistent_kernel_name_that_should_not_match_anything")
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
assert returncode == 1
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
@@ -934,6 +973,10 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
|
||||
assert True
|
||||
return
|
||||
|
||||
# Test `--kernel` filtering outputs are present and labelled correctly
|
||||
filter_kernelName = "kernelName_legend_" + config["kernel_name_1"]
|
||||
filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"]
|
||||
|
||||
plot_configurations = [
|
||||
{
|
||||
"options": ["--device", "0", "--roof-only", "--roofline-data-type", "FP32"],
|
||||
@@ -944,8 +987,15 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
|
||||
"expected_files": ["empirRoof_gpu-0_FP16.pdf"],
|
||||
},
|
||||
{
|
||||
"options": ["--device", "0", "--roof-only", "--kernel-names"],
|
||||
"expected_files": ["kernelName_legend.pdf"],
|
||||
"options": [
|
||||
"--device",
|
||||
"0",
|
||||
"--roof-only",
|
||||
"--kernel-names",
|
||||
"--kernel",
|
||||
config["kernel_name_1"],
|
||||
],
|
||||
"expected_files": [filter_kernelName, filter_empirRoof],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user