[rocprofiler-compute] Fix kernel/dispatch filtering (#2479)

* Fix kernel/dispatch fitlering in GUI

* Disallow --kernel and --dispatch filtering in analyze --gui mode since
  GUI frontend offers dropdown menu for kernel and dispatch filtering
    * Update CHANGELOG and documentation

* Gracefully handle N/A values

* Ensure workload path is valid before using it in GUI

* Ignore kernel filters if dispatch filters provided

* Add documentation for dispatch filtering overriding kernel filtering

* Fix typo

* Fix documentation

* remove unnecessary whitespace

* Address review comments

* Allow kernel/dispatch filtering with --gui

* Address review comments

* Address review comments

* Update CHANGELOG

* Fix formatting
This commit is contained in:
vedithal-amd
2026-01-20 10:02:31 -05:00
committed by GitHub
vanhempi a926660670
commit 4a5cbbfba5
7 muutettua tiedostoa jossa 33 lisäystä ja 31 poistoa
@@ -49,6 +49,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes
* Fix issue where dispatch filtering in a range (e.g. >2) was not working
* Fix redundant warnings for compute/memory partition not found for < MI 300 series GPUs by skipping partition checks
### Removed
@@ -346,6 +346,7 @@ Show System Speed-of-Light and CS_Busy blocks only
this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for
GPU Busy Cycles metric.
Filter kernels
First, list the top kernels in your application using `--list-stats`.
@@ -562,12 +563,6 @@ WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
DEBUG Calculated metric values
DEBUG Calculated roofline data points
DEBUG [analysis] generating analysis
@@ -28,7 +28,9 @@ Launch the standalone GUI analyzer
----------------------------------
To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your
desired analysis command. For example:
desired analysis command.
For example:
.. code-block:: shell-session
@@ -386,26 +386,24 @@ class OmniAnalyze_Base:
sys.exit(0)
# Ensure analysis output does not overwrite existing files
if not args.output_name:
return
if args.output_name:
if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name):
console_error(
"analysis",
"Analysis output file/folder name must "
"contain only alphanumeric characters "
"or underscores (_), hyphens (-).",
)
if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name):
console_error(
"analysis",
"Analysis output file/folder name must "
"contain only alphanumeric characters "
"or underscores (_), hyphens (-).",
)
path_to_check = args.output_name
if args.output_format in ("txt", "db"):
path_to_check += f".{args.output_format}"
path_to_check = args.output_name
if args.output_format in ("txt", "db"):
path_to_check += f".{args.output_format}"
if Path(path_to_check).exists():
console_error(
f"Analysis output file/folder {path_to_check} already exists. "
"Please choose a different name."
)
if Path(path_to_check).exists():
console_error(
f"Analysis output file/folder {path_to_check} already exists. "
"Please choose a different name."
)
# Check if any kernel's counters are missing due to iteration multiplexing
if (
@@ -51,7 +51,6 @@ class webui_analysis(OmniAnalyze_Base):
self.app = dash.Dash(
__name__, title=PROJECT_NAME, external_stylesheets=[dbc.themes.CYBORG]
)
self.dest_dir = str(Path(args.path[0][0]).absolute().resolve())
self.arch: Optional[str] = None
self.__hidden_sections = ["Memory Chart"]
@@ -90,6 +89,7 @@ class webui_analysis(OmniAnalyze_Base):
kernel_top_df = base_data.dfs[1]
for kernel_id in base_data.filter_kernel_ids:
filt_kernel_names.append(str(kernel_top_df.loc[kernel_id, "Kernel_Name"]))
input_filters["kernel"] = filt_kernel_names
# setup app layout
from utils.gui_components.header import get_header
@@ -338,6 +338,7 @@ class webui_analysis(OmniAnalyze_Base):
)
args = self.get_args()
self.dest_dir = str(Path(args.path[0][0]).absolute().resolve())
# create 'mega dataframe'
self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc(
@@ -61,7 +61,7 @@ def multi_bar_chart(
def create_instruction_mix_bar_chart(display_df: pd.DataFrame, df_unit: str) -> px.bar:
display_df = display_df.copy()
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0)
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0)
return px.bar(
display_df,
@@ -78,7 +78,7 @@ def create_multi_bar_charts(
display_df: pd.DataFrame, table_id: int, df_unit: str
) -> list[px.bar]:
display_df = display_df.copy()
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0)
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0)
nested_bar = multi_bar_chart(table_id, display_df)
charts = []
@@ -103,7 +103,9 @@ def create_multi_bar_charts(
def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
display_df = display_df.copy()
display_df["Avg"] = display_df["Avg"].apply(lambda x: float(x) if x != "" else 0.0)
display_df["Avg"] = display_df["Avg"].apply(
lambda x: float(x) if x != "N/A" else 0.0
)
charts = []
@@ -144,7 +146,7 @@ def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
elif table_id == 1101:
# Special formatting reference 'Pct of Peak' value
display_df["Pct of Peak"] = display_df["Pct of Peak"].apply(
lambda x: float(x) if x != "" else 0.0
lambda x: float(x) if x != "N/A" else 0.0
)
charts.append(
px.bar(
@@ -1290,6 +1290,8 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat
# NB: support ignoring the 1st n dispatched execution by '> n'
# The better way may be parsing python slice string
for dispatch_id in workload.filter_dispatch_ids:
if isinstance(dispatch_id, str) and ">" in dispatch_id:
dispatch_id = re.match(r"\>\s*(\d+)", dispatch_id).group(1)
if int(dispatch_id) >= len(df): # subtract 2 bc of the two header rows
console_error("analysis", f"{dispatch_id} is an invalid dispatch id.")
@@ -1297,7 +1299,7 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat
isinstance(workload.filter_dispatch_ids[0], str)
and ">" in workload.filter_dispatch_ids[0]
):
dispatch_match = re.match(r"\> (\d+)", workload.filter_dispatch_ids[0])
dispatch_match = re.match(r"\>\s*(\d+)", workload.filter_dispatch_ids[0])
df = df[
df[schema.PMC_PERF_FILE_PREFIX]["Dispatch_ID"]
> int(dispatch_match.group(1))