[rocprofiler-compute] Fix kernel/dispatch filtering (#2479)
* Fix kernel/dispatch fitlering in GUI
* Disallow --kernel and --dispatch filtering in analyze --gui mode since
GUI frontend offers dropdown menu for kernel and dispatch filtering
* Update CHANGELOG and documentation
* Gracefully handle N/A values
* Ensure workload path is valid before using it in GUI
* Ignore kernel filters if dispatch filters provided
* Add documentation for dispatch filtering overriding kernel filtering
* Fix typo
* Fix documentation
* remove unnecessary whitespace
* Address review comments
* Allow kernel/dispatch filtering with --gui
* Address review comments
* Address review comments
* Update CHANGELOG
* Fix formatting
This commit is contained in:
@@ -49,6 +49,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
* Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes
|
||||
|
||||
* Fix issue where dispatch filtering in a range (e.g. >2) was not working
|
||||
|
||||
* Fix redundant warnings for compute/memory partition not found for < MI 300 series GPUs by skipping partition checks
|
||||
|
||||
### Removed
|
||||
|
||||
@@ -346,6 +346,7 @@ Show System Speed-of-Light and CS_Busy blocks only
|
||||
this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for
|
||||
GPU Busy Cycles metric.
|
||||
|
||||
|
||||
Filter kernels
|
||||
First, list the top kernels in your application using `--list-stats`.
|
||||
|
||||
@@ -562,12 +563,6 @@ WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
|
||||
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
|
||||
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
|
||||
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
|
||||
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
|
||||
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
|
||||
DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
|
||||
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
|
||||
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
|
||||
DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
|
||||
DEBUG Calculated metric values
|
||||
DEBUG Calculated roofline data points
|
||||
DEBUG [analysis] generating analysis
|
||||
|
||||
@@ -28,7 +28,9 @@ Launch the standalone GUI analyzer
|
||||
----------------------------------
|
||||
|
||||
To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your
|
||||
desired analysis command. For example:
|
||||
desired analysis command.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
|
||||
@@ -386,26 +386,24 @@ class OmniAnalyze_Base:
|
||||
sys.exit(0)
|
||||
|
||||
# Ensure analysis output does not overwrite existing files
|
||||
if not args.output_name:
|
||||
return
|
||||
if args.output_name:
|
||||
if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name):
|
||||
console_error(
|
||||
"analysis",
|
||||
"Analysis output file/folder name must "
|
||||
"contain only alphanumeric characters "
|
||||
"or underscores (_), hyphens (-).",
|
||||
)
|
||||
|
||||
if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name):
|
||||
console_error(
|
||||
"analysis",
|
||||
"Analysis output file/folder name must "
|
||||
"contain only alphanumeric characters "
|
||||
"or underscores (_), hyphens (-).",
|
||||
)
|
||||
path_to_check = args.output_name
|
||||
if args.output_format in ("txt", "db"):
|
||||
path_to_check += f".{args.output_format}"
|
||||
|
||||
path_to_check = args.output_name
|
||||
if args.output_format in ("txt", "db"):
|
||||
path_to_check += f".{args.output_format}"
|
||||
|
||||
if Path(path_to_check).exists():
|
||||
console_error(
|
||||
f"Analysis output file/folder {path_to_check} already exists. "
|
||||
"Please choose a different name."
|
||||
)
|
||||
if Path(path_to_check).exists():
|
||||
console_error(
|
||||
f"Analysis output file/folder {path_to_check} already exists. "
|
||||
"Please choose a different name."
|
||||
)
|
||||
|
||||
# Check if any kernel's counters are missing due to iteration multiplexing
|
||||
if (
|
||||
|
||||
@@ -51,7 +51,6 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
self.app = dash.Dash(
|
||||
__name__, title=PROJECT_NAME, external_stylesheets=[dbc.themes.CYBORG]
|
||||
)
|
||||
self.dest_dir = str(Path(args.path[0][0]).absolute().resolve())
|
||||
self.arch: Optional[str] = None
|
||||
|
||||
self.__hidden_sections = ["Memory Chart"]
|
||||
@@ -90,6 +89,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
kernel_top_df = base_data.dfs[1]
|
||||
for kernel_id in base_data.filter_kernel_ids:
|
||||
filt_kernel_names.append(str(kernel_top_df.loc[kernel_id, "Kernel_Name"]))
|
||||
input_filters["kernel"] = filt_kernel_names
|
||||
|
||||
# setup app layout
|
||||
from utils.gui_components.header import get_header
|
||||
@@ -338,6 +338,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
)
|
||||
|
||||
args = self.get_args()
|
||||
self.dest_dir = str(Path(args.path[0][0]).absolute().resolve())
|
||||
|
||||
# create 'mega dataframe'
|
||||
self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc(
|
||||
|
||||
@@ -61,7 +61,7 @@ def multi_bar_chart(
|
||||
|
||||
def create_instruction_mix_bar_chart(display_df: pd.DataFrame, df_unit: str) -> px.bar:
|
||||
display_df = display_df.copy()
|
||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0)
|
||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0)
|
||||
|
||||
return px.bar(
|
||||
display_df,
|
||||
@@ -78,7 +78,7 @@ def create_multi_bar_charts(
|
||||
display_df: pd.DataFrame, table_id: int, df_unit: str
|
||||
) -> list[px.bar]:
|
||||
display_df = display_df.copy()
|
||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0)
|
||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0)
|
||||
|
||||
nested_bar = multi_bar_chart(table_id, display_df)
|
||||
charts = []
|
||||
@@ -103,7 +103,9 @@ def create_multi_bar_charts(
|
||||
|
||||
def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
|
||||
display_df = display_df.copy()
|
||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: float(x) if x != "" else 0.0)
|
||||
display_df["Avg"] = display_df["Avg"].apply(
|
||||
lambda x: float(x) if x != "N/A" else 0.0
|
||||
)
|
||||
|
||||
charts = []
|
||||
|
||||
@@ -144,7 +146,7 @@ def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
|
||||
elif table_id == 1101:
|
||||
# Special formatting reference 'Pct of Peak' value
|
||||
display_df["Pct of Peak"] = display_df["Pct of Peak"].apply(
|
||||
lambda x: float(x) if x != "" else 0.0
|
||||
lambda x: float(x) if x != "N/A" else 0.0
|
||||
)
|
||||
charts.append(
|
||||
px.bar(
|
||||
|
||||
@@ -1290,6 +1290,8 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat
|
||||
# NB: support ignoring the 1st n dispatched execution by '> n'
|
||||
# The better way may be parsing python slice string
|
||||
for dispatch_id in workload.filter_dispatch_ids:
|
||||
if isinstance(dispatch_id, str) and ">" in dispatch_id:
|
||||
dispatch_id = re.match(r"\>\s*(\d+)", dispatch_id).group(1)
|
||||
if int(dispatch_id) >= len(df): # subtract 2 bc of the two header rows
|
||||
console_error("analysis", f"{dispatch_id} is an invalid dispatch id.")
|
||||
|
||||
@@ -1297,7 +1299,7 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat
|
||||
isinstance(workload.filter_dispatch_ids[0], str)
|
||||
and ">" in workload.filter_dispatch_ids[0]
|
||||
):
|
||||
dispatch_match = re.match(r"\> (\d+)", workload.filter_dispatch_ids[0])
|
||||
dispatch_match = re.match(r"\>\s*(\d+)", workload.filter_dispatch_ids[0])
|
||||
df = df[
|
||||
df[schema.PMC_PERF_FILE_PREFIX]["Dispatch_ID"]
|
||||
> int(dispatch_match.group(1))
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user