diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 745b7febd0..1cb97afee7 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -146,6 +146,12 @@ class webui_analysis(OmniAnalyze_Base): base_data[base_run].raw_pmc ) + if self._profiling_config["iteration_multiplexing"] is not None: + base_data[base_run].raw_pmc = self.iteration_multiplex_impute_counters( + base_data[base_run].raw_pmc, + policy=self._profiling_config["iteration_multiplexing"], + ) + # Apply filters to workload data console_debug("analysis", f"gui dispatch filter is {disp_filt}") console_debug("analysis", f"gui kernel filter is {kernel_filter}") @@ -224,6 +230,9 @@ class webui_analysis(OmniAnalyze_Base): "is_standalone": False, "roofline_data_type": self.__roofline_data_type, "kernel_filter": False, + "iteration_multiplexing": self._profiling_config[ + "iteration_multiplexing" + ], } ) roof_obj = soc[self.arch].roofline_obj diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py index 0735bb618f..401beab855 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py @@ -37,6 +37,7 @@ import yaml import config from roofline import Roofline from utils.amdsmi_interface import amdsmi_ctx, get_gpu_model, get_mem_max_clock +from utils.file_io import create_df_pmc, load_profiling_config from utils.logger import ( console_debug, console_error, @@ -45,15 +46,18 @@ from utils.logger import ( demarcate, ) from utils.mi_gpu_spec import mi_gpu_specs -from utils.parser import BUILD_IN_VARS, SUPPORTED_DENOM +from utils.parser import BUILD_IN_VARS, SUPPORTED_DENOM, apply_filters from utils.roofline_calc import validate_roofline_csv +from utils.schema import Workload from utils.specs import MachineSpecs from utils.utils import ( METRIC_ID_RE, add_counter_extra_config_input_yaml, convert_metric_id_to_panel_info, get_panel_alias, + impute_counters_iteration_multiplex, is_tcc_channel_counter, + merge_counters_spatial_multiplex, parse_sets_yaml, ) @@ -701,7 +705,32 @@ class OmniSoC_Base: ) return - self.roofline_obj.post_processing() + args = self.get_args() + workload = Workload() + workload.path = self.__args.path + profiling_config = load_profiling_config(workload.path) + workload.raw_pmc = create_df_pmc( + raw_data_root_dir=workload.path, + nodes=None, + spatial_multiplexing=args.spatial_multiplexing, + kernel_verbose=-1, + verbose=args.verbose, + config_dict=profiling_config, + ) + + if args.spatial_multiplexing: + workload.raw_pmc = merge_counters_spatial_multiplex(workload.raw_pmc) + + if profiling_config["iteration_multiplexing"] is not None: + workload.raw_pmc = impute_counters_iteration_multiplex( + workload.raw_pmc, + policy=profiling_config["iteration_multiplexing"], + ) + filtered_pmc = apply_filters( + workload, workload.path, is_gui=False, debug=False + ) + + self.roofline_obj.post_processing(filtered_pmc) @abstractmethod def analysis_setup(self, roofline_parameters: Optional[dict[str, Any]]) -> None: diff --git a/projects/rocprofiler-compute/src/roofline.py b/projects/rocprofiler-compute/src/roofline.py index 15fec9da7c..e515072c49 100644 --- a/projects/rocprofiler-compute/src/roofline.py +++ b/projects/rocprofiler-compute/src/roofline.py @@ -25,7 +25,6 @@ import argparse import textwrap from abc import abstractmethod -from collections import OrderedDict from pathlib import Path from typing import Any, Optional, Union @@ -36,7 +35,7 @@ import plotly.graph_objects as go from dash import dcc, html from plotly.subplots import make_subplots -from utils import file_io, rocpd_data, schema +from utils import schema from utils.logger import ( console_debug, console_error, @@ -96,6 +95,7 @@ class Roofline: "is_standalone": False, "roofline_data_type": ["FP32"], # default to FP32 "kernel_filter": False, + "iteration_multiplexing": None, } ) self.__ai_data: Optional[dict[str, Any]] = None @@ -116,6 +116,13 @@ class Roofline: hasattr(self.__args, "gpu_kernel") and self.__args.gpu_kernel ): self.__run_parameters["kernel_filter"] = True + if ( + hasattr(self.__args, "iteration_multiplexing") + and self.__args.iteration_multiplexing is not None + ): + self.__run_parameters["iteration_multiplexing"] = ( + self.__args.iteration_multiplexing + ) def get_args(self) -> argparse.Namespace: return self.__args @@ -286,11 +293,7 @@ class Roofline: Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data. """ - if ( - not isinstance(self.__run_parameters["workload_dir"], list) - and self.__run_parameters["workload_dir"] != None - ): - self.roof_setup() + self.roof_setup() console_debug("roofline", f"Path: {self.__run_parameters.get('workload_dir')}") @@ -300,7 +303,10 @@ class Roofline: ) self.__ai_data = calc_ai_profile( - self.__mspec, self.__run_parameters.get("sort_type"), ret_df + self.__mspec, + self.__run_parameters.get("sort_type"), + ret_df, + self.__run_parameters["iteration_multiplexing"], ) msg = "AI at each mem level:" @@ -1133,14 +1139,17 @@ class Roofline: def cli_generate_plot( self, dtype: str, - workload: Optional[schema.Workload] = None, - config: Optional[dict[str, Any]] = None, - arch_config: Optional[schema.ArchConfig] = None, + workload: schema.Workload, + config: dict[str, Any], + arch_config: schema.ArchConfig, ) -> Optional[str]: """ Plot CLI mode roofline analysis in terminal using plotext :param dtype: The datatype to be profiled + :param workload: Complete dataframe + :param config: Profiling configuration from profiling_config.yaml + :param arch_config: Archetype-specific configurations :type method: str :return: Build the current figure using plot.build(), or None if datatype is not valid for the architecture @@ -1200,33 +1209,13 @@ class Roofline: console_warning("roofline", "Skipping plot generation") return None - # if workload is detected, utilize Roofline yamls. - # If not, fallback to legacy calc_ai - if workload and config and arch_config: - self.__ai_data = calc_ai_analyze( - workload=workload, - mspec=self.__mspec, - sort_type=str(self.__run_parameters.get("sort_type")), - config=config, - arch_config=arch_config, - ) - - else: - pmc_perf_csv = base_path / "pmc_perf.csv" - if not pmc_perf_csv.is_file(): - console_error("roofline", f"{pmc_perf_csv} does not exist") - - t_df = OrderedDict() - t_df["pmc_perf"] = pd.read_csv(pmc_perf_csv) - - profiling_config = file_io.load_profiling_config(self.__args.path[0][0]) - if profiling_config.get("format_rocprof_output") == "rocpd": - t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"]) - - t_df = self.validate_apply_kernel_filter(df=t_df, path_str=str(base_path)) - self.__ai_data = calc_ai_profile( - self.__mspec, self.__run_parameters["sort_type"], t_df - ) + self.__ai_data = calc_ai_analyze( + workload=workload, + mspec=self.__mspec, + sort_type=str(self.__run_parameters.get("sort_type")), + config=config, + arch_config=arch_config, + ) self.__ceiling_data = construct_roof( roofline_parameters=self.__run_parameters, dtype=dtype @@ -1402,38 +1391,29 @@ class Roofline: return plt.build() @demarcate - def standalone_roofline(self) -> None: - if ( - not isinstance(self.__run_parameters["workload_dir"], list) - and self.__run_parameters["workload_dir"] != None - ): - self.roof_setup() + def standalone_roofline( + self, + df: dict[str, pd.DataFrame], + ) -> None: + self.roof_setup() # Change vL1D to a interpretable str, if required if "vL1D" in self.__run_parameters["mem_level"]: self.__run_parameters["mem_level"].remove("vL1D") self.__run_parameters["mem_level"].append("L1") - app_path = Path(str(self.__run_parameters["workload_dir"])) / "pmc_perf.csv" - if not app_path.is_file(): - console_error("roofline", f"{app_path} does not exist") - - t_df = OrderedDict() - t_df["pmc_perf"] = pd.read_csv(app_path) - - profiling_config = file_io.load_profiling_config(self.__args.path) - if profiling_config.get("format_rocprof_output") == "rocpd": - t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"]) - - self.empirical_roofline(ret_df=t_df) + self.empirical_roofline(ret_df=df) # NB: Currently the post_prossesing() method is the only one being used by # rocprofiler-compute, we include pre_processing() and profile() methods for # those who wish to borrow the roofline module @abstractmethod - def post_processing(self) -> None: + def post_processing( + self, + filtered_pmc: pd.DataFrame, + ) -> None: if self.__run_parameters["is_standalone"]: - self.standalone_roofline() + self.standalone_roofline(filtered_pmc) def get_dtype(self) -> list[str]: return self.__run_parameters["roofline_data_type"] diff --git a/projects/rocprofiler-compute/src/utils/file_io.py b/projects/rocprofiler-compute/src/utils/file_io.py index 740a6db566..172ddb66e1 100644 --- a/projects/rocprofiler-compute/src/utils/file_io.py +++ b/projects/rocprofiler-compute/src/utils/file_io.py @@ -251,7 +251,9 @@ def create_df_pmc( tmp_df = rocpd_data.process_rocpd_csv(tmp_df) # Demangle original KernelNames - kernel_name_shortener(tmp_df, kernel_verbose) + # Skip for Standalone Roofline with -1 to keep full kernel names + if kernel_verbose >= 0: + kernel_name_shortener(tmp_df, kernel_verbose) # NB: # Idealy, the Node column should be added out of diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py index 14229e5cf9..277777f00b 100644 --- a/projects/rocprofiler-compute/src/utils/roofline_calc.py +++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py @@ -466,7 +466,10 @@ def calc_ai_analyze( def calc_ai_profile( - mspec: MachineSpecs, sort_type: str, ret_df: dict[str, pd.DataFrame] + mspec: MachineSpecs, + sort_type: str, + ret_df: dict[str, pd.DataFrame], + iteration_multiplexing: str, ) -> dict[str, Union[list[list[float]], list[str]]]: """Given counter data, calculate arithmetic intensity for each kernel in the application. Leverage hard-coded equations to calculate AI values. @@ -505,6 +508,10 @@ def calc_ai_profile( next_kernel_name = df["Kernel_Name"][idx + 1] if not at_end else "" kernel_name = df["Kernel_Name"][idx] + # Skip this kernel dispatch row if any counter value is n/a + if df.iloc[idx].isna().any(): + continue + try: total_flops += ( ( @@ -546,7 +553,8 @@ def calc_ai_profile( except KeyError as e: console_debug( "roofline", - f"{kernel_name[:35]}: Skipped total_flops at index {idx} due to {e}", + f"{kernel_name[:35]}: Skipped total_flops at index \ + {idx} due to {e}", ) pass try: @@ -615,7 +623,8 @@ def calc_ai_profile( except KeyError as e: console_debug( "roofline", - f"{kernel_name[:35]}: Skipped L1cache_data at index {idx} due to {e}", + f"{kernel_name[:35]}: Skipped L1cache_data at index \ + {idx} due to {e}", ) pass @@ -629,7 +638,8 @@ def calc_ai_profile( except KeyError as e: console_debug( "roofline", - f"{kernel_name[:35]}: Skipped L2cache_data at index {idx} due to {e}", + f"{kernel_name[:35]}: Skipped L2cache_data at index \ + {idx} due to {e}", ) pass try: diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index 6d51d972ff..e6805bba08 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -1502,6 +1502,7 @@ def impute_counters_iteration_multiplex( } # Collect imputed sub-groups as dataframes subgroup_dfs = [] + previous_fill_values = {} for i in range(0, len(group), subgroup_size): subgroup = group.iloc[i : i + subgroup_size] @@ -1517,7 +1518,22 @@ def impute_counters_iteration_multiplex( if fill_values: subgroup = subgroup.fillna(fill_values) + # If this is the last subgroup and it still has missing values, + # use previous subgroup's fill values + # NOTE: This wont work if the first subgroup is itself incomplete + is_last_subgroup = (i + subgroup_size) >= len(group) + # First any() returns bool pd.Series for every column, + # second any() returns single bool + if ( + is_last_subgroup + and previous_fill_values + and subgroup.isna().any().any() + ): + # Use previous subgroup's fill values for remaining missing values + subgroup = subgroup.fillna(previous_fill_values) + subgroup_dfs.append(subgroup) + previous_fill_values = fill_values # Concatenate all subgroups for this group if subgroup_dfs: diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index fcaf05a130..1ea0d62759 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -75,6 +75,8 @@ config["COUNTER_LOGGING"] = False config["METRIC_COMPARE"] = False config["METRIC_LOGGING"] = False +arch_config = {} + num_kernels = 3 num_devices = 1 @@ -1326,6 +1328,7 @@ def test_roofline_missing_file_handling(binary_handler_profile_rocprof_compute): try: from roofline import Roofline + from utils.schema import Workload from utils.specs import generate_machine_specs class MockArgs: @@ -1337,6 +1340,7 @@ def test_roofline_missing_file_handling(binary_handler_profile_rocprof_compute): args = MockArgs() mspec = generate_machine_specs(None, None) + workload = Workload() workload_dir = test_utils.get_output_dir() @@ -1351,7 +1355,9 @@ def test_roofline_missing_file_handling(binary_handler_profile_rocprof_compute): roofline_instance = Roofline(args, mspec, run_parameters) - result = roofline_instance.cli_generate_plot("FP32") + result = roofline_instance.cli_generate_plot( + "FP32", workload, config, arch_config + ) assert result is None @@ -1378,6 +1384,7 @@ def test_roofline_invalid_datatype_cli(binary_handler_profile_rocprof_compute): try: from roofline import Roofline + from utils.schema import Workload from utils.specs import generate_machine_specs class MockArgs: @@ -1389,6 +1396,7 @@ def test_roofline_invalid_datatype_cli(binary_handler_profile_rocprof_compute): args = MockArgs() mspec = generate_machine_specs(None, None) + workload = Workload() run_parameters = { "workload_dir": test_utils.get_output_dir(), @@ -1401,7 +1409,9 @@ def test_roofline_invalid_datatype_cli(binary_handler_profile_rocprof_compute): roofline_instance = Roofline(args, mspec, run_parameters) - result = roofline_instance.cli_generate_plot("INVALID_DATATYPE") + result = roofline_instance.cli_generate_plot( + "INVALID_DATATYPE", workload, config, arch_config + ) assert result is None