diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 834cc9cc8d..c671e81a87 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -21,6 +21,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Added interactive metric descriptions in TUI analyze mode * users can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab +* Add support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option + ### Changed * Add notice for change in default output format to `rocpd` in a future release @@ -100,6 +102,12 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * L1I-L2 Bandwidth * sL1D-L2 BW +* Analysis output: + * Replace `-o / --output` analyze mode option with `--output-format` and `--output-name` + * Add ``--output-format`` analysis mode option to select the output format of the analysis report. + * Add ``--output-name`` analysis mode option to override the default file/folder name. + * Replace `--save-dfs` analyze mode option with `--output-format csv` + ### Resolved issues * Fixed not detecting memory clock issue when using amd-smi diff --git a/projects/rocprofiler-compute/docker/Dockerfile.doctest b/projects/rocprofiler-compute/docker/Dockerfile.doctest index 8b68b54337..b071787ac9 100644 --- a/projects/rocprofiler-compute/docker/Dockerfile.doctest +++ b/projects/rocprofiler-compute/docker/Dockerfile.doctest @@ -22,6 +22,7 @@ RUN git config --global --add safe.directory /app # Install any dependencies specified in requirements.txt # Run interactive bash shell CMD ["/bin/bash", "-c", "\ - python3 -m pip install -r docs/sphinx/requirements.txt \ + cd /app/projects/rocprofiler-compute \ + && python3 -m pip install -r docs/sphinx/requirements.txt \ && exec /bin/bash \ "] diff --git a/projects/rocprofiler-compute/docker/docker-compose.doctest.yml b/projects/rocprofiler-compute/docker/docker-compose.doctest.yml index 3ccd1b76a8..3e35543bc1 100644 --- a/projects/rocprofiler-compute/docker/docker-compose.doctest.yml +++ b/projects/rocprofiler-compute/docker/docker-compose.doctest.yml @@ -1,9 +1,9 @@ services: doctest: # service name build: - context: ../ - dockerfile: docker/Dockerfile.doctest + context: ../../../ + dockerfile: projects/rocprofiler-compute/docker/Dockerfile.doctest volumes: - - ../:/app + - ../../../:/app tty: true stdin_open: true diff --git a/projects/rocprofiler-compute/docs/conf.py b/projects/rocprofiler-compute/docs/conf.py index a52b6ed6c7..7a080b692c 100644 --- a/projects/rocprofiler-compute/docs/conf.py +++ b/projects/rocprofiler-compute/docs/conf.py @@ -213,4 +213,4 @@ extlinks = { } # Uncomment if facing rate limit exceed issue with local build -external_projects_remote_repository = "" +# external_projects_remote_repository = "" diff --git a/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png new file mode 100644 index 0000000000..7eb4813cf4 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png new file mode 100644 index 0000000000..e7302258f9 Binary files /dev/null and b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png differ diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst index 767e5c0365..2c43a61a49 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -318,10 +318,12 @@ Per-kernel roofline analysis When analyzing specific kernels, the roofline analysis provides detailed metrics for each filtered kernel: .. code-block:: shell-session + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -k 0 -b 4 This generates enhanced roofline output showing per-kernel performance rates and arithmetic intensity calculations: .. code-block:: text + ================================================================================ 4. Roofline ================================================================================ @@ -372,6 +374,7 @@ Per-kernel roofline analysis Analyze multiple kernels for comparison: .. code-block:: shell-session + $ rocprof-compute analyze -p workloads/vcopy/MI200/ -k 0 1 2 -b 4 Baseline comparison @@ -384,3 +387,102 @@ Baseline comparison .. code-block:: shell rocprof-compute analyze -p workload1/path/ -k 0 -p workload2/path/ -k 1 + +Analysis output format +====================== + +Use the ``--output-format `` analyze mode option to specify the output format of the +analysis report. Supported formats are ``stdout``, ``txt``, ``csv``, and ``db``. The default output +format is ``stdout``. + +* ``stdout`` format: + * Print analysis report to the terminal. + * NOTE: This option will not generate any file or folder. + +* ``txt`` format: + * Generate a file named ``rocprof_compute_.txt`` in the current working directory. + * This file contains the entire analysis report as printed on the terminal. + * This is useful in case of searching across long analysis reports. + * NOTE: This option will disable output of analysis report to terminal. + +* ``csv`` format: + * Generate a folder named ``rocprof_compute_`` in the current working directory. + * This folder contains multiple csv files representing the data in each metric table in the analysis report. + * This is useful for further programmatic analysis of analysis reports. + * NOTE: This will print the analysis report to the terminal as well. + +* ``db`` format: + * NOTE: This only works when provided workload paths are created using ``--format-rocprof-output rocpd`` profile mode option. + * Generate a file named ``rocprof_compute_.db`` in the current working directory. + * This is a SQLite database file containing all the data in the analysis report structured according to :ref:`analysis database schema `. + * This is useful for further programmatic analysis of analysis reports. + * NOTE: This option will disable output of analysis report to terminal. + +Default file/folder name ``rocprofiler_compute_`` can be overriden using ``--output-name `` analyze mode option. + +.. _analysis-database: + +Analysis database schema +======================== + +Analysis database tables + +.. image:: ../../data/analyze/analysis_data_dump_schema.png + :align: center + :alt: Analysis database tables + +Analysis database views + +.. image:: ../../data/analyze/analysis_data_dump_views.png + :align: center + :alt: Analysis database views + +Analysis database example + +.. note:: + + Some metrics cannot be calculated when corresponding counters are missing as shown in the warnings below + +.. note:: + + It is possible to merge the analysis data dump for multiple workload folders (resulting from multiple profiles) by repeating ``-p`` option for each workload + +.. code-block:: shell-session + + $ rocprof-compute analyze --verbose --db test -p workloads/vmem/MI300X_A1 -p workloads/vmem1/MI300X_A1 + DEBUG Execution mode = analyze + + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ + | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ + | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ + |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + INFO Analysis mode = db + DEBUG [omnisoc init] + DEBUG [omnisoc init] + DEBUG [analysis] prepping to do some analysis + INFO [analysis] deriving rocprofiler-compute metrics... + WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1. + WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1. + WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1. + WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1. + DEBUG Collected dispatch data + DEBUG Applied analysis mode filters + DEBUG Calculated dispatch data + DEBUG Collected metrics data + WARNING Failed to evaluate expression for 3.1.25 - Value: to_round(to_avg( + (pmc_df.get("TCP_TCP_LATENCY_sum") / pmc_df.get("TCP_TA_TCP_STATE_READ_sum")).where((pmc_df.get("TCP_TA_TCP_STATE_READ_sum") != 0), None)), 0) - unsupported operand type(s) for /: 'NoneType' and 'float' + WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg( + (pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float' + WARNING Failed to evaluate expression for 3.1.25 - Value: to_round(to_avg( + (pmc_df.get("TCP_TCP_LATENCY_sum") / pmc_df.get("TCP_TA_TCP_STATE_READ_sum")).where((pmc_df.get("TCP_TA_TCP_STATE_READ_sum") != 0), None)), 0) - unsupported operand type(s) for /: 'NoneType' and 'float' + WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg( + (pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float' + DEBUG Calculated metric values + DEBUG Calculated roofline data points + DEBUG [analysis] generating analysis + DEBUG SQLite database initialized with name: test.db + DEBUG Initialized database: test.db + DEBUG Completed writing database \ No newline at end of file diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst b/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst index 6ca7f4a773..dc8dc11d06 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/mode.rst @@ -22,7 +22,7 @@ options. * :doc:`cli` * :doc:`grafana-gui` * :doc:`standalone-gui` -* :doc:`text-based user interface (TUI)` +* :doc:`tui` .. note:: diff --git a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst index 74640f6b45..e2d2758934 100644 --- a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst +++ b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst @@ -24,6 +24,15 @@ Profiling with ROCm Compute Profiler yields the following benefits. * :ref:`Automate counter collection `: ROCm Compute Profiler handles all of your profiling via pre-configured input files. +* :ref:`Profiling output format `: ROCm Compute Profile can adjust the + output format of underlying rocprof tool which changes the output format of raw performance + counter data in the workload folder created during profiling. Supported output formats are + ``json``, ``csv``, and ``rocpd``. The default output format is ``csv``. + +.. note:: + + The default output format will be changed to ``rocpd`` in a future release of ROCm Compute Profiler. + * :ref:`Filtering `: Apply runtime filters to speed up the profiling process. @@ -217,6 +226,32 @@ an Instinct MI210 vs an Instinct MI250. -rw-r--r-- 1 auser agroup 650 Mar 1 15:15 sysinfo.csv -rw-r--r-- 1 auser agroup 399 Mar 1 15:15 timestamps.csv +.. _profiling-output-format: + +Profiling output format +----------------------- + +Use the ``--format-rocprof-output `` profile mode option to specify the output format +of the underlying ``rocprof`` tool. The following formats are supported: + +* ``csv`` format: + * Ask underlying rocprof tool to dump raw performance counter data in csv format. + * The generated csv files across multiple runs of rocprof are processed and dumped into the workload directory as csv files. + * Multiple csv files are merged into single pmc_perf.csv file in workload directory. + +* ``json`` format: + * Ask underlying rocprof tool to dump raw performance counter data in json format. + * The generated json files across multiple runs of rocprof are processed and dumped into the workload directory as csv files. + * Multiple csv files are merged into single pmc_perf.csv file in workload directory. + +* ``rocpd`` format: + * Ask underlying rocprof tool to dump raw performance counter data in rocpd format. + * Multiple ``rocpd`` database files containding counter collection data are merged into a single csv under the workload folder. + The database files are then removed. + * Use ``--retain-rocpd-output`` profile mode option to preserve the ``rocpd`` database(s) in the workload folder. + This is useful for custom analysis of profiling data. + + .. _filtering: Filtering diff --git a/projects/rocprofiler-compute/requirements.txt b/projects/rocprofiler-compute/requirements.txt index ef0de785a2..464e6c3f54 100644 --- a/projects/rocprofiler-compute/requirements.txt +++ b/projects/rocprofiler-compute/requirements.txt @@ -12,6 +12,7 @@ plotille pymongo pyyaml setuptools +sqlalchemy>=2.0.42 tabulate textual textual_plotext diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index d54bc0e3e5..51446eae49 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -633,11 +633,29 @@ Examples: help="\t\tMode of spatial multiplexing.", ) analyze_group.add_argument( - "-o", - "--output", + "--output-format", metavar="", - dest="output_file", - help="\t\tSpecify an output file to save analysis results.", + dest="output_format", + choices=["stdout", "txt", "csv", "db"], + default="stdout", + help=( + "\t\tSet the format of output file or folder containing analysis data.\n" + "\t\tBy default, file or folder created will " + "have the name rocprof_compute_.\n" + "\t\tFile or folder name can be overriden using --output-name.\n" + "\t\tDefault output format is stdout which will not " + "generate any file/folder.\n" + ), + ) + analyze_group.add_argument( + "--output-name", + metavar="", + dest="output_name", + help=( + "\t\tOverride the default output file name rocprof_compue_ " + "with the specified name.\n" + "\t\tThis is only applicable when --output-format txt/csv/db is used.\n" + ), ) analyze_group.add_argument( "--gui", @@ -756,12 +774,6 @@ Examples: help="\t\tSpecify the directory of customized configs.", default=rocprof_compute_home.joinpath("rocprof_compute_soc/analysis_configs/"), ) - analyze_advanced_group.add_argument( - "--save-dfs", - dest="df_file_dir", - metavar="", - help="\t\tSpecify the dirctory to save analysis dataframe csv files.", - ) analyze_advanced_group.add_argument( "--cols", type=int, diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py index 654808bea1..67d64de66f 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py @@ -24,6 +24,7 @@ ############################################################################## import copy +import re import sys import textwrap from abc import abstractmethod @@ -41,7 +42,7 @@ from utils.logger import ( console_warning, demarcate, ) -from utils.utils import is_workload_empty, merge_counters_spatial_multiplex +from utils.utils import get_uuid, is_workload_empty, merge_counters_spatial_multiplex class OmniAnalyze_Base: @@ -284,6 +285,23 @@ class OmniAnalyze_Base: print("Node list:", " ".join(nodes)) sys.exit(0) + # Ensure analysis output does not overwrite existing files + if self.__args.output_name: + if not re.match(r"^[A-Za-z0-9_-]+$", self.__args.output_name): + console_error( + "Analysis output file/folder name must " + "contain only alphanumeric characters " + "or underscores (_), hyphens (-)." + ) + path_to_check = self.__args.output_name + if self.__args.output_format in ("txt", "db"): + path_to_check += f".{self.__args.output_format}" + if Path(path_to_check).exists(): + console_error( + f"Analysis output file/folder {path_to_check} already exists. " + "Please choose a different name." + ) + # ---------------------------------------------------- # Required methods to be implemented by child classes # ---------------------------------------------------- @@ -293,11 +311,13 @@ class OmniAnalyze_Base: console_debug("analysis", "prepping to do some analysis") console_log("analysis", "deriving rocprofiler-compute metrics...") # initalize output file - self._output = ( - open(self.__args.output_file, "w+") - if self.__args.output_file - else sys.stdout - ) + if self.__args.output_format == "txt": + output_filename = self.__args.output_name or f"rocprof_compute_{get_uuid()}" + output_filename += ".txt" + self._output = open(output_filename, "w+") + console_warning(f"Created file: {output_filename}") + elif self.__args.output_format == "stdout": + self._output = sys.stdout # Read profiling config self._profiling_config = file_io.load_profiling_config(self.__args.path[0][0]) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py new file mode 100644 index 0000000000..76cc7da7b4 --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py @@ -0,0 +1,601 @@ +##############################################################################bl +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +import ast +import json +import re +from pathlib import Path + +import astunparse +import pandas as pd + +import utils.analysis_orm as orm +from config import rocprof_compute_home +from rocprof_compute_analyze.analysis_base import OmniAnalyze_Base +from utils import rocpd_data +from utils.analysis_orm import Database, get_views +from utils.logger import console_debug, console_error, console_warning, demarcate +from utils.parser import ( + PC_SAMPLING_NOT_ISSUE_PREFIX, + CodeTransformer, + build_in_vars, + to_avg, + to_concat, + to_int, + to_max, + to_median, + to_min, + to_mod, + to_quantile, + to_round, + to_std, + to_sum, +) +from utils.roofline_calc import ( + CACHE_HIERARCHY, + MFMA_DATATYPES, + PEAK_OPS_DATATYPES, + SUPPORTED_DATATYPES, +) +from utils.utils import get_uuid, get_version + + +class db_analysis(OmniAnalyze_Base): + # ----------------------- + # Required child methods + # ----------------------- + @demarcate + def pre_processing(self): + """Perform any pre-processing steps prior to analysis.""" + super().pre_processing() + if self._profiling_config.get("format_rocprof_output") != "rocpd": + console_error( + "Creation of analysis database is only supported " + "for profiling data with rocpd output format." + ) + self._roofline_ceilings_per_workload = self.calc_roofline_ceilings() + self._pc_sampling_data_per_workload = self.calc_pc_sampling_data() + self._pmc_df_per_workload = { + workload_path: rocpd_data.process_rocpd_csv( + pd.read_csv(Path(workload_path) / "pmc_perf.csv") + ) + for workload_path in self._runs.keys() + } + self._top_kernels_per_workload = { + workload_path: pmc_df.assign( + duration=pmc_df["End_Timestamp"] - pmc_df["Start_Timestamp"] + ) + .sort_values(by="duration", ascending=False) + .drop_duplicates("Kernel_Name")["Kernel_Name"] + .to_list() + for workload_path, pmc_df in self._pmc_df_per_workload.items() + } + console_debug("Collected dispatch data") + self._pmc_df_per_workload = self.apply_pmc_filters() + self._dispatch_data_per_workload = self.calc_dispatch_data() + self._metrics_info_data_per_workload, self._values_data_per_workload = ( + self.calc_metrics_data() + ) + self._values_data_per_workload = self.calc_expressions() + self._roofline_data_per_workload = self.calc_roofline_data() + + @demarcate + def run_analysis(self): + """Run CLI analysis.""" + super().run_analysis() + + # Initialize analysis database + # Create db uuid + if self.get_args().output_name: + db_name = f"{self.get_args().output_name}.db" + else: + db_name = f"rocprof_compute_{get_uuid()}.db" + Database.init(db_name) + console_debug(f"Initialized database: {db_name}") + + for workload_path in self._runs.keys(): + workload_obj = orm.Workload( + name=workload_path.split("/")[-2], + sub_name=workload_path.split("/")[-1], + sys_info_extdata=self._runs[workload_path].sys_info.iloc[0].to_dict(), + roofline_bench_extdata=self._roofline_ceilings_per_workload.get( + workload_path + ), + profiling_config_extdata=self._profiling_config, + ) + Database.get_session().add(workload_obj) + for pc_sample in self._pc_sampling_data_per_workload.get( + workload_path, pd.DataFrame() + ).itertuples(): + Database.get_session().add( + orm.PCsampling( + source=pc_sample.source_line, + instruction=pc_sample.instruction, + count=pc_sample.count, + kernel_name=pc_sample.kernel_name, + offset=pc_sample.offset, + count_issue=pc_sample.count_issued, + count_stall=pc_sample.count_stalled, + stall_reason=pc_sample.stall_reason, + workload=workload_obj, + ) + ) + for dispatch in self._dispatch_data_per_workload.get( + workload_path, pd.DataFrame() + ).itertuples(): + Database.get_session().add( + orm.Dispatch( + dispatch_id=dispatch.dispatch_id, + kernel_name=dispatch.kernel_name, + gpu_id=dispatch.gpu_id, + duration=dispatch.duration, + workload=workload_obj, + ) + ) + for metric in self._metrics_info_data_per_workload.get( + workload_path, pd.DataFrame() + ).itertuples(): + metric_obj = orm.Metric( + name=metric.name, + metric_id=metric.metric_id, + description=metric.description, + unit=metric.unit, + table_name=metric.table_name, + sub_table_name=metric.sub_table_name, + workload=workload_obj, + ) + Database.get_session().add(metric_obj) + for value in self._values_data_per_workload.get( + workload_path, pd.DataFrame() + ).itertuples(): + if value.metric_id == metric.metric_id: + Database.get_session().add( + orm.Value( + metric=metric_obj, + value_name=value.value_name, + value=value.value, + ) + ) + + for roofline_data in self._roofline_data_per_workload.get( + workload_path, pd.DataFrame() + ).itertuples(): + Database.get_session().add( + orm.RooflineData( + kernel_name=roofline_data.kernel_name, + total_flops=roofline_data.total_flops, + l1_cache_data=roofline_data.l1_cache_data, + l2_cache_data=roofline_data.l2_cache_data, + hbm_cache_data=roofline_data.hbm_cache_data, + workload=workload_obj, + ) + ) + + version = get_version(rocprof_compute_home) + Database.get_session().add( + orm.Metadata( + compute_version=version["version"], + git_version=version["sha"], + schema_version=orm.SCHEMA_VERSION, + ) + ) + + # Create views + for view_stmt in get_views(): + Database.get_session().execute(view_stmt) + + # Write database + Database.write() + console_debug("Completed writing database") + console_warning(f"Created file: {db_name}") + + def calc_roofline_ceilings(self): + roofline_ceilings_per_workload = dict() + + for workload_path in self._runs.keys(): + if not (Path(workload_path) / "roofline.csv").exists(): + console_warning(f"Roofline ceilings not found for {workload_path}.") + continue + + roofline_dict = ( + pd.read_csv(f"{workload_path}/roofline.csv").iloc[0].to_dict() + ) + keys = list() + for mem_level in CACHE_HIERARCHY: + keys.append(f"{mem_level}Bw") + for dtype in SUPPORTED_DATATYPES[ + self._runs[workload_path].sys_info.iloc[0]["gpu_arch"] + ]: + if dtype in PEAK_OPS_DATATYPES: + if dtype.startswith("F") or dtype.startswith("B"): + keys.append(f"{dtype}Flops") + elif dtype.startswith("I"): + keys.append(f"{dtype}Ops") + if dtype in MFMA_DATATYPES: + if dtype.startswith("F") or dtype.startswith("B"): + # FP16 -> F16 + dtype = dtype.replace("FP", "F") + keys.append(f"MFMA{dtype}Flops") + elif dtype.startswith("I"): + keys.append(f"MFMA{dtype}Ops") + roofline_ceilings_per_workload[workload_path] = { + key: roofline_dict[key] for key in keys if key in roofline_dict + } + + if roofline_ceilings_per_workload: + console_debug("Collected roofline ceilings") + return roofline_ceilings_per_workload + + def calc_pc_sampling_data(self): + pc_sampling_data_per_workload = dict() + + for workload_path in self._runs.keys(): + if not (Path(workload_path) / "ps_file_results.json").exists(): + console_warning(f"PC sampling data not found for {workload_path}.") + continue + + pc_sampling_data = json.loads( + (Path(workload_path) / "ps_file_results.json").read_text() + ) + pc_sampling_data = pc_sampling_data["rocprofiler-sdk-tool"][0] + pc_sampling_stochastic = pc_sampling_data["buffer_records"][ + "pc_sample_stochastic" + ] + pc_sampling_host_trap = pc_sampling_data["buffer_records"][ + "pc_sample_host_trap" + ] + pc_sampling_instruction = pc_sampling_data["strings"][ + "pc_sample_instructions" + ] + pc_sampling_comments = pc_sampling_data["strings"]["pc_sample_comments"] + pc_sampling_kernel_name_dict = { + symbol["code_object_id"]: symbol["formatted_kernel_name"] + for symbol in pc_sampling_data["kernel_symbols"] + } + + pc_df = pd.DataFrame([ + { + "inst_index": pc_sample["inst_index"], + "code_object_id": pc_sample["record"]["pc"]["code_object_id"], + "code_object_offset": pc_sample["record"]["pc"][ + "code_object_offset" + ], + "stall_reason": pc_sample["record"] + .get("snapshot", {}) + .get("stall_reason"), + "wave_issued": pc_sample["record"].get("wave_issued"), + } + for pc_sample in pc_sampling_stochastic + pc_sampling_host_trap + ]) + + def custom_aggregator(column_name): + if column_name == "count_issued": + + def aggregator(series): + return None if series.isnull().all() else series.sum() + + return aggregator + if column_name == "count_stalled": + + def aggregator(series): + if series.isnull().all(): + return None + return series.count() - series.sum() + + return aggregator + if column_name == "stall_reason": + + def aggregator(series): + if series.isnull().all(): + return None + cleaned_series = series.dropna().str[ + len(PC_SAMPLING_NOT_ISSUE_PREFIX) : + ] + return cleaned_series.value_counts().to_dict() + + return aggregator + raise ValueError(f"Unknown column name: {column_name}") + + grouped_df = ( + pc_df.groupby(["code_object_id", "code_object_offset"]) + .agg( + count=("code_object_id", "size"), + inst_index=("inst_index", "last"), + count_issued=("wave_issued", custom_aggregator("count_issued")), + count_stalled=("wave_issued", custom_aggregator("count_stalled")), + stall_reason=("stall_reason", custom_aggregator("stall_reason")), + ) + .reset_index() + ) + + grouped_df["instruction"] = grouped_df["inst_index"].apply( + lambda x: pc_sampling_instruction[x] + if x < len(pc_sampling_instruction) + else None + ) + grouped_df["source_line"] = grouped_df["inst_index"].apply( + lambda x: pc_sampling_comments[x] + if x < len(pc_sampling_comments) + else None + ) + grouped_df["kernel_name"] = grouped_df["code_object_id"].apply( + lambda x: pc_sampling_kernel_name_dict.get(x) + ) + grouped_df = grouped_df.rename(columns={"code_object_offset": "offset"}) + grouped_df = grouped_df.drop(columns=["code_object_id", "inst_index"]) + + pc_sampling_data_per_workload[workload_path] = grouped_df + + if pc_sampling_data_per_workload: + console_debug("Collected PC sampling data") + return pc_sampling_data_per_workload + + @staticmethod + def evaluate(name, value, pmc_df, sys_info, parse=False): + if parse: + value = re.sub( + r"\$([0-9A-Za-z_]+)", + lambda m: f'sys_info["{m.group(1)}"]', + value, + ) + ast_node = ast.parse(value) + transformer = CodeTransformer() + transformer.visit(ast_node) + value = astunparse.unparse(ast_node) + value = value.replace("raw_pmc_df", "pmc_df") + value = value.replace("pmc_df['sys_info']", "sys_info") + else: + value = value.replace("raw_pmc_df['pmc_perf']", "pmc_df") + value = re.sub( + "ammolite__([0-9A-Za-z_]+)", + lambda m: f'sys_info["{m.group(1)}"]', + value, + ) + try: + return eval( + compile(value, "", "eval"), + {}, # no globals + { + # only locals + "pmc_df": pmc_df, + "sys_info": sys_info, + "to_avg": to_avg, + "to_concat": to_concat, + "to_int": to_int, + "to_max": to_max, + "to_median": to_median, + "to_min": to_min, + "to_mod": to_mod, + "to_quantile": to_quantile, + "to_round": to_round, + "to_std": to_std, + "to_sum": to_sum, + }, + ) + except Exception as e: + console_warning(f"Failed to evaluate expression for {name}: {value} - {e}") + return None + + def calc_expressions(self): + values_data_per_workload = self._values_data_per_workload.copy() + + for workload_path in self._runs.keys(): + pmc_df = self._pmc_df_per_workload[workload_path].copy() + sys_info = self._runs[workload_path].sys_info.iloc[0].to_dict() + for key, value in self._roofline_ceilings_per_workload[ + workload_path + ].items(): + sys_info[f"{key}_empirical_peak"] = value + + # Calculate PER_XCD variables first + for key, value in build_in_vars.items(): + if "PER_XCD" in key: + sys_info[key] = db_analysis.evaluate( + key, value, pmc_df, sys_info, parse=True + ) + + # variable dependent on PER_XCD variables + for key, value in build_in_vars.items(): + if "PER_XCD" not in key: + sys_info[key] = db_analysis.evaluate( + key, value, pmc_df, sys_info, parse=True + ) + + # Get name and print warning + values_data_per_workload[workload_path]["value"] = values_data_per_workload[ + workload_path + ].apply( + lambda row: db_analysis.evaluate( + f"{row['metric_id']} - {row['value_name']}", + row["value"], + pmc_df, + sys_info, + ), + axis=1, + ) + + console_debug("Calculated metric values") + return values_data_per_workload + + def calc_metrics_data(self): + metrics_info_data_per_workload = dict() + values_data_per_workload = dict() + + for workload_path in self._runs.keys(): + gfx_arch = self._runs[workload_path].sys_info.iloc[0]["gpu_arch"] + # for example 201 -> Wavefront + table_names_map = dict() + for panel_config in self._arch_configs[gfx_arch].panel_configs.values(): + table_names_map[panel_config["id"]] = panel_config["title"] + for source in panel_config["data source"]: + table_names_map[list(source.values())[0]["id"]] = list( + source.values() + )[0]["title"] + # Build metric data + non_expression_columns = [ + "Metric", + "Channel", + "Unit", + "Description", + "coll_level", + "Type", + "Xfer", + "Coherency", + "Transaction", + ] + metrics_info_df = pd.DataFrame([ + { + "name": row.get("Metric") or row["Channel"].strip(), + "metric_id": metric_id, + "description": row.get("Description"), + "unit": row.get("Unit"), + "table_name": table_names_map[int(metric_id.split(".")[0]) * 100], + "sub_table_name": table_names_map[ + int(metric_id.split(".")[0]) * 100 + + int(metric_id.split(".")[1]) + ], + } + for metric_df_id, metric_df in self._arch_configs[gfx_arch].dfs.items() + if metric_df_id + != 402 # Skip roofline data points handled in calc_roofline_data + if set(metric_df.columns).intersection({"Metric", "Channel"}) + for metric_id, row in metric_df.iterrows() + ]) + values_df = pd.DataFrame([ + { + "metric_id": metric_id, + "value_name": value_name, + "value": row[value_name].strip(), + } + for metric_df_id, metric_df in self._arch_configs[gfx_arch].dfs.items() + if metric_df_id + != 402 # Skip roofline data points handled in calc_roofline_data + if set(metric_df.columns).intersection({"Metric", "Channel"}) + for metric_id, row in metric_df.iterrows() + for value_name in metric_df.drop( + columns=non_expression_columns, errors="ignore" + ).columns + ]) + + metrics_info_data_per_workload[workload_path] = metrics_info_df + values_data_per_workload[workload_path] = values_df + + console_debug("Collected metrics data") + return metrics_info_data_per_workload, values_data_per_workload + + def calc_dispatch_data(self): + dispatch_data_per_workload = dict() + + for workload_path in self._runs.keys(): + dispatch_df = pd.DataFrame([ + { + "dispatch_id": row.Dispatch_ID, + "kernel_name": row.Kernel_Name, + "gpu_id": row.GPU_ID, + "duration": row.End_Timestamp - row.Start_Timestamp, + } + for row in self._pmc_df_per_workload[workload_path].itertuples() + ]) + dispatch_data_per_workload[workload_path] = dispatch_df + + console_debug("Calculated dispatch data") + return dispatch_data_per_workload + + def apply_pmc_filters(self): + pmc_df_per_workload = self._pmc_df_per_workload.copy() + + for workload_path, pmc_df in pmc_df_per_workload.items(): + # Filter gpu_ids + if self._runs[workload_path].filter_gpu_ids: + pmc_df = pmc_df.loc[ + pmc_df["GPU_ID"] + .astype(str) + .isin([self._runs[workload_path].filter_gpu_ids]) + ] + # Filter kernel_ids + if self._runs[workload_path].filter_kernel_ids: + pmc_df = pmc_df.loc[ + pmc_df["Kernel_Name"].isin([ + self._top_kernels_per_workload[workload_path][id] + for id in self._runs[workload_path].filter_kernel_ids + ]) + ] + # Filter dispatch_ids + if self._runs[workload_path].filter_dispatch_ids: + if ">" in self._runs[workload_path].filter_dispatch_ids[0]: + m = re.match( + r"\> (\d+)", self._runs[workload_path].filter_dispatch_ids[0] + ) + pmc_df = pmc_df[pmc_df["Dispatch_ID"] > int(m.group(1))] + else: + pmc_df = pmc_df.loc[ + pmc_df["Dispatch_ID"] + .astype(str) + .isin(self._runs[workload_path].filter_dispatch_ids) + ] + pmc_df_per_workload[workload_path] = pmc_df + + console_debug("Applied analysis mode filters") + return pmc_df_per_workload + + def calc_roofline_data(self): + roofline_data_per_workload = dict() + + for workload_path in self._runs.keys(): + pmc_df = self._pmc_df_per_workload[workload_path].copy() + sys_info = self._runs[workload_path].sys_info.iloc[0].to_dict() + gfx_arch = sys_info["gpu_arch"] + roofline_data_df = self._arch_configs[gfx_arch].dfs[402] + roofline_data_expressions = dict( + zip(roofline_data_df["Metric"], roofline_data_df["Value"]) + ) + roofline_data_expressions = { + "total_flops": roofline_data_expressions["Performance (GFLOPs)"], + "l1_cache_data": roofline_data_expressions["AI L1"], + "l2_cache_data": roofline_data_expressions["AI L2"], + "hbm_cache_data": roofline_data_expressions["AI HBM"], + } + + roofline_df = pd.DataFrame([ + { + "kernel_name": kernel_name, + **{ + metric_name: db_analysis.evaluate( + metric_name, + roofline_data_expressions[metric_name], + pmc_df[pmc_df["Kernel_Name"] == kernel_name], + sys_info, + ) + for metric_name in roofline_data_expressions + }, + } + for kernel_name in self._top_kernels_per_workload[workload_path][ + : self.get_args().max_stat_num + ] + ]) + + roofline_data_per_workload[workload_path] = roofline_df + + console_debug("Calculated roofline data points") + return roofline_data_per_workload diff --git a/projects/rocprofiler-compute/src/rocprof_compute_base.py b/projects/rocprofiler-compute/src/rocprof_compute_base.py index 3faa023134..33877c6a2a 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_base.py @@ -137,6 +137,8 @@ class RocProfCompute: self.__analyze_mode = "web_ui" elif self.__args.tui: self.__analyze_mode = "tui" + elif self.__args.output_format == "db": + self.__analyze_mode = "db" else: self.__analyze_mode = "cli" return @@ -447,6 +449,10 @@ class RocProfCompute: run_tui(self.__args, self.__supported_archs) return + elif self.__analyze_mode == "db": + from rocprof_compute_analyze.analysis_db import db_analysis + + analyzer = db_analysis(self.__args, self.__supported_archs) else: console_error("Unsupported analysis mode -> %s" % self.__analyze_mode) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/0100_system_info.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/0100_system_info.yaml index 8470ffbbe3..23d024fde3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/0100_system_info.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/0100_system_info.yaml @@ -6,5 +6,6 @@ Panel Config: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1500_address_processing_unit_and_data_return_path_ta_td.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1500_address_processing_unit_and_data_return_path_ta_td.yaml index 67c3aa1dfc..4c615fb0d5 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1500_address_processing_unit_and_data_return_path_ta_td.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1500_address_processing_unit_and_data_return_path_ta_td.yaml @@ -206,11 +206,6 @@ Panel Config: min: MIN(((100 * TD_TC_STALL_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) max: MAX(((100 * TD_TC_STALL_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct - "Workgroup manager \u2192 Data-Return Stall": - avg: null - min: null - max: null - unit: pct Coalescable Instructions: avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom)) min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom)) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1600_vector_l1_data_cache.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1600_vector_l1_data_cache.yaml index 50af33c21b..b374ea9466 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1600_vector_l1_data_cache.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx908/1600_vector_l1_data_cache.yaml @@ -400,7 +400,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Req: avg: AVG((TCP_UTCL1_REQUEST_sum / $denom)) @@ -438,5 +438,5 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: {} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/0100_system_info.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/0100_system_info.yaml index 8470ffbbe3..23d024fde3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/0100_system_info.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/0100_system_info.yaml @@ -6,5 +6,6 @@ Panel Config: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_vector_l1_data_cache.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_vector_l1_data_cache.yaml index 50af33c21b..b374ea9466 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_vector_l1_data_cache.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx90a/1600_vector_l1_data_cache.yaml @@ -400,7 +400,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Req: avg: AVG((TCP_UTCL1_REQUEST_sum / $denom)) @@ -438,5 +438,5 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: {} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml index 8470ffbbe3..23d024fde3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml @@ -6,5 +6,6 @@ Panel Config: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml index 0a6510182a..03b5606ad7 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml @@ -236,10 +236,6 @@ Panel Config: L2 Hit: value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)), 0) - L2 Rd Lat: - value: null - L2 Wr Lat: - value: null Fabric_L2 Rd: value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0) Fabric_L2 Wr: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml index db745209b7..e5b5eb9e9c 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml @@ -370,7 +370,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Req: avg: AVG((TCP_UTCL1_REQUEST_sum / $denom)) @@ -408,5 +408,5 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: {} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml index 8470ffbbe3..23d024fde3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml @@ -6,5 +6,6 @@ Panel Config: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml index 0a6510182a..03b5606ad7 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml @@ -236,10 +236,6 @@ Panel Config: L2 Hit: value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)), 0) - L2 Rd Lat: - value: null - L2 Wr Lat: - value: null Fabric_L2 Rd: value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0) Fabric_L2 Wr: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml index db745209b7..e5b5eb9e9c 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml @@ -370,7 +370,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Req: avg: AVG((TCP_UTCL1_REQUEST_sum / $denom)) @@ -408,5 +408,5 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: {} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml index 8470ffbbe3..23d024fde3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml @@ -6,5 +6,6 @@ Panel Config: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0300_memory_chart.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0300_memory_chart.yaml index 0a6510182a..03b5606ad7 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0300_memory_chart.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/0300_memory_chart.yaml @@ -236,10 +236,6 @@ Panel Config: L2 Hit: value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)), 0) - L2 Rd Lat: - value: null - L2 Wr Lat: - value: null Fabric_L2 Rd: value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0) Fabric_L2 Wr: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml index db745209b7..e5b5eb9e9c 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml @@ -370,7 +370,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Req: avg: AVG((TCP_UTCL1_REQUEST_sum / $denom)) @@ -408,5 +408,5 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: {} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml index 8470ffbbe3..23d024fde3 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml @@ -6,5 +6,6 @@ Panel Config: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml index f95e3fcb1f..2d8ac4d781 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml @@ -420,7 +420,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Req: avg: AVG((TCP_UTCL1_REQUEST_sum / $denom)) @@ -468,7 +468,7 @@ Panel Config: avg: Avg min: Min max: Max - units: Units + units: Unit metric: Cache Full Stall: avg: AVG((TCP_UTCL1_STALL_INFLIGHT_MAX_sum / $denom)) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_tui/utils/tui_utils.py b/projects/rocprofiler-compute/src/rocprof_compute_tui/utils/tui_utils.py index 698778fda0..629421b793 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_tui/utils/tui_utils.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_tui/utils/tui_utils.py @@ -109,7 +109,7 @@ def process_panels_to_dataframes(args, kernel_df, archConfigs, roof_plot=None): # args.filter_metrics # args.cols # args.max_stat_num - # args.df_file_dir + # dfs file dir result_structure = {} decimal_precision = getattr(args, "decimal", 2) if args else 2 diff --git a/projects/rocprofiler-compute/src/utils/analysis_orm.py b/projects/rocprofiler-compute/src/utils/analysis_orm.py new file mode 100644 index 0000000000..23647d8133 --- /dev/null +++ b/projects/rocprofiler-compute/src/utils/analysis_orm.py @@ -0,0 +1,216 @@ +##############################################################################bl +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +##############################################################################el + +from sqlalchemy import ( + JSON, + Column, + Float, + ForeignKey, + Integer, + String, + Text, + create_engine, + func, + select, + text, +) +from sqlalchemy.orm import declarative_base, relationship, sessionmaker + +from utils.logger import console_debug, console_error + +Base = declarative_base() + +PREFIX = "compute_" +SCHEMA_VERSION = "1.0.0" + + +class Workload(Base): + __tablename__ = f"{PREFIX}workload" + + workload_id = Column(Integer, primary_key=True) + name = Column(String) + sub_name = Column(String) + sys_info_extdata = Column(JSON) + roofline_bench_extdata = Column(JSON) + profiling_config_extdata = Column(JSON) + + # Workload can have multiple dispatches + dispatches = relationship("Dispatch", back_populates="workload") + # Workload can have multiple metrics + metrics = relationship("Metric", back_populates="workload") + # Workload can have multiple roofline data points + roofline_data_points = relationship("RooflineData", back_populates="workload") + # Workload can have multiple pc_sampling values + pc_sampling_values = relationship("PCsampling", back_populates="workload") + + +class Metric(Base): + __tablename__ = f"{PREFIX}metric" + + metric_uuid = Column(Integer, primary_key=True) + workload_id = Column( + Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False + ) + name = Column(String) # e.g. Wavefronts Num + metric_id = Column(String) # e.g. 4.1.3 + description = Column(Text) # e.g. Number of wavefronts + table_name = Column(String) # e.g. Wavefront + sub_table_name = Column(String) # e.g. Wavefront stats + unit = Column(String) # e.g. Gbps + + # Metric can have one workload + workload = relationship("Workload", back_populates="metrics") + # Metric can have multiple values + values = relationship("Value", back_populates="metric") + + +class RooflineData(Base): + __tablename__ = f"{PREFIX}roofline_data" + + roofline_uuid = Column(Integer, primary_key=True) + workload_id = Column( + Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False + ) + kernel_name = Column(String) + total_flops = Column(Float) + l1_cache_data = Column(Float) + l2_cache_data = Column(Float) + hbm_cache_data = Column(Float) + + # Roofline data point can have one workload + workload = relationship("Workload", back_populates="roofline_data_points") + + +class Dispatch(Base): + __tablename__ = f"{PREFIX}dispatch" + + dispatch_uuid = Column(Integer, primary_key=True) + workload_id = Column( + Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False + ) + dispatch_id = Column(Integer) + kernel_name = Column(String) + gpu_id = Column(Integer) + duration = Column(Integer) + + # Dispatch can have one workload + workload = relationship("Workload", back_populates="dispatches") + + +class PCsampling(Base): + __tablename__ = f"{PREFIX}pcsampling" + + pc_sampling_uuid = Column(Integer, primary_key=True) + workload_id = Column( + Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False + ) + source = Column(String) + instruction = Column(String) + count = Column(Integer) + kernel_name = Column(String) + offset = Column(Integer) + count_issue = Column(Integer) + count_stall = Column(Integer) + stall_reason = Column(JSON) + + # PCsampling can have one workload + workload = relationship("Workload", back_populates="pc_sampling_values") + + +class Value(Base): + __tablename__ = f"{PREFIX}value" + + value_uuid = Column(Integer, primary_key=True) + metric_uuid = Column( + Integer, ForeignKey(f"{PREFIX}metric.metric_uuid"), nullable=False + ) + value_name = Column(String) # e.g. min, max, avg + value = Column(Float) # e.g. 123.45 + + # Value can have one metric + metric = relationship("Metric", back_populates="values") + + +class Metadata(Base): + __tablename__ = f"{PREFIX}metadata" + + id = Column(Integer, primary_key=True) + compute_version = Column(String) + git_version = Column(String) + schema_version = Column(String) + + +class Database: + _session = None + + @classmethod + def init(cls, db_name): + engine = create_engine(f"sqlite:///{db_name}") + Base.metadata.create_all(engine) + cls._session = sessionmaker(bind=engine)() + console_debug(f"SQLite database initialized with name: {db_name}") + return db_name + + @classmethod + def get_session(cls): + return cls._session + + @classmethod + def write(self): + try: + self._session.commit() + except Exception as e: + self._session.rollback() + console_error(f"Error writing analysis database: {e}") + finally: + self._session.close() + + +def get_views(): + views = { + "kernel_view": select( + Dispatch.kernel_name, + func.count(Dispatch.dispatch_id).label("dispatch_count"), + func.sum(Dispatch.duration).label("duration_sum"), + func.avg(Dispatch.duration).label("duration_mean"), + ).group_by(Dispatch.kernel_name), + "metric_view": select( + Metric.workload_id, + Metric.name, + Metric.metric_id, + Metric.description, + Metric.table_name, + Metric.sub_table_name, + Metric.unit, + Value.value_name, + Value.value, + ).join(Value, Metric.metric_uuid == Value.metric_uuid), + } + return [ + text( + f"CREATE VIEW {PREFIX}{view_name} AS " + f"{stmt.compile(compile_kwargs={'literal_binds': True})}" + ) + for view_name, stmt in views.items() + ] diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index e3658b43b1..192d5db774 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -114,6 +114,8 @@ supported_call = { "CONCAT": "to_concat", } +PC_SAMPLING_NOT_ISSUE_PREFIX = "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_" + # ------------------------------------------------------------------------------ @@ -1283,9 +1285,7 @@ def search_pc_sampling_record(records): ) ) - rocp_inst_not_issued_prefix_len = len( - "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_" - ) + rocp_inst_not_issued_prefix_len = len(PC_SAMPLING_NOT_ISSUE_PREFIX) # Populate grouped_data for i, item in enumerate(records): diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py index 7e05a8efc6..0c69976cef 100644 --- a/projects/rocprofiler-compute/src/utils/roofline_calc.py +++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py @@ -104,6 +104,7 @@ SUPPORTED_DATATYPES = { PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"] MFMA_DATATYPES = ["FP4", "FP6", "FP8", "FP16", "BF16", "FP32", "FP64", "I8"] +CACHE_HIERARCHY = ["HBM", "L2", "L1", "LDS"] TOP_N = 10 @@ -164,7 +165,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data): graphPoints = {"hbm": [], "l2": [], "l1": [], "lds": [], "valu": [], "mfma": []} if roofline_parameters["mem_level"] == "ALL": - cacheHierarchy = ["HBM", "L2", "L1", "LDS"] + cacheHierarchy = CACHE_HIERARCHY else: cacheHierarchy = roofline_parameters["mem_level"] diff --git a/projects/rocprofiler-compute/src/utils/tty.py b/projects/rocprofiler-compute/src/utils/tty.py index f4eb9ac318..1cec9de08b 100644 --- a/projects/rocprofiler-compute/src/utils/tty.py +++ b/projects/rocprofiler-compute/src/utils/tty.py @@ -34,7 +34,7 @@ import config from utils import mem_chart, parser from utils.kernel_name_shortener import kernel_name_shortener from utils.logger import console_error, console_log, console_warning -from utils.utils import convert_metric_id_to_panel_info +from utils.utils import convert_metric_id_to_panel_info, get_uuid def string_multiple_lines(source, width, max_rows): @@ -141,6 +141,14 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None): else: hidden_cols = config.HIDDEN_COLUMNS_CLI + if args.output_format == "csv": + if args.output_name: + csv_dir = Path(f"{args.output_name}") + else: + csv_dir = Path(f"rocprof_compute_{get_uuid()}") + if not csv_dir.exists(): + csv_dir.mkdir() + for panel_id, panel in archConfigs.panel_configs.items(): # Skip panels that don't support baseline comparison if len(args.path) > 1 and panel_id in config.HIDDEN_SECTIONS: @@ -484,17 +492,15 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None): ): ss += table_id_str + " " + table_config["title"] + "\n" - if args.df_file_dir: - p = Path(args.df_file_dir) - if not p.exists(): - p.mkdir() - if p.is_dir(): - if "title" in table_config and table_config["title"]: - table_id_str += "_" + table_config["title"] - df.to_csv( - p.joinpath(table_id_str.replace(" ", "_") + ".csv"), - index=False, - ) + if args.output_format == "csv" and csv_dir.is_dir(): + if "title" in table_config and table_config["title"]: + table_id_str += "_" + table_config["title"] + csv_filename = str( + csv_dir.joinpath(table_id_str.replace(" ", "_") + ".csv"), + ) + df.to_csv(csv_filename, index=False) + console_warning(f"Created file: {csv_filename}") + # Only show top N kernels (as specified in --max-kernel-num) # in "Top Stats" section if type == "raw_csv_table" and ( diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index d2d8c41df0..176d89b079 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -36,6 +36,7 @@ import shutil import subprocess import tempfile import time +import uuid from pathlib import Path as path from typing import Optional @@ -1640,3 +1641,7 @@ def parse_sets_yaml(arch): if set_option: sets_info[set_option] = set_item return sets_info + + +def get_uuid(length=8): + return uuid.uuid4().hex[:length] diff --git a/projects/rocprofiler-compute/tests/test_TCP_counters.py b/projects/rocprofiler-compute/tests/test_TCP_counters.py index 6c6bb20f99..48012a60d9 100644 --- a/projects/rocprofiler-compute/tests/test_TCP_counters.py +++ b/projects/rocprofiler-compute/tests/test_TCP_counters.py @@ -147,7 +147,8 @@ def test_L1_cache_counters( base = Path(test_utils.get_output_dir()) for app_name in app_names: - workload_dir = str(base / app_name) + workload_dir = f"{base}/{app_name}" + workload_dir_output = f"{base}_{app_name}" # 1. profile the app return_code = binary_handler_profile_rocprof_compute( @@ -167,15 +168,17 @@ def test_L1_cache_counters( workload_dir, "-b", "16.3", - "--save-dfs", - workload_dir, + "--output-format", + "csv", + "--output-name", + workload_dir_output, ]) assert return_code == 0 # 3. save results in local # FIXME: customize file name to avoid hardcode - csv_path = workload_dir + "/16.3_vL1D_cache_access_metrics.csv" + csv_path = workload_dir_output + "/16.3_vL1D_cache_access_metrics.csv" data = load_metrics(csv_path) for metric in metrics: @@ -185,6 +188,7 @@ def test_L1_cache_counters( # 4. clean local output test_utils.clean_output_dir(config["cleanup"], workload_dir) + test_utils.clean_output_dir(config["cleanup"], workload_dir_output) test_utils.clean_output_dir(config["cleanup"], base) # 5. check results are expected diff --git a/projects/rocprofiler-compute/tests/test_analyze_commands.py b/projects/rocprofiler-compute/tests/test_analyze_commands.py index 6d9532411f..e0e63868fb 100644 --- a/projects/rocprofiler-compute/tests/test_analyze_commands.py +++ b/projects/rocprofiler-compute/tests/test_analyze_commands.py @@ -25,6 +25,7 @@ import os import shutil +from pathlib import Path from unittest.mock import Mock import pandas as pd @@ -608,14 +609,16 @@ def test_decimal_3(binary_handler_analyze_rocprof_compute): @pytest.mark.misc def test_save_dfs(binary_handler_analyze_rocprof_compute): - output_path = "tests/workloads/vcopy/saved_analysis" + output_path = test_utils.get_output_dir() for dir in indirs: workload_dir = test_utils.setup_workload_dir(dir) code = binary_handler_analyze_rocprof_compute([ "analyze", "--path", workload_dir, - "--save-dfs", + "--output-format", + "csv", + "--output-name", output_path, ]) assert code == 0 @@ -627,6 +630,7 @@ def test_save_dfs(binary_handler_analyze_rocprof_compute): shutil.rmtree(output_path) test_utils.clean_output_dir(config["cleanup"], workload_dir) + test_utils.clean_output_dir(config["cleanup"], output_path) @pytest.mark.col @@ -860,7 +864,6 @@ def test_dependency_MI100(binary_handler_analyze_rocprof_compute): def test_parser_utility_functions(): """Test parser utility functions edge cases""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -969,7 +972,6 @@ def test_parser_utility_functions(): def test_parser_error_handling(): """Test parser error handling paths""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -1009,7 +1011,6 @@ def test_missing_file_handling(binary_handler_analyze_rocprof_compute): def test_ast_transformer_edge_cases(): """Simplified test focusing on the actual code paths""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -1051,7 +1052,6 @@ def test_ast_transformer_edge_cases(): def test_analyze_with_debug_mode(binary_handler_analyze_rocprof_compute): """Test analyze to cover debug paths in eval_metric - using direct function call""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -1138,7 +1138,6 @@ def test_filter_combinations_coverage(binary_handler_analyze_rocprof_compute): def test_apply_filters_direct(): """Test apply_filters function directly to cover filter branches""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -1213,7 +1212,6 @@ def test_missing_files_scenarios(binary_handler_analyze_rocprof_compute): def test_pc_sampling_basic_coverage(): """Test PC sampling functions with minimal data""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -1245,7 +1243,6 @@ def test_pc_sampling_basic_coverage(): def test_build_dfs_edge_cases(): """Test build_dfs and gen_counter_list with various configurations""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) @@ -1275,7 +1272,6 @@ def test_build_dfs_edge_cases(): def test_update_functions_coverage(): """Test update_denom_string and update_normUnit_string branches""" import sys - from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index d5c9f900da..a06aefb6a1 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -766,6 +766,32 @@ def test_roof_rocpd(binary_handler_profile_rocprof_compute): test_utils.clean_output_dir(config["cleanup"], workload_dir) +@pytest.mark.misc +def test_analyze_rocpd( + binary_handler_profile_rocprof_compute, binary_handler_analyze_rocprof_compute +): + workload_dir = test_utils.get_output_dir() + options = ["--device", "0", "--format-rocprof-output", "rocpd"] + binary_handler_profile_rocprof_compute(config, workload_dir, options, roof=True) + + db_name = "test" + code = binary_handler_analyze_rocprof_compute([ + "analyze", + "--output-format", + "db", + "--output-name", + f"{db_name}", + "--path", + workload_dir, + ]) + assert code == 0 + assert os.path.isfile(f"{db_name}.db") + + # Remove test.db + os.remove(f"{db_name}.db") + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + @pytest.mark.misc def test_roofline_workload_dir_not_set_error(): """ diff --git a/projects/rocprofiler-compute/utils/autogen_hash.yaml b/projects/rocprofiler-compute/utils/autogen_hash.yaml index 6de2ea2fad..7079981108 100644 --- a/projects/rocprofiler-compute/utils/autogen_hash.yaml +++ b/projects/rocprofiler-compute/utils/autogen_hash.yaml @@ -5,12 +5,12 @@ src/rocprof_compute_soc/analysis_configs/gfx940/0000_top_stats.yaml: 401770cff80 src/rocprof_compute_soc/analysis_configs/gfx941/0000_top_stats.yaml: 401770cff804c6e51b78dff61390d8b5977598a2b09c6601ac593653e912535b src/rocprof_compute_soc/analysis_configs/gfx942/0000_top_stats.yaml: 401770cff804c6e51b78dff61390d8b5977598a2b09c6601ac593653e912535b src/rocprof_compute_soc/analysis_configs/gfx950/0000_top_stats.yaml: 401770cff804c6e51b78dff61390d8b5977598a2b09c6601ac593653e912535b -src/rocprof_compute_soc/analysis_configs/gfx908/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef -src/rocprof_compute_soc/analysis_configs/gfx90a/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef -src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef -src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef -src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef -src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml: 739e39e69056984c277a69c17a6866effa860f56e8b1d3ea5d625582f16228ef +src/rocprof_compute_soc/analysis_configs/gfx908/0100_system_info.yaml: b883dc360890c8d4fae49542b3362fa341598b86198cc7f2b9b9a3cf987f9576 +src/rocprof_compute_soc/analysis_configs/gfx90a/0100_system_info.yaml: b883dc360890c8d4fae49542b3362fa341598b86198cc7f2b9b9a3cf987f9576 +src/rocprof_compute_soc/analysis_configs/gfx940/0100_system_info.yaml: b883dc360890c8d4fae49542b3362fa341598b86198cc7f2b9b9a3cf987f9576 +src/rocprof_compute_soc/analysis_configs/gfx941/0100_system_info.yaml: b883dc360890c8d4fae49542b3362fa341598b86198cc7f2b9b9a3cf987f9576 +src/rocprof_compute_soc/analysis_configs/gfx942/0100_system_info.yaml: b883dc360890c8d4fae49542b3362fa341598b86198cc7f2b9b9a3cf987f9576 +src/rocprof_compute_soc/analysis_configs/gfx950/0100_system_info.yaml: b883dc360890c8d4fae49542b3362fa341598b86198cc7f2b9b9a3cf987f9576 src/rocprof_compute_soc/analysis_configs/gfx908/0200_system_speed_of_light.yaml: 2103e9d6123f473f1cb18b71c046f197b5d1d873563c4aad4933d7361255f0c1 src/rocprof_compute_soc/analysis_configs/gfx90a/0200_system_speed_of_light.yaml: e9f552ee72849dc9c4ab14fee77ecc2681f4bcf610a8649c55365ab7eea7aafc src/rocprof_compute_soc/analysis_configs/gfx940/0200_system_speed_of_light.yaml: 70716745e727d3a7e6fa706d34c346f796c241c485516da52e0c694386b3cf57 @@ -19,9 +19,9 @@ src/rocprof_compute_soc/analysis_configs/gfx942/0200_system_speed_of_light.yaml: src/rocprof_compute_soc/analysis_configs/gfx950/0200_system_speed_of_light.yaml: a2cb003c74c0a75b9fe690da4e21b46e78fdb2f3233fc4753bca9276e93d60b0 src/rocprof_compute_soc/analysis_configs/gfx908/0300_memory_chart.yaml: 190c31ddc0bc713dba8b508faf13f0630b268ed15a0d9206f30998a0a071136f src/rocprof_compute_soc/analysis_configs/gfx90a/0300_memory_chart.yaml: 8eeb4bb544eebd59aa10b51c1149ee4d015c76073c9a35e673210d9740fbf808 -src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml: cff5509ac8502bad6dbd75e3058159fe429aece5d93279c66b2a6a8c887b43b6 -src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml: cff5509ac8502bad6dbd75e3058159fe429aece5d93279c66b2a6a8c887b43b6 -src/rocprof_compute_soc/analysis_configs/gfx942/0300_memory_chart.yaml: cff5509ac8502bad6dbd75e3058159fe429aece5d93279c66b2a6a8c887b43b6 +src/rocprof_compute_soc/analysis_configs/gfx940/0300_memory_chart.yaml: 249e9ae0445de0927827ec14d20f946a07d50d92fd56e1993bbe0c17eb65bd51 +src/rocprof_compute_soc/analysis_configs/gfx941/0300_memory_chart.yaml: 249e9ae0445de0927827ec14d20f946a07d50d92fd56e1993bbe0c17eb65bd51 +src/rocprof_compute_soc/analysis_configs/gfx942/0300_memory_chart.yaml: 249e9ae0445de0927827ec14d20f946a07d50d92fd56e1993bbe0c17eb65bd51 src/rocprof_compute_soc/analysis_configs/gfx950/0300_memory_chart.yaml: 643b31ffa43bc3613d6f90b0c23d95093d0d0aa5bc8e72d9a0fbc1b739a08b67 src/rocprof_compute_soc/analysis_configs/gfx908/0400_roofline.yaml: 6406ce67cd55064f0d2db2a3511c6536cc1625314ddb31366900fbf3c60ed523 src/rocprof_compute_soc/analysis_configs/gfx90a/0400_roofline.yaml: 100d555cf9e70b892e22f92ddd9c0a5d1f914d07077c4a8d35941e8ad62b5b30 @@ -77,18 +77,18 @@ src/rocprof_compute_soc/analysis_configs/gfx940/1400_scalar_l1_data_cache.yaml: src/rocprof_compute_soc/analysis_configs/gfx941/1400_scalar_l1_data_cache.yaml: 29fac4ea38e4a018baffc4a27a720b47078fd890c10da307655d40f693e6f0e7 src/rocprof_compute_soc/analysis_configs/gfx942/1400_scalar_l1_data_cache.yaml: 29fac4ea38e4a018baffc4a27a720b47078fd890c10da307655d40f693e6f0e7 src/rocprof_compute_soc/analysis_configs/gfx950/1400_scalar_l1_data_cache.yaml: 29fac4ea38e4a018baffc4a27a720b47078fd890c10da307655d40f693e6f0e7 -src/rocprof_compute_soc/analysis_configs/gfx908/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 633d59aba82b3a495b7ba33fa4b2ae4da638b58632bcc37ff18be87af68ce4d4 +src/rocprof_compute_soc/analysis_configs/gfx908/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 1e7717fcbd3c8cdf87d593a33f350ca240c1db8f8065a778cca926da1f517088 src/rocprof_compute_soc/analysis_configs/gfx90a/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 2bdb9d7b3bea1057b3baee29ba3b428b211808261063a97bc4b6b319f4a19fb3 src/rocprof_compute_soc/analysis_configs/gfx940/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 3180c2f3266be0ff44e01d73d247ca43ae2ee18ecaf61765f58849e36c701b19 src/rocprof_compute_soc/analysis_configs/gfx941/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 3180c2f3266be0ff44e01d73d247ca43ae2ee18ecaf61765f58849e36c701b19 src/rocprof_compute_soc/analysis_configs/gfx942/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 3180c2f3266be0ff44e01d73d247ca43ae2ee18ecaf61765f58849e36c701b19 src/rocprof_compute_soc/analysis_configs/gfx950/1500_address_processing_unit_and_data_return_path_ta_td.yaml: 9e56cef5b066fb575a5c530bcf9400f1291dd8636b12c8a2244cdba1defafc9f -src/rocprof_compute_soc/analysis_configs/gfx908/1600_vector_l1_data_cache.yaml: 438d0f4a972dd341eb2485f51a47d6860fbb30a6169054cd8550b4b7226e199f -src/rocprof_compute_soc/analysis_configs/gfx90a/1600_vector_l1_data_cache.yaml: 438d0f4a972dd341eb2485f51a47d6860fbb30a6169054cd8550b4b7226e199f -src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230 -src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230 -src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml: 6100b218f24de9f1433b39a093ed04b9bb9dfe656c5df77583c9db332c447230 -src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml: 67054ec0a4c6ca147a5dd40cc91f0e8e81378e1affe7d479274747579ecc524a +src/rocprof_compute_soc/analysis_configs/gfx908/1600_vector_l1_data_cache.yaml: 360a9cd6df4e345a45f0660bc8df2003d5eb5dba2359d7e59c89933dc9fba94e +src/rocprof_compute_soc/analysis_configs/gfx90a/1600_vector_l1_data_cache.yaml: 360a9cd6df4e345a45f0660bc8df2003d5eb5dba2359d7e59c89933dc9fba94e +src/rocprof_compute_soc/analysis_configs/gfx940/1600_vector_l1_data_cache.yaml: 37c061bc9751828621a72aa6576596262b684fca7b764adbb991cd7eef58987d +src/rocprof_compute_soc/analysis_configs/gfx941/1600_vector_l1_data_cache.yaml: 37c061bc9751828621a72aa6576596262b684fca7b764adbb991cd7eef58987d +src/rocprof_compute_soc/analysis_configs/gfx942/1600_vector_l1_data_cache.yaml: 37c061bc9751828621a72aa6576596262b684fca7b764adbb991cd7eef58987d +src/rocprof_compute_soc/analysis_configs/gfx950/1600_vector_l1_data_cache.yaml: ae0388f43813302969f51a80ac58678614b993f5163083a69e1c99811d730064 src/rocprof_compute_soc/analysis_configs/gfx908/1700_l2_cache.yaml: 54ff1df4ee08206d0aa4ff9cd9f0b20cbaa3866aecb9b40a0ac5969e9e25ed20 src/rocprof_compute_soc/analysis_configs/gfx90a/1700_l2_cache.yaml: ee87b5b6cdaca98de6e5cb0d06e2e092470e0e25aac1498f8abcfc8421932ae6 src/rocprof_compute_soc/analysis_configs/gfx940/1700_l2_cache.yaml: 78f9fee5dafc83d311da1c801200c1820e16a0678dd0548fafa8a966ec6a94d5 diff --git a/projects/rocprofiler-compute/utils/unified_config.yaml b/projects/rocprofiler-compute/utils/unified_config.yaml index 4d1964dcb7..531afa847b 100644 --- a/projects/rocprofiler-compute/utils/unified_config.yaml +++ b/projects/rocprofiler-compute/utils/unified_config.yaml @@ -16,6 +16,7 @@ panels: data source: - raw_csv_table: id: 101 + title: System Info source: sysinfo.csv columnwise: true - id: 200 @@ -1878,10 +1879,6 @@ panels: L2 Hit: value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)), 0) - L2 Rd Lat: - value: null - L2 Wr Lat: - value: null Fabric_L2 Rd: value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0) Fabric_L2 Wr: @@ -2012,10 +2009,6 @@ panels: L2 Hit: value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)), 0) - L2 Rd Lat: - value: null - L2 Wr Lat: - value: null Fabric_L2 Rd: value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0) Fabric_L2 Wr: @@ -2146,10 +2139,6 @@ panels: L2 Hit: value: ROUND(AVG((((100 * TCC_HIT_sum) / (TCC_HIT_sum + TCC_MISS_sum)) if ((TCC_HIT_sum + TCC_MISS_sum) != 0) else 0)), 0) - L2 Rd Lat: - value: null - L2 Wr Lat: - value: null Fabric_L2 Rd: value: ROUND(AVG((TCC_EA0_RDREQ_sum / $denom)), 0) Fabric_L2 Wr: @@ -11704,11 +11693,6 @@ panels: min: MIN(((100 * TD_TC_STALL_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) max: MAX(((100 * TD_TC_STALL_sum) / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))) unit: pct - "Workgroup manager \u2192 Data-Return Stall": - avg: null - min: null - max: null - unit: pct Coalescable Instructions: avg: AVG((TD_COALESCABLE_WAVEFRONT_sum / $denom)) min: MIN((TD_COALESCABLE_WAVEFRONT_sum / $denom)) @@ -13338,7 +13322,7 @@ panels: avg: Avg min: Min max: Max - units: Units + units: Unit metric: gfx90a: Req: @@ -13532,7 +13516,7 @@ panels: avg: Avg min: Min max: Max - units: Units + units: Unit metric: gfx90a: {} gfx941: {}