diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index 6c4036e94d..8e7a87d8ee 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -183,6 +183,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - support for `find_package(rocprofiler-sdk-rocpd)` - `rocprofiler-sdk-rocpd` DEB and RPM packages - Support `--version` option for `rocprofv3` +- Added `rocpd` Python package ### Changed diff --git a/projects/rocprofiler-sdk/external/cereal b/projects/rocprofiler-sdk/external/cereal index cc723c4fd6..e736e75d9d 160000 --- a/projects/rocprofiler-sdk/external/cereal +++ b/projects/rocprofiler-sdk/external/cereal @@ -1 +1 @@ -Subproject commit cc723c4fd6114fa4712ec3e8308479909ce21988 +Subproject commit e736e75d9d8cd4cc01614f21097f185dc2c6a6bc diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp index f69f47615b..896b364dad 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp @@ -226,6 +226,7 @@ ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(ROCJPEG_API, rocjpeg_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_STREAM, hip_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_RUNTIME_API_EXT, hip_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_COMPILER_API_EXT, hip_api) +ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(ROCDECODE_API_EXT, rocdecode_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(NONE, none) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HSA_CORE_API, hsa_api) diff --git a/projects/rocprofiler-sdk/source/lib/python/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/python/CMakeLists.txt index 0009129ee4..bbf5a4fe4d 100644 --- a/projects/rocprofiler-sdk/source/lib/python/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/python/CMakeLists.txt @@ -14,3 +14,4 @@ if(NOT DEFINED ROCPROFILER_PYTHON_VERSIONS) endif() add_subdirectory(roctx) +add_subdirectory(rocpd) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/python/rocpd/CMakeLists.txt new file mode 100644 index 0000000000..a2b39fd337 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/CMakeLists.txt @@ -0,0 +1,18 @@ +# +# +# + +if(ROCPROFILER_BUILD_SQLITE3) + message( + FATAL_ERROR + "rocpd python bindings require setting ROCPROFILER_BUILD_SQLITE3=OFF to interop with Python SQLite3 module" + ) +endif() + +foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS}) + rocprofiler_rocpd_python_bindings(${_PYTHON_VERSION}) +endforeach() + +add_subdirectory(source) + +rocprofiler_reset_python3_cache() diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/README.md b/projects/rocprofiler-sdk/source/lib/python/rocpd/README.md new file mode 100644 index 0000000000..fe97b2575d --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/README.md @@ -0,0 +1,93 @@ +# ROCm Profiling Data (RocPD) + +The RocPD Python package provides a scriptable API for analyzing, summarizing, filtering, and merging tracing data +collected with the ROCm profiling tools suite. + +## Background + +In the past, the ROCm profiling tools (e.g. rocprofv3, rocprofiler-systems, etc.) have directly written data to +various output formats such as CSV, JSON, Perfetto, OTF2, etc. This approach has a significant number of flaws: + +### No standardization in the CSV and JSON output formats + +The ROCm profiling groups considers the standardization of the CSV and JSON output formats for all the tools +as a waste of time. Neither of these data formats scale well when large amounts of profiling data is collected +Due to the inherent overhead of parsing textual data as opposed to binary, the archane simplicity of the +CSV format, and the (general) requirement to parse/load the entire JSON file in order to perform any meaningful +data processing. + +### Inability to unify output collected across multiple processes and nodes + +Supporting the unification of output collected across multiple processes and nodes is a difficult endeavor. +The complexity of communicating profiling information between processes, especially when the processes exist +on separate nodes connected through a network, at best, requires integration with the job launchers and/or +explicit support for the job launchers. The general expectation for profiling tools is for them to work +regardless of the user application's choice of process-level parallelism (e.g. MPI, fork, spawn, +Python multi-processing, UPC, etc.) and job scheduler (e.g. SLURM, flux, PBS/Torque, LSD, etc.). +Adding explicit integration/support for this many flavors of parallelism and jobs schedulers is untenable. +The most consistent aspect of multi-node jobs is a shared filesystem: it is considered a necessity for the +user experience. Without a shared filesystem, the user would be responsible for transferring the application's +input and output to/from the specific nodes the job scheduler decided to give them. Thus, the most reliable +output for in-process profiling tools is adopting the approach of generating (at least) one output file per process. + +In order to unify the output colleted across multiple processes, the one-output-per-process approach +requires either (A) a post-processing step which combines the various outputs into a single output, +(B) an output format which utilizes a single "metadata" file which links together the individual +outputs, or (C) a visualizer which supports opening multiple files at once. The ROCm profiling group +considers Option A are the most flexible and reliable approach since Option B does require a small +amount of inter-process communication to write the "metadata" file and Option C imposes a rigid +restriction on the choice of visualizer. + +### Data filtering at the data collection stage + +In rocprofiler-systems and rocprofv3 with the direct output to Perfetto approach, if the tool collects +2 GB of tracing data per-process in a multi-node job with 16 processes, Perfetto will struggle +to visualize each individual 2 GB trace and fail to load a combined 32 GB trace. In this situation, the +user must re-run the application and collect less data -- all of that tracing data from the previous run +is effectively lost. However, if rocprofiler-systems and rocprofv3 were to adopt an intermediate output +format approach and the Perfetto visualization is generated from this intermediate output format, +the user would have a multitude of options to remedy this issue. For example, the user could filter out +data (e.g. drop HSA functions from the trace), instruct the Perfetto generator to skip adding Perfetto +debug annotations on the trace events, combine the 32 GB of data and split it into 32 separate visualizations +based on time instead of processes, etc. + +### Absence of automated analysis + +Certain formats such as Perfetto are great for visualization. However, they lack any automated analysis +of the data. For example, a flat profile is an extremely useful companion when visually analyzing a trace +and other forms of automated analysis can quickly and easily do anomaly detection. + +## Overview + +RocPD is essentially a Python package which understands a standardized SQLite3 schema. This Python package +intends to provide a centralized place for a multitude of post-process analysis capabilities. The capabilities +include, but are not limited to, analyzing, summarizing, filtering, merging, and generating visualizations of +tracing data. This design allows tools such as rocprofv3, rocprofiler-systems, rocprofiler-compute, etc. to +focus on minimizing overhead during data collection and adding new data collection features. These tools simply +need to write one SQL database per process which adheres to the agreed upon RocPD SQL schema and RocPD will +handle the analysis and visualization of the data. + +RocPD uses a unique approach to view multiple on-disk databases as a single-database when performing queries. +Python applications using RocPD __must__ load the on-disk databases by constructing a `rocpd.importer.RocpdImportData` +object with a list of the database filepaths or by using the `rocpd.connect` function which returns a +`rocpd.importer.RocpdImportData` object. + +### Loading Databases Example + +```python +input = ["A.db", "B.db"] +rpd_data = rocpd.connect(input) +``` + +### Executing Queries + +The `rocpd.importer.RocpdImportData` object supports all of the same functions as `sqlite3.Connection`: + +```python +for itr in rpd_data.execute("SELECT * FROM kernels"): + print(f"{itr}") + +cursor = rpd_data.cursor() +for itr in cursor.execute("SELECT * FROM top").fetchall(): + print(f"{itr}") +``` diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/__init__.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/__init__.py new file mode 100644 index 0000000000..bd8b171833 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/__init__.py @@ -0,0 +1,149 @@ +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +import sys +import os + +try: + import ctypes + + sqlite3lib = ctypes.CDLL("libsqlite3.so") +except Exception: + pass + +from . import libpyrocpd +from .importer import RocpdImportData + +__all__ = [ + "connect", + "execute", + "read_agents", + "read_nodes", + "read_processes", + "read_threads", + "write_perfetto", + "write_csv", + "write_otf2", + "RocpdImportData", +] + + +def format_path(path, tag=os.path.basename(sys.executable)): + return libpyrocpd.format_path(path, tag) + + +def connect(input, *args, **kwargs): + return RocpdImportData(input, *args, **kwargs) + + +def execute(data, *args, **kwargs): + return data.execute(*args, **kwargs) + + +def read_agents(data, condition=""): + return libpyrocpd.read_agents(data, condition) + + +def read_nodes(data, condition=""): + return libpyrocpd.read_nodes(data, condition) + + +def read_processes(data, condition=""): + return libpyrocpd.read_processes(data, condition) + + +def read_threads(data, condition=""): + return libpyrocpd.read_threads(data, condition) + + +def write_perfetto(connection, config=None, **kwargs): + """ + Write Perfetto pftrace output file + + Args: + connection (rocpd.RocpdImportData): + rocPD instance of database connection(s) + config (rocpd.output_config.output_config): + Output specification + + Returns: + bool: returns True if successful + """ + from . import output_config + + config = ( + output_config.output_config(**kwargs) + if config is None + else config.update(**kwargs) + ) + + return libpyrocpd.write_perfetto(connection, config) + + +def write_csv(connection, config=None, **kwargs): + """ + Write CSV output file(s) + + Args: + connection (rocpd.RocpdImportData): + rocPD instance of database connection(s) + config (rocpd.output_config.output_config): + Output specification + + Returns: + bool: returns True if successful + """ + from . import output_config + + config = ( + output_config.output_config(**kwargs) + if config is None + else config.update(**kwargs) + ) + + return libpyrocpd.write_csv(connection, config) + + +def write_otf2(connection, config=None, **kwargs): + """ + Write OTF@ output file + + Args: + connection (rocpd.RocpdImportData): + rocPD instance of database connection(s) + config (rocpd.output_config.output_config): + Output specification + + Returns: + bool: returns True if successful + """ + from . import output_config + + config = ( + output_config.output_config(**kwargs) + if config is None + else config.update(**kwargs) + ) + + return libpyrocpd.write_otf2(connection, config) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/__main__.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/__main__.py new file mode 100644 index 0000000000..a056d497dd --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/__main__.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +from __future__ import absolute_import + +__author__ = "Advanced Micro Devices, Inc." +__copyright__ = "Copyright 2025, Advanced Micro Devices, Inc." +__license__ = "MIT" + + +def main(argv=None, config=None): + """Main entry point for the rocpd command line tool. + + Args: + argv (list, optional): List of command line arguments. Defaults to None. + + """ + import argparse + from . import time_window + from . import output_config + from . import pftrace + from . import csv + from . import otf2 + from .importer import RocpdImportData + + convert_examples = """ + +Example usage: + + Convert 1 database, output perfetto trace + $ python3 -m rocpd convert -i db1.db --output-format pftrace + + Convert 2 databases, output perfetto trace to path and filename, reduce time window to omit the first 30% + $ python3 -m rocpd convert -i db1.db db2.db --output-format pftrace -d "./output/" -o "twoFileTraces" --start 30% --end 100% + + Convert 6 databases, output CSV and perfetto trace formats + $ python3 -m rocpd convert -i db{0..5}.db --output-format csv pftrace -d "~/output_folder/" -o "sixFileTraces" + + Convert 2 databases, output CSV, OTF2, and perfetto trace formats + $ python3 -m rocpd convert -i db{3,4}.db --output-format csv otf2 pftrace + +""" + + parser = argparse.ArgumentParser( + prog="rocpd", + description="Aggregate and/or analyze ROCm Profiling Data (rocpd)", + allow_abbrev=False, + ) + + subparsers = parser.add_subparsers(dest="command") + converter = subparsers.add_parser( + "convert", + description="Convert rocPD data into another data format", + allow_abbrev=False, + formatter_class=argparse.RawTextHelpFormatter, + epilog=convert_examples, + ) + + def get_output_type(val): + return val.lower().replace("perfetto", "pftrace") + + required_params = converter.add_argument_group("Required arguments") + + required_params.add_argument( + "-i", + "--input", + required=True, + type=output_config.check_file_exists, + nargs="+", + help="Input path and filename to one or more database(s), separated by spaces", + ) + required_params.add_argument( + "-f", + "--output-format", + help="For adding output format (supported formats: csv, pftrace, otf2)", + nargs="+", + default=None, + choices=("csv", "pftrace", "otf2"), + type=get_output_type, + required=True, + ) + + # add args from any sub-modules + valid_out_config_args = output_config.add_args(converter) + valid_generic_args = output_config.add_generic_args(converter) + valid_pftrace_args = pftrace.add_args(converter) + valid_csv_args = csv.add_args(converter) + valid_otf2_args = otf2.add_args(converter) + valid_time_window_args = time_window.add_args(converter) + + # parse the command line arguments + args = parser.parse_args(argv) + + # process the args + out_cfg_args = output_config.process_args(args, valid_out_config_args) + generic_out_cfg_args = output_config.process_generic_args(args, valid_generic_args) + pftrace_args = pftrace.process_args(args, valid_pftrace_args) + csv_args = csv.process_args(args, valid_csv_args) + otf2_args = otf2.process_args(args, valid_otf2_args) + window_args = time_window.process_args(args, valid_time_window_args) + + # now start processing the data. Import the data and merge the views + importData = RocpdImportData(args.input) + + # adjust the time window view of the data + if window_args is not None: + time_window.apply_time_window(importData, **window_args) + + all_args = { + **out_cfg_args, + **generic_out_cfg_args, + **pftrace_args, + **csv_args, + **otf2_args, + } + # setup the config args + config = ( + output_config.output_config(**all_args) + if config is None + else config.update(**all_args) + ) + + # process each requested output format + format_handlers = { + "pftrace": pftrace.write_pftrace, + "csv": csv.write_csv, + "otf2": otf2.write_otf2, + } + + for out_format in args.output_format: + if out_format in format_handlers: + print(f"Converting database(s) to {out_format} format:") + format_handlers[out_format](importData, config) + else: + print(f"Warning: Unsupported output format '{out_format}'") + + print("Done. Exiting...") + + +if __name__ == "__main__": + main() diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/csv.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/csv.py new file mode 100644 index 0000000000..0c6a5d3d9d --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/csv.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +from .importer import RocpdImportData +from .time_window import apply_time_window +from . import output_config +from . import libpyrocpd + + +def write_csv(importData, config): + return libpyrocpd.write_csv(importData, config) + + +def execute(input, config=None, window_args=None, **kwargs): + + importData = RocpdImportData(input) + + apply_time_window(importData, **window_args) + + config = ( + output_config.output_config(**kwargs) + if config is None + else config.update(**kwargs) + ) + + write_csv(importData, config) + + +def add_args(parser): + """Add csv arguments.""" + + return [] + + +def process_args(args, valid_args): + ret = {} + return ret + + +def main(argv=None): + import argparse + from .time_window import add_args as add_args_time_window + from .time_window import process_args as process_args_time_window + from .output_config import add_args as add_args_output_config + from .output_config import process_args as process_args_output_config + from .output_config import add_generic_args, process_generic_args + + parser = argparse.ArgumentParser( + description="Convert rocPD to CSV files", + allow_abbrev=False, + formatter_class=argparse.RawTextHelpFormatter, + ) + + required_params = parser.add_argument_group("Required arguments") + + required_params.add_argument( + "-i", + "--input", + required=True, + type=output_config.check_file_exists, + nargs="+", + help="Input path and filename to one or more database(s), separated by spaces", + ) + + valid_out_config_args = add_args_output_config(parser) + valid_generic_args = add_generic_args(parser) + valid_time_window_args = add_args_time_window(parser) + valid_csv_args = add_args(parser) + + args = parser.parse_args(argv) + + out_cfg_args = process_args_output_config(args, valid_out_config_args) + generic_out_cfg_args = process_generic_args(args, valid_generic_args) + window_args = process_args_time_window(args, valid_time_window_args) + csv_args = process_args(args, valid_csv_args) + + all_args = { + **out_cfg_args, + **generic_out_cfg_args, + **csv_args, + } + + execute(args.input, window_args=window_args, **all_args) + + +if __name__ == "__main__": + main() diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/importer.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/importer.py new file mode 100644 index 0000000000..5ad4aef1a6 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/importer.py @@ -0,0 +1,166 @@ +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +# +# Utility classes to simplify generating rpd files +# +# + +import sys +import sqlite3 + +from .schema import RocpdSchema +from . import libpyrocpd + +__all__ = ["RocpdImportData", "execute_statement"] + + +class RocpdImportData(libpyrocpd.RocpdImportData): + + def __init__(self, input): + if isinstance(input, RocpdImportData): + super(RocpdImportData, self).__init__(input) + self.table_info = input.table_info + else: + + if isinstance(input, sqlite3.Connection): + raise ValueError( + "RocpdImportData does not accept existing sqlite3 connections" + ) + elif isinstance(input, str): + _connection = libpyrocpd.connect(input) + _filenames = [input] + elif isinstance(input, list) and len(input) > 0 and isinstance(input[0], str): + _connection = libpyrocpd.connect(":memory:") + _filenames = input[:] + _connection.execute("PRAGMA foreign_keys = ON") + self.table_info = _create_temp_views(_connection, input) + _create_meta_views(_connection) + else: + raise ValueError( + f"input is unsupported type. Expected sqlite3.Connection, string, or (non-empty) list of strings. type={type(input).__name__}" + ) + super(RocpdImportData, self).__init__(_connection, _filenames) + + def __getattr__(self, name): + # any attribute or method not found in RocpdImportData will be looked up on self.connection + return getattr(self.connection, name) + + def __enter__(self): + # support "with RocpdImportData(...) as db:": + return self + + def __exit__(self, exc_type, exc, tb): + return self.connection.__exit__(exc_type, exc, tb) + + +def execute_statement(conn, statement, is_script=False): + if isinstance(conn, RocpdImportData): + _conn = conn.connection + else: + _conn = conn + + assert isinstance(_conn, sqlite3.Connection) + try: + if is_script: + return _conn.executescript(statement) + return _conn.execute(f"{statement}") + except sqlite3.Error as err: + sys.stderr.write(f"SQLite3 error: {err}\nStatement:\n\t{statement}\n") + sys.stderr.flush() + raise err + + +def _create_temp_views(connection, input): + """Create temporary unified views from multiple database files.""" + + assert isinstance(connection, sqlite3.Connection) + assert isinstance(input, list) + + # Attach each database and extract the uuid from each database + dbinfo = [] + uuids = [] + for i, inp in enumerate(input): + execute_statement(connection, f"ATTACH DATABASE '{inp}' AS db{i}") + _uuids = [ + itr[0] + for itr in execute_statement( + connection, + f"SELECT value FROM db{i}.rocpd_metadata WHERE tag='uuid'", + ).fetchall() + ] + dbinfo += [f"db{i}"] + uuids += [itr for itr in _uuids if itr not in uuids] + + # unique set of universal process identifiers + uuids = list(set(uuids)) + + all_tables = {} + for ditr in dbinfo: + # get the tables for the given attached database + tables = [ + itr[0] + for itr in execute_statement( + connection, + f"SELECT name FROM {ditr}.sqlite_master WHERE type='table' AND name LIKE 'rocpd_%'", + ).fetchall() + ] + + # loop over the tables + for itr in tables: + # loop over the UUIDs + for uitr in uuids: + # skip the tables without the UUID suffix + if f"{uitr}" not in itr: + continue + + # strip the UUID suffix to create a base table name, e.g. 'rocpd_string_03daf93' -> 'rocpd_string' + base = itr.replace(f"{uitr}", "") + + # create a list of attached databases which have the base table name + if base not in all_tables.keys(): + all_tables[base] = [] + + # create the SELECT statement from this database + select = f"SELECT * FROM {ditr}.{base}" + + # make sure that we don't duplicate SELECT statements of same table from same attached database + if select in all_tables[base]: + continue + + # add this to list + all_tables[base] += [select] + + # create the temporary view that is a union of all the attached databases + for key, itr in all_tables.items(): + stmt = "CREATE TEMPORARY VIEW {} AS {}".format(key, " UNION ALL ".join(itr)) + execute_statement(connection, stmt) + + return all_tables + + +def _create_meta_views(connection): + schema = RocpdSchema() + sql_script = schema.views.replace("CREATE VIEW", "CREATE TEMPORARY VIEW") + execute_statement(connection, sql_script, is_script=True) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp new file mode 100644 index 0000000000..0f62325724 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp @@ -0,0 +1,795 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "libpyrocpd.hpp" +#include "lib/output/format_path.hpp" +#include "lib/python/rocpd/source/common.hpp" +#include "lib/python/rocpd/source/csv.hpp" +#include "lib/python/rocpd/source/functions.hpp" +#include "lib/python/rocpd/source/interop.hpp" +#include "lib/python/rocpd/source/otf2.hpp" +#include "lib/python/rocpd/source/perfetto.hpp" +#include "lib/python/rocpd/source/serialization/sql.hpp" +#include "lib/python/rocpd/source/sql_generator.hpp" +#include "lib/python/rocpd/source/types.hpp" + +#include "lib/common/defines.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/simple_timer.hpp" +#include "lib/common/utility.hpp" +#include "lib/output/agent_info.hpp" +#include "lib/output/kernel_symbol_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/output_stream.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/timestamps.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace py = ::pybind11; + +namespace rocpd +{ +template +auto +read_impl(sqlite3* conn, std::string_view conditions) +{ + auto query = std::string_view{}; + + if constexpr(std::is_same::value) + query = "rocpd_info_node"; + else if constexpr(std::is_same::value) + query = "processes"; + else if constexpr(std::is_same::value) + query = "threads"; + else if constexpr(std::is_same::value) + query = "regions"; + else if constexpr(std::is_same::value) + query = "kernels"; + else if constexpr(std::is_same::value) + query = "rocpd_info_agent"; + else + static_assert(rocprofiler::sdk::mpl::assert_false::value, "Unsupported read type"); + + auto data = std::vector{}; + if(conn) + { + auto ar = cereal::SQLite3InputArchive{ + conn, fmt::format("SELECT * FROM {} {}", query, conditions)}; + cereal::load(ar, data); + } + return data; +} + +template +auto +read_impl(py::object obj, // NOLINT(performance-unnecessary-value-param) + std::string_view conditions) +{ + return read_impl(rocpd::interop::get_connection(std::move(obj)), conditions); +} + +template +auto +read(ArgT&& arg, std::string_view conditions = {}) +{ + return read_impl(std::forward(arg), conditions); +} + +bool +is_sqlite3_connection(const py::object& obj) +{ + py::module_ sqlite3 = py::module_::import("sqlite3"); + py::object connection_type = sqlite3.attr("Connection"); + return py::isinstance(obj, connection_type); +} + +std::string +get_type_name(const py::object& obj) +{ + return obj.get_type().attr("__name__").cast(); +} + +struct RocpdImportData +{ + RocpdImportData() = default; + ~RocpdImportData() = default; + + RocpdImportData(const RocpdImportData&) = default; + RocpdImportData(RocpdImportData&&) noexcept = default; + RocpdImportData& operator=(const RocpdImportData&) = default; + RocpdImportData& operator=(RocpdImportData&&) noexcept = default; + + RocpdImportData(const py::object& _obj, const std::vector& _dbs) + : connection{_obj} + , databases{_dbs} + { + if(py::isinstance(_obj)) + { + connection = _obj.cast().connection; + databases = _obj.cast().databases; + } + else + { + if(!is_sqlite3_connection(_obj)) + { + auto _errmsg = fmt::format("libpyrocpd.RocpdImportData cannot be constructed " + "from provided Python object of type {} (databases: {})", + get_type_name(_obj), + fmt::join(_dbs.begin(), _dbs.end(), ", ")); + ROCP_CI_LOG(WARNING) << _errmsg; + throw py::type_error{_errmsg}; + } + } + } + + size_t size() const { return (connection) ? databases.size() : 0; } + bool empty() const { return databases.empty() || !connection; } + + py::object connection = {}; + std::vector databases = {}; +}; + +struct jinja_variables +{ + py::str uuid = py::none{}; + py::str guid = py::none{}; +}; +} // namespace rocpd + +PYBIND11_MODULE(libpyrocpd, pyrocpd) +{ + // namespace sdk = ::rocprofiler::sdk; + namespace tool = ::rocprofiler::tool; + namespace common = ::rocprofiler::common; + + py::doc("ROCm Profiling Data (RocPD) Python bindings"); + + // initialize logging with control via ROCPD_LOG_LEVEL env variable + common::init_logging("ROCPD"); + + rocpd::interop::activate_gotcha_bindings(); + + if(auto _thrd_safety = sqlite3_threadsafe(); _thrd_safety == 2) + { + ROCP_INFO_IF(sqlite3_config(SQLITE_CONFIG_MULTITHREAD) == SQLITE_MISUSE) + << "sqlite3 cannot be configured to support multithreading"; + } + + pyrocpd.def( + "format_path", + [](std::string inp, const std::string& tag) { + return tool::format_path(std::move(inp), tag); + }, + "Resolve output keys in filepath"); + + py::enum_(pyrocpd, "sql_engine", "Load schema engines") + .value("sqlite3", ROCPD_SQL_ENGINE_SQLITE3); + + py::enum_(pyrocpd, "sql_schema", "Load schema kinds") + .value("rocpd_tables", ROCPD_SQL_SCHEMA_ROCPD_TABLES) + .value("rocpd_indexes", ROCPD_SQL_SCHEMA_ROCPD_INDEXES) + .value("rocpd_views", ROCPD_SQL_SCHEMA_ROCPD_VIEWS) + .value("data_views", ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS) + .value("summary_views", ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS) + .value("marker_views", ROCPD_SQL_SCHEMA_ROCPD_MARKER_VIEWS); + + py::enum_(pyrocpd, "sql_option", "Load schema options") + .value("none", ROCPD_SQL_OPTIONS_NONE) + .value("sqlite3_pragma_foreign_keys", ROCPD_SQL_OPTIONS_SQLITE3_PRAGMA_FOREIGN_KEYS); + + py::enum_(pyrocpd, "agent_indexing", "enum.Enum") + .value("node", tool::agent_indexing::node) + .value("logical_node", tool::agent_indexing::logical_node) + .value("logical_node_type", tool::agent_indexing::logical_node_type); + + // demo for creating python bindings to a class + py::class_(pyrocpd, "agent") + .def_readonly("node_id", &rocpd::types::agent::node_id) + .def_readonly("logical_node_id", &rocpd::types::agent::logical_node_id) + .def_readonly("gpu_index", &rocpd::types::agent::gpu_index) + .def_readonly("name", &rocpd::types::agent::name) + .def_readonly("user_name", &rocpd::types::agent::user_name) + .def_readonly("product_name", &rocpd::types::agent::product_name); + + py::class_(pyrocpd, "node") + .def(py::init<>()) + .def_readonly("nid", &rocpd::types::node::id) + .def_readonly("hash", &rocpd::types::node::hash) + .def_readonly("machine_id", &rocpd::types::node::machine_id) + .def_readonly("hostname", &rocpd::types::node::hostname) + .def_readonly("system_name", &rocpd::types::node::system_name) + .def_readonly("system_release", &rocpd::types::node::release) + .def_readonly("system_version", &rocpd::types::node::version); + + py::class_(pyrocpd, "process") + .def(py::init<>()) + .def_readonly("nid", &rocpd::types::process::nid) + .def_readonly("machine_id", &rocpd::types::process::machine_id) + .def_readonly("hostname", &rocpd::types::process::hostname) + .def_readonly("system_name", &rocpd::types::process::system_name) + .def_readonly("system_release", &rocpd::types::process::system_release) + .def_readonly("system_version", &rocpd::types::process::system_version) + .def_readonly("ppid", &rocpd::types::process::ppid) + .def_readonly("pid", &rocpd::types::process::pid) + .def_readonly("init", &rocpd::types::process::init) + .def_readonly("start", &rocpd::types::process::start) + .def_readonly("end", &rocpd::types::process::end) + .def_readonly("fini", &rocpd::types::process::fini) + .def_readonly("command", &rocpd::types::process::command); + + py::class_(pyrocpd, "thread") + .def(py::init<>()) + .def_readonly("nid", &rocpd::types::thread::nid) + .def_readonly("machine_id", &rocpd::types::thread::machine_id) + .def_readonly("hostname", &rocpd::types::thread::hostname) + .def_readonly("system_name", &rocpd::types::thread::system_name) + .def_readonly("system_release", &rocpd::types::thread::system_release) + .def_readonly("system_version", &rocpd::types::thread::system_version) + .def_readonly("ppid", &rocpd::types::thread::ppid) + .def_readonly("pid", &rocpd::types::thread::pid) + .def_readonly("tid", &rocpd::types::thread::tid) + .def_readonly("start", &rocpd::types::thread::start) + .def_readonly("end", &rocpd::types::thread::end) + .def_readonly("name", &rocpd::types::thread::name); + + py::class_(pyrocpd, "output_config", "Output configuration") + .def(py::init<>()) + .def_readwrite("output_path", &tool::output_config::output_path) + .def_readwrite("output_file", &tool::output_config::output_file) + .def_readwrite("tmp_directory", &tool::output_config::tmp_directory) + .def_readwrite("csv", &tool::output_config::csv_output) + .def_readwrite("pftrace", &tool::output_config::pftrace_output) + .def_readwrite("otf2", &tool::output_config::otf2_output) + .def_readwrite("kernel_rename", &tool::output_config::kernel_rename) + .def_readwrite("agent_index_value", &tool::output_config::agent_index_value) + .def_readwrite("group_by_queue", &tool::output_config::group_by_queue) + .def_readwrite("perfetto_shmem_size_hint", &tool::output_config::perfetto_shmem_size_hint) + .def_readwrite("perfetto_buffer_size", &tool::output_config::perfetto_buffer_size) + .def_readwrite("perfetto_backend", &tool::output_config::perfetto_backend) + .def_readwrite("perfetto_buffer_fill_policy", + &tool::output_config::perfetto_buffer_fill_policy); + + py::class_(pyrocpd, "metadata") + .def("set_process_id", &tool::metadata::set_process_id) + .def("add_marker_message", &tool::metadata::add_marker_message) + // .def("add_code_object", &tool::metadata::add_code_object) + // .def("add_kernel_symbol", &tool::metadata::add_kernel_symbol) + // .def("add_host_function", &tool::metadata::add_host_function) + .def("add_string_entry", &tool::metadata::add_string_entry) + .def("add_external_correlation_id", &tool::metadata::add_external_correlation_id) + .def("add_agent", + [](tool::metadata* md, const rocpd::types::agent& _agent) { + if(!md) return; + md->agents.emplace_back(_agent.base()); + md->agents_map.emplace(_agent.id, _agent.base()); + }) + .def_readwrite("process_id", &tool::metadata::process_id) + .def_readwrite("parent_process_id", &tool::metadata::parent_process_id) + .def_readwrite("process_start_ns", &tool::metadata::process_start_ns) + .def_readwrite("process_end_ns", &tool::metadata::process_end_ns) + .def_readwrite("agents", &tool::metadata::agents_map) + .def_readwrite("node_data", &tool::metadata::node_data) + .def_readwrite("att_filenames", &tool::metadata::att_filenames) + .def_readwrite("buffer_names", &tool::metadata::buffer_names) + .def_readwrite("callback_names", &tool::metadata::callback_names) + .def_readwrite("command_line", &tool::metadata::command_line); + + py::class_( + pyrocpd, "schema_jinja_variables", "Variables for jinja substitution") + .def(py::init<>()) + .def_readwrite("uuid", &rocpd::jinja_variables::uuid) + .def_readwrite("guid", &rocpd::jinja_variables::guid); + + py::class_(pyrocpd, "RocpdImportData", "RocPD database(s) instances") + .def(py::init<>()) + .def(py::init()) + .def(py::init>()) + .def_readonly("connection", &rocpd::RocpdImportData::connection) + .def_readonly("databases", &rocpd::RocpdImportData::databases); + + pyrocpd.def("load_schema", + [](rocpd_sql_engine_t engine, + rocpd_sql_schema_kind_t kind, + rocpd_sql_options_t options, + const rocpd::jinja_variables& variables) { + auto _callback = [](rocpd_sql_engine_t _engine, + rocpd_sql_schema_kind_t _kind, + rocpd_sql_options_t _options, + const rocpd_sql_schema_jinja_variables_t* _variables, + const char* _schema_path, + const char* _schema_content, + void* _user_data) -> void { + rocprofiler::common::consume_args( + _engine, _kind, _options, _variables, _schema_path); + auto* _data = static_cast(_user_data); + if(_data && _schema_content) *_data = std::string{_schema_content}; + }; + + auto _uuid = std::optional{}; + if(!variables.uuid.is(py::none{})) + _uuid = py::cast(variables.uuid); + + auto _guid = std::optional{}; + if(!variables.guid.is(py::none{})) + _guid = py::cast(variables.guid); + + auto _rocpd_variables = + common::init_public_api_struct(rocpd_sql_schema_jinja_variables_t{}); + if(_uuid) _rocpd_variables.uuid = _uuid->c_str(); + if(_guid) _rocpd_variables.guid = _guid->c_str(); + + auto _hints = std::vector{}; + // for(const auto& itr : schema_path_hints) + // _hints.emplace_back(itr.c_str()); + + auto _contents = std::string{}; + ROCPD_CHECK(rocpd_sql_load_schema(engine, + kind, + options, + &_rocpd_variables, + _callback, + _hints.data(), + _hints.size(), + &_contents)); + return _contents; + }); + + // NOLINTBEGIN(performance-unnecessary-value-param) + + // function which maps the python sqlite3.Connection object to the sqlite3* + // pointer + pyrocpd.def( + "connect", + [](std::string dbpath, py::args args, py::kwargs kwargs) { + // import the sqlite3 module + auto sqlite3_mod = py::module_::import("sqlite3"); + auto ret = sqlite3_mod.attr("connect")(dbpath, *args, **kwargs); + + auto* db = rocpd::interop::map_connection(ret); + // this is a no-op right now + if(db) rocpd::functions::define_for_database(db); + + return ret; + }, + "Open a database connection"); + + pyrocpd.def( + "write_perfetto", + [](rocpd::RocpdImportData& data, const tool::output_config& output_cfg) -> bool { + auto _create_agent_index = + [&output_cfg](const rocpd::types::agent& _agent) -> tool::agent_index { + auto ret_index = tool::create_agent_index( + output_cfg.agent_index_value, + _agent.node_id, // absolute index + static_cast(_agent.logical_node_id), // relative index + static_cast(_agent.logical_node_type_id), // type-relative index + std::string_view(_agent.type)); + return ret_index; + }; + // ORDER BY expression for kernel dispatches + constexpr auto kernels_order_by = + "agent_abs_index ASC, stream_id ASC, queue_id ASC, start ASC, end DESC"; + + constexpr auto region_order_by = "start ASC, end DESC"; + constexpr auto sample_order_by = "timestamp ASC"; + + auto perfetto_session = rocpd::output::PerfettoSession{output_cfg}; + auto sqlgen_perf = common::simple_timer{ + fmt::format("Perfetto generation from {} SQL database(s)", data.size())}; + for(auto obj : {data.connection}) + { + auto* conn = rocpd::interop::get_connection(std::move(obj)); + auto nodes = rocpd::read(conn); + + for(const auto& nitr : nodes) + { + auto agents = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + auto processes = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + + for(const auto& pitr : processes) + { + ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) + << fmt::format("Found process with a mismatched nid/guid. process: " + "{}/{} vs. node: {}/{}", + pitr.nid, + pitr.guid, + nitr.id, + nitr.guid); + auto select_guid_nid_pid = [&nitr, &pitr](std::string_view tbl) { + return fmt::format("SELECT * FROM {} WHERE guid = '{}' AND nid " + "= {} AND pid = {}", + tbl, + pitr.guid, + nitr.id, + pitr.pid); + }; + + auto _sqlgen_perft = common::simple_timer{fmt::format( + "Perfetto generation from SQL for process {} (total)", pitr.pid)}; + + auto kernels = rocpd::sql_generator{ + conn, select_guid_nid_pid("kernels"), kernels_order_by}; + + auto memory_allocations = + rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_allocations")}; + + auto memory_copies = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_copies")}; + + auto counters = rocpd::sql_generator{ + conn, select_guid_nid_pid("counters_collection")}; + + auto regions = rocpd::sql_generator{ + conn, select_guid_nid_pid("regions"), region_order_by}; + + auto samples = rocpd::sql_generator{ + conn, select_guid_nid_pid("samples"), sample_order_by}; + + auto threads = rocpd::sql_generator{ + conn, select_guid_nid_pid("threads")}; + + // absolute_index |-> (agent, agent_index) + auto agents_map = + std::unordered_map>{}; + + for(const auto& itr : agents) + { + auto new_index = _create_agent_index(itr); + agents_map.emplace(itr.absolute_index, std::make_pair(itr, new_index)); + } + + ROCP_TRACE << "Starting Perfetto generation from SQL for process " + << pitr.pid; + auto _sqlgen_perfw = common::simple_timer{fmt::format( + "Perfetto generation from SQL for process {} (write)", pitr.pid)}; + rocpd::output::write_perfetto(perfetto_session, + pitr, + agents_map, + threads, + regions, + samples, + kernels, + memory_copies, + memory_allocations, + counters); + } + } + } + return true; + }, + "Write pftrace output file from rocpd SQLite3 database"); + + pyrocpd.def( + "write_csv", + [](rocpd::RocpdImportData& data, const rocprofiler::tool::output_config& output_cfg) { + auto sqlgen_csv = common::simple_timer{ + fmt::format("CSV generation from {} SQL database(s)", data.size())}; + + if(data.empty()) return; + + auto csv_manager = rocpd::output::CsvManager{output_cfg}; + + for(auto obj : {data.connection}) + { + auto* conn = rocpd::interop::get_connection(std::move(obj)); + auto nodes = rocpd::read(conn); + + for(const auto& nitr : nodes) + { + auto agents = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + auto processes = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + + for(const auto& pitr : processes) + { + ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) + << fmt::format("Found process with a mismatched nid/guid. process: " + "{}/{} vs. node: {}/{}", + pitr.nid, + pitr.guid, + nitr.id, + nitr.guid); + auto _sqlgen_csv = common::simple_timer{fmt::format( + "CSV generation from SQL for process {} (total)", pitr.pid)}; + + auto select_guid_nid_pid = [&nitr, &pitr](std::string_view tbl, + std::string_view + where_extra_condition = {}) { + return fmt::format( + "SELECT * FROM {} WHERE guid = '{}' AND nid = {} AND pid = {} {}", + tbl, + pitr.guid, + nitr.id, + pitr.pid, + where_extra_condition); + }; + + rocpd::output::write_agent_info_csv(csv_manager, agents); + + constexpr auto region_order_by = "start ASC, end DESC"; + + auto kernels = rocpd::sql_generator{ + conn, select_guid_nid_pid("kernels"), region_order_by}; + auto memory_copies = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_copies"), region_order_by}; + auto memory_allocations = + rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_allocations"), region_order_by}; + auto hip_api_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("regions", "AND category LIKE 'HIP_%'"), + region_order_by}; + auto hsa_api_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("regions", "AND category LIKE 'HSA_%'"), + region_order_by}; + auto marker_api_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("regions_and_samples", + "AND category LIKE 'MARKER_%'"), + region_order_by}; + auto counters_calls = rocpd::sql_generator{ + conn, select_guid_nid_pid("counters_collection"), region_order_by}; + auto scratch_memory_calls = + rocpd::sql_generator{ + conn, select_guid_nid_pid("scratch_memory"), region_order_by}; + auto rccl_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("regions", "AND category LIKE 'RCCL_%'"), + region_order_by}; + auto rocdecode_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("regions", "AND category LIKE 'ROCDECODE_%'"), + region_order_by}; + auto rocjpeg_calls = rocpd::sql_generator{ + conn, + select_guid_nid_pid("regions", "AND category LIKE 'ROCJPEG_%'"), + region_order_by}; + + rocpd::output::write_csvs(csv_manager, + kernels, + memory_copies, + memory_allocations, + hip_api_calls, + hsa_api_calls, + marker_api_calls, + counters_calls, + scratch_memory_calls, + rccl_calls, + rocdecode_calls, + rocjpeg_calls); + } + } + } + }, + "Write trace data to CSV files"); + + pyrocpd.def( + "write_otf2", + [](rocpd::RocpdImportData& data, const tool::output_config& output_cfg) { + auto _create_agent_index = + [&output_cfg](const rocpd::types::agent& _agent) -> tool::agent_index { + auto ret_index = tool::create_agent_index( + output_cfg.agent_index_value, + _agent.node_id, // absolute index + static_cast(_agent.logical_node_id), // relative index + static_cast(_agent.logical_node_type_id), // type-relative index + std::string_view(_agent.type)); + return ret_index; + }; + + constexpr auto kernels_order_by = + "agent_abs_index ASC, stream_id ASC, queue_id ASC, start ASC, end DESC"; + + // to initialise the OTF@ session properly we need to know: + // (1) the process with the earliest start time + // (2) find the process with the longest duration + uint64_t min_start_time = std::numeric_limits::max(); + uint64_t max_fini_time = 0; + for(auto obj : {data.connection}) + { + auto* conn = rocpd::interop::get_connection(std::move(obj)); + + // min start + sqlite3_stmt* _stmt_min_start; + sqlite3_prepare_v2( + conn, "SELECT MIN(start) FROM processes;", -1, &_stmt_min_start, nullptr); + uint64_t _min_start_time = std::numeric_limits::max(); + if(sqlite3_step(_stmt_min_start) == SQLITE_ROW) + { + _min_start_time = + static_cast(sqlite3_column_int64(_stmt_min_start, 0)); + } + + sqlite3_finalize(_stmt_min_start); + if(min_start_time > _min_start_time) + { + min_start_time = _min_start_time; + } + //// max fini + sqlite3_stmt* _stmt_max_fini; + sqlite3_prepare_v2( + conn, "SELECT MAX(fini) FROM processes;", -1, &_stmt_max_fini, nullptr); + uint64_t _max_fini_time = 0; + if(sqlite3_step(_stmt_max_fini) == SQLITE_ROW) + { + _max_fini_time = static_cast(sqlite3_column_int64(_stmt_max_fini, 0)); + } + + sqlite3_finalize(_stmt_max_fini); + if(max_fini_time < _max_fini_time) + { + max_fini_time = _max_fini_time; + } + } + + auto otf2_session = + rocpd::output::OTF2Session(output_cfg, min_start_time, max_fini_time); + + auto sqlgen_otf2 = common::simple_timer{ + fmt::format("OTF2 generation from {} SQL database(s)", data.size())}; + + uint16_t _process_counter = 0; + for(auto obj : {data.connection}) + { + auto* conn = rocpd::interop::get_connection(std::move(obj)); + auto nodes = rocpd::read(conn); + for(const auto& nitr : nodes) + { + auto agents = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + auto processes = rocpd::read( + conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); + + // absolute_index |-> (agent, agent_index) + auto agents_map = std::unordered_map{}; + + for(const auto& itr : agents) + { + const rocprofiler::tool::agent_index new_index = _create_agent_index(itr); + const std::string labeled_name = fmt::format("{}", itr.name); + agents_map.emplace( + itr.absolute_index, + rocpd::output::extended_agent{itr, new_index, labeled_name}); + } + + for(const auto& pitr : processes) + { + ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) + << fmt::format("Found process with a mismatched nid/guid. process: " + "{}/{} vs. node: {}/{}", + pitr.nid, + pitr.guid, + nitr.id, + nitr.guid); + + auto select_guid_nid_pid = + [&nitr, &pitr](std::string_view tbl, + std::string_view where_extra_condition = "") { + return fmt::format("SELECT * FROM {} WHERE guid = '{}' AND " + "nid = {} AND pid = {} {}", + tbl, + pitr.guid, + nitr.id, + pitr.pid, + where_extra_condition); + }; + + constexpr auto region_order_by = "start ASC, end DESC"; + + auto _sqlgen_otf2 = common::simple_timer{fmt::format( + "OTF2 generation from SQL for process {} (total)", pitr.pid)}; + + auto kernels = rocpd::sql_generator{ + conn, select_guid_nid_pid("kernels"), kernels_order_by}; + + auto memory_allocations = + rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_allocations"), region_order_by}; + + auto memory_copies = rocpd::sql_generator{ + conn, select_guid_nid_pid("memory_copies"), region_order_by}; + + auto regions = rocpd::sql_generator{ + conn, select_guid_nid_pid("regions"), region_order_by}; + + auto threads = rocpd::sql_generator{ + conn, select_guid_nid_pid("threads")}; + + ROCP_TRACE << "Starting OTF2 generation from SQL for process " << pitr.pid; + auto _sqlgen_perfw = common::simple_timer{fmt::format( + "OTF2 generation from SQL for process {} (write)", pitr.pid)}; + rocpd::output::write_otf2(otf2_session, + pitr, + _process_counter, + agents_map, + threads, + regions, + kernels, + memory_copies, + memory_allocations); + _process_counter++; + } + } + } + }, + "Write OTF2 output file from rocpd SQLite3 database"); + + // NOLINTEND(performance-unnecessary-value-param) + + // reads in all the agent info from database + pyrocpd.def( + "read_agents", + [](const rocpd::RocpdImportData& data, const std::string& conditions) { + return rocpd::read(data.connection, conditions); + }, + "Reads in the rocprofiler-sdk agents from the database"); + + pyrocpd.def( + "read_nodes", + [](const rocpd::RocpdImportData& data, const std::string& conditions) { + return rocpd::read(data.connection, conditions); + }, + "Reads in the node information from the database"); + + pyrocpd.def( + "read_processes", + [](const rocpd::RocpdImportData& data, const std::string& conditions) { + return rocpd::read(data.connection, conditions); + }, + "Reads in the process information from the database"); + + pyrocpd.def( + "read_threads", + [](const rocpd::RocpdImportData& data, const std::string& conditions) { + return rocpd::read(data.connection, conditions); + }, + "Reads in the thread information from the database"); +} diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.hpp new file mode 100644 index 0000000000..783e5774b3 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.hpp @@ -0,0 +1,49 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py new file mode 100644 index 0000000000..33714e8a0f --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +from .importer import RocpdImportData +from .time_window import apply_time_window +from . import output_config +from . import libpyrocpd + + +def write_otf2(importData, config): + return libpyrocpd.write_otf2(importData, config) + + +def execute(input, config=None, window_args=None, **kwargs): + + importData = RocpdImportData(input) + + apply_time_window(importData, **window_args) + + config = ( + output_config.output_config(**kwargs) + if config is None + else config.update(**kwargs) + ) + + write_otf2(importData, config) + + +def add_args(parser): + """Add otf2 arguments.""" + + # Currently, no otf2 specific args + + # otf2_options = parser.add_argument_group("OTF2 options") + + # otf2_options.add_argument( + # "--kernel-rename", + # help="Use kernel names from debugging symbols if available", + # action="store_true", + # default=False, + # ) + + return [] + + +def process_args(args, valid_args): + + ret = {} + for itr in valid_args: + if hasattr(args, itr): + val = getattr(args, itr) + if val is not None: + ret[itr] = val + return ret + + +def main(argv=None): + import argparse + from .time_window import add_args as add_args_time_window + from .time_window import process_args as process_args_time_window + from .output_config import add_args as add_args_output_config + from .output_config import process_args as process_args_output_config + from .output_config import add_generic_args, process_generic_args + + parser = argparse.ArgumentParser( + description="Convert rocPD to OTF2 format", allow_abbrev=False + ) + + required_params = parser.add_argument_group("Required arguments") + + required_params.add_argument( + "-i", + "--input", + required=True, + type=output_config.check_file_exists, + nargs="+", + help="Input path and filename to one or more database(s), separated by spaces", + ) + + valid_out_config_args = add_args_output_config(parser) + valid_otf2_args = add_args(parser) + valid_generic_args = add_generic_args(parser) + valid_time_window_args = add_args_time_window(parser) + + args = parser.parse_args(argv) + + out_cfg_args = process_args_output_config(args, valid_out_config_args) + generic_out_cfg_args = process_generic_args(args, valid_generic_args) + window_args = process_args_time_window(args, valid_time_window_args) + otf2_args = process_args(args, valid_otf2_args) + + all_args = {**out_cfg_args, **otf2_args, **generic_out_cfg_args} + + execute(args.input, window_args=window_args, **all_args) + + +if __name__ == "__main__": + main() diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/output_config.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/output_config.py new file mode 100644 index 0000000000..d0877384c1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/output_config.py @@ -0,0 +1,182 @@ +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +import sys +import os +import argparse + +try: + import ctypes + + sqlite3lib = ctypes.CDLL("libsqlite3.so") +except Exception: + pass + +from . import libpyrocpd + + +__all__ = ["format_path", "output_config", "add_args", "process_args"] + + +def _generate_attribute_docs(data): + properties = [] + for key, itr in data.items(): + if not isinstance(key, str): + pass + if key.startswith("__") and key.endswith("__"): + pass + else: + properties += [key] + return "\n\t- ".join(properties) + + +class output_config(libpyrocpd.output_config): + __doc__ = f"""Output configuration + + Read/Write properties:\n\t- {_generate_attribute_docs(libpyrocpd.output_config.__dict__)} + + Example: + # folder for output data + output_dir = os.path.join(os.getcwd(), "rocpd-output") + + # create output config instance + cfg = output_config(output_path=output_dir, output_file="out") + + # using read/write properties + if cfg.output_path != output_dir: + cfg.output_path = output_dir + """ + + def __init__(self, **kwargs): + super(output_config, self).__init__() + self.update(**kwargs) + + def update(self, **kwargs): + _strict = kwargs.get("strict", True) + # _verbose = kwargs.get("log-level", "config") + for key, itr in kwargs.items(): + if hasattr(self, key): + # if _verbose in ("info", "trace", "config"): + # print(f" - output_config.{key} = {itr}") + if key == "agent_index_value": + if itr == "absolute": + setattr(self, key, libpyrocpd.agent_indexing.node) + elif itr == "type-relative": + setattr(self, key, libpyrocpd.agent_indexing.logical_node_type) + else: + setattr(self, key, libpyrocpd.agent_indexing.logical_node) + else: + setattr(self, key, itr) + elif _strict: + raise KeyError(f"output_config does not have {key} attribute") + return self + + +def format_path(path, tag=os.path.basename(sys.executable)): + return libpyrocpd.format_path(path, tag) + + +def check_file_exists(filename): + if not os.path.exists(filename): + raise argparse.ArgumentTypeError(f"File '{filename}' does not exist.") + return filename + + +def add_args(parser): + """Add output arguments to an existing parser.""" + + io_options = parser.add_argument_group("I/O options") + + io_options.add_argument( + "-o", + "--output-file", + help="Sets the base output file name (default base filename: `out`)", + default=os.environ.get("ROCPD_OUTPUT_NAME", "out"), + type=str, + required=False, + ) + io_options.add_argument( + "-d", + "--output-path", + help="Sets the output path where the output files will be saved (default path: `./rocpd-output-data`)", + default=os.environ.get("ROCPD_OUTPUT_PATH", "./rocpd-output-data"), + type=str, + required=False, + ) + + kernel_naming_options = parser.add_argument_group("Kernel naming options") + + kernel_naming_options.add_argument( + "--kernel-rename", + help="Use ROCTx marker names instead of kernel names", + action="store_true", + default=False, + ) + + return ["output_file", "output_path", "kernel_rename"] + + +def process_args(args, valid_args): + + ret = {} + for itr in valid_args: + if hasattr(args, itr): + val = getattr(args, itr) + if itr == "output_format": + ret[itr] = val + elif itr == "output_path" and val is not None: + ret[itr] = format_path(val) + elif val is not None: + ret[itr] = val + return ret + + +def add_generic_args(parser): + """Add generic arguments that apply to multiple output formats.""" + + generic_options = parser.add_argument_group("Generic options") + + generic_options.add_argument( + "--agent-index-value", + choices=("absolute", "relative", "type-relative"), + help="""Device identification format in CSV/Perfetto/OTF2 output (default: relative): + absolute: uses node_id (Agent-0, Agent-2, Agent-4) ignoring cgroups restrictions. + relative: uses logical_node_id (Agent-0, Agent-1, Agent-2) considering cgroups restrictions. + type-relative: uses logical_node_type_id (CPU-0, GPU-0, GPU-1) with numbering that resets for each device type.""", + default="relative", + ) + + return [ + "agent_index_value", + ] + + +def process_generic_args(args, valid_args): + ret = {} + for itr in valid_args: + if hasattr(args, itr): + val = getattr(args, itr) + if val is not None: + ret[itr] = val + return ret diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/pftrace.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/pftrace.py new file mode 100644 index 0000000000..a2650062ae --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/pftrace.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +from .importer import RocpdImportData +from .time_window import apply_time_window +from . import output_config +from . import libpyrocpd + + +def write_pftrace(importData, config): + return libpyrocpd.write_perfetto(importData, config) + + +def execute(input, config=None, window_args=None, **kwargs): + + importData = RocpdImportData(input) + + apply_time_window(importData, **window_args) + + config = ( + output_config.output_config(**kwargs) + if config is None + else config.update(**kwargs) + ) + + write_pftrace(importData, config) + + +def add_args(parser): + """Add arguments for pftrace.""" + + pftrace_options = parser.add_argument_group("Perfetto trace (pftrace) options") + + pftrace_options.add_argument( + "--perfetto-backend", + help="Perfetto data collection backend. 'system' mode requires starting traced and perfetto daemons (default: inprocess)", + default="inprocess", + choices=["inprocess", "system"], + ) + + pftrace_options.add_argument( + "--perfetto-buffer-fill-policy", + help="Policy for handling new records when perfetto has reached the buffer limit (default: discard)", + default="discard", + choices=["discard", "ring_buffer"], + ) + + pftrace_options.add_argument( + "--perfetto-buffer-size", + help="Size of buffer for perfetto output in KB (default: 1 GB)", + default=None, + type=int, + metavar="KB", + ) + + pftrace_options.add_argument( + "--perfetto-shmem-size-hint", + help="Perfetto shared memory size hint in KB (default: 64 KB)", + default=None, + type=int, + metavar="KB", + ) + + pftrace_options.add_argument( + "--group-by-queue", + help="For displaying the HIP streams that kernels and memory copy operations are submitted to rather than HSA queues", + action="store_true", + default=False, + ) + + return [ + "perfetto_backend", + "perfetto_buffer_fill_policy", + "perfetto_buffer_size", + "perfetto_shmem_size_hint", + "group_by_queue", + ] + + +def process_args(args, valid_args): + + ret = {} + for itr in valid_args: + if hasattr(args, itr): + val = getattr(args, itr) + if val is not None: + ret[itr] = val + return ret + + +def main(argv=None): + import argparse + from .time_window import add_args as add_args_time_window + from .time_window import process_args as process_args_time_window + from .output_config import add_args as add_args_output_config + from .output_config import process_args as process_args_output_config + from .output_config import add_generic_args, process_generic_args + + parser = argparse.ArgumentParser( + description="Convert rocPD to Perfetto file", allow_abbrev=False + ) + + required_params = parser.add_argument_group("Required arguments") + + required_params.add_argument( + "-i", + "--input", + required=True, + type=output_config.check_file_exists, + nargs="+", + help="Input path and filename to one or more database(s), separated by spaces", + ) + + valid_out_config_args = add_args_output_config(parser) + valid_pftrace_args = add_args(parser) + valid_generic_args = add_generic_args(parser) + valid_time_window_args = add_args_time_window(parser) + + args = parser.parse_args(argv) + + out_cfg_args = process_args_output_config(args, valid_out_config_args) + pftrace_args = process_args(args, valid_pftrace_args) + generic_out_cfg_args = process_generic_args(args, valid_generic_args) + window_args = process_args_time_window(args, valid_time_window_args) + + all_args = { + **pftrace_args, + **out_cfg_args, + **generic_out_cfg_args, + } + + execute( + args.input, + window_args=window_args, + **all_args, + ) + + +if __name__ == "__main__": + main() diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/schema.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/schema.py new file mode 100644 index 0000000000..fe13e7836f --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/schema.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +# +# Utility Class to create the rocpd schema on an existing sqlite connection +# +# Requires a current copy of the schema in the 'schema' subdirectory +# Executes the contained sql 'scripts' to create the schema +# + +import argparse +from . import libpyrocpd + + +class RocpdSchema: + + def __init__(self, uuid="", guid=""): + + variables = libpyrocpd.schema_jinja_variables() + variables.uuid = f"{uuid}" + variables.guid = f"{guid}" + + self.tables = RocpdSchema.load_schema( + libpyrocpd.sql_engine.sqlite3, + libpyrocpd.sql_schema.rocpd_tables, + libpyrocpd.sql_option.sqlite3_pragma_foreign_keys, + variables, + ) + + self.indexes = RocpdSchema.load_schema( + libpyrocpd.sql_engine.sqlite3, + libpyrocpd.sql_schema.rocpd_indexes, + libpyrocpd.sql_option.none, + variables, + ) + + _views = [] + for itr in ["rocpd", "data", "summary", "marker"]: + _views += [ + RocpdSchema.load_schema( + libpyrocpd.sql_engine.sqlite3, + getattr(libpyrocpd.sql_schema, f"{itr}_views"), + libpyrocpd.sql_option.none, + variables, + ) + ] + self.views = "\n".join(_views) + + def write_schema(self, connection): + connection.executescript(self.tables) + connection.executescript(self.indexes) + connection.executescript(self.views) + + @staticmethod + def load_schema(engine, kind, options, variables=None, **kwargs): + + if variables is None: + variables = libpyrocpd.schema_jinja_variables() + + for itr in ["uuid", "guid"]: + _variable = kwargs.get(itr, None) + if _variable is not None: + setattr(variables, itr, f"{_variable}") + + return libpyrocpd.load_schema(engine, kind, options, variables) + + +def main(create=None): + schema = RocpdSchema() + + if create: + from . import connect + + print(f"Creating empty rpd: {args.create}") + connection = connect(args.create) + schema.write_schema(connection) + connection.commit() + else: + print(schema.tables) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="convert rocprofiler output to an RPD database" + ) + parser.add_argument( + "--create", type=str, help="filename in create empty db", default=None + ) + args = parser.parse_args() + + main(args.create) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt new file mode 100644 index 0000000000..99a6927b43 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt @@ -0,0 +1,25 @@ +# +# libpyrocpd python binding sources +# + +set(libpyrocpd_source_headers + common.hpp + functions.hpp + interop.hpp + perfetto.hpp + csv.hpp + otf2.hpp + sql_generator.hpp + pysqlite_Connection.h + types.hpp) + +set(libpyrocpd_source_sources csv.cpp functions.cpp interop.cpp otf2.cpp perfetto.cpp + types.cpp) + +foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS}) + rocprofiler_rocpd_python_bindings_target_sources( + ${_PYTHON_VERSION} PRIVATE ${libpyrocpd_source_sources} + ${libpyrocpd_source_headers}) +endforeach() + +add_subdirectory(serialization) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/common.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/common.hpp new file mode 100644 index 0000000000..175dc5b63f --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/common.hpp @@ -0,0 +1,55 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/output/agent_info.hpp" +#include "lib/output/kernel_symbol_info.hpp" +#include "lib/output/metadata.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +namespace rocpd +{ +namespace common +{ +template +void +read_json_string(const std::string& inp, FuncT&& _func, Args&&... _args) +{ + using json_archive = cereal::JSONInputArchive; + + { + auto json_ss = std::stringstream{inp}; + auto ar = json_archive{json_ss}; + std::forward(_func)(ar, std::forward(_args)...); + } +} +} // namespace common +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp new file mode 100644 index 0000000000..5be3f97977 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp @@ -0,0 +1,738 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/python/rocpd/source/csv.hpp" + +#include "lib/common/defines.hpp" +#include "lib/common/hasher.hpp" +#include "lib/common/mpl.hpp" +#include "lib/output/csv.hpp" +#include "lib/output/csv_output_file.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/output_stream.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace +{ +const std::string STATS_HEADER = "\"Name\",\"Calls\",\"TotalDurationNs\"," + "\"AverageNs\",\"Percentage\",\"MinNs\",\"MaxNs\",\"StdDev\""; +const std::string API_TRACE_HEADER = + "\"Guid\",\"Domain\",\"Function\",\"Process_Id\",\"Thread_Id\"," + "\"Correlation_Id\",\"Start_Timestamp\",\"End_Timestamp\""; +} // namespace + +namespace rocpd +{ +namespace output +{ +CsvManager::CsvManager(rocprofiler::tool::output_config output_cfg) +: config{std::move(output_cfg)} +{ + if(!ensure_output_directory()) + { + ROCP_ERROR << "Failed to create csv output directory: " << config.output_path; + return; + } + + this->csv_configs = { + {CsvType::KERNEL_DISPATCH, + {"kernel_trace.csv", + "\"Guid\",\"Kind\",\"Agent_Id\",\"Queue_Id\"," + "\"Stream_Id\",\"Thread_Id\",\"Dispatch_Id\"," + "\"Kernel_Id\",\"Kernel_Name\",\"Correlation_Id\"," + "\"Start_Timestamp\",\"End_" + "Timestamp\",\"Private_Segment_Size\",\"Group_" + "Segment_Size\",\"Workgroup_Size_X\"," + "\"Workgroup_Size_Y\",\"Workgroup_Size_Z\",\"Grid_" + "Size_X\",\"Grid_Size_Y\",\"Grid_Size_" + "Z\""}}, + {CsvType::MEMORY_COPY, + {"memory_copy_trace.csv", + "\"Guid\",\"Kind\",\"Direction\",\"Stream_Id\",\"Source_Agent_Id\"," + "\"Destination_Agent_" + "Id\"," + "\"Correlation_Id\",\"Start_Timestamp\",\"End_Timestamp\""}}, + {CsvType::MEMORY_ALLOCATION, + {"memory_allocation_trace.csv", + "\"Guid\",\"Kind\",\"Operation\",\"Agent_Id\",\"Allocation_Size\"," + "\"Address\"," + "\"Correlation_Id\",\"Start_Timestamp\",\"End_Timestamp\""}}, + {CsvType::SCRATCH_MEMORY, + {"scratch_memory_trace.csv", + "\"Kind\",\"Operation\",\"Agent_Id\",\"Queue_Id\",\"Thread_Id\"," + "\"Alloc_Flags\",\"Start_" + "Timestamp\",\"End_Timestamp\""}}, + + {CsvType::HIP_API, {"hip_api_trace.csv", API_TRACE_HEADER}}, + {CsvType::HSA_CSV_API, {"hsa_api_trace.csv", API_TRACE_HEADER}}, + {CsvType::MARKER, {"marker_api_trace.csv", API_TRACE_HEADER}}, + {CsvType::RCCL_API, {"rccl_api_trace.csv", API_TRACE_HEADER}}, + {CsvType::ROCDECODE_API, {"rocdecode_api_trace.csv", API_TRACE_HEADER}}, + {CsvType::ROCJPEG_API, {"rocjpeg_api_trace.csv", API_TRACE_HEADER}}, + + {CsvType::COUNTER, + {"counter_trace.csv", + "\"Pid\",\"Correlation_Id\",\"Dispatch_Id\",\"Agent_Id\",\"Queue_Id\"," + "\"Process_Id\"," + "\"Thread_Id\"," + "\"Grid_Size\",\"Kernel_Id\",\"Kernel_Name\",\"Workgroup_Size\",\"LDS_" + "Block_Size\"," + "\"Scratch_Size\",\"VGPR_Count\",\"Accum_VGPR_Count\",\"SGPR_Count\"," + "\"Counter_Name\",\"Counter_Value\",\"Start_Timestamp\",\"End_" + "Timestamp\""}}, + }; +} + +bool +CsvManager::ensure_output_directory() const +{ + try + { + fs::create_directories(config.output_path); + return true; + } catch(const std::exception& e) + { + ROCP_ERROR << "Failed to create directory: " << e.what(); + return false; + } +} + +CsvManager::~CsvManager() +{ + for(auto& [type, stream] : streams) + { + if(stream.is_open()) + { + stream.flush(); + stream.close(); + } + } +} + +std::ofstream& +CsvManager::get_stream(CsvType type) +{ + return streams[type]; +} + +bool +CsvManager::has_stream(CsvType type) const +{ + return streams.count(type) != 0u && streams.at(type).is_open(); +} + +bool +CsvManager::initialize_csv_file(CsvType type) +{ + if(has_stream(type)) return true; + + if(csv_configs.find(type) == csv_configs.end()) + { + ROCP_ERROR << "No CSV configuration found for type: " << static_cast(type); + return false; + } + + const auto& cfg = csv_configs[type]; + + fs::path output_dir = config.output_path; + fs::path filename = + config.output_file.empty() ? cfg.filename : config.output_file + "_" + cfg.filename; + + file_paths[type] = (output_dir / filename).string(); + + auto& path = file_paths[type]; + auto& stream = streams[type]; + + stream.open(path, std::ios::out); + if(!stream.is_open()) + { + ROCP_ERROR << "Failed to open CSV output file: " << path; + return false; + } + + stream << cfg.header << '\n'; + return true; +} + +template +bool +has_any_data(const rocprofiler::tool::generator& data_gen) +{ + for(auto ditr : data_gen) + { + auto gen = data_gen.get(ditr); + if(begin(gen) != end(gen)) + { + return true; + } + } + return false; +} + +template +void +process_data_to_csv(CsvManager& csv_manager, + CsvType csv_type, + const rocprofiler::tool::generator& data_gen, + Processor process_func) +{ + if(!has_any_data(data_gen)) return; + + if(!csv_manager.initialize_csv_file(csv_type)) return; + + for(auto ditr : data_gen) + { + auto gen = data_gen.get(ditr); + for(auto it = begin(gen); it != end(gen); ++it) + { + process_func(csv_manager, csv_type, *it); + } + } +} + +void +write_kernel_csv( + CsvManager& csv_manager, + const rocprofiler::tool::generator& kernel_dispatch_gen) +{ + process_data_to_csv( + csv_manager, + CsvType::KERNEL_DISPATCH, + kernel_dispatch_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::kernel_dispatch& kernel) { + std::string kernel_identifier = cm.config.kernel_rename ? kernel.region : kernel.name; + + std::string agent_identifier = create_agent_index(cm.config.agent_index_value, + kernel.agent_abs_index, + kernel.agent_log_index, + kernel.agent_type_index, + std::string_view(kernel.agent_type)) + .as_string(); + + cm.write_line(type, + fmt::format("\"{}\"", kernel.guid), + fmt::format("\"{}\"", "KERNEL_DISPATCH"), + fmt::format("\"{}\"", agent_identifier), + kernel.queue_id, + kernel.stream_id, + kernel.tid, + kernel.dispatch_id, + kernel.kernel_id, + fmt::format("\"{}\"", kernel_identifier), + kernel.stack_id, + kernel.start, + kernel.end, + kernel.scratch_size, + kernel.lds_size, + kernel.workgroup_size.x, + kernel.workgroup_size.y, + kernel.workgroup_size.z, + kernel.grid_size.x, + kernel.grid_size.y, + kernel.grid_size.z); + }); +} + +void +write_memory_copy_csv( + CsvManager& csv_manager, + const rocprofiler::tool::generator& memory_copies_gen) +{ + process_data_to_csv(csv_manager, + CsvType::MEMORY_COPY, + memory_copies_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::memory_copies& mcopy) { + std::string src_agent_identifier = + create_agent_index(cm.config.agent_index_value, + mcopy.src_agent_abs_index, + mcopy.src_agent_log_index, + mcopy.src_agent_type_index, + std::string_view(mcopy.src_agent_type)) + .as_string(); + + std::string dst_agent_identifier = + create_agent_index(cm.config.agent_index_value, + mcopy.dst_agent_abs_index, + mcopy.dst_agent_log_index, + mcopy.dst_agent_type_index, + std::string_view(mcopy.dst_agent_type)) + .as_string(); + + cm.write_line(type, + fmt::format("\"{}\"", mcopy.guid), + fmt::format("\"{}\"", "MEMORY_COPY"), + fmt::format("\"{}\"", mcopy.name), + mcopy.stream_id, + fmt::format("\"{}\"", src_agent_identifier), + fmt::format("\"{}\"", dst_agent_identifier), + mcopy.stack_id, + mcopy.start, + mcopy.end); + }); +} + +void +write_memory_allocation_csv( + CsvManager& csv_manager, + const rocprofiler::tool::generator& memory_alloc_gen) +{ + process_data_to_csv( + csv_manager, + CsvType::MEMORY_ALLOCATION, + memory_alloc_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::memory_allocation& malloc) { + std::string operation = fmt::format("MEMORY_ALLOCATION_{}", malloc.type); + + std::string agent_identifier = create_agent_index(cm.config.agent_index_value, + malloc.agent_abs_index, + malloc.agent_log_index, + malloc.agent_type_index, + std::string_view(malloc.agent_type)) + .as_string(); + + std::string agent_id = + operation != "MEMORY_ALLOCATION_FREE" ? agent_identifier : "\"\""; + std::string address = fmt::format("\"0x{:016x}\"", malloc.address); + + cm.write_line(type, + fmt::format("\"{}\"", malloc.guid), + fmt::format("\"{}\"", "MEMORY_ALLOCATION"), + fmt::format("\"{}\"", operation), + fmt::format("\"{}\"", agent_id), + malloc.size, + address, + malloc.stack_id, + malloc.start, + malloc.end); + }); +} + +void +write_scratch_memory_csv( + CsvManager& csv_manager, + const rocprofiler::tool::generator& scratch_memory_gen) +{ + process_data_to_csv( + csv_manager, + CsvType::SCRATCH_MEMORY, + scratch_memory_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::scratch_memory& scratch_mem) { + std::string agent_identifier = + create_agent_index(cm.config.agent_index_value, + scratch_mem.agent_abs_index, + scratch_mem.agent_log_index, + scratch_mem.agent_type_index, + std::string_view(scratch_mem.agent_type)) + .as_string(); + + cm.write_line(type, + fmt::format("\"{}\"", "SCRATCH_MEMORY"), + fmt::format("\"SCRATCH_MEMORY_{}\"", scratch_mem.operation), + fmt::format("\"{}\"", agent_identifier), + scratch_mem.queue_id, + scratch_mem.tid, + scratch_mem.alloc_flags, + scratch_mem.start, + scratch_mem.end); + }); +} + +void +write_hip_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& hip_api_gen) +{ + process_data_to_csv(csv_manager, + CsvType::HIP_API, + hip_api_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::region& api) { + // Skip entries that are not HIP API calls + if(api.category.find("HIP_") != 0) return; + + cm.write_line(type, + fmt::format("\"{}\"", api.guid), + fmt::format("\"{}\"", api.category), + fmt::format("\"{}\"", api.name), + api.pid, + api.tid, + api.stack_id, + api.start, + api.end); + }); +} + +void +write_hsa_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& hsa_api_gen) +{ + process_data_to_csv(csv_manager, + CsvType::HSA_CSV_API, + hsa_api_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::region& api) { + // Skip entries that are not HSA API calls + if(api.category.find("HSA_") != 0) return; + + cm.write_line(type, + fmt::format("\"{}\"", api.guid), + fmt::format("\"{}\"", api.category), + fmt::format("\"{}\"", api.name), + api.pid, + api.tid, + api.stack_id, + api.start, + api.end); + }); +} + +void +write_marker_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& marker_api_gen) +{ + namespace tool = ::rocprofiler::tool; + + if(marker_api_gen.empty()) return; + + using marker_csv_encoder = tool::csv::csv_encoder<8>; + + auto ofs = tool::csv_output_file{csv_manager.config, + domain_type::MARKER, + marker_csv_encoder{}, + {"Guid", + "Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + + // write samples first and ignore the timestamp ordering w.r.t. regions for now + for(auto ditr : marker_api_gen) + { + for(const auto& record : marker_api_gen.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto _name = record.name; + + if(record.has_extdata()) + { + if(auto _extdata = record.get_extdata(); !_extdata.message.empty()) + _name = _extdata.message; + } + + marker_csv_encoder::write_row(row_ss, + record.guid, + record.category, + _name, + record.pid, + record.tid, + record.stack_id, + record.start, + record.end); + + ofs << row_ss.str(); + } + } +} + +void +write_rccl_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& rccl_api_gen) +{ + process_data_to_csv(csv_manager, + CsvType::RCCL_API, + rccl_api_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::region& api) { + if(api.category.find("RCCL_") != 0) return; + + cm.write_line(type, + fmt::format("\"{}\"", api.guid), + fmt::format("\"{}\"", api.category), + fmt::format("\"{}\"", api.name), + api.pid, + api.tid, + api.stack_id, + api.start, + api.end); + }); +} + +void +write_rocdecode_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& rocdecode_api_gen) +{ + process_data_to_csv(csv_manager, + CsvType::ROCDECODE_API, + rocdecode_api_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::region& api) { + if(api.category.find("ROCDECODE_") != 0) return; + + cm.write_line(type, + fmt::format("\"{}\"", api.guid), + fmt::format("\"{}\"", api.category), + fmt::format("\"{}\"", api.name), + api.pid, + api.tid, + api.stack_id, + api.start, + api.end); + }); +} + +void +write_rocjpeg_api_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& rocjpeg_api_gen) +{ + process_data_to_csv(csv_manager, + CsvType::ROCJPEG_API, + rocjpeg_api_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::region& api) { + if(api.category.find("ROCJPEG_") != 0) return; + + cm.write_line(type, + fmt::format("\"{}\"", api.guid), + fmt::format("\"{}\"", api.category), + fmt::format("\"{}\"", api.name), + api.pid, + api.tid, + api.stack_id, + api.start, + api.end); + }); +} + +void +write_agent_info_csv(CsvManager& csv_manager, const std::vector& agents) +{ + if(agents.empty()) return; + + namespace tool = ::rocprofiler::tool; + using agent_info_csv_encoder = tool::csv::csv_encoder<54>; + + auto ofs = tool::csv_output_file{csv_manager.config, + "agent_info", + agent_info_csv_encoder{}, + {"Guid", + "Node_Id", + "Logical_Node_Id", + "Agent_Type", + "Cpu_Cores_Count", + "Simd_Count", + "Cpu_Core_Id_Base", + "Simd_Id_Base", + "Max_Waves_Per_Simd", + "Lds_Size_In_Kb", + "Gds_Size_In_Kb", + "Num_Gws", + "Wave_Front_Size", + "Num_Xcc", + "Cu_Count", + "Array_Count", + "Num_Shader_Banks", + "Simd_Arrays_Per_Engine", + "Cu_Per_Simd_Array", + "Simd_Per_Cu", + "Max_Slots_Scratch_Cu", + "Gfx_Target_Version", + "Vendor_Id", + "Device_Id", + "Location_Id", + "Domain", + "Drm_Render_Minor", + "Num_Sdma_Engines", + "Num_Sdma_Xgmi_Engines", + "Num_Sdma_Queues_Per_Engine", + "Num_Cp_Queues", + "Max_Engine_Clk_Ccompute", + "Max_Engine_Clk_Fcompute", + "Sdma_Fw_Version", + "Fw_Version", + "Capability", + "Cu_Per_Engine", + "Max_Waves_Per_Cu", + "Family_Id", + "Workgroup_Max_Size", + "Grid_Max_Size", + "Local_Mem_Size", + "Hive_Id", + "Gpu_Id", + "Workgroup_Max_Dim_X", + "Workgroup_Max_Dim_Y", + "Workgroup_Max_Dim_Z", + "Grid_Max_Dim_X", + "Grid_Max_Dim_Y", + "Grid_Max_Dim_Z", + "Name", + "Vendor_Name", + "Product_Name", + "Model_Name"}}; + + for(const auto& itr : agents) + { + auto row_ss = std::stringstream{}; + agent_info_csv_encoder::write_row(row_ss, + itr.guid, + itr.node_id, + itr.logical_node_id, + itr.type, + itr.cpu_cores_count, + itr.simd_count, + itr.cpu_core_id_base, + itr.simd_id_base, + itr.max_waves_per_simd, + itr.lds_size_in_kb, + itr.gds_size_in_kb, + itr.num_gws, + itr.wave_front_size, + itr.num_xcc, + itr.cu_count, + itr.array_count, + itr.num_shader_banks, + itr.simd_arrays_per_engine, + itr.cu_per_simd_array, + itr.simd_per_cu, + itr.max_slots_scratch_cu, + itr.gfx_target_version, + itr.vendor_id, + itr.device_id, + itr.location_id, + itr.domain, + itr.drm_render_minor, + itr.num_sdma_engines, + itr.num_sdma_xgmi_engines, + itr.num_sdma_queues_per_engine, + itr.num_cp_queues, + itr.max_engine_clk_ccompute, + itr.max_engine_clk_fcompute, + itr.sdma_fw_version.Value, + itr.fw_version.Value, + itr.capability.Value, + itr.cu_per_engine, + itr.max_waves_per_cu, + itr.family_id, + itr.workgroup_max_size, + itr.grid_max_size, + itr.local_mem_size, + itr.hive_id, + itr.gpu_id, + itr.workgroup_max_dim.x, + itr.workgroup_max_dim.y, + itr.workgroup_max_dim.z, + itr.grid_max_dim.x, + itr.grid_max_dim.y, + itr.grid_max_dim.z, + itr.name, + itr.vendor_name, + itr.product_name, + itr.model_name); + ofs << row_ss.str(); + } +} + +void +write_counters_csv(CsvManager& csv_manager, + const rocprofiler::tool::generator& counter_gen) +{ + process_data_to_csv(csv_manager, + CsvType::COUNTER, + counter_gen, + [](CsvManager& cm, CsvType type, const rocpd::types::counter& counter) { + std::string agent_identifier = + create_agent_index(cm.config.agent_index_value, + counter.agent_abs_index, + counter.agent_log_index, + counter.agent_type_index, + std::string_view(counter.agent_type)) + .as_string(); + + cm.write_line(type, + counter.guid, + counter.stack_id, + counter.dispatch_id, + fmt::format("\"{}\"", agent_identifier), + counter.queue_id, + counter.pid, + counter.tid, + counter.grid_size, + counter.kernel_id, + fmt::format("\"{}\"", counter.kernel_name), + counter.workgroup_size, + counter.lds_block_size, + counter.scratch_size, + counter.vgpr_count, + counter.accum_vgpr_count, + counter.sgpr_count, + fmt::format("\"{}\"", counter.counter_name), + counter.value, + counter.start, + counter.end); + }); +} + +void +write_csvs(CsvManager& csv_manager, + const rocprofiler::tool::generator& kernel_dispatch, + const rocprofiler::tool::generator& memory_copies, + const rocprofiler::tool::generator& memory_allocations, + const rocprofiler::tool::generator& hip_api_calls, + const rocprofiler::tool::generator& hsa_api_calls, + const rocprofiler::tool::generator& marker_api_calls, + const rocprofiler::tool::generator& counters_calls, + const rocprofiler::tool::generator& scratch_memory_calls, + const rocprofiler::tool::generator& rccl_calls, + const rocprofiler::tool::generator& rocdecode_calls, + const rocprofiler::tool::generator& rocjpeg_calls) +{ + rocpd::output::write_kernel_csv(csv_manager, kernel_dispatch); + rocpd::output::write_memory_copy_csv(csv_manager, memory_copies); + rocpd::output::write_memory_allocation_csv(csv_manager, memory_allocations); + rocpd::output::write_hip_api_csv(csv_manager, hip_api_calls); + rocpd::output::write_hsa_api_csv(csv_manager, hsa_api_calls); + rocpd::output::write_marker_api_csv(csv_manager, marker_api_calls); + + rocpd::output::write_counters_csv(csv_manager, counters_calls); + rocpd::output::write_scratch_memory_csv(csv_manager, scratch_memory_calls); + rocpd::output::write_rccl_api_csv(csv_manager, rccl_calls); + + rocpd::output::write_rocdecode_api_csv(csv_manager, rocdecode_calls); + rocpd::output::write_rocjpeg_api_csv(csv_manager, rocjpeg_calls); +} +} // namespace output +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.hpp new file mode 100644 index 0000000000..7cabdcaf10 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.hpp @@ -0,0 +1,120 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/python/rocpd/source/types.hpp" + +#include "lib/common/defines.hpp" +#include "lib/output/generateStats.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include +#include +#include +#include +#include +#include + +namespace rocpd +{ +namespace output +{ +using rocprofiler::tool::float_type; + +struct CsvFileConfig +{ + std::string filename; + std::string header; +}; + +enum class CsvType +{ + KERNEL_DISPATCH, + MEMORY_COPY, + MEMORY_ALLOCATION, + SCRATCH_MEMORY, + HIP_API, + HSA_CSV_API, + MARKER, + COUNTER, + RCCL_API, + ROCDECODE_API, + ROCJPEG_API, +}; + +class CsvManager +{ +public: + CsvManager(rocprofiler::tool::output_config output_cfg); + ~CsvManager(); + + rocprofiler::tool::output_config config; + std::map csv_configs; + + std::ofstream& get_stream(CsvType type); + + bool has_stream(CsvType type) const; + bool initialize_csv_file(CsvType type); + + template + void write_line(CsvType type, Args&&... args) + { + auto& stream = get_stream(type); + if(!stream.is_open()) return; + + std::vector items; + (items.push_back(fmt::format("{}", std::forward(args))), ...); + stream << fmt::format("{}\n", fmt::join(items, ",")); + } + +private: + std::map streams; + std::map file_paths; + + bool ensure_output_directory() const; +}; + +void +write_agent_info_csv(CsvManager& csv_manager, const std::vector& agents); + +void +write_csvs(CsvManager& csv_manager, + const rocprofiler::tool::generator& kernel_dispatch, + const rocprofiler::tool::generator& memory_copies, + const rocprofiler::tool::generator& memory_allocations, + const rocprofiler::tool::generator& hip_api_calls, + const rocprofiler::tool::generator& hsa_api_calls, + const rocprofiler::tool::generator& marker_api_calls, + const rocprofiler::tool::generator& counters_calls, + const rocprofiler::tool::generator& scratch_memory_calls, + const rocprofiler::tool::generator& rccl_calls, + const rocprofiler::tool::generator& rocdecode_calls, + const rocprofiler::tool::generator& rocjpeg_calls); +} // namespace output +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/functions.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/functions.cpp new file mode 100644 index 0000000000..0d7c293168 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/functions.cpp @@ -0,0 +1,138 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/python/rocpd/source/functions.hpp" + +#include "lib/common/logging.hpp" +#include "lib/common/utility.hpp" + +#include +#include + +#include +#include +#include + +#include + +namespace rocpd +{ +namespace functions +{ +namespace +{ +// Custom SQL function: rocpd_get_string(common_string_id, unique_string_id, nid, pid) +void +rocpd_get_string(sqlite3_context* context, int argc, sqlite3_value** argv) +{ + if(argc != 4) + { + ROCP_WARNING << "rocpd_get_string requires exactly 4 arguments (common_string_id, " + "unique_string_id, nid, pid)"; + sqlite3_result_null(context); + return; + } + + auto* db = static_cast(sqlite3_user_data(context)); + + // common and unique name ids passed in + auto c_name_id = sqlite3_value_int64(argv[0]); + auto u_name_id = sqlite3_value_int64(argv[1]); + + auto execute_query = [&](std::string_view _query, std::initializer_list&& _args) { + // char query[256]; + // snprintf(query, sizeof(query), "SELECT value FROM %s WHERE id = ?", table); + + sqlite3_stmt* stmt = nullptr; + + if(int rc = sqlite3_prepare_v2(db, _query.data(), -1, &stmt, nullptr); rc != SQLITE_OK) + { + ROCP_WARNING << fmt::format("SQL prepare failed: {}", sqlite3_errmsg(db)); + sqlite3_result_error(context, "SQL prepare failed", -1); + return; + } + + int64_t idx = 1; + for(auto itr : _args) + sqlite3_bind_int64(stmt, idx++, itr); + + if(auto rc = sqlite3_step(stmt); rc == SQLITE_ROW) + { + const unsigned char* result = sqlite3_column_text(stmt, 0); + sqlite3_result_text( + context, reinterpret_cast(result), -1, SQLITE_TRANSIENT); + } + else if(rc == SQLITE_DONE) + { + ROCP_WARNING << fmt::format("No row found for query '{}'", _query); + sqlite3_result_null(context); + } + else + { + ROCP_WARNING << fmt::format("SQL step failed: {}", sqlite3_errmsg(db)); + sqlite3_result_error(context, "SQL step failed", -1); + } + + sqlite3_finalize(stmt); + }; + + if(c_name_id != 0) + { + execute_query("SELECT string FROM rocpd_common_string WHERE id == ?", + std::initializer_list{c_name_id}); + } + else if(u_name_id != 0) + { + auto u_nid = sqlite3_value_int64(argv[2]); + auto u_pid = sqlite3_value_int64(argv[3]); + + execute_query( + "SELECT string FROM rocpd_unique_string WHERE id == ? AND nid = ? AND pid = ?", + std::initializer_list{u_name_id, u_nid, u_pid}); + } + else + { + sqlite3_result_null(context); + } +} +} // namespace + +void +define_for_database(sqlite3* conn) +{ + if(false) + { + sqlite3_create_function_v2(conn, + "rocpd_get_string", + 4, + SQLITE_UTF8, + conn, + rocpd_get_string, + nullptr, + nullptr, + nullptr); + } + + rocprofiler::common::consume_args(conn); +} +} // namespace functions +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/functions.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/functions.hpp new file mode 100644 index 0000000000..01b66f72f6 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/functions.hpp @@ -0,0 +1,38 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/defines.hpp" +#include "lib/common/logging.hpp" + +#include +#include + +namespace rocpd +{ +namespace functions +{ +void +define_for_database(sqlite3* conn); +} // namespace functions +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp new file mode 100644 index 0000000000..e2bc92c4a5 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp @@ -0,0 +1,208 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "interop.hpp" +#include "pysqlite_Connection.h" + +#include "lib/common/defines.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/static_object.hpp" +#include "lib/common/utility.hpp" +#include "lib/output/timestamps.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace rocpd +{ +namespace interop +{ +namespace +{ +namespace sdk = ::rocprofiler::sdk; +namespace common = ::rocprofiler::common; +// namespace tool = ::rocprofiler::tool; +// namespace sql = ::rocprofiler::tool::sql; + +using sqlite3_open_func_t = int (*)(const char*, sqlite3**); +using sqlite3_open_v2_func_t = int (*)(const char*, sqlite3**, int, const char*); +using sqlite3_close_func_t = int (*)(sqlite3*); + +gotcha_wrappee_handle_t orig_sqlite3_open = {}; +gotcha_wrappee_handle_t orig_sqlite3_open_v2 = {}; +gotcha_wrappee_handle_t orig_sqlite3_close = {}; +gotcha_wrappee_handle_t orig_sqlite3_close_v2 = {}; + +using sqlite_object_map_t = std::unordered_map; + +thread_local sqlite3* last_sqlite3 = nullptr; +auto sqlite_obj_mapping = sqlite_object_map_t{}; + +auto* +get_mapping() +{ + static auto*& _v = common::static_object::construct(); + return _v; +} + +void +erase_connection(sqlite3* pDb) +{ + if(!get_mapping()) return; + + if(last_sqlite3 && last_sqlite3 == pDb) last_sqlite3 = nullptr; + + auto itr = std::find_if(get_mapping()->begin(), get_mapping()->end(), [pDb](const auto& val) { + return (val.second == pDb); + }); + if(itr != get_mapping()->end()) get_mapping()->erase(itr); +} + +namespace impl +{ +int +sqlite3_open(const char* filename, /* Database filename (UTF-8) */ + sqlite3** ppDb /* OUT: SQLite db handle */ +) +{ + ROCP_TRACE << fmt::format("invoking {}... {}", __FUNCTION__, filename); + + auto func = reinterpret_cast(gotcha_get_wrappee(orig_sqlite3_open)); + auto ret = func(filename, ppDb); + + if(ppDb) last_sqlite3 = *ppDb; + + return ret; +} + +int +sqlite3_open_v2(const char* filename, sqlite3** ppDb, int flags, const char* zVfs) +{ + ROCP_TRACE << fmt::format("invoking {}... {}", __FUNCTION__, filename); + + auto func = reinterpret_cast(gotcha_get_wrappee(orig_sqlite3_open_v2)); + auto ret = func(filename, ppDb, flags, zVfs); + + if(ppDb) last_sqlite3 = *ppDb; + + return ret; +} + +int +sqlite3_close(sqlite3* pDb) +{ + ROCP_TRACE << fmt::format("invoking {}... ", __FUNCTION__); + + auto func = reinterpret_cast(gotcha_get_wrappee(orig_sqlite3_close)); + auto ret = func(pDb); + + erase_connection(pDb); + + return ret; +} + +int +sqlite3_close_v2(sqlite3* pDb) +{ + ROCP_TRACE << fmt::format("invoking {}... ", __FUNCTION__); + + auto func = reinterpret_cast(gotcha_get_wrappee(orig_sqlite3_close_v2)); + auto ret = func(pDb); + + erase_connection(pDb); + + return ret; +} +} // namespace impl + +auto bindings = std::array{ + gotcha_binding_t{"sqlite3_open", + reinterpret_cast(impl::sqlite3_open), + &orig_sqlite3_open}, + gotcha_binding_t{"sqlite3_close", + reinterpret_cast(impl::sqlite3_close), + &orig_sqlite3_close}, + gotcha_binding_t{"sqlite3_open_v2", + reinterpret_cast(impl::sqlite3_open_v2), + &orig_sqlite3_open_v2}, + gotcha_binding_t{"sqlite3_close_v2", + reinterpret_cast(impl::sqlite3_close_v2), + &orig_sqlite3_close_v2}, +}; +} // namespace + +void +activate_gotcha_bindings() +{ + // activate the gotcha wrappers + auto _err = gotcha_wrap(bindings.data(), bindings.size(), "rocpd.sqlite3"); + ROCP_WARNING_IF(_err != GOTCHA_SUCCESS) << "gotcha error for rocpd.sqlite3"; +} + +sqlite3* +map_connection(py::object obj) +{ + if(!get_mapping()) return nullptr; + + get_mapping()->emplace(obj.ptr(), last_sqlite3); + + auto* _ret = get_mapping()->at(obj.ptr()); + ROCP_INFO << "[pyrocpd][mapping] " << obj.ptr() << " <-> " << _ret; + + return get_mapping()->at(obj.ptr()); +} + +sqlite3* +get_connection(py::object&& obj) +{ + if(!obj) return nullptr; + + if(get_mapping()) + { + if(auto itr = get_mapping()->find(obj.ptr()); + itr != get_mapping()->end() && itr->second != nullptr) + { + ROCP_INFO << fmt::format( + "sqlite3 python connection ({}) mapped to sqlite3* ({}) safely " + "via gotcha capture of sqlite3_open", + sdk::utility::as_hex(obj.ptr(), 16), + sdk::utility::as_hex(itr->second, 16)); + return itr->second; + } + } + + ROCP_CI_LOG(WARNING) << fmt::format( + "sqlite3 python connection ({}) not captured by gotcha... accessing sqlite3* via " + "reinterpret_cast(PyObject*&)->db :: [unsafe]", + sdk::utility::as_hex(obj.ptr(), 16)); + + return reinterpret_cast(obj.ptr())->db; +} +} // namespace interop +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.hpp new file mode 100644 index 0000000000..8a80da0ec1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.hpp @@ -0,0 +1,46 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/defines.hpp" +#include "lib/common/logging.hpp" + +#include +#include + +namespace rocpd +{ +namespace interop +{ +namespace py = ::pybind11; + +void +activate_gotcha_bindings(); + +sqlite3* +map_connection(py::object obj); + +sqlite3* +get_connection(py::object&& obj); +} // namespace interop +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.cpp new file mode 100644 index 0000000000..ed84dfb82e --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.cpp @@ -0,0 +1,905 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/python/rocpd/source/otf2.hpp" + +#include "lib/common/defines.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/hasher.hpp" +#include "lib/common/mpl.hpp" +#include "lib/common/units.hpp" +#include "lib/common/utility.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/output_stream.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/output/timestamps.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OTF2_CHECK(result) \ + { \ + OTF2_ErrorCode ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \ + if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != OTF2_SUCCESS) \ + { \ + auto _err_name = OTF2_Error_GetName(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \ + auto _err_msg = \ + OTF2_Error_GetDescription(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \ + ROCP_FATAL << #result << " failed with error code " << _err_name \ + << " (code=" << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \ + << ") :: " << _err_msg; \ + } \ + } + +namespace rocpd +{ +namespace output +{ +namespace +{ +template +struct array_hash +{ + size_t operator()(const std::array& _data) const + { + constexpr size_t seed = 0x9e3779b9; + size_t _val = 0; + for(const auto& itr : _data) + _val ^= std::hash{}(itr) + seed + (_val << 6) + (_val >> 2); + return _val; + } + + template + size_t operator()(Up... _data) const + { + static_assert(sizeof...(Up) == N, "Insufficient data"); + return operator()(std::array{std::forward(_data)...}); + } +}; + +struct region_info +{ + std::string name = {}; + OTF2_RegionRole_enum region_role = OTF2_REGION_ROLE_FUNCTION; + OTF2_Paradigm_enum paradigm = OTF2_PARADIGM_HIP; +}; + +OTF2_FlushType +pre_flush(void* userData, + OTF2_FileType fileType, + OTF2_LocationRef location, + void* callerData, + bool fini); + +OTF2_TimeStamp +post_flush(void* userData, OTF2_FileType fileType, OTF2_LocationRef location); + +template +void +consume_variables(Args&&...) +{} + +using event_writer_t = OTF2_EvtWriter; +using archive_t = OTF2_Archive; +using attribute_list_t = OTF2_AttributeList; +using hash_value_t = size_t; +using hash_map_t = std::unordered_map; + +auto main_tid = rocprofiler::common::get_tid(); +archive_t* archive = nullptr; +auto flush_callbacks = OTF2_FlushCallbacks{pre_flush, post_flush}; +OTF2_GlobalDefWriter* global_def_writer = nullptr; // shared between data bases (processes) + +enum rocprofiler_location_type_t +{ + ROCPROFILER_AGENT_NO_TYPE = 0, + ROCPROFILER_AGENT_MEMORY_COPY_TYPE, + ROCPROFILER_AGENT_DISPATCH_TYPE, + ROCPROFILER_AGENT_MEMORY_ALLOC_TYPE, + ROCPROFILER_AGENT_MEMORY_DEALLOC_TYPE +}; + +struct location_base +{ + uint64_t pid = 0; + uint64_t tid = 0; + uint64_t agent_handle = 0; + uint64_t queue_id = 0; + rocprofiler_location_type_t type = ROCPROFILER_AGENT_NO_TYPE; + + location_base(pid_t _pid, + pid_t _tid, + uint64_t _agent_handle = 0, + rocprofiler_location_type_t _type = ROCPROFILER_AGENT_NO_TYPE, + uint64_t _queue_id = 0) + : pid{static_cast(_pid)} + , tid{static_cast(_tid)} + , agent_handle{_agent_handle} + , queue_id{_queue_id} + , type{_type} + {} + + auto hash() const + { + return array_hash{}(pid, tid, agent_handle + 1, queue_id + 1, type); + } +}; + +bool +operator<(const location_base& lhs, const location_base& rhs) +{ + return std::tie(lhs.pid, lhs.tid, lhs.agent_handle, lhs.queue_id, lhs.type) < + std::tie(rhs.pid, rhs.tid, rhs.agent_handle, rhs.queue_id, rhs.type); +} + +struct location_data : location_base +{ + location_data(pid_t _pid, + pid_t _tid, + uint64_t _agent_handle = 0, + rocprofiler_location_type_t _type = ROCPROFILER_AGENT_NO_TYPE, + uint64_t _queue_id = 0) + : location_base{_pid, _tid, _agent_handle, _type, _queue_id} + , index{++index_counter} + , event_writer{OTF2_Archive_GetEvtWriter(CHECK_NOTNULL(archive), index)} + { + CHECK_NOTNULL(event_writer); + } + + using location_base::hash; + + static uint64_t index_counter; + + uint64_t index = 0; + event_writer_t* event_writer = nullptr; + + bool operator==(const location_base& rhs) const { return (hash() == rhs.hash()); } +}; + +uint64_t location_data::index_counter = 0; + +OTF2_TimeStamp +get_time() +{ + auto _ts = rocprofiler_timestamp_t{}; + rocprofiler_get_timestamp(&_ts); + return static_cast(_ts); +} + +auto& +get_locations() +{ + static auto _v = std::vector>{}; + return _v; +} + +const location_data* +get_location(const location_base& _location, bool _init = false) +{ + for(auto& itr : get_locations()) + if(*itr == _location) return itr.get(); + + if(_init) + return get_locations() + .emplace_back(std::make_unique(_location.pid, + _location.tid, + _location.agent_handle, + _location.type, + _location.queue_id)) + .get(); + + return nullptr; +} + +event_writer_t* +get_event_writer(const location_base& _location, bool _init = false) +{ + const auto* _loc = get_location(_location, _init); + return (_loc) ? _loc->event_writer : nullptr; +} + +OTF2_FlushType +pre_flush(void* userData, + OTF2_FileType fileType, + OTF2_LocationRef location, + void* callerData, + bool fini) +{ + consume_variables(userData, fileType, location, callerData, fini); + return OTF2_FLUSH; +} + +OTF2_TimeStamp +post_flush(void* userData, OTF2_FileType fileType, OTF2_LocationRef location) +{ + consume_variables(userData, fileType, location); + return get_time(); +} + +template +size_t +get_hash_id(Tp&& _val) +{ + using value_type = rocprofiler::common::mpl::unqualified_type_t; + + if constexpr(!std::is_pointer::value) + return std::hash{}(std::forward(_val)); + else if constexpr(std::is_same::value || + std::is_same::value) + return get_hash_id(std::string_view{_val}); + else + return get_hash_id(*_val); +} + +template +auto +add_event(std::string_view name, + const location_base& _location, + rocprofiler_callback_phase_t _phase, + OTF2_TimeStamp _ts, + attribute_list_t* _attributes = nullptr) +{ + auto* evt_writer = get_event_writer(_location, true); + auto _hash = get_hash_id(name); + + if(_phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + OTF2_CHECK(OTF2_EvtWriter_Enter(evt_writer, _attributes, _ts, _hash)) + else if(_phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + OTF2_CHECK(OTF2_EvtWriter_Leave(evt_writer, _attributes, _ts, _hash)) + else + ROCP_FATAL << "otf2::add_event phase is not enter or exit"; +} + +void +setup(const rocprofiler::tool::output_config& cfg, uint64_t min_start, uint64_t max_fini) +{ + namespace fs = rocprofiler::common::filesystem; + + auto _filename = rocprofiler::tool::get_output_filename(cfg, "results", std::string_view{}); + auto _filepath = fs::path{_filename}; + auto _name = _filepath.filename().string(); + auto _path = _filepath.parent_path().string(); + + if(fs::exists(_filepath)) fs::remove_all(_filepath); + + constexpr uint64_t evt_chunk_size = 2 * rocprofiler::common::units::MB; + constexpr uint64_t def_chunk_size = 8 * rocprofiler::common::units::MB; + + archive = OTF2_Archive_Open(_path.c_str(), + _name.c_str(), + OTF2_FILEMODE_WRITE, + evt_chunk_size, // event chunk size + def_chunk_size, // def chunk size + OTF2_SUBSTRATE_POSIX, + OTF2_COMPRESSION_NONE); + + OTF2_CHECK(OTF2_Archive_SetFlushCallbacks(archive, &flush_callbacks, nullptr)); + OTF2_CHECK(OTF2_Archive_SetSerialCollectiveCallbacks(archive)); + OTF2_CHECK(OTF2_Pthread_Archive_SetLockingCallbacks(archive, nullptr)); + OTF2_CHECK(OTF2_Archive_OpenEvtFiles(archive)); + + ROCP_ERROR << "Opened result file: " << _filename << ".otf2"; + + auto _timer_resolution = + rocprofiler::common::get_clock_period_ns_impl(rocprofiler::common::default_clock_id) * + std::nano::den; + auto _global_offset = min_start; + auto _max_trace_length = (max_fini - min_start); + + global_def_writer = OTF2_Archive_GetGlobalDefWriter(archive); + OTF2_CHECK(OTF2_GlobalDefWriter_WriteClockProperties( + global_def_writer, + _timer_resolution, + _global_offset, + _max_trace_length, + std::chrono::system_clock::now().time_since_epoch().count())); + + OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(global_def_writer, 0, "")); + + auto add_write_string = [](size_t _hash, std::string_view _name_strv) { + static auto _existing = std::unordered_set{}; + if(_hash > 0 && _existing.count(_hash) == 0) + { + OTF2_CHECK( + OTF2_GlobalDefWriter_WriteString(global_def_writer, _hash, _name_strv.data())); + _existing.emplace(_hash); + } + }; + + auto add_write_string_val = [&add_write_string](std::string_view _name_v) { + auto _hash_v = get_hash_id(_name_v); + add_write_string(_hash_v, _name_v); + return _hash_v; + }; + + //(must be shared between processes) + auto _attr_name = std::string_view{"category"}; + auto _attr_desc = std::string_view{"tracing category"}; + + auto _attr_name_hash = add_write_string_val(_attr_name); + auto _attr_desc_hash = add_write_string_val(_attr_desc); + + OTF2_CHECK(OTF2_GlobalDefWriter_WriteAttribute( + global_def_writer, 0, _attr_name_hash, _attr_desc_hash, OTF2_TYPE_STRING)); +} + +void +shutdown() +{ + OTF2_CHECK(OTF2_Archive_Close(archive)); +} + +struct event_info +{ + explicit event_info(location_base&& _loc) + : m_location{output::get_location(std::forward(_loc), true)} + {} + + auto id() const { return m_location->index; } + auto hash() const { return m_location->hash(); } + const location_base* get_location() const { return m_location; } + + std::string name = {}; + uint64_t event_count = 0; + +private: + const location_data* m_location = nullptr; +}; + +attribute_list_t* +create_attribute_list_for_name(const char* name) +{ + auto* _val = OTF2_AttributeList_New(); + auto _hash = get_hash_id(name); + auto _attr_value = OTF2_AttributeValue{}; + _attr_value.stringRef = _hash; + OTF2_AttributeList_AddAttribute(_val, 0, OTF2_TYPE_STRING, _attr_value); + return _val; +} +} // namespace + +OTF2Session::OTF2Session(const tool::output_config& output_cfg, + uint64_t min_start, + uint64_t max_fini) +: config{output_cfg} +{ + setup(output_cfg, min_start, max_fini); +} + +OTF2Session::~OTF2Session() { shutdown(); } + +void +write_otf2(const OTF2Session& otf2_session, + const types::process& process, + const uint16_t tree_node_id, + const std::unordered_map& agent_data, + const tool::generator& thread_gen, + const tool::generator& api_gen, + const tool::generator& kernel_dispatch_gen, + const tool::generator& memory_copy_gen, + const tool::generator& memory_allocation_gen) +{ + const uint64_t _no_agent_handle = 0; + // std::numeric_limits::max() - 1; + const auto& ocfg = otf2_session.config; + + auto _app_ts = rocprofiler::tool::timestamps_t{process.start, process.fini}; + + auto thread_event_info = std::map{}; + auto agent_memcpy_info = + std::map>{}; // tid -> agent_handle ->evt + auto agent_memalloc_info = + std::map>{}; // // tid -> agent_handle ->evt + auto agent_dispatch_info = + std::map>>{}; // tid -> agent_handle + // -> quieue_id -> evt + auto _get_alloc_level_type_name = [](const std::string& level, + const std::string& type) -> std::string { + static const std::unordered_map> + name_map = { + {"REAL", + {{"ALLOC", "MEMORY_ALLOCATION_ALLOCATE"}, {"FREE", "MEMORY_ALLOCATION_FREE"}}}, + {"VIRTUAL", + {{"ALLOC", "MEMORY_ALLOCATION_VMEM_ALLOCATE"}, + {"FREE", "MEMORY_ALLOCATION_VMEM_FREE"}}}, + {"SCRATCH", {{"ALLOC", "SCRATCH_MEMORY_ALLOC"}, {"FREE", "SCRATCH_MEMORY_FREE"}}}}; + + if((name_map.count(level) != 0u) && (name_map.at(level).count(type) != 0u)) + return name_map.at(level).at(type); + + return level == "SCRATCH" || level == "REAL" || level == "VIRTUAL" ? level + "_MEMORY_NONE" + : "UNKNOWN_LEVEL"; + }; + + for(auto ditr : thread_gen) + for(const auto& itr : thread_gen.get(ditr)) + { + auto _evt_info = event_info{location_base{process.pid, itr.tid}}; + _evt_info.name = fmt::format("Thread {}", itr.tid); + thread_event_info.emplace(itr.tid, _evt_info); + } + + auto _hash_data = hash_map_t{}; + + struct evt_data + { + rocprofiler_callback_phase_t phase = ROCPROFILER_CALLBACK_PHASE_NONE; + std::string name = {}; + const location_base* location = nullptr; + uint64_t timestamp = 0; + OTF2_AttributeList* attributes = nullptr; + }; + + auto _data = std::deque{}; + auto _attr_str = std::unordered_map{}; + + // copypatse from perfetto. TODO: Move to a common place? + auto get_category_string = [](std::string_view _category) { + static auto buffer_names = rocprofiler::sdk::get_buffer_tracing_names(); + auto _category_idx = ROCPROFILER_BUFFER_TRACING_NONE; + for(const auto& citr : buffer_names) + { + if(_category == citr.name) _category_idx = citr.value; + } + return rocprofiler::sdk::get_perfetto_category(_category_idx); + }; + + auto get_category_attribute = [&get_category_string, + &_attr_str](const std::string& category) -> OTF2_AttributeList* { + const auto* _perfetto_category = get_category_string(category); + _attr_str.emplace(get_hash_id(_perfetto_category), _perfetto_category); + return create_attribute_list_for_name(_perfetto_category); + }; + + for(auto ditr : api_gen) + for(const auto& itr : api_gen.get(ditr)) + { + std::string _name = itr.name; + _hash_data.emplace(get_hash_id(_name), + region_info{_name, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP}); + + auto& _evt_info = thread_event_info.at(itr.tid); + _evt_info.event_count += 1; + + auto* attributes = get_category_attribute(itr.category); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + _name, + _evt_info.get_location(), + itr.start, + attributes}); + + if(!attributes) + { + ROCP_FATAL << "Undefined attributes for api call " << itr.name; + } + + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + _name, + _evt_info.get_location(), + itr.end, + nullptr}); + } + + for(auto ditr : memory_copy_gen) + for(const auto& itr : memory_copy_gen.get(ditr)) + { + std::string _name = itr.name; + _hash_data.emplace( + get_hash_id(_name), + region_info{_name, OTF2_REGION_ROLE_DATA_TRANSFER, OTF2_PARADIGM_HIP}); + + auto _extended_agent = agent_data.at(itr.dst_agent_abs_index); + auto _agent_handle = _extended_agent.types_agent.id.handle; + auto _evt_info = event_info{location_base{ + process.pid, itr.tid, _agent_handle, ROCPROFILER_AGENT_MEMORY_COPY_TYPE}}; + + auto agent_index_info = _extended_agent.agent_index; + _evt_info.name = fmt::format("Thread {}, Copy to {} {}", + itr.tid, + agent_index_info.type, + agent_index_info.as_string("-")); + + _evt_info.event_count += 1; + + agent_memcpy_info[itr.tid].emplace(_agent_handle, _evt_info); + + const auto* _perfetto_name = + rocprofiler::sdk::perfetto_category::name; + + _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); + auto* _attrs = create_attribute_list_for_name(_perfetto_name); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + _name, + _evt_info.get_location(), + itr.start, + _attrs}); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + _name, + _evt_info.get_location(), + itr.end, + nullptr}); + }; + + for(auto ditr : memory_allocation_gen) + for(const auto& itr : memory_allocation_gen.get(ditr)) + { + std::string _alloc_operation = _get_alloc_level_type_name(itr.level, itr.type); + + const auto* _perfetto_name = rocprofiler::sdk::perfetto_category< + rocprofiler::sdk::category::memory_allocation>::name; + + _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); + auto* _attrs = create_attribute_list_for_name(_perfetto_name); + + if(itr.type == "ALLOC") + { + _hash_data.emplace( + get_hash_id(_alloc_operation), + region_info{_alloc_operation, OTF2_REGION_ROLE_ALLOCATE, OTF2_PARADIGM_HIP}); + + auto _extended_agent = agent_data.at(itr.agent_abs_index); + auto _handle = _extended_agent.types_agent.id.handle; + + auto _evt_info = event_info{location_base{ + process.pid, itr.tid, _handle, ROCPROFILER_AGENT_MEMORY_ALLOC_TYPE}}; + + auto agent_index_info = _extended_agent.agent_index; + _evt_info.name = fmt::format("Thread {}, Memory Allocate at {} {}", + itr.tid, + agent_index_info.type, + agent_index_info.as_string("-")); + + agent_memalloc_info[itr.tid].emplace(_handle, _evt_info); + _evt_info.event_count += 1; + + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + _alloc_operation, + _evt_info.get_location(), + itr.start, + _attrs}); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + _alloc_operation, + _evt_info.get_location(), + itr.end, + nullptr}); + } + else if(itr.type == "FREE") // + { + _hash_data.emplace( + get_hash_id(_alloc_operation), + region_info{_alloc_operation, OTF2_REGION_ROLE_DEALLOCATE, OTF2_PARADIGM_HIP}); + + auto _evt_info = event_info{location_base{ + process.pid, itr.tid, _no_agent_handle, ROCPROFILER_AGENT_MEMORY_DEALLOC_TYPE}}; + _evt_info.name = fmt::format("Thread {}, Memory Deallocate (Free)", itr.tid); + + agent_memalloc_info[itr.tid].emplace(_no_agent_handle, _evt_info); + + _evt_info.event_count += 1; + + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + _alloc_operation, + _evt_info.get_location(), + itr.start, + _attrs}); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + _alloc_operation, + _evt_info.get_location(), + itr.end, + nullptr}); + } + else + { + auto _evt_info = event_info{location_base{process.pid, itr.tid}}; + _evt_info.name = fmt::format("Thread {}, Memory Operation UNK", itr.tid); + _evt_info.event_count += 1; + agent_memalloc_info[itr.tid].emplace(_no_agent_handle, _evt_info); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + _alloc_operation, + _evt_info.get_location(), + itr.start, + _attrs}); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + _alloc_operation, + _evt_info.get_location(), + itr.end, + nullptr}); + } + } + + auto _queue_ids = std::map{}; + for(auto ditr : kernel_dispatch_gen) + for(const auto& itr : kernel_dispatch_gen.get(ditr)) + { + auto _name = fmt::format( + "{}", (ocfg.kernel_rename && !itr.region.empty()) ? itr.region : itr.name); + _hash_data.emplace(get_hash_id(_name), + region_info{_name, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP}); + + const auto* _perfetto_name = rocprofiler::sdk::perfetto_category< + rocprofiler::sdk::category::kernel_dispatch>::name; + + _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); + auto* _attrs = create_attribute_list_for_name(_perfetto_name); + + auto _extended_agent = agent_data.at(itr.agent_abs_index); + auto _handle = _extended_agent.types_agent.id.handle; + auto agent_index_info = _extended_agent.agent_index; + + auto _evt_info = event_info{location_base{ + process.pid, itr.tid, _handle, ROCPROFILER_AGENT_DISPATCH_TYPE, itr.queue_id}}; + + if(_queue_ids.count(itr.queue_id) == 0) + { + _queue_ids.emplace(itr.queue_id, _queue_ids.size()); + } + + _evt_info.name = fmt::format("Thread {}, Compute on {} {}, Queue {}", + + itr.tid, + agent_index_info.type, + agent_index_info.as_string("-"), + _queue_ids.at(itr.queue_id)); + + _evt_info.event_count += 1; + agent_dispatch_info[itr.tid][_handle].emplace(itr.queue_id, _evt_info); + + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + _name, + _evt_info.get_location(), + itr.start, + _attrs}); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + _name, + _evt_info.get_location(), + itr.end, + nullptr}); + } + + std::sort(_data.begin(), _data.end(), [](const evt_data& lhs, const evt_data& rhs) { + if(lhs.timestamp != rhs.timestamp) return (lhs.timestamp < rhs.timestamp); + if(lhs.phase != rhs.phase) return (lhs.phase > rhs.phase); + return (*lhs.location < *rhs.location); + }); + + for(const auto& itr : _data) + { + add_event(itr.name, *itr.location, itr.phase, itr.timestamp, itr.attributes); + ROCP_ERROR_IF(itr.timestamp < _app_ts.app_start_time) + << "event found with timestamp < app start time by " + << (_app_ts.app_start_time - itr.timestamp) << " nsec :: " << itr.name; + ROCP_ERROR_IF(itr.timestamp > _app_ts.app_end_time) + << "event found with timestamp > app end time by " + << (itr.timestamp - _app_ts.app_end_time) << " nsec :: " << itr.name; + } + + for(const auto& itr : _data) + { + if(itr.attributes) OTF2_AttributeList_Delete(itr.attributes); + } + + OTF2_CHECK(OTF2_Archive_CloseEvtFiles(archive)); + + OTF2_CHECK(OTF2_Archive_OpenDefFiles(archive)); + for(auto& itr : get_locations()) + { + OTF2_DefWriter* def_writer = OTF2_Archive_GetDefWriter(archive, itr->index); + OTF2_Archive_CloseDefWriter(archive, def_writer); + } + OTF2_CHECK(OTF2_Archive_CloseDefFiles(archive)); + + for(const auto& itr : _hash_data) + { + if(itr.first != 0) + OTF2_CHECK(OTF2_GlobalDefWriter_WriteString( + global_def_writer, itr.first, itr.second.name.c_str())); + } + + for(const auto& itr : _hash_data) + { + if(itr.first != 0) + OTF2_CHECK(OTF2_GlobalDefWriter_WriteRegion(global_def_writer, + itr.first, + itr.first, + 0, + 0, + itr.second.region_role, + itr.second.paradigm, + OTF2_REGION_FLAG_NONE, + 0, + 0, + 0)); + } + + auto add_write_string = [](size_t _hash, std::string_view _name) { + static auto _existing = std::unordered_set{}; + if(_hash > 0 && _existing.count(_hash) == 0) + { + OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(global_def_writer, _hash, _name.data())); + _existing.emplace(_hash); + } + }; + + for(const auto& itr : _attr_str) + add_write_string(itr.first, itr.second); + + std::istringstream command_line(process.command); + std::string _exe_name; + command_line >> _exe_name; // Extracts characters until whitespace + _exe_name = fmt::format("{}", _exe_name); + auto _exe_hash = get_hash_id(_exe_name); + add_write_string(_exe_hash, _exe_name); + + auto _node_name = std::string{"node"}; + { + if(!process.hostname.empty()) + { + if(process.hostname.length() >= PATH_MAX) + { + _node_name = process.hostname.substr(0, PATH_MAX - 1); + } + else + { + _node_name = process.hostname; + } + } + } + _node_name = fmt::format("{}", _node_name); + auto _node_hash = get_hash_id(_node_name); + add_write_string(_node_hash, _node_name); + + // debug + OTF2_CHECK(OTF2_GlobalDefWriter_WriteSystemTreeNode( + global_def_writer, tree_node_id, _exe_hash, _node_hash, OTF2_UNDEFINED_SYSTEM_TREE_NODE)); + + // Process + OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocationGroup(global_def_writer, + tree_node_id, + _exe_hash, + OTF2_LOCATION_GROUP_TYPE_PROCESS, + tree_node_id, + OTF2_UNDEFINED_LOCATION_GROUP)); + + // Accelerators (must be shared between the processes) + for(const auto& [abs_idx, extended_agent] : agent_data) + { + auto _handle = extended_agent.types_agent.id.handle; + const auto _name = std::string_view{extended_agent.labeled_name}; + auto _hash = get_hash_id(_name); + + add_write_string(_hash, _name); + OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocationGroup(global_def_writer, + _handle, + _hash, + OTF2_LOCATION_GROUP_TYPE_ACCELERATOR, + tree_node_id, + OTF2_UNDEFINED_LOCATION_GROUP)); + } + + // Thread Events + for(auto& [tid, evt] : thread_event_info) + { + auto _hash = get_hash_id(evt.name); + + add_write_string(_hash, evt.name); + OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, + evt.id(), // id + _hash, + OTF2_LOCATION_TYPE_CPU_THREAD, + 2 * evt.event_count, // # events + tree_node_id // location group + )); + } + + // Memcpy Events + for(auto& [tid, itr] : agent_memcpy_info) + { + for(auto& [agent_handle, evt] : itr) + { + auto _hash = get_hash_id(evt.name); + + add_write_string(_hash, evt.name); + OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, + evt.id(), // id + _hash, + OTF2_LOCATION_TYPE_ACCELERATOR_STREAM, + 2 * evt.event_count, // # events + agent_handle // location group + )); + } + } + + // Memalloc Events + for(auto& [tid, itr] : agent_memalloc_info) + { + for(auto& [agent_handle, evt] : itr) + { + auto _hash = get_hash_id(evt.name); + + add_write_string(_hash, evt.name); + + OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, + evt.id(), // id + _hash, + OTF2_LOCATION_TYPE_ACCELERATOR_STREAM, + 2 * evt.event_count, // # events + agent_handle // location group + )); + } + } + + // Dispatch Events + for(auto& [tid, itr] : agent_dispatch_info) + { + for(auto& [agent_handle, qitr] : itr) + { + for(auto& [queue_id, evt] : qitr) + { + auto _hash = get_hash_id(evt.name); + + add_write_string(_hash, evt.name); + + OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, + evt.id(), // id + _hash, + OTF2_LOCATION_TYPE_ACCELERATOR_STREAM, + 2 * evt.event_count, // # events + agent_handle // location group + )); + } + } + } +} + +} // namespace output +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.hpp new file mode 100644 index 0000000000..cf0ebb0a23 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.hpp @@ -0,0 +1,71 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/python/rocpd/source/types.hpp" + +#include "lib/common/defines.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include + +namespace rocpd +{ +namespace output +{ +namespace tool = rocprofiler::tool; + +struct extended_agent +{ + const rocpd::types::agent& types_agent; + const tool::agent_index agent_index; + const std::string labeled_name; +}; + +struct OTF2Session +{ + OTF2Session(const tool::output_config& output_cfg, uint64_t min_start, uint64_t max_fini); + + ~OTF2Session(); + + const tool::output_config& config; +}; + +void +write_otf2(const OTF2Session& session, + const types::process& process, + const uint16_t tree_node_id, + const std::unordered_map& agent_data, + const tool::generator& thread_gen, + const tool::generator& api_gen, + const tool::generator& kernel_dispatch_gen, + const tool::generator& memory_copy_gen, + const tool::generator& memory_allocation_gen); +} // namespace output +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/perfetto.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/perfetto.cpp new file mode 100644 index 0000000000..9908941fbb --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/perfetto.cpp @@ -0,0 +1,966 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/python/rocpd/source/perfetto.hpp" + +#include "lib/common/defines.hpp" +#include "lib/common/hasher.hpp" +#include "lib/common/mpl.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/output_stream.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include + +#include +#include +#include +#include + +namespace rocpd +{ +namespace output +{ +namespace +{ +auto +to_string(rocprofiler_dim3_t v) +{ + return fmt::format("{} (x={}, y={}, z={})", (v.x * v.y * v.z), v.x, v.y, v.z); +} + +template +size_t +get_hash_id(Tp&& _val) +{ + using value_type = rocprofiler::common::mpl::unqualified_type_t; + + if constexpr(!std::is_pointer::value) + return std::hash{}(std::forward(_val)); + else if constexpr(std::is_same::value) + return get_hash_id(std::string_view{_val}); + else + return get_hash_id(*_val); +} +} // namespace + +PerfettoSession::PerfettoSession(const tool::output_config& output_cfg) +: config{output_cfg} +{ + auto args = ::perfetto::TracingInitArgs{}; + auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{}; + auto cfg = ::perfetto::TraceConfig{}; + + // environment settings + auto shmem_size_hint = config.perfetto_shmem_size_hint; + auto buffer_size_kb = config.perfetto_buffer_size; + + auto* buffer_config = cfg.add_buffers(); + buffer_config->set_size_kb(buffer_size_kb); + + args.supports_multiple_data_source_instances = true; + // track_event_cfg.clear_disabled_categories(); + // track_event_cfg.clear_disabled_tags(); + + if(config.perfetto_buffer_fill_policy == "discard" || + config.perfetto_buffer_fill_policy.empty()) + buffer_config->set_fill_policy( + ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD); + else if(config.perfetto_buffer_fill_policy == "ring_buffer") + buffer_config->set_fill_policy( + ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER); + else + ROCP_FATAL << "Unsupport perfetto buffer fill policy: '" + << config.perfetto_buffer_fill_policy << "'. Supported: discard, ring_buffer"; + + auto* ds_cfg = cfg.add_data_sources()->mutable_config(); + ds_cfg->set_name("track_event"); // this MUST be track_event + ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString()); + + args.shmem_size_hint_kb = shmem_size_hint; + + if(config.perfetto_backend == "inprocess" || config.perfetto_backend.empty()) + args.backends |= ::perfetto::kInProcessBackend; + else if(config.perfetto_backend == "system") + args.backends |= ::perfetto::kSystemBackend; + else + ROCP_FATAL << "Unsupport perfetto backend: '" << config.perfetto_backend + << "'. Supported: inprocess, system"; + + ::perfetto::Tracing::Initialize(args); + ::perfetto::TrackEvent::Register(); + + tracing_session = ::perfetto::Tracing::NewTrace(); + + tracing_session->Setup(cfg); + tracing_session->StartBlocking(); +} + +PerfettoSession::~PerfettoSession() +{ + tracing_session->StopBlocking(); + auto filename = std::string{"results"}; + auto ofs = tool::get_output_stream(config, filename, ".pftrace", std::ios::binary); + + auto amount_read = std::atomic{0}; + auto is_done = std::promise{}; + auto _mtx = std::mutex{}; + auto _reader = [&ofs, &_mtx, &is_done, &amount_read]( + ::perfetto::TracingSession::ReadTraceCallbackArgs _args) { + auto _lk = std::unique_lock{_mtx}; + if(_args.data && _args.size > 0) + { + ROCP_TRACE << "Writing " << _args.size << " B to trace..."; + // Write the trace data into file + ofs.stream->write(_args.data, _args.size); + amount_read += _args.size; + } + ROCP_INFO_IF(!_args.has_more && amount_read > 0) + << "Wrote " << amount_read << " B to perfetto trace file"; + if(!_args.has_more) is_done.set_value(); + }; + + for(size_t i = 0; i < 2; ++i) + { + ROCP_TRACE << "Reading trace..."; + amount_read = 0; + is_done = std::promise{}; + tracing_session->ReadTrace(_reader); + is_done.get_future().wait(); + } + + ROCP_TRACE << "Destroying tracing session..."; + tracing_session.reset(); + + ROCP_TRACE << "Flushing trace output stream..."; + (*ofs.stream) << std::flush; + + ROCP_TRACE << "Destroying trace output stream..."; + ofs.close(); +} + +void +write_perfetto( + const PerfettoSession& perfetto_session, + const types::process& process, + const std::unordered_map>& + agent_data, + const tool::generator& thread_gen, + const tool::generator& region_gen, + const tool::generator& sample_gen, + const tool::generator& kernel_dispatch_gen, + const tool::generator& memory_copy_gen, + const tool::generator& memory_allocation_gen, + const tool::generator& counter_collection_gen) +{ + namespace sdk = ::rocprofiler::sdk; + namespace common = ::rocprofiler::common; + + static auto orig_process_track = ::perfetto::ProcessTrack::Current(); + static auto orig_process_desc = orig_process_track.Serialize(); + + const auto& tracing_session = perfetto_session.tracing_session; + const auto& ocfg = perfetto_session.config; + const uint64_t this_pid = process.pid; + const uint64_t this_pid_init_ns = process.init; + const uint64_t this_nid = process.nid; + auto command_line = ::rocprofiler::sdk::parse::tokenize(process.command, " "); + + auto uuid_pid = common::fnv1a_hasher::combine(this_nid, this_pid_init_ns, this_pid); + auto this_pid_track = ::perfetto::Track{uuid_pid, ::perfetto::Track{}}; + + { + auto desc = orig_process_desc; + desc.set_uuid(uuid_pid); + desc.set_parent_uuid(0); + desc.mutable_process()->set_pid(this_pid); + desc.mutable_process()->set_start_timestamp_ns(this_pid_init_ns); + + desc.mutable_process()->set_process_name(command_line.front()); + desc.mutable_process()->clear_cmdline(); + for(const auto& itr : command_line) + desc.mutable_process()->add_cmdline(itr); + + ::perfetto::TrackEvent::SetTrackDescriptor(this_pid_track, desc); + } + + auto agent_thread_ids = std::unordered_map>{}; + auto agent_thread_ids_alloc = std::unordered_map>{}; + auto agent_queue_ids = + std::unordered_map>{}; + auto agent_stream_ids = + std::unordered_map>{}; + auto thread_indexes = std::unordered_map{}; + + auto thread_tracks = std::unordered_map{}; + auto agent_thread_tracks = + std::unordered_map>{}; + auto agent_queue_tracks = + std::unordered_map>{}; + auto agent_stream_tracks = + std::unordered_map>{}; + + { + for(auto ditr : memory_copy_gen) + for(const auto& itr : memory_copy_gen.get(ditr)) + { + auto stream_id = rocprofiler_stream_id_t{.handle = itr.stream_id}; + agent_stream_ids[itr.dst_agent_abs_index].emplace(stream_id); + if(ocfg.group_by_queue) + { + agent_thread_ids[itr.dst_agent_abs_index].emplace(itr.tid); + } + } + } + + for(auto ditr : memory_allocation_gen) + for(const auto& itr : memory_allocation_gen.get(ditr)) + { + agent_thread_ids_alloc[itr.agent_abs_index].emplace(itr.tid); + } + + { + for(auto ditr : kernel_dispatch_gen) + for(const auto& itr : kernel_dispatch_gen.get(ditr)) + { + auto stream_id = rocprofiler_stream_id_t{.handle = itr.stream_id}; + auto queue_id = rocprofiler_queue_id_t{.handle = itr.queue_id}; + agent_stream_ids[itr.agent_abs_index].emplace(stream_id); + if(ocfg.group_by_queue) + { + agent_queue_ids[itr.agent_abs_index].emplace(queue_id); + } + } + } + + uint64_t nthrn = 0; + for(auto ditr : thread_gen) + for(const auto& itr : thread_gen.get(ditr)) + { + auto is_main_thread = (static_cast(itr.tid) == this_pid); + auto _idx = (is_main_thread) ? 0 : ++nthrn; + thread_indexes.emplace(itr.tid, _idx); + auto _track = ::perfetto::Track{static_cast(itr.tid), this_pid_track}; + auto _desc = _track.Serialize(); + if(is_main_thread) + _desc.set_name(fmt::format("{}", ::basename(command_line.front().c_str()))); + else + _desc.set_name(fmt::format("THREAD {}", _idx)); + _desc.mutable_thread()->set_pid(this_pid); + _desc.mutable_thread()->set_tid(itr.tid); + if(is_main_thread) + _desc.mutable_thread()->set_thread_name( + fmt::format("{}", ::basename(command_line.front().c_str()))); + else + _desc.mutable_thread()->set_thread_name(fmt::format("THREAD {}", _idx)); + ::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + thread_tracks.emplace(itr.tid, _track); + } + + for(const auto& [abs_index, thread_ids] : agent_thread_ids) + { + const auto _agent = agent_data.at(abs_index).first; + + for(auto titr : thread_ids) + { + auto _namess = std::stringstream{}; + _namess << "COPY to AGENT [" << _agent.logical_node_id << "] THREAD [" + << thread_indexes.at(titr) << "] "; + + if(_agent.type == "CPU") + _namess << "(CPU)"; + else if(_agent.type == "GPU") + _namess << "(GPU)"; + else + _namess << "(UNK)"; + + auto _track = ::perfetto::Track{get_hash_id(_namess.str()), this_pid_track}; + auto _desc = _track.Serialize(); + _desc.set_name(_namess.str()); + + perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + agent_thread_tracks[abs_index].emplace(titr, _track); + } + } + + for(const auto& [abs_index, queue_ids] : agent_queue_ids) + { + uint32_t nqueue = 0; + const auto _agent = agent_data.at(abs_index).first; + auto agent_index_info = agent_data.at(abs_index).second; + + for(auto qitr : queue_ids) + { + auto _namess = std::stringstream{}; + + _namess << "COMPUTE " << agent_index_info.label << " [" << agent_index_info.index + << "] QUEUE [" << nqueue++ << "] "; + _namess << agent_index_info.type; + + auto _track = ::perfetto::Track{get_hash_id(_namess.str()), this_pid_track}; + auto _desc = _track.Serialize(); + _desc.set_name(_namess.str()); + + ::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + agent_queue_tracks[abs_index].emplace(qitr, _track); + } + } + + for(const auto& [abs_index, stream_ids] : agent_stream_ids) + { + const auto _agent = agent_data.at(abs_index).first; + // auto agent_index_info = agent_data.at(abs_index).second; + for(auto sitr : stream_ids) + { + const auto stream_id = sitr.handle; + + auto _name = + fmt::format("COMPUTE AGENT [{}] STREAM [{}]", _agent.logical_node_id, stream_id); + + if(_agent.type == "CPU") + _name = fmt::format("{} (CPU)", _name); + else if(_agent.type == "GPU") + _name = fmt::format("{} (GPU)", _name); + else + _name = fmt::format("{} (UNK)", _name); + + auto _track = ::perfetto::Track{get_hash_id(_name), this_pid_track}; + auto _desc = _track.Serialize(); + _desc.set_name(_name); + + ::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + agent_stream_tracks[abs_index].emplace(sitr, _track); + } + } + + // Fetch counter values + auto counter_id_value = std::map{}; + auto counter_id_name = std::map{}; + for(auto ditr : counter_collection_gen) + for(const auto& record : counter_collection_gen.get(ditr)) + { + // Accumulate counters based on ID + counter_id_value[record.counter_id] += record.value; + counter_id_name[record.counter_id] = std::string{record.counter_name}; + } + + // trace events + { + auto get_category_string = [](std::string_view _category) { + static auto buffer_names = sdk::get_buffer_tracing_names(); + auto _category_idx = ROCPROFILER_BUFFER_TRACING_NONE; + for(const auto& citr : buffer_names) + { + if(_category == citr.name) _category_idx = citr.value; + } + return sdk::get_perfetto_category(_category_idx); + }; + + for(auto ditr : region_gen) + { + for(auto itr : region_gen.get(ditr)) + { + auto& track = thread_tracks.at(itr.tid); + auto _name = itr.name; + + if(itr.has_extdata()) + { + if(auto _extdata = itr.get_extdata(); !_extdata.message.empty()) + _name = _extdata.message; + } + + auto _category = ::perfetto::DynamicCategory{get_category_string(itr.category)}; + TRACE_EVENT_BEGIN(_category, + ::perfetto::DynamicString{_name}, + track, + itr.start, + ::perfetto::Flow::Global(itr.stack_id ^ uuid_pid), + "begin_ns", + itr.start, + "end_ns", + itr.end, + "delta_ns", + (itr.end - itr.start), + "tid", + itr.tid, + "kind", + itr.category, + "operation", + _name, + "corr_id", + itr.stack_id, + "ancestor_id", + itr.parent_stack_id, + [&](::perfetto::EventContext ctx) { (void) ctx; }); + + TRACE_EVENT_END(_category, track, itr.end); + + tracing_session->FlushBlocking(); + } + } + + for(auto ditr : sample_gen) + { + for(auto itr : sample_gen.get(ditr)) + { + auto& track = thread_tracks.at(itr.tid); + auto _name = itr.name; + + if(itr.has_extdata()) + { + if(auto _extdata = itr.get_extdata(); !_extdata.message.empty()) + _name = _extdata.message; + } + + auto _category = ::perfetto::DynamicCategory{get_category_string(itr.category)}; + TRACE_EVENT_INSTANT(_category, + ::perfetto::DynamicString{_name}, + track, + itr.timestamp, + ::perfetto::Flow::Global(itr.stack_id ^ uuid_pid), + "begin_ns", + itr.timestamp, + "end_ns", + itr.timestamp, + "delta_ns", + 0, + "tid", + itr.tid, + "kind", + itr.category, + "operation", + _name, + "corr_id", + itr.stack_id, + "ancestor_id", + itr.parent_stack_id, + [&](::perfetto::EventContext ctx) { (void) ctx; }); + + tracing_session->FlushBlocking(); + } + } + + for(auto ditr : memory_copy_gen) + { + for(auto itr : memory_copy_gen.get(ditr)) + { + ::perfetto::Track* _track = nullptr; + if(ocfg.group_by_queue) + { + _track = &agent_thread_tracks.at(itr.dst_agent_abs_index).at(itr.tid); + } + else + { + auto stream_id = rocprofiler_stream_id_t{.handle = itr.stream_id}; + _track = &agent_stream_tracks.at(itr.dst_agent_abs_index).at(stream_id); + } + + auto src_agent_index = agent_data.at(itr.src_agent_abs_index).second; + auto dst_agent_index = agent_data.at(itr.dst_agent_abs_index).second; + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::DynamicString{itr.name}, + *_track, + itr.start, + ::perfetto::Flow::Global(itr.stack_id ^ uuid_pid), + "begin_ns", + itr.start, + "end_ns", + itr.end, + "delta_ns", + (itr.end - itr.start), + "kind", + itr.category, + "operation", + itr.name, + "src_agent", + src_agent_index.as_string("-"), + "dst_agent", + dst_agent_index.as_string("-"), + "copy_bytes", + itr.size, + "corr_id", + itr.stack_id, + "tid", + itr.tid); + TRACE_EVENT_END( + sdk::perfetto_category::name, *_track, itr.end); + } + tracing_session->FlushBlocking(); + } + + for(auto ditr : kernel_dispatch_gen) + { + auto gen = kernel_dispatch_gen.get(ditr); + for(auto it = begin(gen); it != end(gen); ++it) + { + auto& current = *it; + + ::perfetto::Track* _track = nullptr; + auto agent_id = current.agent_abs_index; + auto queue_id = rocprofiler_queue_id_t{.handle = current.queue_id}; + auto stream_id = rocprofiler_stream_id_t{.handle = current.stream_id}; + if(ocfg.group_by_queue) + { + _track = &agent_queue_tracks.at(agent_id).at(queue_id); + } + else + { + _track = &agent_stream_tracks.at(agent_id).at(stream_id); + } + + // Temporary fix until timestamp issues are resolved: Set timestamps to be + // halfway between ending timestamp and starting timestamp of overlapping + // kernel dispatches. Perfetto displays slices incorrectly if overlapping + // slices on the same track are not completely enveloped. + auto next = std::next(it); + if(next != end(gen) && next->agent_abs_index == it->agent_abs_index && + ((ocfg.group_by_queue && next->queue_id == it->queue_id) || + (!ocfg.group_by_queue && next->stream_id == it->stream_id)) && + next->start < it->end) + { + auto start = next->start; + auto end = it->end; + auto mid = start + (end - start) / 2; + // Report changed timestamps to ROCP INFO + ROCP_INFO << fmt::format( + "Kernel ending timestamp increased by {} ns to {} ns with " + "following kernel starting timestamp decreased by {} ns to {} ns " + "due to firmware timestamp error.", + (it->end - mid), + mid, + (mid - next->start), + mid); + it->end = mid; + next->start = mid; + } + + auto agent_index = agent_data.at(current.agent_abs_index).second; + auto _name = + (ocfg.kernel_rename && !current.region.empty()) ? current.region : current.name; + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::DynamicString{_name}, + *_track, + current.start, + ::perfetto::Flow::Global(current.stack_id ^ uuid_pid), + "begin_ns", + current.start, + "end_ns", + current.end, + "delta_ns", + (current.end - current.start), + "kind", + current.category, + "agent", + agent_index.as_string("-"), + "corr_id", + current.stack_id, + "queue", + current.queue_id, + "tid", + current.tid, + "kernel_id", + current.kernel_id, + "private_segment_size", + current.scratch_size, + "group_segment_size", + current.lds_size, + "workgroup_size", + to_string(current.workgroup_size), + "grid_size", + to_string(current.grid_size), + [&](::perfetto::EventContext ctx) { + for(auto& [counter_id, counter_value] : counter_id_value) + { + rocprofiler::sdk::add_perfetto_annotation( + ctx, counter_id_name.at(counter_id), counter_value); + } + }); + TRACE_EVENT_END(sdk::perfetto_category::name, + *_track, + current.end); + } + tracing_session->FlushBlocking(); + } + } + + // counter tracks + { + // memory copy counter track + auto mem_cpy_endpoints = std::map>{}; + auto mem_cpy_extremes = std::pair{std::numeric_limits::max(), + std::numeric_limits::min()}; + auto constexpr timestamp_buffer = 1000; + for(auto ditr : memory_copy_gen) + { + for(const auto& itr : memory_copy_gen.get(ditr)) + { + uint64_t _mean_timestamp = itr.start + (0.5 * (itr.end - itr.start)); + + mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.start - timestamp_buffer, 0); + mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.start, 0); + mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(_mean_timestamp, 0); + mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.end, 0); + mem_cpy_endpoints[itr.dst_agent_abs_index].emplace(itr.end + timestamp_buffer, 0); + + mem_cpy_extremes = std::make_pair(std::min(mem_cpy_extremes.first, itr.start), + std::max(mem_cpy_extremes.second, itr.end)); + } + } + + for(auto ditr : memory_copy_gen) + { + for(const auto& itr : memory_copy_gen.get(ditr)) + { + auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_abs_index).lower_bound(itr.start); + auto mend = mem_cpy_endpoints.at(itr.dst_agent_abs_index).upper_bound(itr.end); + + LOG_IF(FATAL, mbeg == mend) + << "Missing range for timestamp [" << itr.start << ", " << itr.end << "]"; + + for(auto mitr = mbeg; mitr != mend; ++mitr) + mitr->second += itr.size; + } + } + + constexpr auto bytes_multiplier = 1024; + constexpr auto extremes_endpoint_buffer = 5000; + + auto mem_cpy_tracks = std::unordered_map{}; + auto mem_cpy_cnt_names = std::vector{}; + mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size()); + + for(auto& [abs_index, ts_map] : mem_cpy_endpoints) + { + mem_cpy_endpoints[abs_index].emplace(mem_cpy_extremes.first - extremes_endpoint_buffer, + 0); + mem_cpy_endpoints[abs_index].emplace(mem_cpy_extremes.second + extremes_endpoint_buffer, + 0); + + auto _track_name = std::stringstream{}; + const auto _agent = agent_data.at(abs_index).first; + auto agent_index_info = agent_data.at(abs_index).second; + _track_name << "COPY BYTES to " << agent_index_info.label << " [" + << agent_index_info.index << "] (" << agent_index_info.type << ")"; + + constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; + auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str()); + mem_cpy_tracks.emplace(abs_index, + ::perfetto::CounterTrack{_name.c_str(), this_pid_track} + .set_unit(_unit) + .set_unit_multiplier(bytes_multiplier) + .set_is_incremental(false)); + } + + for(auto& mitr : mem_cpy_endpoints) + { + for(auto itr : mitr.second) + { + TRACE_COUNTER(sdk::perfetto_category::name, + mem_cpy_tracks.at(mitr.first), + itr.first, + itr.second / bytes_multiplier); + } + tracing_session->FlushBlocking(); + } + + // memory allocation counter track + struct free_memory_information + { + rocprofiler_timestamp_t start_timestamp = 0; + rocprofiler_timestamp_t end_timestamp = 0; + rocprofiler_address_t address = {.handle = 0}; + }; + + struct memory_information + { + uint64_t alloc_size = {0}; + rocprofiler_address_t address = {.handle = 0}; + bool is_alloc_op = {false}; + }; + + struct agent_and_size + { + uint64_t agent_abs_index = {}; + uint64_t size = {0}; + }; + + auto mem_alloc_endpoints = + std::unordered_map>{}; + auto mem_alloc_extremes = std::pair{ + std::numeric_limits::max(), std::numeric_limits::min()}; + auto address_to_agent_and_size = + std::unordered_map{}; + auto free_mem_info = std::vector{}; + + // Load memory allocation endpoints + for(auto ditr : memory_allocation_gen) + { + for(const auto& itr : memory_allocation_gen.get(ditr)) + { + if(itr.type == "ALLOC") + { + LOG_IF(FATAL, itr.agent_name.empty()) + << "Missing agent id for memory allocation trace"; + mem_alloc_endpoints[itr.agent_abs_index].emplace( + itr.start, + memory_information{ + itr.size, rocprofiler_address_t{.handle = itr.address}, true}); + mem_alloc_endpoints[itr.agent_abs_index].emplace( + itr.end, + memory_information{ + itr.size, rocprofiler_address_t{.handle = itr.address}, true}); + address_to_agent_and_size.emplace( + rocprofiler_address_t{.handle = itr.address}, + agent_and_size{itr.agent_abs_index, itr.size}); + } + else if(itr.type == "FREE") + { + // Store free memory operations in seperate vector to pair with agent + // and allocation size in following loop + free_mem_info.push_back(free_memory_information{ + itr.start, itr.end, rocprofiler_address_t{.handle = itr.address}}); + } + else + { + ROCP_CI_LOG(WARNING) << "unhandled memory allocation type " << itr.type; + } + } + } + + // Add free memory operations to the endpoint map + for(const auto& itr : free_mem_info) + { + if(address_to_agent_and_size.count(itr.address) == 0) + { + if(itr.address.handle == 0) + { + // Freeing null pointers is expected behavior and is occurs in HSA functions + // like hipStreamDestroy + ROCP_INFO << "null pointer freed due to HSA operation"; + } + else + { + // Following should not occur + ROCP_INFO << "Unpaired free operation occurred"; + } + continue; + } + auto [agent_abs_index, size] = address_to_agent_and_size[itr.address]; + mem_alloc_endpoints[agent_abs_index].emplace( + itr.start_timestamp, memory_information{size, itr.address, false}); + mem_alloc_endpoints[agent_abs_index].emplace( + itr.end_timestamp, memory_information{size, itr.address, false}); + } + // Create running sum of allocated memory + for(auto& [_, endpoint_map] : mem_alloc_endpoints) + { + if(!endpoint_map.empty()) + { + auto earliest_agent_timestamp = endpoint_map.begin()->first; + auto latest_agent_timestamp = (--endpoint_map.end())->first; + mem_alloc_extremes = + std::make_pair(std::min(mem_alloc_extremes.first, earliest_agent_timestamp), + std::max(mem_alloc_extremes.second, latest_agent_timestamp)); + } + if(endpoint_map.size() <= 1) + { + continue; + } + + auto prev = endpoint_map.begin(); + auto itr = std::next(prev); + for(; itr != endpoint_map.end(); ++itr, ++prev) + { + // If address or allocation type are different, add or subtract from running sum + if(prev->second.address != itr->second.address || + prev->second.is_alloc_op != itr->second.is_alloc_op) + { + if(itr->second.is_alloc_op) + { + itr->second.alloc_size += prev->second.alloc_size; + } + else if(prev->second.alloc_size >= itr->second.alloc_size) + { + itr->second.alloc_size = prev->second.alloc_size - itr->second.alloc_size; + } + } + else + { + itr->second.alloc_size = prev->second.alloc_size; + } + } + } + + auto mem_alloc_tracks = std::unordered_map{}; + auto mem_alloc_cnt_names = std::vector{}; + mem_alloc_cnt_names.reserve(mem_alloc_endpoints.size()); + + for(auto& [abs_index, ts_map] : mem_alloc_endpoints) + { + mem_alloc_endpoints[abs_index].emplace( + mem_alloc_extremes.first - extremes_endpoint_buffer, + memory_information{0, {0}, false}); + mem_alloc_endpoints[abs_index].emplace( + mem_alloc_extremes.second + extremes_endpoint_buffer, + memory_information{0, {0}, false}); + + auto _track_name = std::stringstream{}; + + if(agent_data.find(abs_index) != agent_data.end()) + { + const auto _agent = agent_data.at(abs_index).first; + auto agent_index_info = agent_data.at(abs_index).second; + _track_name << "ALLOCATE BYTES on " << agent_index_info.label << " [" + << agent_index_info.index << "] (" << agent_index_info.type << ")"; + } + else + { + _track_name << "FREE BYTES"; + } + + constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; + auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str()); + mem_alloc_tracks.emplace(abs_index, + ::perfetto::CounterTrack{_name.c_str(), this_pid_track} + .set_unit(_unit) + .set_unit_multiplier(bytes_multiplier) + .set_is_incremental(false)); + } + + for(auto& alloc_itr : mem_alloc_endpoints) + { + for(auto itr : alloc_itr.second) + { + TRACE_COUNTER(sdk::perfetto_category::name, + mem_alloc_tracks.at(alloc_itr.first), + itr.first, + itr.second.alloc_size / bytes_multiplier); + } + } + tracing_session->FlushBlocking(); + } + + // Create counter tracks per agent + { + auto counters_endpoints = + std::unordered_map>>{}; + + auto counters_extremes = std::pair{ + std::numeric_limits::max(), std::numeric_limits::min()}; + + auto constexpr timestamp_buffer = 1000; + + for(auto ditr : counter_collection_gen) + for(const auto& record : counter_collection_gen.get(ditr)) + { + // const auto& info = record.; + + const auto& start_timestamp = record.start; + const auto& end_timestamp = record.end; + + uint64_t _mean_timestamp = + start_timestamp + (0.5 * (end_timestamp - start_timestamp)); + + for(auto& [counter_id, counter_value] : counter_id_value) + { + counters_endpoints[record.agent_abs_index][counter_id].emplace( + start_timestamp - timestamp_buffer, 0); + counters_endpoints[record.agent_abs_index][counter_id].emplace(start_timestamp, + counter_value); + counters_endpoints[record.agent_abs_index][counter_id].emplace(_mean_timestamp, + counter_value); + counters_endpoints[record.agent_abs_index][counter_id].emplace(end_timestamp, + 0); + counters_endpoints[record.agent_abs_index][counter_id].emplace( + end_timestamp + timestamp_buffer, 0); + } + + counters_extremes = std::make_pair(std::min(counters_extremes.first, record.start), + std::max(counters_extremes.second, record.end)); + } + + auto counter_tracks = + std::unordered_map>{}; + + constexpr auto extremes_endpoint_buffer = 5000; + + for(auto ditr : counter_collection_gen) + { + for(const auto& record : counter_collection_gen.get(ditr)) + { + // const auto& info = record.dispatch_data.dispatch_info; + // const auto& sym = tool_metadata.get_kernel_symbol(info.kernel_id); + + // CHECK(sym != nullptr); + + auto name = record.kernel_name; + + for(auto& [counter_id, counter_value] : counter_id_value) + { + counters_endpoints[record.agent_id][counter_id].emplace( + counters_extremes.first - extremes_endpoint_buffer, 0); + counters_endpoints[record.agent_id][counter_id].emplace( + counters_extremes.second + extremes_endpoint_buffer, 0); + + const auto _agent = agent_data.at(record.agent_abs_index).first; + auto agent_index_info = agent_data.at(record.agent_abs_index).second; + auto track_name_ss = std::stringstream{}; + track_name_ss << agent_index_info.label << " [" << agent_index_info.index + << "] " + << "PMC " << record.counter_name; + + auto track_name = track_name_ss.str(); + + counter_tracks[record.agent_abs_index].emplace( + track_name, ::perfetto::CounterTrack{track_name.c_str(), this_pid_track}); + auto& endpoints = counters_endpoints[record.agent_id][counter_id]; + for(auto& counter_itr : endpoints) + { + TRACE_COUNTER( + sdk::perfetto_category::name, + counter_tracks[record.agent_abs_index].at(track_name), + counter_itr.first, + counter_itr.second); + } + } + } + tracing_session->FlushBlocking(); + } + } + + ::perfetto::TrackEvent::Flush(); + tracing_session->FlushBlocking(); +} +} // namespace output +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/perfetto.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/perfetto.hpp new file mode 100644 index 0000000000..601a83edd9 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/perfetto.hpp @@ -0,0 +1,69 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/python/rocpd/source/types.hpp" + +#include "lib/common/defines.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/node_info.hpp" +#include "lib/output/output_config.hpp" +#include "lib/output/sql/common.hpp" +#include "lib/output/stream_info.hpp" +#include "lib/rocprofiler-sdk-tool/config.hpp" + +#include + +#include + +namespace rocpd +{ +namespace output +{ +namespace tool = ::rocprofiler::tool; + +struct PerfettoSession +{ + PerfettoSession(const tool::output_config&); + ~PerfettoSession(); + + std::unique_ptr<::perfetto::TracingSession> tracing_session = {}; + const tool::output_config& config; +}; + +void +write_perfetto( + const PerfettoSession& perfetto_session, + const types::process& process, + const std::unordered_map>& + agent_data, + const tool::generator& thread_gen, + const tool::generator& region_gen, + const tool::generator& sample_gen, + const tool::generator& kernel_dispatch_gen, + const tool::generator& memory_copy_gen, + const tool::generator& memory_allocation_gen, + const tool::generator& counter_collection_gen); +} // namespace output +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/pysqlite_Connection.h b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/pysqlite_Connection.h new file mode 100644 index 0000000000..5a44bdcb22 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/pysqlite_Connection.h @@ -0,0 +1,114 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/defines.hpp" + +#include +#include +#include +#include +#include + +#include + +extern "C" { +struct pysqlite_state; +struct sqlite3; + +enum autocommit_mode +{ + AUTOCOMMIT_LEGACY = -1, + AUTOCOMMIT_ENABLED = 1, + AUTOCOMMIT_DISABLED = 0, +}; + +typedef struct _callback_context +{ + PyObject* callable; + PyObject* module; + pysqlite_state* state; +} callback_context; + +// this is the python sqlite3 wrapper from Python 3.12.2 +typedef struct +{ + PyObject_HEAD; + sqlite3* db; + pysqlite_state* state; + + /* the type detection mode. Only 0, PARSE_DECLTYPES, PARSE_COLNAMES or a + * bitwise combination thereof makes sense */ + int detect_types; + + /* NULL for autocommit, otherwise a string with the isolation level */ + const char* isolation_level; + enum autocommit_mode autocommit; + + /* 1 if a check should be performed for each API call if the connection is + * used from the same thread it was created in */ + int check_same_thread; + + int initialized; + + /* thread identification of the thread the connection was created in */ + unsigned long thread_ident; + + PyObject* statement_cache; + + /* Lists of weak references to cursors and blobs used within this connection */ + PyObject* cursors; + PyObject* blobs; + + /* Counters for how many cursors were created in the connection. May be + * reset to 0 at certain intervals */ + int created_cursors; + + PyObject* row_factory; + + /* Determines how bytestrings from SQLite are converted to Python objects: + * - PyUnicode_Type: Python Unicode objects are constructed from UTF-8 bytestrings + * - PyBytes_Type: The bytestrings are returned as-is. + * - Any custom callable: Any object returned from the callable called with the bytestring + * as single parameter. + */ + PyObject* text_factory; + + // Remember contexts used by the trace, progress, and authoriser callbacks + callback_context* trace_ctx; + callback_context* progress_ctx; + callback_context* authorizer_ctx; + + /* Exception objects: borrowed refs. */ + PyObject* Warning; + PyObject* Error; + PyObject* InterfaceError; + PyObject* DatabaseError; + PyObject* DataError; + PyObject* OperationalError; + PyObject* IntegrityError; + PyObject* InternalError; + PyObject* ProgrammingError; + PyObject* NotSupportedError; +} pysqlite_Connection; +} diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/CMakeLists.txt new file mode 100644 index 0000000000..c2303112f8 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/CMakeLists.txt @@ -0,0 +1,11 @@ +# +# add sql common sources to output library target +# +set(libpyrocpd_source_serialization_sources sql.hpp) +set(libpyrocpd_source_serialization_headers sql.cpp) + +foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS}) + rocprofiler_rocpd_python_bindings_target_sources( + ${_PYTHON_VERSION} PRIVATE ${libpyrocpd_source_serialization_sources} + ${libpyrocpd_source_serialization_headers}) +endforeach() diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/sql.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/sql.cpp new file mode 100644 index 0000000000..d10e878be5 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/sql.cpp @@ -0,0 +1,166 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/python/rocpd/source/serialization/sql.hpp" +#include "lib/output/sql/common.hpp" + +namespace cereal +{ +SQLite3InputArchive::SQLite3InputArchive(sqlite3* conn, + std::string_view query, + int64_t len, + int64_t chunk_len) +: InputArchive{this} +, m_conn{conn} +, m_row_count{(len > 0) ? len : getRowCount(conn, query)} +, m_chunk_size{chunk_len} +, m_query{query} +, m_iterator{this} +{ + ROCP_CI_LOG_IF(ERROR, + sqlite3_prepare_v2(m_conn, m_query.c_str(), -1, &m_stmt, nullptr) != SQLITE_OK) + << "Error preparing select statement: " << sqlite3_errmsg(m_conn); + + ROCP_CI_LOG_IF(ERROR, m_stmt == nullptr) << "Error preparing statment: " << query; + + if(!m_stmt) return; + + auto col_count = sqlite3_column_count(m_stmt); + + ROCP_TRACE << " sql query: " << query; + ROCP_TRACE << " - col_count: " << col_count; + + m_column_count = sqlite3_column_count(m_stmt); + for(int64_t i = 0; i < m_column_count; ++i) + { + auto name = impl::extract_column_name(m_stmt, i); + m_column_names.emplace(name, i); + ROCP_TRACE << " - column " << i << ": " << name; + } + + if(m_chunk_size > 0) + { + auto _num_chunks = (m_row_count / m_chunk_size); + auto _chunk_modulo = (m_row_count % m_chunk_size); + + m_sizes.resize(_num_chunks, m_chunk_size); + if(_chunk_modulo > 0) m_sizes.emplace_back(_chunk_modulo); + } +} + +void +SQLite3InputArchive::set_chunk_index(size_t idx) +{ + ROCP_FATAL_IF(idx >= m_sizes.size()) << fmt::format( + "Invalid chunk index {} (>= {}) for query '{}'", idx, m_sizes.size(), m_query); + + ROCP_TRACE << fmt::format("Setting chunk index to {}. Current index is {}", idx, m_size_idx); + if(idx != m_size_idx) + { + auto _status = sqlite3_reset(m_stmt); + + ROCP_FATAL_IF(_status != SQLITE_OK) + << fmt::format("sqlite3_reset failed for statement '{}'", m_query); + + for(size_t i = 0; i < (idx * m_chunk_size); ++i) + ++m_iterator; + m_size_idx = idx; + } +} + +void +SQLite3InputArchive::loadBinaryValue(void* data, size_t size, const char* name) +{ + m_next_name = name; + + std::string encoded; + loadValue(encoded); + auto decoded = base64::decode(encoded); + + if(size != decoded.size()) + throw Exception("Decoded binary data size does not match specified size"); + + std::memcpy(data, decoded.data(), decoded.size()); + m_next_name = nullptr; +}; + +void +SQLite3InputArchive::loadSize(size_type& size) const +{ + if(m_sizes.empty()) + size = m_row_count; + else if(m_size_idx >= m_sizes.size()) + size = 0; + else + size = m_sizes.at(m_size_idx++); + + // ROCP_WARNING << fmt::format( + // "[SQLite3InputArchive] counted={}, chunk_size={}, size={}", m_counted, m_chunk_size, + // size); +} + +int64_t +SQLite3InputArchive::search(std::string_view _col_name) +{ + auto itr = m_column_names.find(_col_name.data()); + if(itr == m_column_names.end()) + { + auto _names = std::vector{}; + for(const auto& citr : m_column_names) + _names.emplace_back(citr.first); + auto _msg = fmt::format("SQL query '{}' does not contain a column named '{}'. Columns: {}", + m_query, + m_next_name, + fmt::join(_names.begin(), _names.end(), ", ")); + ROCP_WARNING << _msg; + throw Exception(_msg); + } + return itr->second; +} + +int64_t +SQLite3InputArchive::search(std::string_view _col_name, std::nothrow_t) +{ + auto itr = m_column_names.find(_col_name.data()); + if(itr == m_column_names.end()) + { + ROCP_WARNING << fmt::format( + "SQL query '{}' does not contain a column named '{}'", m_query, m_next_name); + return -1; + } + return itr->second; +} + +int64_t +SQLite3InputArchive::search() +{ + auto idx = search(m_next_name); + m_next_name = nullptr; + return idx; +} + +int64_t +SQLite3InputArchive::getRowCount(sqlite3* conn, std::string_view query) +{ + return impl::extract_row_count(conn, query); +} +} // namespace cereal diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/sql.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/sql.hpp new file mode 100644 index 0000000000..b948235f47 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/serialization/sql.hpp @@ -0,0 +1,341 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/logging.hpp" +#include "lib/common/mpl.hpp" +#include "lib/output/sql/common.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cereal +{ +namespace impl +{ +using namespace ::rocprofiler::tool::sql; +} // namespace impl + +class SQLite3InputArchive +: public InputArchive +, public traits::TextArchive +{ +public: + SQLite3InputArchive(sqlite3* conn, + std::string_view query, + int64_t len = 0, + int64_t chunk_len = 0); + + ~SQLite3InputArchive() CEREAL_NOEXCEPT override + { + sqlite3_finalize(m_stmt); // Finalize statement + } + +public: + //! Retrieves the current node name + /*! @return nullptr if no name exists */ + static const char* getNodeName() { return nullptr; } + + static int64_t getRowCount(sqlite3* conn, std::string_view query); + + void startNode() { ++m_iterator; } + void finishNode() const {} + + //! Sets the name for the next node created with startNode + void setNextName(const char* name) { m_next_name = name; } + + template + void loadValue(std::string_view colname, Tp& val); + + template + void loadValue(Tp& val); + + void loadBinaryValue(void* data, size_t size, const char* name = nullptr); + void loadSize(size_type& size) const; + + int64_t search(std::string_view); + int64_t search(std::string_view, std::nothrow_t); + + void set_chunk_index(size_t idx); + +private: + int64_t search(); + +private: + class Iterator + { + public: + Iterator(SQLite3InputArchive* ar) + : m_archive{ar} + {} + + //! Advance to the next node + Iterator& operator++() + { + m_step_ret = sqlite3_step(m_archive->m_stmt); + return *this; + } + + private: + int m_step_ret = 0; + SQLite3InputArchive* m_archive = nullptr; + }; + + friend class Iterator; + +private: + sqlite3* m_conn = nullptr; + sqlite3_stmt* m_stmt = nullptr; + const char* m_next_name = nullptr; + int64_t m_row_count = 0; + int64_t m_column_count = 0; + int64_t m_chunk_size = 0; + std::string m_query = {}; + std::unordered_map m_column_names = {}; + Iterator m_iterator; + std::vector m_sizes = {}; + mutable size_t m_size_idx = 0; +}; + +template +void +SQLite3InputArchive::loadValue(std::string_view colname, Tp& val) +{ + auto col = search(colname); + auto ret = impl::extract_column(m_stmt, col); + if(ret) val = *ret; +} + +template +void +SQLite3InputArchive::loadValue(Tp& val) +{ + auto col = search(); + auto ret = impl::extract_column(m_stmt, col); + if(ret) val = *ret; +} + +// ###################################################################### +// SQLite3Archive prologue and epilogue functions +// ###################################################################### + +// ###################################################################### +//! Prologue for NVPs for SQLite3 archives +/*! NVPs do not start or finish nodes - they just set up the names */ +template +inline void +prologue(SQLite3InputArchive&, NameValuePair const&) +{} + +// ###################################################################### +//! Epilogue for NVPs for SQLite3 archives +/*! NVPs do not start or finish nodes - they just set up the names */ +template +inline void +epilogue(SQLite3InputArchive&, NameValuePair const&) +{} + +// ###################################################################### +//! Prologue for deferred data for SQLite3 archives +/*! Do nothing for the defer wrapper */ +template +inline void +prologue(SQLite3InputArchive&, DeferredData const&) +{} + +// ###################################################################### +//! Epilogue for deferred for SQLite3 archives +/*! NVPs do not start or finish nodes - they just set up the names */ +/*! Do nothing for the defer wrapper */ +template +inline void +epilogue(SQLite3InputArchive&, DeferredData const&) +{} + +// ###################################################################### +//! Prologue for SizeTags for SQLite3 archives +/*! SizeTags are strictly ignored for SQLite3, they just indicate + that the current node should be made into an array */ +template +inline void +prologue(SQLite3InputArchive&, SizeTag const&) +{} + +// ###################################################################### +//! Epilogue for SizeTags for SQLite3 archives +/*! SizeTags are strictly ignored for SQLite3 */ +template +inline void +epilogue(SQLite3InputArchive&, SizeTag const&) +{} + +// ###################################################################### +//! Prologue for all other types for SQLite3 archives (except minimal types) +/*! Starts a new node, named either automatically or by some NVP, + that may be given data by the type about to be archived + + Minimal types do not start or finish nodes */ +template < + class T, + traits::EnableIf< + !std::is_arithmetic::value, + !traits::has_minimal_base_class_serialization::value, + !traits::has_minimal_input_serialization::value> = traits::sfinae> +inline void +prologue(SQLite3InputArchive& ar, T const&) +{ + ar.startNode(); +} + +// ###################################################################### +//! Epilogue for all other types other for SQLite3 archives (except minimal types) +/*! Finishes the node created in the prologue + + Minimal types do not start or finish nodes */ +template < + class T, + traits::EnableIf< + !std::is_arithmetic::value, + !traits::has_minimal_base_class_serialization::value, + !traits::has_minimal_input_serialization::value> = traits::sfinae> +inline void +epilogue(SQLite3InputArchive& ar, T const&) +{ + ar.finishNode(); +} + +// ###################################################################### +//! Prologue for arithmetic types for SQLite3 archives +inline void +prologue(SQLite3InputArchive&, std::nullptr_t const&) +{} + +// ###################################################################### +//! Epilogue for arithmetic types for SQLite3 archives +inline void +epilogue(SQLite3InputArchive&, std::nullptr_t const&) +{} + +// ###################################################################### +//! Prologue for arithmetic types for SQLite3 archives +template ::value> = traits::sfinae> +inline void +prologue(SQLite3InputArchive&, T const&) +{} + +// ###################################################################### +//! Epilogue for arithmetic types for SQLite3 archives +template ::value> = traits::sfinae> +inline void +epilogue(SQLite3InputArchive&, T const&) +{} + +// ###################################################################### +//! Prologue for strings for SQLite3 archives +template +inline void +prologue(SQLite3InputArchive&, std::basic_string const&) +{} + +// ###################################################################### +//! Epilogue for strings for SQLite3 archives +template +inline void +epilogue(SQLite3InputArchive&, std::basic_string const&) +{} + +// ###################################################################### +// Common SQLite3Archive serialization functions +// ###################################################################### +//! Serializing NVP types to SQLite3 +template +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, NameValuePair& t) +{ + ar.setNextName(t.name); + ar(t.value); +} + +//! Loading nullptr from SQLite3 +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, std::nullptr_t& t) +{ + ar.loadValue(t); +} + +//! Loading byte array from SQLite3 +template +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, std::array& t) +{ + ar.loadValue(t); +} + +template +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, std::array& t) +{ + ar.loadValue(t); +} + +//! Loading arithmetic from SQLite3 +template ::value> = traits::sfinae> +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, T& t) +{ + ar.loadValue(t); +} + +//! loading string from SQLite3 +template +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, std::basic_string& str) +{ + ar.loadValue(str); +} + +// ###################################################################### +//! Loading SizeTags from SQLite3 +template +inline void +CEREAL_LOAD_FUNCTION_NAME(SQLite3InputArchive& ar, SizeTag& st) +{ + ar.loadSize(st.size); +} +} // namespace cereal + +// register archives for polymorphic support +CEREAL_REGISTER_ARCHIVE(cereal::SQLite3InputArchive) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/sql_generator.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/sql_generator.hpp new file mode 100644 index 0000000000..6284857afb --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/sql_generator.hpp @@ -0,0 +1,161 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/python/rocpd/source/serialization/sql.hpp" + +#include "lib/common/container/ring_buffer.hpp" +#include "lib/common/mpl.hpp" +#include "lib/common/units.hpp" +#include "lib/output/domain_type.hpp" +#include "lib/output/generator.hpp" +#include "lib/output/sql/common.hpp" + +#include + +#include +#include +#include +#include + +namespace rocpd +{ +namespace tool = ::rocprofiler::tool; + +template +struct sql_generator : public tool::generator +{ + using base_type = tool::generator; + + sql_generator(sqlite3* conn, + std::string_view query, + std::string_view order_by = {}, + int64_t chunk_size = compute_chunk_size()); + + sql_generator() = delete; + ~sql_generator() override = default; + + sql_generator(const sql_generator&) = delete; + sql_generator(sql_generator&&) noexcept = delete; + sql_generator& operator=(const sql_generator&) = delete; + sql_generator& operator=(sql_generator&&) noexcept = delete; + + std::vector get(size_t itr) const override; + + static int64_t compute_chunk_size(); + +private: + static std::string sanitize_query(std::string_view query); + +private: + using archive_t = cereal::SQLite3InputArchive; + + sqlite3* m_conn = nullptr; + std::string m_query = {}; + std::string m_order = {}; + int64_t m_chunk_size = 0; + int64_t m_num_entries = 0; + int64_t m_num_chunks = 0; + std::vector m_expected = {}; + archive_t m_archive; +}; + +template +std::string +sql_generator::sanitize_query(std::string_view query) +{ + if(auto pos = query.find(';'); pos != std::string_view::npos) + return std::string{query.substr(0, pos)}; + + return std::string{query}; +} + +template +int64_t +sql_generator::compute_chunk_size() +{ + return (16 * ::rocprofiler::common::units::get_page_size()) / sizeof(Tp); +} + +template +sql_generator::sql_generator(sqlite3* conn, + std::string_view query, + std::string_view order_by, + int64_t chunk_size) +: base_type{tool::defer_size{}} +, m_conn{conn} +, m_query{sanitize_query(query)} +, m_order{(order_by.empty()) ? std::string{} + : fmt::format(" ORDER BY {}", sanitize_query(order_by))} +, m_chunk_size{chunk_size} +, m_num_entries{tool::sql::extract_row_count(m_conn, sanitize_query(query))} +, m_num_chunks{(m_num_entries / m_chunk_size) + ((m_num_entries % m_chunk_size) > 0 ? 1 : 0)} +, m_archive{m_conn, fmt::format("{}{}", m_query, m_order), m_num_entries, m_chunk_size} +{ + base_type::resize(m_num_chunks); + + m_expected.resize(m_num_chunks, m_chunk_size); + if(!m_expected.empty() && (m_num_entries % m_chunk_size) > 0) + m_expected.back() = (m_num_entries % m_chunk_size); + + ROCP_TRACE << fmt::format("- Query : {}", query); + ROCP_TRACE << fmt::format(" Expected: {}", + fmt::join(m_expected.begin(), m_expected.end(), ", ")); +} + +template +std::vector +sql_generator::get(size_t idx) const +{ + auto _data = std::vector{}; + + if(idx < static_cast(m_num_chunks)) + { + // auto _offset = idx * m_chunk_size; + // auto _limit = m_chunk_size; + // auto _query = fmt::format("{}{} LIMIT {} OFFSET {};", m_query, m_order, _limit, + // _offset); + + // auto* conn = const_cast(m_conn); + // auto ar = cereal::SQLite3InputArchive{conn, _query}; + + auto& ar = const_cast(m_archive); + ar.set_chunk_index(idx); + + cereal::load(ar, _data); + + ROCP_FATAL_IF(_data.size() != m_expected.at(idx)) + << fmt::format("Unexpected SQL query result for group {}. Found {} rows. Expected {} " + "rows.\nQuery:\n\t{}\n# of entries: {}, chunk size: {}, # chunks: {}", + idx, + _data.size(), + m_expected.at(idx), + m_query, + m_num_entries, + m_chunk_size, + m_num_chunks); + } + + return _data; +} +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/types.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/types.cpp new file mode 100644 index 0000000000..480e39d38a --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/types.cpp @@ -0,0 +1,89 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/python/rocpd/source/types.hpp" +#include "lib/python/rocpd/source/common.hpp" + +#include "lib/output/node_info.hpp" + +#include +#include + +#include + +#include + +namespace rocpd +{ +namespace types +{ +// std::string +// blob::hexdigest() const +// { +// auto _oss = std::ostringstream{}; +// for(auto itr : (*this)) +// _oss << std::hex << std::setw(2) << std::setfill('0') << +// static_cast(itr); +// return _oss.str(); +// } + +// std::string +// blob::hexliteral() const +// { +// auto _oss = std::ostringstream{}; +// _oss << "X'"; +// for(auto itr : (*this)) +// _oss << std::hex << std::setw(2) << std::setfill('0') << +// static_cast(itr); +// _oss << "'"; +// return _oss.str(); +// } + +void +agent::load_extdata() +{ + if(has_extdata()) + common::read_json_string( + extdata, [](auto& _ar, base_type& _base) { cereal::load(_ar, _base); }, base()); +} + +region::decoded_extdata +region::get_extdata() const +{ + auto _msg = decoded_extdata{}; + if(has_extdata()) + common::read_json_string( + extdata, [](auto& ar, auto& msg) { cereal::load(ar, msg); }, _msg); + return _msg; +} + +sample::decoded_extdata +sample::get_extdata() const +{ + auto _msg = decoded_extdata{}; + if(has_extdata()) + common::read_json_string( + extdata, [](auto& ar, auto& msg) { cereal::load(ar, msg); }, _msg); + return _msg; +} +} // namespace types +} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/types.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/types.hpp new file mode 100644 index 0000000000..165c68c36e --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/types.hpp @@ -0,0 +1,942 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/output/agent_info.hpp" +#include "lib/output/node_info.hpp" + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace rocpd +{ +namespace types +{ +namespace tool = ::rocprofiler::tool; + +template +struct base_class : public BaseT +{ + using base_type = BaseT; + + auto& base() { return static_cast(*this); } + const auto& base() const { return static_cast(*this); } +}; + +using guid_t = std::string; + +// struct blob : private std::array +// { +// using base_type = std::array; + +// auto& base() { return static_cast(*this); } +// const auto& base() const { return static_cast(*this); } + +// using base_type::at; +// using base_type::operator[]; +// using base_type::data; + +// friend bool operator==(const blob& lhs, const blob& rhs) +// { +// for(size_t i = 0; i < lhs.size(); ++i) +// if(lhs.at(i) != rhs.at(i)) return false; +// return true; +// } + +// friend bool operator!=(const blob& lhs, const blob& rhs) +// { +// for(size_t i = 0; i < lhs.size(); ++i) +// if(lhs.at(i) != rhs.at(i)) return true; +// return false; +// } + +// // decltype(auto) at(size_t idx) { return base_type::at(idx); } +// // decltype(auto) at(size_t idx) const { return base_type::at(idx); } +// // decltype(auto) operator[](size_t idx) { return +// base_type::operator[](idx); } +// // decltype(auto) operator[](size_t idx) const { return +// base_type::operator[](idx); } + +// // decltype(auto) data() { return base_type::data(); } +// // decltype(auto) data() const { return base_type::data(); } + +// std::string hexdigest() const; +// std::string hexliteral() const; +// }; + +struct node : public base_class +{ + guid_t guid = {}; +}; + +// common base class for node info +struct common_node_info +{ + guid_t guid = {}; + uint64_t nid = 0; + std::string machine_id = {}; + std::string hostname = {}; + std::string system_name = {}; + std::string system_release = {}; + std::string system_version = {}; +}; + +struct process : public base_class +{ + pid_t ppid = 0; + pid_t pid = 0; + uint64_t init = 0; + uint64_t start = 0; + uint64_t end = 0; + uint64_t fini = 0; + std::string command = {}; +}; + +struct thread : public base_class +{ + pid_t ppid = 0; + pid_t pid = 0; + pid_t tid = 0; + uint64_t start = 0; + uint64_t end = 0; + std::string name = {}; + + bool is_main_thread() const { return (tid == pid); } +}; + +struct agent : public base_class +{ + guid_t guid = {}; + uint64_t nid = 0; + uint64_t absolute_index = 0; + std::string type = {}; + std::string user_name = {}; + std::string extdata = {}; + + bool has_extdata() const { return (extdata.length() > 2); } + void load_extdata(); +}; + +struct code_object +{ + uint64_t id = 0; + guid_t guid = {}; + uint64_t nid = 0; + uint64_t pid = 0; + uint64_t agent_abs_index = 0; + std::string uri = {}; + uint64_t load_base = 0; + uint64_t load_size = 0; + uint64_t load_delta = 0; + std::string storage_type_str = {}; + uint64_t storage_type = 0; + uint64_t memory_base = 0; + uint64_t memory_size = 0; + uint16_t code_object_size = 0; +}; + +struct kernel_symbol +{ + uint64_t id = 0; + guid_t guid = {}; + uint64_t nid = 0; + pid_t pid = 0; + uint64_t code_object_id = 0; + std::string kernel_name = {}; + std::string display_name = {}; + uint64_t kernel_id = 0; + uint64_t kernel_object = 0; + uint64_t kernarg_segment_size = 0; + uint64_t kernarg_segment_alignment = 0; + uint64_t group_segment_size = 0; + uint64_t private_segment_size = 0; + uint32_t sgpr_count = 0; + uint32_t arch_vgpr_count = 0; + uint32_t accum_vgpr_count = 0; + uint64_t kernel_symbol_size = 0; + uint64_t kernel_code_entry_byte_offset = 0; + std::string formatted_kernel_name = {}; + std::string demangled_kernel_name = {}; + std::string truncated_kernel_name = {}; + uint64_t kernel_address = 0; +}; + +struct region +{ + struct decoded_extdata + { + std::string message = {}; + }; + + uint64_t id = 0; + guid_t guid = {}; + std::string category = {}; + std::string name = {}; + pid_t nid = 0; + pid_t pid = 0; + pid_t tid = 0; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + uint64_t event_id = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t corr_id = 0; + std::string extdata = {}; + + bool has_extdata() const { return (extdata.length() > 2); } + decoded_extdata get_extdata() const; +}; + +struct sample +{ + struct decoded_extdata + { + std::string message = {}; + }; + + uint64_t id = 0; + guid_t guid = {}; + std::string category = {}; + std::string name = {}; + pid_t nid = 0; + pid_t pid = 0; + pid_t tid = 0; + rocprofiler_timestamp_t timestamp = 0; + uint64_t event_id = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t corr_id = 0; + std::string extdata = {}; + + bool has_extdata() const { return (extdata.length() > 2); } + decoded_extdata get_extdata() const; +}; + +struct region_arg +{ + uint64_t id = 0; + guid_t guid = {}; + pid_t nid = 0; + pid_t pid = 0; + std::string type = {}; + std::string name = {}; + std::string value = {}; +}; + +struct kernel_dispatch +{ + uint64_t id = 0; + guid_t guid = {}; + std::string category = {}; + std::string region = {}; + std::string name = {}; + pid_t nid = 0; + pid_t pid = 0; + pid_t tid = 0; + uint64_t agent_abs_index = 0; + uint64_t agent_log_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t code_object_id = 0; + uint64_t kernel_id = 0; + uint64_t dispatch_id = 0; + uint64_t stream_id = 0; + uint64_t queue_id = 0; + std::string queue = {}; + std::string stream = {}; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + rocprofiler_dim3_t grid_size = {}; + rocprofiler_dim3_t workgroup_size = {}; + uint64_t lds_size = 0; + uint64_t scratch_size = 0; + uint64_t static_lds_size = 0; + uint64_t static_scratch_size = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t corr_id = 0; +}; + +struct memory_allocation +{ + uint64_t id = 0; + guid_t guid = {}; + pid_t pid = 0; + pid_t tid = 0; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + std::string type = {}; + std::string level = {}; + std::string agent_name = {}; + std::string category = {}; + uint64_t agent_abs_index = 0; + uint64_t agent_log_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t address = 0; + uint64_t size = 0; + uint64_t queue_id = 0; + std::string queue_name = {}; + uint64_t stream_id = 0; + std::string stream_name = {}; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t corr_id = 0; +}; + +struct memory_copies +{ + uint64_t id = 0; + guid_t guid = {}; + pid_t pid = 0; + pid_t tid = 0; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + std::string name = {}; + std::string region_name = {}; + std::string category = {}; + uint64_t stream_id = 0; + uint64_t queue_id = 0; + std::string stream_name = {}; + std::string queue_name = {}; + uint64_t size = 0; + std::string dst_device = {}; + uint64_t dst_agent_abs_index = 0; + uint64_t dst_agent_log_index = 0; + uint64_t dst_agent_type_index = 0; + std::string dst_agent_type = {}; + uint64_t dst_address = 0; + std::string src_device = {}; + uint64_t src_agent_abs_index = 0; + uint64_t src_agent_log_index = 0; + uint64_t src_agent_type_index = 0; + std::string src_agent_type = {}; + uint64_t src_address = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t corr_id = 0; +}; + +struct scratch_memory +{ + guid_t guid = {}; + std::string operation = {}; + std::string category = {}; + uint64_t agent_abs_index = 0; + uint64_t agent_log_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t queue_id = 0; + pid_t pid = 0; + pid_t tid = 0; + std::string alloc_flags = {}; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + uint64_t size = 0; + uint64_t stack_id = 0; + uint64_t parent_stack_id = 0; + uint64_t corr_id = 0; +}; + +struct stats +{ + std::string name = {}; + uint64_t calls = 0; + uint64_t total_duration = 0; + double sqr = 0.0; + double average = 0.0; + double percentage = 0.0; + uint64_t min_ns = 0; + uint64_t max_ns = 0; + double variance = 0.0; + double std_dev = 0.0; +}; + +struct stats_node +{ + guid_t guid = {}; + pid_t pid = 0; + // uint64_t nid = 0; // nid is not used in stats_node + std::string name = {}; + uint64_t calls = 0; + uint64_t total_duration = 0; + double sqr = 0.0; + double average = 0.0; + double percentage = 0.0; + uint64_t min_ns = 0; + uint64_t max_ns = 0; + double variance = 0.0; + double std_dev = 0.0; +}; + +// Add this struct after the existing type definitions + +struct pmc_event +{ + uint64_t id = 0; + guid_t guid = {}; + pid_t pid = 0; + uint64_t event_id = 0; + uint64_t pmc_id = 0; + double counter_value = 0; +}; + +struct counter +{ + uint64_t id = 0; + guid_t guid = {}; + uint64_t dispatch_id = 0; + uint64_t kernel_id = 0; + uint32_t stack_id = 0; + uint64_t correlation_id = 0; + uint64_t event_id = 0; + pid_t pid = 0; + pid_t tid = 0; + uint32_t agent_id = 0; + uint64_t agent_abs_index = 0; + uint64_t agent_log_index = 0; + uint64_t agent_type_index = 0; + std::string agent_type = {}; + uint64_t queue_id = 0; + uint32_t grid_size_x = 0; + uint32_t grid_size_y = 0; + uint32_t grid_size_z = 0; + uint64_t grid_size = 0; + std::string kernel_name = {}; + std::string kernel_region = {}; + uint32_t workgroup_size_x = 0; + uint32_t workgroup_size_y = 0; + uint32_t workgroup_size_z = 0; + uint32_t workgroup_size = 0; + uint32_t lds_block_size = 0; + uint32_t scratch_size = 0; + uint32_t vgpr_count = 0; + uint32_t accum_vgpr_count = 0; + uint32_t sgpr_count = 0; + std::string counter_name = {}; + std::string counter_symbol = {}; + std::string component = {}; + std::string description = {}; + std::string block = {}; + std::string expression = {}; + std::string value_type = {}; + uint32_t counter_id = 0; + double value = 0; + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; + bool is_constant = false; + bool is_derived = false; +}; + +struct pmc_info +{ + uint64_t id = 0; + guid_t guid = {}; + uint64_t nid = 0; + uint64_t agent_abs_index = 0; + bool is_constant = false; + bool is_derived = false; + std::string name = {}; + std::string description = {}; + std::string block = {}; + std::string expression = {}; +}; + +} // namespace types +} // namespace rocpd + +namespace cereal +{ +#define LOAD_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) +#define LOAD_DATA_NAMED(NAME, FIELD) ar(make_nvp(NAME, data.FIELD)) +#define LOAD_DATA_VALUE(NAME, ARG) ar(make_nvp(NAME, ARG)) + +// template +// void +// load(ArchiveT& ar, rocpd::types::blob& data) +// { +// ::cereal::load(ar, data.base()); +// } + +template +void +load(ArchiveT& ar, rocpd::types::node& data) +{ + ::cereal::load(ar, data.base()); + + LOAD_DATA_FIELD(guid); +} + +template +void +load(ArchiveT& ar, rocpd::types::common_node_info& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(machine_id); + LOAD_DATA_FIELD(hostname); + LOAD_DATA_FIELD(system_name); + LOAD_DATA_FIELD(system_release); + LOAD_DATA_FIELD(system_version); +} + +template +void +load(ArchiveT& ar, rocpd::types::process& data) +{ + ::cereal::load(ar, data.base()); + + LOAD_DATA_FIELD(ppid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(init); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(fini); + LOAD_DATA_FIELD(command); +} + +template +void +load(ArchiveT& ar, rocpd::types::thread& data) +{ + ::cereal::load(ar, data.base()); + + LOAD_DATA_FIELD(ppid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(name); + + if(data.name.empty()) data.name = fmt::format("Thread {}", data.tid); + if(data.tid == data.pid && data.name.find("[main]") == std::string::npos) + data.name += std::string(" [main]"); +} + +template +void +load(ArchiveT& ar, rocpd::types::agent& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(absolute_index); + LOAD_DATA_FIELD(type); + LOAD_DATA_FIELD(user_name); + LOAD_DATA_FIELD(extdata); + + data.load_extdata(); +} + +template +void +load(ArchiveT& ar, rocpd::types::code_object& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(uri); + LOAD_DATA_FIELD(load_base); + LOAD_DATA_FIELD(load_size); + LOAD_DATA_FIELD(load_delta); + LOAD_DATA_FIELD(storage_type_str); + LOAD_DATA_FIELD(storage_type); + LOAD_DATA_FIELD(memory_base); + LOAD_DATA_FIELD(memory_size); + LOAD_DATA_FIELD(code_object_size); +} + +// Add after the pmc_info serialization (around line 1005) + +template +void +load(ArchiveT& ar, rocpd::types::kernel_symbol& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(code_object_id); + LOAD_DATA_FIELD(kernel_name); + LOAD_DATA_FIELD(display_name); + LOAD_DATA_FIELD(kernel_id); + LOAD_DATA_FIELD(kernel_object); + LOAD_DATA_FIELD(kernarg_segment_size); + LOAD_DATA_FIELD(kernarg_segment_alignment); + LOAD_DATA_FIELD(group_segment_size); + LOAD_DATA_FIELD(private_segment_size); + LOAD_DATA_FIELD(sgpr_count); + LOAD_DATA_FIELD(arch_vgpr_count); + LOAD_DATA_FIELD(accum_vgpr_count); + LOAD_DATA_FIELD(kernel_symbol_size); + LOAD_DATA_FIELD(kernel_code_entry_byte_offset); + LOAD_DATA_FIELD(formatted_kernel_name); + LOAD_DATA_FIELD(demangled_kernel_name); + LOAD_DATA_FIELD(truncated_kernel_name); + LOAD_DATA_FIELD(kernel_address); +} + +template +void +load(ArchiveT& ar, rocpd::types::region& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(category); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(event_id); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(parent_stack_id); + LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(extdata); +} + +template +void +load(ArchiveT& ar, rocpd::types::region::decoded_extdata& data) +{ + LOAD_DATA_FIELD(message); +} + +template +void +load(ArchiveT& ar, rocpd::types::sample& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(category); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(timestamp); + LOAD_DATA_FIELD(event_id); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(parent_stack_id); + LOAD_DATA_FIELD(corr_id); + LOAD_DATA_FIELD(extdata); +} + +template +void +load(ArchiveT& ar, rocpd::types::sample::decoded_extdata& data) +{ + LOAD_DATA_FIELD(message); +} + +template +void +load(ArchiveT& ar, rocpd::types::region_arg& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(type); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(value); +} + +template +void +load(ArchiveT& ar, rocpd::types::kernel_dispatch& data) +{ + auto load_dim3 = [&ar](std::string_view view, auto& _v) { + ar(make_nvp(fmt::format("{}_x", view), _v.x)); + ar(make_nvp(fmt::format("{}_y", view), _v.y)); + ar(make_nvp(fmt::format("{}_z", view), _v.z)); + }; + + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(category); + LOAD_DATA_FIELD(region); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_type_index); + LOAD_DATA_FIELD(agent_type); + LOAD_DATA_FIELD(code_object_id); + LOAD_DATA_FIELD(kernel_id); + LOAD_DATA_FIELD(dispatch_id); + LOAD_DATA_FIELD(queue_id); + LOAD_DATA_FIELD(stream_id); + LOAD_DATA_FIELD(queue); + LOAD_DATA_FIELD(stream); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + load_dim3("workgroup", data.workgroup_size); + load_dim3("grid", data.grid_size); + LOAD_DATA_FIELD(lds_size); + LOAD_DATA_FIELD(scratch_size); + LOAD_DATA_FIELD(static_lds_size); + LOAD_DATA_FIELD(static_scratch_size); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(parent_stack_id); + LOAD_DATA_FIELD(corr_id); +} + +template +void +load(ArchiveT& ar, rocpd::types::memory_allocation& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(type); + LOAD_DATA_FIELD(level); + LOAD_DATA_FIELD(agent_name); + LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_type_index); + LOAD_DATA_FIELD(agent_type); + LOAD_DATA_FIELD(address); + LOAD_DATA_FIELD(size); + LOAD_DATA_FIELD(queue_id); + LOAD_DATA_FIELD(queue_name); + LOAD_DATA_FIELD(stream_id); + LOAD_DATA_FIELD(stream_name); + LOAD_DATA_FIELD(category); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(parent_stack_id); + LOAD_DATA_FIELD(corr_id); +} + +template +void +load(ArchiveT& ar, rocpd::types::memory_copies& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(region_name); + LOAD_DATA_FIELD(stream_id); + LOAD_DATA_FIELD(queue_id); + LOAD_DATA_FIELD(stream_name); + LOAD_DATA_FIELD(queue_name); + LOAD_DATA_FIELD(size); + LOAD_DATA_FIELD(dst_device); + LOAD_DATA_FIELD(dst_agent_abs_index); + LOAD_DATA_FIELD(dst_agent_log_index); + LOAD_DATA_FIELD(dst_agent_type_index); + LOAD_DATA_FIELD(dst_agent_type); + LOAD_DATA_FIELD(dst_address); + LOAD_DATA_FIELD(src_device); + LOAD_DATA_FIELD(src_agent_abs_index); + LOAD_DATA_FIELD(src_agent_log_index); + LOAD_DATA_FIELD(src_agent_type_index); + LOAD_DATA_FIELD(src_agent_type); + LOAD_DATA_FIELD(src_address); + LOAD_DATA_FIELD(category); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(parent_stack_id); + LOAD_DATA_FIELD(corr_id); +} + +template +void +load(ArchiveT& ar, rocpd::types::scratch_memory& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(operation); + LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_type_index); + LOAD_DATA_FIELD(agent_type); + LOAD_DATA_FIELD(queue_id); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(alloc_flags); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(size); + LOAD_DATA_FIELD(category); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(parent_stack_id); + LOAD_DATA_FIELD(corr_id); +} + +template +void +load(ArchiveT& ar, rocpd::types::stats& data) +{ + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(calls); + LOAD_DATA_NAMED("DURATION (nsec)", total_duration); + LOAD_DATA_NAMED("SQR (nsec)", sqr); + LOAD_DATA_NAMED("AVERAGE (nsec)", average); + LOAD_DATA_NAMED("PERCENT (INC)", percentage); + LOAD_DATA_NAMED("MIN (nsec)", min_ns); + LOAD_DATA_NAMED("MAX (nsec)", max_ns); + LOAD_DATA_NAMED("VARIANCE", variance); + LOAD_DATA_NAMED("STD_DEV", std_dev); +} + +template +void +load(ArchiveT& ar, rocpd::types::stats_node& data) +{ + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(pid); + // LOAD_DATA_FIELD(nid); // nid is not used in stats_node + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(calls); + LOAD_DATA_NAMED("DURATION (nsec)", total_duration); + LOAD_DATA_NAMED("SQR (nsec)", sqr); + LOAD_DATA_NAMED("AVERAGE (nsec)", average); + LOAD_DATA_NAMED("PERCENT (INC)", percentage); + LOAD_DATA_NAMED("MIN (nsec)", min_ns); + LOAD_DATA_NAMED("MAX (nsec)", max_ns); + LOAD_DATA_NAMED("VARIANCE", variance); + LOAD_DATA_NAMED("STD_DEV", std_dev); +} + +// Add this inside the cereal namespace, after the existing load functions + +template +void +load(ArchiveT& ar, rocpd::types::pmc_event& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(event_id); + LOAD_DATA_FIELD(pmc_id); + // LOAD_DATA_FIELD(counter_value); +} + +template +void +load(ArchiveT& ar, rocpd::types::counter& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(dispatch_id); + LOAD_DATA_FIELD(kernel_id); + LOAD_DATA_FIELD(stack_id); + LOAD_DATA_FIELD(correlation_id); + LOAD_DATA_FIELD(event_id); + LOAD_DATA_FIELD(pid); + LOAD_DATA_FIELD(tid); + LOAD_DATA_FIELD(agent_id); + LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(agent_log_index); + LOAD_DATA_FIELD(agent_type_index); + LOAD_DATA_FIELD(agent_type); + LOAD_DATA_FIELD(queue_id); + LOAD_DATA_FIELD(grid_size_x); + LOAD_DATA_FIELD(grid_size_y); + LOAD_DATA_FIELD(grid_size_z); + LOAD_DATA_FIELD(grid_size); + LOAD_DATA_FIELD(kernel_name); + LOAD_DATA_FIELD(kernel_region); + LOAD_DATA_FIELD(workgroup_size_x); + LOAD_DATA_FIELD(workgroup_size_y); + LOAD_DATA_FIELD(workgroup_size_z); + LOAD_DATA_FIELD(workgroup_size); + LOAD_DATA_FIELD(lds_block_size); + LOAD_DATA_FIELD(scratch_size); + LOAD_DATA_FIELD(vgpr_count); + LOAD_DATA_FIELD(accum_vgpr_count); + LOAD_DATA_FIELD(sgpr_count); + LOAD_DATA_FIELD(counter_name); + LOAD_DATA_FIELD(counter_symbol); + LOAD_DATA_FIELD(component); + LOAD_DATA_FIELD(description); + LOAD_DATA_FIELD(block); + LOAD_DATA_FIELD(expression); + LOAD_DATA_FIELD(value_type); + LOAD_DATA_FIELD(counter_id); + LOAD_DATA_FIELD(value); + LOAD_DATA_FIELD(start); + LOAD_DATA_FIELD(end); + LOAD_DATA_FIELD(is_constant); + LOAD_DATA_FIELD(is_derived); +} +template +void +load(ArchiveT& ar, rocpd::types::pmc_info& data) +{ + LOAD_DATA_FIELD(id); + LOAD_DATA_FIELD(guid); + LOAD_DATA_FIELD(nid); + LOAD_DATA_FIELD(agent_abs_index); + LOAD_DATA_FIELD(is_constant); + LOAD_DATA_FIELD(is_derived); + LOAD_DATA_FIELD(name); + LOAD_DATA_FIELD(description); + LOAD_DATA_FIELD(block); + LOAD_DATA_FIELD(expression); +} + +} // namespace cereal + +#undef LOAD_DATA_FIELD +#undef LOAD_DATA_NAMED +#undef LOAD_DATA_VALUE + +// namespace fmt +// { +// template <> +// struct formatter +// { +// template +// constexpr auto parse(ParseContext& ctx) +// { +// return ctx.begin(); +// } + +// template +// auto format(const rocpd::types::blob& val, Ctx& ctx) const +// { +// return fmt::format_to(ctx.out(), "{}", val.hexliteral()); +// } +// }; +// } // namespace fmt diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/time_window.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/time_window.py new file mode 100644 index 0000000000..79aec8c095 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/time_window.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +############################################################################### +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +import argparse +import sqlite3 +from argparse import ArgumentParser +from typing import Optional, Tuple, Dict, Any, List + +from .importer import RocpdImportData, execute_statement + + +def get_marker_timestamp( + connection: sqlite3.Connection, marker_name: str, marker_type: str = "start" +) -> float: + """Get timestamp for a specific marker.""" + query = "SELECT start FROM markers WHERE name = ?" + result = connection.execute(query, (marker_name,)).fetchall() + + if not result: + raise ValueError( + f'ERROR: {marker_type.capitalize()} marker "{marker_name}" not found' + ) + if len(result) > 1: + raise ValueError( + f'ERROR: Ambiguous reference - multiple {marker_type} markers found with name "{marker_name}"' + ) + + return float(result[0][0]) + + +def markers2timestamp( + connection: sqlite3.Connection, start_marker: str, end_marker: str +) -> Tuple[float, float]: + """Convert marker names to timestamp values.""" + start_time = get_marker_timestamp(connection, start_marker, "start") + end_time = get_marker_timestamp(connection, end_marker, "end") + return (start_time, end_time) + + +def get_min_max_time(connection): + min_max_query = """ + SELECT + MIN(min_time) as min_time, + MAX(max_time) as max_time + FROM ( + SELECT start as min_time, end as max_time FROM regions_and_samples + UNION ALL + SELECT start as min_time, end as max_time FROM rocpd_kernel_dispatch + UNION ALL + SELECT start as min_time, end as max_time FROM rocpd_memory_allocate + UNION ALL + SELECT start as min_time, end as max_time FROM rocpd_memory_copy + )""" + + min_time, max_time = execute_statement(connection, min_max_query).fetchone() + return (min_time, max_time) + + +def percentages2timestamp( + connection: sqlite3.Connection, start_time: Optional[str], end_time: Optional[str] +) -> Tuple[float, float]: + """Convert percentage strings or time values to timestamps.""" + + min_time, max_time = get_min_max_time(connection) + + if min_time is None: + raise ValueError( + "ERROR: Cannot create time window - trace file contains no timing data" + ) + + def convert_time(time_str: Optional[str], is_start: bool = False) -> float: + if not time_str: + return min_time if is_start else max_time + + if "%" in time_str: + percentage = float(time_str.replace("%", "")) / 100.0 + if not 0 <= percentage <= 1: + raise ValueError( + f"ERROR: Invalid percentage '{time_str}' - must be between '0%' and '100%'" + ) + return min_time + ((max_time - min_time) * percentage) + + try: + return float(time_str) + except ValueError: + raise ValueError( + f"ERROR: Invalid time value '{time_str}' - must be percentage (e.g., '50%') or a number (nanoseconds since epoch) " + ) + + return (convert_time(start_time, True), convert_time(end_time, False)) + + +def get_time_filter(inclusive: bool, start_time, end_time) -> str: + """Create SQL filter for start/end time ranges.""" + _beg = int(start_time) + _end = int(end_time) + if inclusive: + return f"start >= {_beg} AND end <= {_end}" + else: + return f"start <= {_end} AND end >= {_beg}" + + +def get_timestamp_filter(inclusive: bool, start_time, end_time) -> str: + """Create SQL filter for timestamp columns.""" + _beg = int(start_time) + _end = int(end_time) + if inclusive: + return f"timestamp >= {_beg} AND timestamp <= {_end}" + else: + return f"timestamp <= {_end} AND timestamp >= {_beg}" + + +def create_view(connection: sqlite3.Connection, view_name: str, query: str) -> None: + """Create or replace a database view.""" + execute_statement(connection, f"DROP VIEW IF EXISTS {view_name}") + # print(f"{query}") + execute_statement(connection, query) + connection.commit() + + +# +# Main processing functions +# +def is_using_markers(args: Dict[str, Any]) -> bool: + """Check if filtering mode uses markers or time ranges.""" + # Add improved null checks + if args.get("start") is not None or args.get("end") is not None: + return False + elif args.get("start_marker") is not None or args.get("end_marker") is not None: + return True + + return None + + +def get_column_names(conn: RocpdImportData, table_name: str): + """ + Use SELECT on zero rows and read cursor.description. + """ + cursor = conn.execute(f"SELECT * FROM '{table_name}' LIMIT 0") + return [desc[0] for desc in cursor.description] + + +def apply_time_window(connection: RocpdImportData, **kwargs: Any) -> None: + """Apply time window filtering to create filtered views.""" + + is_marker_mode = is_using_markers(kwargs) + if is_marker_mode is None: + return connection + + inclusive = kwargs.get("inclusive", True) + + def dump_min_max(label): + bounds_min, bounds_max = get_min_max_time(connection) + # bounds_min /= 1.0e9 + # bounds_max /= 1.0e9 + delta = bounds_max - bounds_min + print( + f"# {label:>8} time bounds: {bounds_min} : {bounds_max} nsec (delta={delta} nsec)" + ) + return delta + + orig_delta = dump_min_max("Initial") + + # Get start and end times + if not is_marker_mode: + start_time = kwargs.get("start", None) + end_time = kwargs.get("end", None) + start_time, end_time = percentages2timestamp(connection, start_time, end_time) + else: + start_marker = kwargs.get("start_marker", None) + end_marker = kwargs.get("end_marker", None) + start_time, end_time = markers2timestamp(connection, start_marker, end_marker) + + if not end_time > start_time: + raise ValueError( + f"ERROR: Invalid time range - end time ({end_time}) must be greater than start time ({start_time})" + ) + + # Create views for tables with start and end times + start_end_timed_tables = [] + timestamp_timed_tables = [] + + for itr in connection.table_info.keys(): + if itr.find("rocpd_info_") == 0: + continue + column_names = get_column_names(connection, itr) + if "start" in column_names and "end" in column_names: + start_end_timed_tables += [itr] + elif "timestamp" in column_names: + timestamp_timed_tables += [itr] + + # Restrict the scope of the tables with start/end columns + for table_name in start_end_timed_tables: + dbs = [ + f"{itr} WHERE {get_time_filter(inclusive, start_time, end_time)}" + for itr in connection.table_info[table_name] + ] + table_union = " UNION ALL ".join(dbs) + create_view_query = f""" + CREATE TEMPORARY VIEW {table_name} AS + {table_union} + """ + create_view(connection, table_name, create_view_query) + + # Restrict the scope of the tables with timestamp columns + for table_name in timestamp_timed_tables: + dbs = [ + f"{itr} WHERE {get_timestamp_filter(inclusive, start_time, end_time)}" + for itr in connection.table_info[table_name] + ] + table_union = " UNION ALL ".join(dbs) + create_view_query = f""" + CREATE TEMPORARY VIEW {table_name} AS + {table_union} + """ + create_view(connection, table_name, create_view_query) + + # # Create node view + # create_view_query = """CREATE VIEW rocpd_node AS """ + # selects = [ + # f"SELECT rocpd_node.* FROM rocpd_node INNER JOIN {t} ON rocpd_node.id = {t}.node_id" + # for t in start_end_timed_tables + # ] + # create_view_query += " UNION ".join(selects) + # create_view(connection, "rocpd_node", create_view_query) + + # # Create track view + # create_view_query = """ + # CREATE VIEW rocpd_track AS + # SELECT rocpd_track.* FROM rocpd_track + # INNER JOIN rocpd_sample ON rocpd_sample.track_id = rocpd_track.id + # """ + # create_view(connection, "rocpd_track", create_view_query) + + upd_delta = dump_min_max("Windowed") + + reduction = (1.0 - (upd_delta / orig_delta)) * 100.0 + print(f"# Time windowing reduced the duration by {reduction:6.2f}%") + + return connection + + +# +# Command-line interface functions +# +def add_args(parser: ArgumentParser) -> List[str]: + """Add time slice arguments to an existing parser.""" + + tw_options = parser.add_argument_group("Time window options") + + # Start time mutually exclusive group + start_group = tw_options.add_mutually_exclusive_group(required=False) + start_group.add_argument( + "--start", + type=str, + help="Start time as percentage or in nanoseconds from trace file (e.g., '50%%' or '781470909013049')", + default=None, + ) + start_group.add_argument( + "--start-marker", + type=str, + help="Named marker event to use as window start point", + default=None, + ) + + # End time mutually exclusive group + end_group = tw_options.add_mutually_exclusive_group(required=False) + end_group.add_argument( + "--end", + type=str, + help="End time in as percentage or nanoseconds from trace file (e.g., '75%%' or '3543724246381057')", + default=None, + ) + end_group.add_argument( + "--end-marker", + type=str, + help="Named marker event to use as window end point", + default=None, + ) + + tw_options.add_argument( + "--inclusive", + type=lambda x: x.lower() in ("true", "t", "yes", "1"), + help="True: include events if START or END in window; False: only if BOTH in window (default: True)", + default=True, + ) + + return ["start", "end", "inclusive", "start_marker", "end_marker"] + + +def process_args(args, valid_args): + + ret = {} + for itr in valid_args: + if hasattr(args, itr): + val = getattr(args, itr) + if val is not None: + ret[itr] = val + return ret + + +def execute(input_rpd: str, **kwargs: Any) -> RocpdImportData: + """Execute time window filtering on database file.""" + + importData = RocpdImportData(input_rpd) + + apply_time_window(importData, **kwargs) + + return importData + + +def main(argv=None) -> int: + """Main entry point for command line execution.""" + parser = argparse.ArgumentParser( + description="Apply time window filtering to ROCpd database views" + ) + parser.add_argument( + "-i", + "--input", + type=str, + required=True, + help="Path to the input ROCpd database file", + ) + + arg_names = add_args(parser) + args = parser.parse_args(argv) + + execute(args.input, **{arg: getattr(args, arg) for arg in arg_names}) + + +if __name__ == "__main__": + main() diff --git a/projects/rocprofiler-sdk/source/lib/python/setup.py b/projects/rocprofiler-sdk/source/lib/python/setup.py new file mode 100644 index 0000000000..2045f65c41 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/python/setup.py @@ -0,0 +1,33 @@ +################################################################################ +# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +################################################################################ + +from setuptools import setup + +setup( + name="rocpd", + version="1.0", + description="RocmProfileData profiling format", + packages={"rocpd"}, + include_package_data=True, + python_requires=">=3.6", + zip_safe=False, +) diff --git a/projects/rocprofiler-sdk/source/lib/python/utilities.cmake b/projects/rocprofiler-sdk/source/lib/python/utilities.cmake index 5f50fc578d..b0db11f318 100644 --- a/projects/rocprofiler-sdk/source/lib/python/utilities.cmake +++ b/projects/rocprofiler-sdk/source/lib/python/utilities.cmake @@ -147,19 +147,15 @@ function(rocprofiler_rocpd_python_bindings _VERSION) set(rocpd_PYTHON_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/${rocpd_PYTHON_INSTALL_DIRECTORY}) set(rocpd_PYTHON_SOURCES - chrome_tracing.py csv.py importer.py __init__.py __main__.py output_config.py + otf2.py pftrace.py schema.py time_window.py) - set(rocpd_SCHEMA_SOURCES - schema_data/data_views.sql schema_data/marker_views.sql - schema_data/rocpd_indexes.sql schema_data/rocpd_tables.sql - schema_data/rocpd_views.sql schema_data/summary_views.sql) foreach(_SOURCE ${rocpd_PYTHON_SOURCES}) configure_file(${CMAKE_CURRENT_LIST_DIR}/${_SOURCE} @@ -167,16 +163,7 @@ function(rocprofiler_rocpd_python_bindings _VERSION) install( FILES ${rocpd_PYTHON_OUTPUT_DIRECTORY}/${_SOURCE} DESTINATION ${rocpd_PYTHON_INSTALL_DIRECTORY} - COMPONENT core) - endforeach() - - foreach(_SOURCE ${rocpd_SCHEMA_SOURCES}) - configure_file(${CMAKE_CURRENT_LIST_DIR}/${_SOURCE} - ${rocpd_PYTHON_OUTPUT_DIRECTORY}/${_SOURCE} COPYONLY) - install( - FILES ${rocpd_PYTHON_OUTPUT_DIRECTORY}/${_SOURCE} - DESTINATION ${rocpd_PYTHON_INSTALL_DIRECTORY}/schema_data - COMPONENT core) + COMPONENT rocpd) endforeach() add_library(rocprofiler-sdk-rocpd-python-bindings-${_VERSION} MODULE) @@ -201,6 +188,7 @@ function(rocprofiler_rocpd_python_bindings _VERSION) rocprofiler-sdk::rocprofiler-sdk-gotcha rocprofiler-sdk::rocprofiler-sdk-dw rocprofiler-sdk::rocprofiler-sdk-static-library + rocprofiler-sdk::rocprofiler-sdk-rocpd-library ${Python3_LIBRARIES}) set_target_properties( @@ -218,35 +206,5 @@ function(rocprofiler_rocpd_python_bindings _VERSION) install( TARGETS rocprofiler-sdk-rocpd-python-bindings-${_VERSION} DESTINATION ${rocpd_PYTHON_INSTALL_DIRECTORY} - COMPONENT core) -endfunction() - -function(rocprofiler_rocpd_python_packaging _VERSION) - message( - STATUS "Creating rocprofiler-sdk rocpd python packaging for python ${_VERSION}") - rocprofiler_find_python3(${_VERSION}) - - add_custom_target( - rocprofiler-sdk-rocpd-${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR} ALL - ${Python3_EXECUTABLE} - -m - pip - install - -q - -q - --prefix - ${PROJECT_BINARY_DIR} - -I - ${CMAKE_CURRENT_BINARY_DIR} - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMENT - "Packaging rocpd for python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}..." - ) - - install( - DIRECTORY - ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR} - DESTINATION ${CMAKE_INSTALL_LIBDIR} - USE_SOURCE_PERMISSIONS - COMPONENT core) + COMPONENT rocpd) endfunction() diff --git a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/data_views.sql b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/data_views.sql index fff20040ae..868542c99f 100644 --- a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/data_views.sql +++ b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/data_views.sql @@ -8,7 +8,7 @@ SELECT CO.id, CO.guid, CO.nid, - CO.pid, + P.pid, A.absolute_index AS agent_abs_index, CO.uri, CO.load_base, @@ -22,7 +22,9 @@ SELECT FROM `rocpd_info_code_object` CO INNER JOIN `rocpd_info_agent` A ON CO.agent_id = A.id - AND CO.guid = A.guid; + AND CO.guid = A.guid + INNER JOIN `rocpd_info_process` P ON CO.pid = P.id + AND CO.guid = P.guid; CREATE VIEW IF NOT EXISTS `kernel_symbols` AS @@ -30,7 +32,7 @@ SELECT KS.id, KS.guid, KS.nid, - KS.pid, + P.pid, KS.code_object_id, KS.kernel_name, KS.display_name, @@ -50,7 +52,9 @@ SELECT JSON_EXTRACT(KS.extdata, '$.truncated_kernel_name') AS truncated_kernel_name, JSON_EXTRACT(KS.extdata, '$.kernel_address.handle') AS kernel_address FROM - `rocpd_info_kernel_symbol` KS; + `rocpd_info_kernel_symbol` KS + INNER JOIN `rocpd_info_process` P ON KS.pid = P.id + AND KS.guid = P.guid; -- Processes CREATE VIEW IF NOT EXISTS @@ -64,7 +68,7 @@ SELECT N.version AS system_version, P.guid, P.ppid, - P.id AS pid, + P.pid, P.init, P.start, P.end, @@ -87,8 +91,8 @@ SELECT N.version AS system_version, P.guid, P.ppid, - P.id AS pid, - T.id AS tid, + P.pid, + T.tid, T.start, T.end, T.name @@ -116,8 +120,8 @@ SELECT ) AS category, S.string AS name, R.nid, - R.pid, - R.tid, + P.pid, + T.tid, R.start, R.end, (R.end - R.start) AS duration, @@ -133,7 +137,11 @@ FROM INNER JOIN `rocpd_event` E ON E.id = R.event_id AND E.guid = R.guid INNER JOIN `rocpd_string` S ON S.id = R.name_id - AND S.guid = R.guid; + AND S.guid = R.guid + INNER JOIN `rocpd_info_process` P ON P.id = R.pid + AND P.guid = R.guid + INNER JOIN `rocpd_info_thread` T ON T.id = R.tid + AND T.guid = R.guid; CREATE VIEW IF NOT EXISTS `region_args` AS @@ -141,7 +149,7 @@ SELECT R.id, R.guid, R.nid, - R.pid, + P.pid, A.type, A.name, A.value @@ -150,7 +158,9 @@ FROM INNER JOIN `rocpd_event` E ON E.id = R.event_id AND E.guid = R.guid INNER JOIN `rocpd_arg` A ON A.event_id = E.id - AND A.guid = R.guid; + AND A.guid = R.guid + INNER JOIN `rocpd_info_process` P ON P.id = R.pid + AND P.guid = R.guid; -- -- Samples @@ -178,9 +188,10 @@ SELECT AND RS.guid = T.guid ) AS name, T.nid, - T.pid, - T.tid, + P.pid, + TH.tid, R.timestamp, + R.event_id, E.stack_id AS stack_id, E.parent_stack_id AS parent_stack_id, E.correlation_id AS corr_id, @@ -192,7 +203,74 @@ FROM INNER JOIN `rocpd_track` T ON T.id = R.track_id AND T.guid = R.guid INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid; + AND E.guid = R.guid + INNER JOIN `rocpd_info_process` P ON P.id = T.pid + AND P.guid = T.guid + INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid + AND TH.guid = T.guid; + +-- +-- Provides samples view with the same columns as regions view +CREATE VIEW IF NOT EXISTS + `sample_regions` AS +SELECT + R.id, + R.guid, + ( + SELECT + string + FROM + `rocpd_string` RS + WHERE + RS.id = E.category_id + AND RS.guid = E.guid + ) AS category, + ( + SELECT + string + FROM + `rocpd_string` RS + WHERE + RS.id = T.name_id + AND RS.guid = T.guid + ) AS name, + T.nid, + P.pid, + TH.tid, + R.timestamp AS start, + R.timestamp AS END, + (R.timestamp - R.timestamp) AS duration, + R.event_id, + E.stack_id AS stack_id, + E.parent_stack_id AS parent_stack_id, + E.correlation_id AS corr_id, + E.extdata AS extdata, + E.call_stack AS call_stack, + E.line_info AS line_info +FROM + `rocpd_sample` R + INNER JOIN `rocpd_track` T ON T.id = R.track_id + AND T.guid = R.guid + INNER JOIN `rocpd_event` E ON E.id = R.event_id + AND E.guid = R.guid + INNER JOIN `rocpd_info_process` P ON P.id = T.pid + AND P.guid = T.guid + INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid + AND TH.guid = T.guid; + +-- +-- Provides a unified view of the regions and samples +CREATE VIEW IF NOT EXISTS + `regions_and_samples` AS +SELECT + * +FROM + `regions` +UNION ALL +SELECT + * +FROM + `sample_regions`; -- -- Kernel information @@ -201,7 +279,7 @@ CREATE VIEW SELECT K.id, K.guid, - K.tid, + T.tid, ( SELECT string @@ -214,7 +292,7 @@ SELECT R.string AS region, S.display_name AS name, K.nid, - Q.pid, + P.pid, A.absolute_index AS agent_abs_index, A.logical_index AS agent_log_index, A.type_index AS agent_type_index, @@ -255,68 +333,11 @@ FROM LEFT JOIN `rocpd_info_stream` ST ON ST.id = K.stream_id AND ST.guid = K.guid LEFT JOIN `rocpd_info_queue` Q ON Q.id = K.queue_id - AND Q.guid = K.guid; - -CREATE VIEW - `kernels_renamed` AS -SELECT - K.id, - K.guid, - K.tid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - R.string AS region, - R.string AS name, - K.nid, - Q.pid, - A.absolute_index AS agent_abs_index, - A.logical_index AS agent_log_index, - A.type_index AS agent_type_index, - A.type AS agent_type, - S.code_object_id AS code_object_id, - K.kernel_id, - K.dispatch_id, - K.stream_id, - K.queue_id, - Q.name AS queue, - ST.name AS stream, - K.start, - K.end, - (K.end - K.start) AS duration, - K.grid_size_x AS grid_x, - K.grid_size_y AS grid_y, - K.grid_size_z AS grid_z, - K.workgroup_size_x AS workgroup_x, - K.workgroup_size_y AS workgroup_y, - K.workgroup_size_z AS workgroup_z, - K.group_segment_size AS lds_size, - K.private_segment_size AS scratch_size, - S.group_segment_size AS static_lds_size, - S.private_segment_size AS static_scratch_size, - E.stack_id, - E.parent_stack_id, - E.correlation_id AS corr_id -FROM - `rocpd_kernel_dispatch` K - INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id - AND A.guid = K.guid - INNER JOIN `rocpd_event` E ON E.id = K.event_id - AND E.guid = K.guid - INNER JOIN `rocpd_string` R ON R.id = K.region_name_id - AND R.guid = K.guid - INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id - AND S.guid = K.guid - LEFT JOIN `rocpd_info_stream` ST ON ST.id = K.stream_id - AND ST.guid = K.guid - LEFT JOIN `rocpd_info_queue` Q ON Q.id = K.queue_id - AND Q.guid = K.guid; + AND Q.guid = K.guid + INNER JOIN `rocpd_info_process` P ON P.id = Q.pid + AND P.guid = Q.guid + INNER JOIN `rocpd_info_thread` T ON T.id = K.tid + AND T.guid = K.guid; -- -- Performance Monitoring Counters (PMC) @@ -326,7 +347,7 @@ SELECT PMC_I.id, PMC_I.guid, PMC_I.nid, - PMC_I.pid, + P.pid, A.absolute_index AS agent_abs_index, PMC_I.is_constant, PMC_I.is_derived, @@ -337,7 +358,9 @@ SELECT FROM `rocpd_info_pmc` PMC_I INNER JOIN `rocpd_info_agent` A ON PMC_I.agent_id = A.id - AND PMC_I.guid = A.guid; + AND PMC_I.guid = A.guid + INNER JOIN `rocpd_info_process` P ON P.id = PMC_I.pid + AND PMC_I.guid = P.guid; CREATE VIEW IF NOT EXISTS `pmc_events` AS @@ -365,7 +388,7 @@ SELECT AND KS.guid = K.guid ) AS name, K.nid, - K.pid, + P.pid, K.dispatch_id, K.start, K.end, @@ -379,7 +402,9 @@ FROM INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id AND E.guid = PMC_E.guid INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id - AND K.guid = PMC_E.guid; + AND K.guid = PMC_E.guid + INNER JOIN `rocpd_info_process` P ON P.id = K.pid + AND P.guid = K.guid; -- events with arguments --- CREATE VIEW IF NOT EXISTS @@ -421,13 +446,15 @@ SELECT A.value AS arg_value, JSON_EXTRACT(A.extdata, '$.stream_id') AS stream_id, S.nid, - S.pid, + P.pid, S.name AS stream_name, S.extdata AS extdata FROM `rocpd_arg` A INNER JOIN `rocpd_info_stream` S ON JSON_EXTRACT(A.extdata, '$.stream_id') = S.id AND A.guid = S.guid + INNER JOIN `rocpd_info_process` P ON P.id = S.pid + AND P.guid = S.guid WHERE A.name = 'stream'; @@ -448,8 +475,8 @@ SELECT AND RS.guid = E.guid ) AS category, M.nid, - M.pid, - M.tid, + P.pid, + T.tid, M.start, M.end, (M.end - M.start) AS duration, @@ -490,7 +517,11 @@ FROM LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id AND ST.guid = M.guid INNER JOIN `rocpd_event` E ON E.id = M.event_id - AND E.guid = M.guid; + AND E.guid = M.guid + INNER JOIN `rocpd_info_process` P ON P.id = M.pid + AND P.guid = M.guid + INNER JOIN `rocpd_info_thread` T ON T.id = M.tid + AND T.guid = M.guid; -- -- @@ -509,8 +540,8 @@ SELECT AND RS.guid = E.guid ) AS category, M.nid, - M.pid, - M.tid, + P.pid, + T.tid, M.start, M.end, (M.end - M.start) AS duration, @@ -539,7 +570,11 @@ FROM LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id AND ST.guid = M.guid INNER JOIN `rocpd_event` E ON E.id = M.event_id - AND E.guid = M.guid; + AND E.guid = M.guid + INNER JOIN `rocpd_info_process` P ON P.id = M.pid + AND P.guid = M.guid + INNER JOIN `rocpd_info_thread` T ON T.id = M.tid + AND P.guid = M.guid; -- -- @@ -549,7 +584,7 @@ SELECT M.id, M.guid, M.nid, - M.pid, + P.pid, M.type AS operation, A.name AS agent_name, A.absolute_index AS agent_abs_index, @@ -557,7 +592,7 @@ SELECT A.type_index AS agent_type_index, A.type AS agent_type, M.queue_id, - M.tid, + T.tid, JSON_EXTRACT(M.extdata, '$.flags') AS alloc_flags, M.start, M.end, @@ -585,6 +620,10 @@ FROM AND Q.guid = M.guid INNER JOIN `rocpd_event` E ON E.id = M.event_id AND E.guid = M.guid + INNER JOIN `rocpd_info_process` P ON P.id = M.pid + AND P.guid = M.guid + INNER JOIN `rocpd_info_thread` T ON T.id = M.tid + AND T.guid = M.guid WHERE M.level = 'SCRATCH' ORDER BY @@ -603,8 +642,8 @@ SELECT E.correlation_id, E.stack_id, E.parent_stack_id, - K.pid, - K.tid, + P.pid, + T.tid, K.agent_id, A.absolute_index AS agent_abs_index, A.logical_index AS agent_log_index, @@ -672,258 +711,12 @@ FROM AND A.guid = K.guid INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id AND S.guid = K.guid + INNER JOIN `rocpd_info_process` P ON P.id = K.pid + AND P.guid = K.guid + INNER JOIN `rocpd_info_thread` T ON T.id = K.tid + AND T.guid = K.guid GROUP BY PMC_E.guid, K.dispatch_id, PMC_I.name, K.agent_id; - --- --- RCCL API calls -CREATE VIEW IF NOT EXISTS - `rccl` AS -SELECT - R.id, - R.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - S.string AS name, - R.nid, - R.pid, - R.tid, - R.start, - R.end, - (R.end - R.start) AS duration, - R.event_id, - E.stack_id, - E.parent_stack_id, - E.correlation_id AS corr_id, - E.extdata, - E.call_stack, - E.line_info -FROM - `rocpd_region` R - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_string` S ON S.id = R.name_id - AND S.guid = R.guid -WHERE - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) LIKE 'RCCL_%' -ORDER BY - R.start ASC; - --- --- ROCJPEG API calls -CREATE VIEW IF NOT EXISTS - `rocjpeg` AS -SELECT - R.id, - R.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - S.string AS name, - R.nid, - R.pid, - R.tid, - R.start, - R.end, - (R.end - R.start) AS duration, - R.event_id, - E.stack_id, - E.parent_stack_id, - E.correlation_id AS corr_id, - E.extdata, - E.call_stack, - E.line_info -FROM - `rocpd_region` R - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_string` S ON S.id = R.name_id - AND S.guid = R.guid -WHERE - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) LIKE 'ROCJPEG_%' -ORDER BY - R.start ASC; - --- --- ROCDECODE API calls -CREATE VIEW IF NOT EXISTS - `rocdecode` AS -SELECT - R.id, - R.guid, - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) AS category, - S.string AS name, - R.nid, - R.pid, - R.tid, - R.start, - R.end, - (R.end - R.start) AS duration, - R.event_id, - E.stack_id, - E.parent_stack_id, - E.correlation_id AS corr_id, - E.extdata, - E.call_stack, - E.line_info -FROM - `rocpd_region` R - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_string` S ON S.id = R.name_id - AND S.guid = R.guid -WHERE - ( - SELECT - string - FROM - `rocpd_string` RS - WHERE - RS.id = E.category_id - AND RS.guid = E.guid - ) LIKE 'ROCDECODE_%' -ORDER BY - R.start ASC; - --- ALL API call regions in one go -CREATE VIEW IF NOT EXISTS - `api_regions` AS -WITH - category_strings AS ( - SELECT - RS.id, - RS.guid, - RS.string AS category_string - FROM - `rocpd_string` RS - ) -SELECT - R.id, - R.guid, - CS.category_string AS category, - S.string AS name, - R.nid, - R.pid, - R.tid, - R.start, - R.end, - (R.end - R.start) AS duration, - R.event_id, - E.stack_id, - E.parent_stack_id, - E.correlation_id AS corr_id, - E.extdata, - E.call_stack, - E.line_info -FROM - `rocpd_region` R - INNER JOIN `rocpd_event` E ON E.id = R.event_id - AND E.guid = R.guid - INNER JOIN `rocpd_string` S ON S.id = R.name_id - AND S.guid = R.guid - INNER JOIN category_strings CS ON CS.id = E.category_id - AND CS.guid = E.guid -WHERE - CS.category_string LIKE '%HIP_%' - OR CS.category_string LIKE '%HSA_%' - OR CS.category_string LIKE 'RCCL_%' - OR CS.category_string LIKE 'ROCJPEG_%' - OR CS.category_string LIKE 'ROCDECODE_%' -ORDER BY - R.start ASC; - --- Threads participating in API calls -CREATE VIEW IF NOT EXISTS - `api_threads` AS -SELECT DISTINCT - N.id AS nid, - N.machine_id, - N.system_name, - N.hostname, - N.release AS system_release, - N.version AS system_version, - P.guid, - P.ppid, - P.id AS pid, - T.id AS tid, - T.start, - T.end, - T.name -FROM - `rocpd_info_thread` T - INNER JOIN `rocpd_info_process` P ON P.id = T.pid - AND P.guid = T.guid - INNER JOIN `rocpd_info_node` N ON N.id = T.nid - AND N.guid = T.guid - INNER JOIN ( - SELECT DISTINCT - tid, - guid - FROM - `api_regions` - UNION ALL - SELECT DISTINCT - tid, - guid - FROM - `kernels` - UNION ALL - SELECT DISTINCT - tid, - guid - FROM - `memory_copies` - UNION ALL - SELECT DISTINCT - tid, - guid - FROM - `memory_allocations` - UNION ALL - SELECT DISTINCT - tid, - guid - FROM - `markers` - ) AS activity ON activity.tid = T.id - AND activity.guid = T.guid; diff --git a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/marker_views.sql b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/marker_views.sql index 26ddaad298..9cd821a412 100644 --- a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/marker_views.sql +++ b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/marker_views.sql @@ -1,218 +1,3 @@ -- -- Views related to markers -- --- --- -CREATE VIEW - `range_markers` AS -SELECT - E.id, - E.guid, - ST.string AS category, - JSON_EXTRACT(E.extdata, '$.message') AS name, - R.start, - R.end, - (R.end - R.start) AS duration, - E.stack_id, - E.parent_stack_id, - E.correlation_id, - E.call_stack, - E.line_info, - R.pid, - R.tid, - R.nid, - ST2.string AS operation -FROM - `rocpd_event` E - INNER JOIN `rocpd_string` ST ON ST.id = E.category_id - AND ST.guid = E.guid - INNER JOIN `rocpd_region` R ON R.event_id = E.id - AND R.guid = E.guid - INNER JOIN `rocpd_string` ST2 ON ST2.id = R.name_id - AND ST2.guid = R.guid -WHERE - ST.string LIKE '%MARKER%' - AND JSON_VALID(E.extdata); - --- --- -CREATE VIEW - `single_markers` AS -SELECT - E.id, - E.guid, - ST.string AS category, - JSON_EXTRACT(E.extdata, '$.message') AS name, - S.timestamp AS timestamp, - E.stack_id, - E.parent_stack_id, - E.correlation_id, - E.call_stack, - E.line_info, - T.pid AS pid, - T.tid AS tid, - T.nid -FROM - `rocpd_event` E - INNER JOIN `rocpd_string` ST ON ST.id = E.category_id - AND ST.guid = E.guid - INNER JOIN `rocpd_sample` S ON S.event_id = E.id - AND S.guid = E.guid - INNER JOIN `rocpd_track` T ON T.id = S.track_id - AND S.guid = T.guid -WHERE - ST.string LIKE '%MARKER%'; - --- --- -CREATE VIEW - `markers` AS -SELECT - * -FROM - `range_markers` -UNION -SELECT - SM.id, - SM.guid, - SM.category, - SM.name, - SM.timestamp AS start, - SM.timestamp AS end, - 0 AS duration, - SM.stack_id, - SM.parent_stack_id, - SM.correlation_id, - SM.call_stack, - SM.line_info, - SM.pid, - SM.tid, - SM.nid, - 'roctxMarkA' AS operation -FROM - `single_markers` SM; - --- --- -CREATE VIEW - `range_marker_summary` AS -WITH - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - `range_markers` - GROUP BY - name - ), - aggregated_data AS ( - SELECT - RM.name, - COUNT(*) AS calls, - SUM(RM.duration) AS total_duration, - A.avg_duration AS average_duration, - MIN(RM.duration) AS min_duration, - MAX(RM.duration) AS max_duration, - SQRT(AVG((RM.duration - A.avg_duration) * (RM.duration - A.avg_duration))) AS std_dev_duration - FROM - `range_markers` RM - JOIN avg_data A ON RM.name = A.name - GROUP BY - RM.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS `DURATION (nsec)`, - AD.average_duration AS `AVERAGE (nsec)`, - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS `PERCENT (INC)`, - AD.min_duration AS `MIN (nsec)`, - AD.max_duration AS `MAX (nsec)`, - AD.std_dev_duration AS `STD_DEV` -FROM - aggregated_data AD - CROSS JOIN total_duration TD; - --- --- -CREATE VIEW - `single_marker_summary` AS -SELECT - SM.name, - COUNT(*) AS calls -FROM - `single_markers` SM -GROUP BY - SM.name; - --- --- Markers summary -CREATE VIEW - `marker_summary` AS -WITH - all_markers AS ( - SELECT - name, - duration - FROM - `range_markers` - UNION ALL - SELECT - name, - 0 AS duration - FROM - `single_markers` - ), - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - all_markers - GROUP BY - name - ), - aggregated_data AS ( - SELECT - M.name, - COUNT(*) AS calls, - SUM(M.duration) AS total_duration, - CAST(SUM(M.duration * M.duration) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(M.duration) AS min_duration, - MAX(M.duration) AS max_duration, - AVG((M.duration - A.avg_duration) * (M.duration - A.avg_duration)) AS variance_duration, - SQRT(AVG((M.duration - A.avg_duration) * (M.duration - A.avg_duration))) AS std_dev_duration - FROM - all_markers M - JOIN avg_data A ON M.name = A.name - GROUP BY - M.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS `DURATION (nsec)`, - AD.sqr_duration AS `SQR (nsec)`, - AD.average_duration AS `AVERAGE (nsec)`, - (CAST(AD.total_duration AS REAL) / NULLIF(TD.grand_total_duration, 0)) * 100 AS `PERCENT (INC)`, - AD.min_duration AS `MIN (nsec)`, - AD.max_duration AS `MAX (nsec)`, - AD.variance_duration AS `VARIANCE`, - AD.std_dev_duration AS `STD_DEV` -FROM - aggregated_data AD - CROSS JOIN total_duration TD; diff --git a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql index 6e9eaf9ea9..43d722ea10 100644 --- a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql +++ b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/rocpd_indexes.sql @@ -3,25 +3,43 @@ -- -- string field --- CREATE INDEX "rocpd_string{{uuid}}_string_idx" ON "rocpd_string{{uuid}}" ("string"); +-- CREATE INDEX `rocpd_string{{uuid}}_string_idx` ON `rocpd_string{{uuid}}` ("string"); -- guid field --- CREATE INDEX "rocpd_string{{uuid}}_guid_idx" ON "rocpd_string{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_node{{uuid}}_guid_idx" ON "rocpd_info_node{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_process{{uuid}}_guid_idx" ON "rocpd_info_process{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_thread{{uuid}}_guid_idx" ON "rocpd_info_thread{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_agent{{uuid}}_guid_idx" ON "rocpd_info_agent{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_queue{{uuid}}_guid_idx" ON "rocpd_info_queue{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_stream{{uuid}}_guid_idx" ON "rocpd_info_stream{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_pmc{{uuid}}_guid_idx" ON "rocpd_info_pmc{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_code_object{{uuid}}_guid_idx" ON "rocpd_info_code_object{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_info_kernel_symbol{{uuid}}_guid_idx" ON "rocpd_info_kernel_symbol{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_track{{uuid}}_guid_idx" ON "rocpd_track{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_event{{uuid}}_guid_idx" ON "rocpd_event{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_arg{{uuid}}_guid_idx" ON "rocpd_arg{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_pmc_event{{uuid}}_guid_idx" ON "rocpd_pmc_event{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_region{{uuid}}_guid_idx" ON "rocpd_region{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_sample{{uuid}}_guid_idx" ON "rocpd_sample{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_kernel_dispatch{{uuid}}_guid_idx" ON "rocpd_kernel_dispatch{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_memory_copy{{uuid}}_guid_idx" ON "rocpd_memory_copy{{uuid}}" ("guid"); --- CREATE INDEX "rocpd_memory_allocate{{uuid}}_guid_idx" ON "rocpd_memory_allocate{{uuid}}" ("guid"); +-- CREATE INDEX `rocpd_string{{uuid}}_guid_idx` ON `rocpd_string{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_node{{uuid}}_guid_idx` ON `rocpd_info_node{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_process{{uuid}}_guid_idx` ON `rocpd_info_process{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_thread{{uuid}}_guid_idx` ON `rocpd_info_thread{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_agent{{uuid}}_guid_idx` ON `rocpd_info_agent{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_queue{{uuid}}_guid_idx` ON `rocpd_info_queue{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_stream{{uuid}}_guid_idx` ON `rocpd_info_stream{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_pmc{{uuid}}_guid_idx` ON `rocpd_info_pmc{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_code_object{{uuid}}_guid_idx` ON `rocpd_info_code_object{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_info_kernel_symbol{{uuid}}_guid_idx` ON `rocpd_info_kernel_symbol{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_track{{uuid}}_guid_idx` ON `rocpd_track{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_event{{uuid}}_guid_idx` ON `rocpd_event{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_arg{{uuid}}_guid_idx` ON `rocpd_arg{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_pmc_event{{uuid}}_guid_idx` ON `rocpd_pmc_event{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_sample{{uuid}}_guid_idx` ON `rocpd_sample{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_idx` ON `rocpd_memory_copy{{uuid}}` ("id", "guid"); +-- CREATE INDEX `rocpd_memory_allocate{{uuid}}_guid_idx` ON `rocpd_memory_allocate{{uuid}}` ("id", "guid"); + +-- CREATE INDEX `rocpd_event{{uuid}}_category_idx` ON `rocpd_event{{uuid}}` ("id", "guid", "category_id"); +-- CREATE INDEX `rocpd_region{{uuid}}_event_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "event_id"); +-- CREATE INDEX `rocpd_region{{uuid}}_name_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "name_id"); +-- CREATE INDEX `rocpd_sample{{uuid}}_event_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "event_id"); +-- CREATE INDEX `rocpd_sample{{uuid}}_track_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "track_id"); +-- CREATE INDEX `rocpd_track{{uuid}}_name_idx` ON `rocpd_track{{uuid}}` ("id", "guid", "name_id"); + +-- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_nid_pid_idx` ON `rocpd_memory_copy{{uuid}}` ("guid", "nid", "pid"); +-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_nid_pid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("guid", "nid", "pid"); +-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid", "nid", "pid"); +-- CREATE INDEX `rocpd_sample{{uuid}}_guid_nid_pid_idx` ON `rocpd_sample{{uuid}}` ("guid", "nid", "pid"); + +-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid"); +-- CREATE INDEX `rocpd_region{{uuid}}_nid_idx` ON `rocpd_region{{uuid}}` ("nid"); +-- CREATE INDEX `rocpd_region{{uuid}}_pid_idx` ON `rocpd_region{{uuid}}` ("pid"); +-- CREATE INDEX `rocpd_region{{uuid}}_start_idx` ON `rocpd_region{{uuid}}` ("start"); +-- CREATE INDEX `rocpd_region{{uuid}}_end_idx` ON `rocpd_region{{uuid}}` ("end"); diff --git a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/summary_views.sql b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/summary_views.sql index ef040ff721..3575bf6f55 100644 --- a/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/summary_views.sql +++ b/projects/rocprofiler-sdk/source/share/rocprofiler-sdk-rocpd/summary_views.sql @@ -8,8 +8,8 @@ CREATE VIEW IF NOT EXISTS SELECT S.display_name AS name, COUNT(K.kernel_id) AS total_calls, - SUM(K.end - K.start) / 1000 AS total_duration, - (SUM(K.end - K.start) / COUNT(K.kernel_id)) / 1000 AS average, + SUM(K.end - K.start) / 1000.0 AS total_duration, + (SUM(K.end - K.start) / COUNT(K.kernel_id)) / 1000.0 AS average, SUM(K.end - K.start) * 100.0 / ( SELECT SUM(A.end - A.start) @@ -39,11 +39,13 @@ FROM ( SELECT agent_id, + guid, SUM(END - start) AS GpuTime FROM ( SELECT agent_id, + guid, END, start FROM @@ -51,13 +53,15 @@ FROM UNION ALL SELECT dst_agent_id AS agent_id, + guid, END, start FROM `rocpd_memory_copy` ) GROUP BY - agent_id + agent_id, + guid ) A INNER JOIN ( SELECT @@ -148,10 +152,6 @@ GROUP BY ORDER BY total_duration DESC; --- --- summaries --- --- -- Kernel summary by name CREATE VIEW `kernel_summary` AS @@ -170,12 +170,14 @@ WITH K.name, COUNT(*) AS calls, SUM(K.duration) AS total_duration, - CAST(SUM(K.duration * K.duration) AS REAL) AS sqr_duration, + SUM(CAST(K.duration AS REAL) * CAST(K.duration AS REAL)) AS sqr_duration, A.avg_duration AS average_duration, MIN(K.duration) AS min_duration, MAX(K.duration) AS max_duration, - SUM((K.duration - A.avg_duration) * (K.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((K.duration - A.avg_duration) * (K.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration + SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) AS variance_duration, + SQRT( + SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) + ) AS std_dev_duration FROM `kernels` K JOIN avg_data A ON K.name = A.name @@ -222,12 +224,14 @@ WITH K.region AS name, COUNT(*) AS calls, SUM(K.duration) AS total_duration, - CAST(SUM(K.duration * K.duration) AS REAL) AS sqr_duration, + SUM(CAST(K.duration AS REAL) * CAST(K.duration AS REAL)) AS sqr_duration, A.avg_duration AS average_duration, MIN(K.duration) AS min_duration, MAX(K.duration) AS max_duration, - SUM((K.duration - A.avg_duration) * (K.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((K.duration - A.avg_duration) * (K.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration + SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) AS variance_duration, + SQRT( + SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) + ) AS std_dev_duration FROM `kernels` K JOIN avg_data A ON K.region = A.region @@ -274,12 +278,18 @@ WITH MC.name, COUNT(*) AS calls, SUM(MC.duration) AS total_duration, - CAST(SUM(MC.duration * MC.duration) AS REAL) AS sqr_duration, + SUM(CAST(MC.duration AS REAL) * CAST(MC.duration AS REAL)) AS sqr_duration, A.avg_duration AS average_duration, MIN(MC.duration) AS min_duration, MAX(MC.duration) AS max_duration, - SUM((MC.duration - A.avg_duration) * (MC.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((MC.duration - A.avg_duration) * (MC.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration + SUM( + CAST((MC.duration - A.avg_duration) AS REAL) * CAST((MC.duration - A.avg_duration) AS REAL) + ) / (COUNT(*) - 1) AS variance_duration, + SQRT( + SUM( + CAST((MC.duration - A.avg_duration) AS REAL) * CAST((MC.duration - A.avg_duration) AS REAL) + ) / (COUNT(*) - 1) + ) AS std_dev_duration FROM `memory_copies` MC JOIN avg_data A ON MC.name = A.name @@ -326,12 +336,18 @@ WITH MA.type AS name, COUNT(*) AS calls, SUM(MA.duration) AS total_duration, - CAST(SUM(MA.duration * MA.duration) AS REAL) AS sqr_duration, + SUM(CAST(MA.duration AS REAL) * CAST(MA.duration AS REAL)) AS sqr_duration, A.avg_duration AS average_duration, MIN(MA.duration) AS min_duration, MAX(MA.duration) AS max_duration, - SUM((MA.duration - A.avg_duration) * (MA.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((MA.duration - A.avg_duration) * (MA.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration + SUM( + CAST((MA.duration - A.avg_duration) AS REAL) * CAST((MA.duration - A.avg_duration) AS REAL) + ) / (COUNT(*) - 1) AS variance_duration, + SQRT( + SUM( + CAST((MA.duration - A.avg_duration) AS REAL) * CAST((MA.duration - A.avg_duration) AS REAL) + ) / (COUNT(*) - 1) + ) AS std_dev_duration FROM `memory_allocations` MA JOIN avg_data A ON MA.type = A.name @@ -358,1032 +374,3 @@ SELECT FROM aggregated_data AD CROSS JOIN total_duration TD; - --- --- Scratch Memory summary -CREATE VIEW - `scratch_memory_summary` AS -WITH - avg_data AS ( - SELECT - operation AS name, - AVG(END - start) AS avg_duration - FROM - `scratch_memory` - GROUP BY - operation - ), - aggregated_data AS ( - SELECT - SM.operation AS name, - COUNT(*) AS calls, - SUM(SM.end - SM.start) AS total_duration, - CAST(SUM((SM.end - SM.start) * (SM.end - SM.start)) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(SM.end - SM.start) AS min_duration, - MAX(SM.end - SM.start) AS max_duration, - SUM((SM.end - SM.start - A.avg_duration) * (SM.end - SM.start - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM((SM.end - SM.start - A.avg_duration) * (SM.end - SM.start - A.avg_duration)) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `scratch_memory` SM - JOIN avg_data A ON SM.operation = A.name - GROUP BY - SM.operation - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.average_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - aggregated_data AD - CROSS JOIN total_duration TD -ORDER BY - AD.total_duration DESC; - --- --- HIP summary -CREATE VIEW - `hip_api_summary` AS -WITH - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'HIP_%' - GROUP BY - name - ), - aggregated_data AS ( - SELECT - R.name, - COUNT(*) AS calls, - SUM(R.duration) AS total_duration, - CAST(SUM(R.duration * R.duration) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(R.duration) AS min_duration, - MAX(R.duration) AS max_duration, - SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration - FROM - `regions` R - JOIN avg_data A ON R.name = A.name - WHERE - R.category LIKE 'HIP_%' - GROUP BY - R.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.average_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - aggregated_data AD - CROSS JOIN total_duration TD; - --- --- HSA summary -CREATE VIEW - `hsa_api_summary` AS -WITH - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'HSA_%' - GROUP BY - name - ), - aggregated_data AS ( - SELECT - R.name, - COUNT(*) AS calls, - SUM(R.duration) AS total_duration, - CAST(SUM(R.duration * R.duration) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(R.duration) AS min_duration, - MAX(R.duration) AS max_duration, - SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration - FROM - `regions` R - JOIN avg_data A ON R.name = A.name - WHERE - R.category LIKE 'HSA_%' - GROUP BY - R.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.average_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - aggregated_data AD - CROSS JOIN total_duration TD; - --- --- RCCL API summary -CREATE VIEW - `rccl_summary` AS -WITH - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'RCCL_%' - GROUP BY - name - ), - aggregated_data AS ( - SELECT - R.name, - COUNT(*) AS calls, - SUM(R.duration) AS total_duration, - CAST(SUM(R.duration * R.duration) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(R.duration) AS min_duration, - MAX(R.duration) AS max_duration, - SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration - FROM - `regions` R - JOIN avg_data A ON R.name = A.name - WHERE - R.category LIKE 'RCCL_%' - GROUP BY - R.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.average_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - aggregated_data AD - CROSS JOIN total_duration TD -ORDER BY - AD.total_duration DESC; - --- --- ROCJPEG API summary -CREATE VIEW - `rocjpeg_summary` AS -WITH - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'ROCJPEG_%' - GROUP BY - name - ), - aggregated_data AS ( - SELECT - R.name, - COUNT(*) AS calls, - SUM(R.duration) AS total_duration, - CAST(SUM(R.duration * R.duration) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(R.duration) AS min_duration, - MAX(R.duration) AS max_duration, - SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration - FROM - `regions` R - JOIN avg_data A ON R.name = A.name - WHERE - R.category LIKE 'ROCJPEG_%' - GROUP BY - R.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.average_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - aggregated_data AD - CROSS JOIN total_duration TD -ORDER BY - AD.total_duration DESC; - --- --- ROCDECODE API summary -CREATE VIEW - `rocdecode_summary` AS -WITH - avg_data AS ( - SELECT - name, - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'ROCDECODE_%' - GROUP BY - name - ), - aggregated_data AS ( - SELECT - R.name, - COUNT(*) AS calls, - SUM(R.duration) AS total_duration, - CAST(SUM(R.duration * R.duration) AS REAL) AS sqr_duration, - A.avg_duration AS average_duration, - MIN(R.duration) AS min_duration, - MAX(R.duration) AS max_duration, - SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1) AS variance_duration, - SQRT(SUM((R.duration - A.avg_duration) * (R.duration - A.avg_duration)) / (COUNT(*) - 1)) AS std_dev_duration - FROM - `regions` R - JOIN avg_data A ON R.name = A.name - WHERE - R.category LIKE 'ROCDECODE_%' - GROUP BY - R.name - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - aggregated_data - ) -SELECT - AD.name AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.average_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - aggregated_data AD - CROSS JOIN total_duration TD -ORDER BY - AD.total_duration DESC; - --- --- Domain summary -CREATE VIEW - `domain_summary` AS -WITH - kernel_times AS ( - WITH - kernel_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `kernels` - ) - SELECT - 'KERNEL_DISPATCH' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - kernel_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - kernel_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - kernel_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - kernel_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - kernel_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `kernels` - ), - mcopy_times AS ( - WITH - mcopy_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `memory_copies` - ) - SELECT - 'MEMORY_COPY' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - mcopy_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - mcopy_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - mcopy_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - mcopy_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - mcopy_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `memory_copies` - ), - malloc_times AS ( - WITH - malloc_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `memory_allocations` - ) - SELECT - 'MEMORY_ALLOCATION' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - malloc_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - malloc_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - malloc_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - malloc_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - malloc_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `memory_allocations` - ), - scratch_times AS ( - WITH - scratch_avg AS ( - SELECT - AVG(END - start) AS avg_duration - FROM - `scratch_memory` - ) - SELECT - 'SCRATCH_MEMORY' AS domain, - COUNT(*) AS calls, - SUM(END - start) AS total_duration, - CAST(SUM((END - start) * (END - start)) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - scratch_avg - ) AS avg_duration, - MIN(END - start) AS min_duration, - MAX(END - start) AS max_duration, - SUM( - ( - END - start - ( - SELECT - avg_duration - FROM - scratch_avg - ) - ) * ( - END - start - ( - SELECT - avg_duration - FROM - scratch_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - END - start - ( - SELECT - avg_duration - FROM - scratch_avg - ) - ) * ( - END - start - ( - SELECT - avg_duration - FROM - scratch_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `scratch_memory` - ), - hip_api_times AS ( - WITH - hip_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'HIP_%' - ) - SELECT - 'HIP_API' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - hip_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - hip_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - hip_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - hip_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - hip_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `regions` - WHERE - category LIKE 'HIP_%' - ), - hsa_api_times AS ( - WITH - hsa_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `regions` - WHERE - category LIKE 'HSA_%' - ) - SELECT - 'HSA_API' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - hsa_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - hsa_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - hsa_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - hsa_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - hsa_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `regions` - WHERE - category LIKE 'HSA_%' - ), - marker_times AS ( - WITH - marker_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `markers` - ) - SELECT - 'MARKER_API' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - marker_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - marker_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - marker_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - marker_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - marker_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `markers` - ), - rccl_times AS ( - WITH - rccl_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `rccl` - ) - SELECT - 'RCCL_API' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - rccl_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - rccl_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - rccl_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - rccl_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - rccl_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `rccl` - ), - rocdecode_times AS ( - WITH - rocdecode_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `rocdecode` - ) - SELECT - 'ROCDECODE_API' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - rocdecode_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - rocdecode_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - rocdecode_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - rocdecode_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - rocdecode_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `rocdecode` - ), - rocjpeg_times AS ( - WITH - rocjpeg_avg AS ( - SELECT - AVG(duration) AS avg_duration - FROM - `rocjpeg` - ) - SELECT - 'ROCJPEG_API' AS domain, - COUNT(*) AS calls, - SUM(duration) AS total_duration, - CAST(SUM(duration * duration) AS REAL) AS sqr_duration, - ( - SELECT - avg_duration - FROM - rocjpeg_avg - ) AS avg_duration, - MIN(duration) AS min_duration, - MAX(duration) AS max_duration, - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - rocjpeg_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - rocjpeg_avg - ) - ) - ) / (COUNT(*) - 1) AS variance_duration, - SQRT( - SUM( - ( - duration - ( - SELECT - avg_duration - FROM - rocjpeg_avg - ) - ) * ( - duration - ( - SELECT - avg_duration - FROM - rocjpeg_avg - ) - ) - ) / (COUNT(*) - 1) - ) AS std_dev_duration - FROM - `rocjpeg` - ), - all_domains AS ( - SELECT - * - FROM - kernel_times - UNION ALL - SELECT - * - FROM - mcopy_times - UNION ALL - SELECT - * - FROM - malloc_times - UNION ALL - SELECT - * - FROM - scratch_times - UNION ALL - SELECT - * - FROM - hip_api_times - UNION ALL - SELECT - * - FROM - hsa_api_times - UNION ALL - SELECT - * - FROM - marker_times - UNION ALL - SELECT - * - FROM - rccl_times - UNION ALL - SELECT - * - FROM - rocdecode_times - UNION ALL - SELECT - * - FROM - rocjpeg_times - ), - total_duration AS ( - SELECT - SUM(total_duration) AS grand_total_duration - FROM - all_domains - ) -SELECT - AD.domain AS name, - AD.calls, - AD.total_duration AS "DURATION (nsec)", - AD.sqr_duration AS "SQR (nsec)", - AD.avg_duration AS "AVERAGE (nsec)", - (CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)", - AD.min_duration AS "MIN (nsec)", - AD.max_duration AS "MAX (nsec)", - AD.variance_duration AS "VARIANCE", - AD.std_dev_duration AS "STD_DEV" -FROM - all_domains AD - CROSS JOIN total_duration TD -ORDER BY - AD.total_duration DESC; diff --git a/projects/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/tests/CMakeLists.txt index 77f8e3109e..5dcfd44d83 100644 --- a/projects/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/CMakeLists.txt @@ -78,6 +78,9 @@ add_subdirectory(openmp-tools) add_subdirectory(rocdecode) add_subdirectory(rocjpeg) +# rocpd validation tests +add_subdirectory(rocpd) + # rocprofv3 validation tests add_subdirectory(rocprofv3) diff --git a/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py b/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py index 5c45ef211c..600a6bb85b 100644 --- a/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py +++ b/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py @@ -196,7 +196,7 @@ def test_rocpd_data( view_mapping = { "hip_api": "regions", "hsa_api": "regions", - "marker_api": "markers", + "marker_api": "regions_and_samples", "rccl_api": "regions", "rocdecode_api": "regions", "rocjpeg_api": "regions", diff --git a/projects/rocprofiler-sdk/tests/rocpd/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocpd/CMakeLists.txt new file mode 100644 index 0000000000..1d365006cf --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocpd/CMakeLists.txt @@ -0,0 +1,46 @@ +# +# rocpd command-line tests +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-sdk-tests-rocpd + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) +find_package(Python3 REQUIRED COMPONENTS Interpreter) + +set(rocpd-env + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}" + "PYTHONPATH=${rocprofiler-sdk_LIB_DIR}/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages" + ) + +######################################################################################### +# +# Test the --help flag works +# +######################################################################################### + +foreach(_SUBPARSER "" "-convert") + string(REPLACE "-" "" _CMD "${_SUBPARSER}") + add_test(NAME rocpd${_SUBPARSER}-help COMMAND ${Python3_EXECUTABLE} -m rocpd ${_CMD} + --help) + + set_tests_properties( + rocpd${_SUBPARSER}-help + PROPERTIES TIMEOUT 120 LABELS "integration-tests;rocpd" ENVIRONMENT + "${rocpd-env}" FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") +endforeach() + +foreach(_MODULE "csv" "pftrace" "otf2") + add_test(NAME rocpd-module-${_MODULE}-help COMMAND ${Python3_EXECUTABLE} -m + rocpd.${_MODULE} --help) + + set_tests_properties( + rocpd-module-${_MODULE}-help + PROPERTIES TIMEOUT 120 LABELS "integration-tests;rocpd" ENVIRONMENT + "${rocpd-env}" FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") +endforeach() diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt index 436ce1302f..e1699d90b6 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt @@ -44,3 +44,5 @@ add_subdirectory(agent-index) add_subdirectory(negate-aggregate-tracing-options) add_subdirectory(minimum-bytes) add_subdirectory(conversion-script) +add_subdirectory(python-bindings) +add_subdirectory(rocpd) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/CMakeLists.txt new file mode 100644 index 0000000000..26d75ec0f1 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/CMakeLists.txt @@ -0,0 +1,76 @@ +# +# rocprofv3 python bindings for roctx test(s) +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocprofv3-python-binding + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +if(NOT Python3_EXECUTABLE) + find_package(Python3 3.6 REQUIRED COMPONENTS Interpreter) +endif() + +if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer") + set(LOG_LEVEL "warning") # info produces memory leak +else() + set(LOG_LEVEL "info") +endif() + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(tracing-env + "${PRELOAD_ENV}" + "PYTHONPATH=${rocprofiler-sdk_LIB_DIR}/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages" + ) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini marker.py COPY validate.py + conftest.py) + +add_test( + NAME rocprofv3-trace-roctx-python-bindings-execute + COMMAND + $ --marker-trace --summary -u sec -d + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings -o out --output-format csv + json pftrace --log-level ${LOG_LEVEL} -- ${Python3_EXECUTABLE} + ${CMAKE_CURRENT_BINARY_DIR}/marker.py) + +set_tests_properties( + rocprofv3-trace-roctx-python-bindings-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${tracing-env}") + +add_test( + NAME rocprofv3-trace-roctx-python-bindings-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --agent-input + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_agent_info.csv + --marker-input + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_marker_api_trace.csv + --json-input ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_results.json + --pftrace-input + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_results.pftrace) + +set(VALIDATION_FILES + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_agent_info.csv + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_marker_api_trace.csv + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_results.json + ${CMAKE_CURRENT_BINARY_DIR}/marker-python-bindings/out_results.pftrace) + +set_tests_properties( + rocprofv3-trace-roctx-python-bindings-validate + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DEPENDS + "rocprofv3-trace-roctx-python-bindings-execute" + DISABLED + "${TRANSPOSE_ROCTRACER_ROCTX_DISABLED}" + FAIL_REGULAR_EXPRESSION + "AssertionError" + ATTACHED_FILES_ON_FAIL + "${VALIDATION_FILES}") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/conftest.py new file mode 100644 index 0000000000..1495c3a66d --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/conftest.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import csv +import pytest +import json + + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list +from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader + + +def pytest_addoption(parser): + parser.addoption( + "--agent-input", + action="store", + help="Path to agent info CSV file.", + ) + parser.addoption( + "--marker-input", + action="store", + help="Path to marker API tracing CSV file.", + ) + parser.addoption( + "--json-input", + action="store", + help="Path to JSON file.", + ) + parser.addoption( + "--pftrace-input", + action="store", + help="Path to Perfetto trace file.", + ) + + +@pytest.fixture +def agent_info_input_data(request): + filename = request.config.getoption("--agent-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data + + +@pytest.fixture +def marker_input_data(request): + filename = request.config.getoption("--marker-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data + + +@pytest.fixture +def json_data(request): + filename = request.config.getoption("--json-input") + with open(filename, "r") as inp: + return dotdict(collapse_dict_list(json.load(inp))) + + +@pytest.fixture +def pftrace_data(request): + filename = request.config.getoption("--pftrace-input") + return PerfettoReader(filename).read()[0] diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/marker.py b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/marker.py new file mode 100644 index 0000000000..3498d3e876 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/marker.py @@ -0,0 +1,59 @@ +#!@Python3_EXECUTABLE@ + +import os +import roctx +import random + +from roctx.context_decorators import RoctxRange + +_prefix = "" + + +@RoctxRange("fib") +def fib(n, nmin): + with RoctxRange(f"fib(n={n})" if n >= nmin else None): + return n if n < 2 else (fib(n - 1, nmin) + fib(n - 2, nmin)) + + +@RoctxRange("sum") +def _sum(arr): + with RoctxRange(f"sum(nelem={len(arr)})"): + return sum(arr) + + +def inefficient(n): + roctx.rangePush(f"inefficient({n})") + a = 0 + for i in range(n): + a += i + for j in range(n): + a += j + _len = a * n * n + _arr = [random.random() for _ in range(_len)] + _ret = _sum(_arr) + roctx.rangePop() + return _ret + + +def run(n): + idx = roctx.rangeStart(f"run({n})") + _ret_a = fib(n, max([n / 2, n - 10])) + _ret_b = inefficient(n) + roctx.rangeStop(idx) + return (_ret_a, _ret_b) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--num-iterations", help="Number", type=int, default=3) + parser.add_argument("-v", "--value", help="Starting value", type=int, default=20) + args = parser.parse_args() + + _prefix = os.path.basename(__file__) + roctx.mark(f"iterations: {args.num_iterations}") + for i in range(args.num_iterations): + with RoctxRange("main loop"): + ans_a, ans_b = run(args.value) + print(f"[{_prefix}] [{i}] result of run({args.value}) = {ans_a}, {ans_b}\n") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/validate.py new file mode 100644 index 0000000000..8430c38e1f --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/python-bindings/validate.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 + +import sys +import pytest +import re +import os + +from collections import Counter + + +def test_agent_info(agent_info_input_data): + logical_node_id = max([int(itr["Logical_Node_Id"]) for itr in agent_info_input_data]) + + assert logical_node_id + 1 == len(agent_info_input_data) + + for row in agent_info_input_data: + agent_type = row["Agent_Type"] + assert agent_type in ("CPU", "GPU") + if agent_type == "CPU": + assert int(row["Cpu_Cores_Count"]) > 0 + assert int(row["Simd_Count"]) == 0 + assert int(row["Max_Waves_Per_Simd"]) == 0 + else: + assert int(row["Cpu_Cores_Count"]) == 0 + assert int(row["Simd_Count"]) > 0 + assert int(row["Max_Waves_Per_Simd"]) > 0 + + +def extract_number(pattern, string): + match = re.match(pattern, string) + if match: + return int(match.group(1)) + else: + raise ValueError(f"Pattern '{pattern}' not found in '{string}'.") + + +def find_key_with_substring(data, substring): + return next((k for k in data.keys() if substring in k), None) + + +def check_tot_data(tot_data): + iteration_msg = find_key_with_substring(tot_data, "iterations:") + assert tot_data[iteration_msg] == 1 + + num_iterations = extract_number(r"iterations: (\d+)", iteration_msg) + assert tot_data["main loop"] == num_iterations + + if num_iterations > 0: + run_msg = find_key_with_substring(tot_data, "run") + if run_msg is not None: + value = extract_number(r"run\((\d+)\)", run_msg) + + assert tot_data[f"run({value})"] == num_iterations + + assert "fib" in tot_data.keys() + assert tot_data[f"fib(n={value})"] == num_iterations + for n in range(value, int(max([value / 2, value - 10]))): + assert f"fib(n={n})" in tot_data.keys() + + assert tot_data[f"inefficient({value})"] == num_iterations + assert tot_data[f"sum"] == num_iterations + sum_msg = find_key_with_substring(tot_data, "sum(nelem=") + assert tot_data[sum_msg] == num_iterations + + +def test_marker_api_trace(marker_input_data): + functions = [] + + for row in marker_input_data: + assert row["Domain"] in [ + "MARKER_CORE_API", + "MARKER_CONTROL_API", + "MARKER_NAME_API", + ] + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int( + row["Process_Id"] + ) + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + + functions.append(row["Function"]) + + check_tot_data(Counter(functions)) + + +def test_marker_api_trace_json(json_data): + data = json_data["rocprofiler-sdk-tool"] + + def get_kind_name(kind_id): + return data.strings.buffer_records[kind_id]["kind"] + + valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API") + + marker_data = data.buffer_records.marker_api + + for marker in marker_data: + assert get_kind_name(marker["kind"]) in valid_domain + assert marker["thread_id"] >= data["metadata"]["pid"] + assert marker["end_timestamp"] >= marker["start_timestamp"] + + tot_data = Counter([m["value"] for m in data.strings.marker_api]) + check_tot_data(tot_data) + + +def test_perfetto_data(pftrace_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_perfetto_data( + pftrace_data, json_data, ("hip", "hsa", "marker", "kernel", "memory_copy") + ) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/CMakeLists.txt new file mode 100644 index 0000000000..77c388dfaf --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/CMakeLists.txt @@ -0,0 +1,187 @@ +# +# rocprofv3 rocpd tests +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-sdk-tests-rocprofv3-rocpd + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +set(tracing-env + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}" + "PYTHONPATH=${rocprofiler-sdk_LIB_DIR}/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages" + "OMPI_ALLOW_RUN_AS_ROOT=1" + "OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1") + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY conftest.py validate.py) + +find_package(MPI) +find_package(Python3 REQUIRED) + +if(MPI_FOUND) + set(MULTIPROC_IS_DISABLED OFF) + set(MULTIPROC_LAUNCHER ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 + ${MPIEXEC_PREFLAGS}) +else() + set(MULTIPROC_IS_DISABLED ON) + set(MULTIPROC_LAUNCHER) +endif() + +######################################################################################### +# +# generate rocpd database and the old-way outputs csv, otf2, perfetto to compare +# +######################################################################################### + +add_test( + NAME rocprofv3-test-rocpd-execute + COMMAND + $ -d + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-test -o out --output-format rocpd json + --runtime-trace --kernel-rename --output-config --pmc SQ_WAVES -- + $ 2 500 50) + +set_tests_properties( + rocprofv3-test-rocpd-execute + PROPERTIES TIMEOUT 120 LABELS "integration-tests;rocpd" ENVIRONMENT "${tracing-env}" + FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +add_test( + NAME rocprofv3-test-rocpd-execute-multiproc + COMMAND + ${MULTIPROC_LAUNCHER} ${Python3_EXECUTABLE} + $ -d + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-mp-test -o out_transpose_%rank% --output-format + rocpd json --runtime-trace --kernel-rename --output-config --pmc SQ_WAVES -- + $ 1 250 50) + +set_tests_properties( + rocprofv3-test-rocpd-execute-multiproc + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests;rocpd" + ENVIRONMENT + "${tracing-env}" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}" + DISABLED + "${MULTIPROC_IS_DISABLED}") + +######################################################################################### +# +# OTF2 generation +# +######################################################################################### + +add_test( + NAME rocprofv3-test-rocpd-otf2-generation + COMMAND + ${Python3_EXECUTABLE} -m rocpd convert -f otf2 --kernel-rename -d + ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test -i + ${CMAKE_CURRENT_BINARY_DIR}/transpose-test/out_results.db) + +set_tests_properties( + rocprofv3-test-rocpd-otf2-generation + PROPERTIES TIMEOUT 120 LABELS "integration-tests;rocpd" ENVIRONMENT "${tracing-env}" + FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +add_test( + NAME rocprofv3-test-rocpd-otf2-generation-multiproc + COMMAND + ${Python3_EXECUTABLE} -m rocpd convert -f otf2 --kernel-rename -d + ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test -o rocpd-mp-output-test -i + ${CMAKE_CURRENT_BINARY_DIR}/transpose-mp-test/out_transpose_0_results.db + ${CMAKE_CURRENT_BINARY_DIR}/transpose-mp-test/out_transpose_1_results.db) + +set_tests_properties( + rocprofv3-test-rocpd-otf2-generation-multiproc + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests;rocpd" + ENVIRONMENT + "${tracing-env}" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}" + DISABLED + "${MULTIPROC_IS_DISABLED}") + +######################################################################################### +# +# perfetto generate +# +######################################################################################### + +add_test( + NAME rocprofv3-test-rocpd-perfetto-generation + COMMAND + ${Python3_EXECUTABLE} -m rocpd convert -f pftrace --kernel-rename -d + ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test -i + ${CMAKE_CURRENT_BINARY_DIR}/transpose-test/out_results.db) + +set_tests_properties( + rocprofv3-test-rocpd-perfetto-generation + PROPERTIES TIMEOUT 120 LABELS "integration-tests;rocpd" ENVIRONMENT "${tracing-env}" + FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +add_test( + NAME rocprofv3-test-rocpd-perfetto-generation-multiproc + COMMAND + ${Python3_EXECUTABLE} -m rocpd convert -f perfetto --kernel-rename + --group-by-queue -d ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test -o out_mp -i + ${CMAKE_CURRENT_BINARY_DIR}/transpose-mp-test/out_transpose_0_results.db + ${CMAKE_CURRENT_BINARY_DIR}/transpose-mp-test/out_transpose_1_results.db) + +set_tests_properties( + rocprofv3-test-rocpd-perfetto-generation-multiproc + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests;rocpd" + ENVIRONMENT + "${tracing-env}" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}" + DISABLED + "${MULTIPROC_IS_DISABLED}") + +######################################################################################### +# +# CSV generate +# +######################################################################################### + +add_test( + NAME rocprofv3-test-rocpd-csv-generation + COMMAND + ${Python3_EXECUTABLE} -m rocpd convert -f csv --kernel-rename -d + ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test -i + ${CMAKE_CURRENT_BINARY_DIR}/transpose-test/out_results.db) + +set_tests_properties( + rocprofv3-test-rocpd-csv-generation + PROPERTIES TIMEOUT 45 LABELS "integration-tests;rocpd" ENVIRONMENT "${tracing-env}" + FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +######################################################################################### +# +# Validation +# +######################################################################################### + +add_test( + NAME rocprofv3-test-rocpd-validation + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input + ${CMAKE_CURRENT_BINARY_DIR}/transpose-test/out_results.json --otf2-input + ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test/out_results.otf2 --pftrace-input + ${CMAKE_CURRENT_BINARY_DIR}/rocpd-output-test/out_results.pftrace) + +set_tests_properties( + rocprofv3-test-rocpd-validation + PROPERTIES TIMEOUT 45 LABELS "integration-tests;rocpd" ENVIRONMENT "${tracing-env}" + FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/conftest.py new file mode 100644 index 0000000000..73faaed82e --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/conftest.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import pandas as pd +import pytest +import json +import os +import io + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list +from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader +from rocprofiler_sdk.pytest_utils.otf2_reader import OTF2Reader + + +def pytest_addoption(parser): + parser.addoption( + "--json-input", + action="store", + help="Path to JSON file.", + ) + parser.addoption( + "--pftrace-input", + action="store", + help="Path to Perfetto trace file.", + ) + parser.addoption( + "--otf2-input", + action="store", + help="Path to OTF2 trace file.", + ) + parser.addoption( + "--summary-input", + action="store", + help="Path to summary markdown file.", + ) + + pd.set_option("display.width", 2000) + # increase debug display of pandas dataframes + for itr in ["rows", "columns", "colwidth"]: + pd.set_option(f"display.max_{itr}", None) + + +@pytest.fixture +def json_data(request): + filename = request.config.getoption("--json-input") + with open(filename, "r") as inp: + return dotdict(collapse_dict_list(json.load(inp))) + + +@pytest.fixture +def pftrace_data(request): + filename = request.config.getoption("--pftrace-input") + return PerfettoReader(filename).read()[0] + + +@pytest.fixture +def otf2_data(request): + filename = request.config.getoption("--otf2-input") + if not os.path.exists(filename): + raise FileExistsError(f"{filename} does not exist") + return OTF2Reader(filename).read()[0] + + +@pytest.fixture +def summary_data(request): + filename = request.config.getoption("--summary-input") + if not os.path.exists(filename): + raise FileExistsError(f"{filename} does not exist") + + domains = {} + with open(filename, "r") as inp: + lines = [itr.strip() for itr in inp.readlines()] + lines = [itr for itr in lines if itr and not itr.startswith("|--")] + + def rework(x): + tmp = [itr.strip(" ") for itr in x.split("|")] + tmp = [itr.strip() for itr in tmp if len(itr.strip()) > 0] + if tmp[0] == "NAME": + tmp = [f'"{itr}"' for itr in tmp] + else: + tmp[0] = f'"{tmp[0]}"' + tmp[1] = f'"{tmp[1]}"' + return ",".join(tmp) + + def process_current_domain(_name, _list): + if _name and _list: + _list = [rework(itr) for itr in _list] + _contents = "{}\n".format("\n".join(_list)) + ifs = io.StringIO(_contents) + df = pd.read_csv(ifs) + domains[_name] = df + _name = None + _list = [] + return (None, []) + + current_name = None + current_list = [] + + for itr in lines: + if not itr.startswith("|") or itr.startswith("ROCPROFV3 "): + current_name, current_list = process_current_domain( + current_name, current_list + ) + current_name = itr.strip().strip(":").replace("ROCPROFV3 ", "", 1) + rpos = current_name.rfind(" SUMMARY") + if rpos >= 0: + current_name = current_name[:rpos] + else: + current_list += [itr] + + process_current_domain(current_name, current_list) + + return domains diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/validate.py new file mode 100644 index 0000000000..a47c56d538 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocpd/validate.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +# MIT License +# +# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import pytest + + +def test_perfetto_data(pftrace_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_perfetto_data( + pftrace_data, + json_data, + ("hip", "marker", "kernel", "memory_copy"), + ) + + +def test_otf2_data(otf2_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_otf2_data( + otf2_data, + json_data, + ("hip", "marker", "kernel", "memory_copy", "memory_allocation"), + ) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code)