Files
rocm-systems/projects/rocprofiler-sdk/source/lib/python/rocpd/__main__.py
T
systems-assistant[bot] 061948a5ec [rocpd] Adding merge and package submodules for rocpd (#164)
* adding ROCpd database merge

* adding ROCpd database merge concatenating all tables

* update merge script

  - copy all tables from files

* fix merge format

* Add package submodule, initial POC.  Need to refine

* Minor fixes and clean up duplicated code in package.py

* Revamp metadata layout, add wildcard and .rpdb parsing

* Add auto merge & package when > 5 DBs, add examples, don't use auto_merge when using sub-commands merge & package

* - Extend package/yaml inputs to all rocpd modules
- Improve handling more corner cases for bad input files when parsing input parameters (bad yaml files, bad .rpdb folder, folders as input)
- Changed to use UUID in merged filename instead of the time, in auto-merge algorithm

* Minor text fixes for consistancy between modules

* Add more wildcard support and add package, merge tests

* Make changes based on review suggestions

* Move parsing packages into importer.py, simplified adding required params to a function

* fix package test by flattening input list before processing

* Integrate merge.py changes from Jonathan to add name-collision checks, recreating indexes, foreign key check (disabled for now, due to processing time)

* Rework rocpd.<submodule>.{add_args,process_args}

- add_args function returns a functor which accepts input and args
- time_window functor returned from add_args automatically applies time windowing of input

* change merge&package limit to 1, merge should create data views

* Move files by default instead of making copies

- copying can be enabled by passing "copy=True" or --copy cmdline argument

* refactor package to make the logic cleaner, set merge limit back to 5

* Allow automerge-limit param to override limit, change default back to 1.  Tests updated to use query, much quicker

* Update --help instructions for package

---------

Co-authored-by: acanadas <acanadas@amd.com>
Co-authored-by: a-canadasruiz <Araceli.CanadasRuiz@amd.com>
Co-authored-by: Young Hui <young.hui@amd.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
2025-11-12 17:07:12 -05:00

363 řádky
12 KiB
Python

#!/usr/bin/env python3
###############################################################################
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
###############################################################################
from __future__ import absolute_import
__author__ = "Advanced Micro Devices, Inc."
__copyright__ = "Copyright 2025, Advanced Micro Devices, Inc."
__license__ = "MIT"
def main(argv=None, config=None):
"""Main entry point for the rocpd command line tool.
Args:
argv (list, optional): List of command line arguments. Defaults to None.
"""
import argparse
from . import csv
from . import merge
from . import otf2
from . import output_config
from . import package
from . import pftrace
from . import query
from . import summary
from . import time_window
from . import version_info
from .importer import RocpdImportData
convert_examples = """
Example usage:
Convert 1 database, output perfetto trace
$ rocpd convert -i db1.db --output-format pftrace
Convert 2 databases, output perfetto trace to path and filename, reduce time window to omit the first 30%
$ rocpd convert -i db1.db db2.db --output-format pftrace -d "./output/" -o "twoFileTraces" --start 30% --end 100%
Convert 6 databases, output CSV and perfetto trace formats
$ rocpd convert -i db{0..5}.db --output-format csv pftrace -d "~/output_folder/" -o "sixFileTraces"
Convert 2 databases, output CSV, OTF2, and perfetto trace formats
$ rocpd convert -i db{3,4}.db --output-format csv otf2 pftrace
"""
merge_examples = """
Example usage:
Merge the three databases and output to a folder called merged3DBs
$ rocpd merge -i db0.db db1.db db2.db -d merged3DBs
Merge all the databases from the node0 folder and output to the node0_output folder, with filename called largeMerged.db
$ rocpd merge -i node0/*.db -d node0_output -o largeMerged
"""
package_examples = """
Example usage:
Index the three databases into a metadata file (index.yaml) in the current folder, just reference the databases where they are on the filesystem
$ rocpd package -i node0/db0.db node1/db1.db node2/db2.db
Package and copy/consolidate all the databases into a my_MPI_run_1.rpdb folder so it can be managed easier
$ rocpd package -i node0/db0.db node1/db1.db node2/db2.db -d my_MPI_run_1 --consolidate --copy
Package and copy/consolidate all the databases from my_MPI_run_1.rpdb folder append node5/db5.db and make new folder
$ rocpd package -i my_MPI_run_1.rpdb node5/db5.db -d my_MPI_run_1_append_5 --consolidate --copy
Use my_MPI_run_1.rpdb folder and move/consolidate node7/db7.db and re-use same .rpdb folder
$ rocpd package -i my_MPI_run_1.rpdb node7/db7.db -d my_MPI_run_1 --consolidate
"""
query_examples = """
Example usage:
Query the first 5 rows of the 'rocpd_info_agents' view and output to console
$ rocpd query -i db0.db --query "SELECT * FROM rocpd_info_agents LIMIT 5"
Combine 4 databases and query the first 10 rows of the 'top_kernels' view and output to CSV file
$ rocpd query -i db{0..3}.db --query "SELECT * FROM top_kernels LIMIT 10" --format csv
"""
summary_examples = """
Example usage:
Output all summaries to console and include domain summary for 1 database
$ rocpd summary -i db1.db --domain-summary
Aggregate 3 databases and output all summary files and include summary by rank/process ID, to csv file output
$ rocpd summary -i db{1..3}.db --summary-by-rank --format csv
Output all summaries to console and exlude all regions to save processing time
$ rocpd summary -i db0.db --region-categories NONE
Aggregate 2 databases and output all summary files to HTML, only include HIP and MARKER regions, include domain summary
$ rocpd summary -i db{0,1}.db --region-categories HIP MARKERS --domain-summary --format html
"""
input_help_string = "Input path and filename to one or more database(s). Wildcards accepted, as well as .rpdb folders"
# Add the subparsers
parser = argparse.ArgumentParser(
prog="rocpd",
description="Aggregate and/or analyze ROCm Profiling Data (rocpd)",
allow_abbrev=False,
)
parser.add_argument(
"-v",
"--version",
action="store_true",
help="Print the version information and exit",
)
def add_required_args(_parser):
_required_params = _parser.add_argument_group("Required options")
_required_params.add_argument(
"-i",
"--input",
required=True,
type=output_config.check_file_exists,
nargs="+",
help=input_help_string,
)
return _required_params
subparsers = parser.add_subparsers(dest="command")
converter = subparsers.add_parser(
"convert",
description="Convert rocPD data into another data format",
allow_abbrev=False,
formatter_class=argparse.RawTextHelpFormatter,
epilog=convert_examples,
)
merger = subparsers.add_parser(
"merge",
description="Generate merged database from rocPD databases",
allow_abbrev=False,
formatter_class=argparse.RawTextHelpFormatter,
epilog=merge_examples,
)
packager = subparsers.add_parser(
"package",
description="Package database files into .rpdb output",
allow_abbrev=False,
formatter_class=argparse.RawTextHelpFormatter,
epilog=package_examples,
)
query_reporter = subparsers.add_parser(
"query",
description="Generate output on a query",
allow_abbrev=False,
formatter_class=argparse.RawTextHelpFormatter,
epilog=query_examples,
)
generate_summary = subparsers.add_parser(
"summary",
description="Generate summary views from rocPD data",
allow_abbrev=False,
formatter_class=argparse.RawTextHelpFormatter,
epilog=summary_examples,
)
def get_output_type(val):
return val.lower().replace("perfetto", "pftrace")
# add required options for each subparser
converter_required_params = add_required_args(converter)
converter_required_params.add_argument(
"-f",
"--output-format",
help="For adding output format (supported formats: csv, pftrace, otf2)",
nargs="+",
default=None,
choices=("csv", "pftrace", "otf2"),
type=get_output_type,
required=True,
)
add_required_args(merger)
add_required_args(packager)
add_required_args(query_reporter)
add_required_args(generate_summary)
# converter: add args from any sub-modules
process_converter_args = []
process_converter_args.append(output_config.add_args(converter))
process_converter_args.append(output_config.add_generic_args(converter))
process_converter_args.append(pftrace.add_args(converter))
process_converter_args.append(csv.add_args(converter))
process_converter_args.append(otf2.add_args(converter))
process_converter_args.append(time_window.add_args(converter))
# merge: subparser args
process_merger_args = []
process_merger_args.append(merge.add_args(merger))
# package: subparser args
process_packager_args = []
process_packager_args.append(package.add_args(packager))
# query: subparser args
process_query_reporter_args = []
process_query_reporter_args.append(output_config.add_args(query_reporter))
process_query_reporter_args.append(query.add_args(query_reporter))
process_query_reporter_args.append(time_window.add_args(query_reporter))
# summary: subparser args
process_generate_summary_args = []
process_generate_summary_args.append(output_config.add_args(generate_summary))
process_generate_summary_args.append(summary.add_args(generate_summary))
process_generate_summary_args.append(time_window.add_args(generate_summary))
# parse the command line arguments
args = parser.parse_args(argv)
if args.version:
for key, itr in version_info.items():
if key in ["major", "minor", "patch"]:
continue
print(f" {key:>16}: {itr}")
return 0
# error check the command line arguments, if no subparser command is given, print the help message
if args.command is None:
parser.print_help()
return
# if the user requested converter, process the conversion
if args.command == "convert":
# construct the rocpd import data object
input = RocpdImportData(
args.input,
automerge_limit=getattr(
args, "automerge_limit", package.IDEAL_NUMBER_OF_DATABASE_FILES
),
)
all_args = {}
for pitr in process_converter_args:
all_args.update(pitr(input, args))
# setup the config args
config = (
output_config.output_config(**all_args)
if config is None
else config.update(**all_args)
)
# process each requested output format
format_handlers = {
"pftrace": pftrace.write_pftrace,
"csv": csv.write_csv,
"otf2": otf2.write_otf2,
}
for out_format in args.output_format:
if out_format in format_handlers:
print(f"Converting database(s) to {out_format} format:")
format_handlers[out_format](input, config)
else:
print(f"Warning: Unsupported output format '{out_format}'")
# if the user requested merge module, execute the merge
elif args.command == "merge":
# no construction of the import data object
input = None
# merge subparser args
merge_args = {}
for pitr in process_merger_args:
merge_args.update(pitr(input, args))
merge.execute(args.input, **merge_args)
# if the user requested package module, package up the database
elif args.command == "package":
# construct the rocpd import data object
input = None
# package subparser args
packager_args = {}
for pitr in process_packager_args:
packager_args.update(pitr(input, args))
package.execute(args.input, **packager_args)
# if the user requested query module, execute the query
elif args.command == "query":
# construct the rocpd import data object
input = RocpdImportData(
args.input,
automerge_limit=getattr(
args, "automerge_limit", package.IDEAL_NUMBER_OF_DATABASE_FILES
),
)
# query subparser args
query_args = {}
for pitr in process_query_reporter_args:
query_args.update(pitr(input, args))
query.execute(
input,
args,
**query_args,
)
# if the user requested a summary, generate the views
elif args.command == "summary":
# construct the rocpd import data object
input = RocpdImportData(
args.input,
automerge_limit=getattr(
args, "automerge_limit", package.IDEAL_NUMBER_OF_DATABASE_FILES
),
)
# summary subparser args
summary_args = {}
for pitr in process_generate_summary_args:
summary_args.update(pitr(input, args))
summary.generate_all_summaries(input, **summary_args)
print("Done. Exiting...")
if __name__ == "__main__":
main()