diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index 90319185b9..2506aa3941 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -139,14 +139,14 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): ) profile_group.add_argument( "-b", - "--ipblocks", + "--block", type=str, dest="ipblocks", metavar="", nargs="+", required=False, choices=["SQ", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"], - help="\t\t\tIP block filtering:\n\t\t\t SQ\n\t\t\t SQC\n\t\t\t TA\n\t\t\t TD\n\t\t\t TCP\n\t\t\t TCC\n\t\t\t SPI\n\t\t\t CPC\n\t\t\t CPF", + help="\t\t\tHardware block filtering:\n\t\t\t SQ\n\t\t\t SQC\n\t\t\t TA\n\t\t\t TD\n\t\t\t TCP\n\t\t\t TCC\n\t\t\t SPI\n\t\t\t CPC\n\t\t\t CPF", ) result = shutil.which("rocscope") @@ -253,7 +253,7 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): required=False, default=-1, type=int, - help="\t\t\tGPU device ID. (DEFAULT: ALL)", + help="\t\t\tTarget GPU device ID. (DEFAULT: ALL)", ) roofline_group.add_argument( "--kernel-names", @@ -441,11 +441,11 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): ) analyze_group.add_argument( "-b", - "--metric", + "--block", dest="filter_metrics", metavar="", nargs="+", - help="\t\tSpecify IP block/metric id(s) from --list-metrics for filtering.", + help="\t\tSpecify hardware block/metric id(s) from --list-metrics for filtering.", ) analyze_group.add_argument( "--gpu-id", diff --git a/projects/rocprofiler-compute/src/docs-2.x/analysis.md b/projects/rocprofiler-compute/src/docs-2.x/analysis.md index 1b7b1e14ee..a3cfd26661 100644 --- a/projects/rocprofiler-compute/src/docs-2.x/analysis.md +++ b/projects/rocprofiler-compute/src/docs-2.x/analysis.md @@ -164,7 +164,7 @@ SoC = {'MI200'} 2.1.25 -> L2-Fabric Write Latency ... ``` - 2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below shows how to generate a report containing only metric 2 (a.k.a. System Speed-of-Light). + 2. Choose your own customized subset of metrics with `-b` (a.k.a. `--block`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below shows how to generate a report containing only metric 2 (a.k.a. System Speed-of-Light). ```shell-session $ omniperf analyze -p workloads/vcopy/MI200/ -b 2 -------- diff --git a/projects/rocprofiler-compute/src/docs-2.x/getting_started.md b/projects/rocprofiler-compute/src/docs-2.x/getting_started.md index b841fb0632..23cfc9b655 100644 --- a/projects/rocprofiler-compute/src/docs-2.x/getting_started.md +++ b/projects/rocprofiler-compute/src/docs-2.x/getting_started.md @@ -14,7 +14,7 @@ To collect the default set of data for all kernels in the target application, launch, e.g.: ```shell - $ omniperf profile -n vcopy_data -- ./vcopy 1048576 256 + $ omniperf profile -n vcopy_data -- ./vcopy -n 1048576 -b 256 ``` The app runs, each kernel is launched, and profiling results are generated. By default, results are written to e.g., ./workloads/vcopy_data (configurable via the `-n` argument). To collect all requested profile information, it may be required to replay kernels multiple times. @@ -27,7 +27,7 @@ - `-k`/`--kernel` enables filtering kernels by name. - `-d`/`--dispatch` enables filtering based on dispatch ID. - - `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) hardware component blocks. + - `-b`/`--block` enables collects metrics for only the specified (one or more) hardware component blocks. To view available metrics by IP Block you can use the `--list-metrics` argument: ```shell diff --git a/projects/rocprofiler-compute/src/docs-2.x/introduction.md b/projects/rocprofiler-compute/src/docs-2.x/introduction.md index 6e595b926a..c3a315abfe 100644 --- a/projects/rocprofiler-compute/src/docs-2.x/introduction.md +++ b/projects/rocprofiler-compute/src/docs-2.x/introduction.md @@ -19,10 +19,11 @@ Omniperf is a kernel level profiling tool for Machine Learning/HPC workloads run ## Features The Omniperf tool performs profiling based on all available hardware counters for the target accelerator. It provides high level performance analysis features including System Speed-of-Light, Hardware block level Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... - -Both command line analysis and GUI analysis are supported. + +Both command line analysis and GUI analysis are supported. Detailed Feature List: + - MI100 support - MI200 support - Standalone GUI Analyzer @@ -36,7 +37,7 @@ Detailed Feature List: - System Speed-of-Light Panel - Kernel Statistic Panel - Memory Chart Analysis Panel -- Roofline Analysis Panel (*Supported on MI200 only, Ubuntu 20.04, SLES 15 SP3 or RHEL8*) +- Roofline Analysis Panel (_Supported on MI200 only, Ubuntu 20.04, SLES 15 SP3 or RHEL8_) - Command Processor (CP) Panel - Workgroup Manager (SPI) Panel - Wavefront Launch Panel @@ -52,10 +53,9 @@ Detailed Feature List: ## Compatible SoCs -| Platform | Status | -| :------- | :------------- | -| Vega 20 (MI50/60) | No | -| MI100 | Supported | -| MI200 | Supported | -| MI300 | In development | - +| Platform | Status | +| :---------------- | :------------- | +| Vega 20 (MI50/60) | No support | +| MI100 | Supported | +| MI200 | Supported | +| MI300 | In development | diff --git a/projects/rocprofiler-compute/src/docs-2.x/profiling.md b/projects/rocprofiler-compute/src/docs-2.x/profiling.md index 0caf98dd9c..2d953b3de6 100644 --- a/projects/rocprofiler-compute/src/docs-2.x/profiling.md +++ b/projects/rocprofiler-compute/src/docs-2.x/profiling.md @@ -69,16 +69,16 @@ General Options: Profile Options: -n , --name Assign a name to workload. - -p , --path Specify path to save workload. + -p , --path Specify path to save workload. -k [ ...], --kernel [ ...] Kernel filtering. -d [ ...], --dispatch [ ...] Dispatch ID filtering. - -b [ ...], --ipblocks [ ...] IP block filtering: + -b [ ...], --block [ ...] Hardware block filtering: SQ SQC TA TD - TCP + TCP TCC SPI CPC @@ -100,7 +100,7 @@ Standalone Roofline Options: L2 vL1D LDS - --device GPU device ID. (DEFAULT: ALL) + --device Target GPU device ID. (DEFAULT: ALL) --kernel-names Include kernel names in roofline plot. ```