diff --git a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst index 31526a300c..89a0017671 100644 --- a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst +++ b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst @@ -264,9 +264,11 @@ Use the following command to view the available domains: .. note:: -Some settings can enable tracing for multiple domains, such as ``hip_api`` which will enable both ``hip_runtime_api`` and ``hip_compiler_api``. -And ``hsa_api`` which will enable all hsa domains, ``hsa_core_api``, ``hsa_amd_ext_api``, ``hsa_image_exit_api``, ``hsa_finalize_ext_api``. -The setting ``marker_api`` or ``roctx`` can be used to enable the roctx marker API tracing. + Some settings can enable tracing for multiple domains, such as: + + * ``hip_api`` which will enable both ``hip_runtime_api`` and ``hip_compiler_api``. + * ``hsa_api`` which will enable all hsa domains, ``hsa_core_api``, ``hsa_amd_ext_api``, ``hsa_image_exit_api``, and ``hsa_finalize_ext_api``. + * ``marker_api`` or ``roctx`` can be used to enable the roctx marker API tracing. For example, the following is a valid configuration: diff --git a/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst b/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst index 506520418d..d25d9ad74f 100644 --- a/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst +++ b/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst @@ -30,9 +30,9 @@ be the same size. .. note:: - Direct Perfetto output (using `--trace` or `ROCPROFSYS_USE_TRACE=ON`) has limited support for Artificial Intelligence (AI) and Machine Learning (ML) workloads. - Data from child threads is not captured. Instead, use ROCPD (`ROCPROFSYS_USE_ROCPD=ON`) as the output type. - For more information, see the :ref:`_rocprof_sys_rocpd_output` section. + Direct Perfetto output (using ``--trace`` or ``ROCPROFSYS_USE_TRACE=ON``) has limited support for Artificial Intelligence (AI) and Machine Learning (ML) workloads. + Data from child threads is not captured. Instead, use ROCPD (``ROCPROFSYS_USE_ROCPD=ON``) as the output type. + For more information, see the :ref:`rocprof_sys_rocpd_output` section. Getting started ======================================== diff --git a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst index baaf79cac4..29a2ad08da 100644 --- a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst +++ b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst @@ -74,9 +74,12 @@ about the system and the run, as follows: Metadata JSON Sample ----------------------------------------------------------------------- -.. code-block:: json +.. dropdown:: Sample JSON - { + .. code-block:: json + :linenos: + + { "rocprofiler-systems": { "metadata": { "info": { @@ -104,14 +107,8 @@ Metadata JSON Sample "USER": "rocm-dev", "CPU_FREQUENCY": 1972, "CPU_FEATURES": [ - "fpu", - "vme", - "de", - "pse", - "tsc", - "msr", - "pae", - "... etc. ..." + "fpu", "vme", "de", "pse", "tsc", "msr", "pae" + // ... more features ], "HW_CONCURRENCY": 12, "HW_PHYSICAL_CPU": 6, @@ -126,17 +123,9 @@ Metadata JSON Sample "ROCPROFSYS_ROCM_VERSION_PATCH": 1, "memory_maps_files": [ "/opt/rocm-6.3.1/lib/libhsa-amd-aqlprofile64.so.1.0.60301", - "/opt/rocm-6.3.1/lib/libhsa-runtime64.so.1.14.60301", - "/opt/rocm-6.3.1/lib/librocm_smi64.so.7.4.60301", - "/opt/rocm-6.3.1/lib/librocprofiler-register.so.0.4.0", - "/opt/rocm-6.3.1/lib/librocprofiler-sdk.so.0.5.0", - "/opt/rocm/lib/libhsa-amd-aqlprofile64.so.1", - "/opt/rocm/lib/libhsa-runtime64.so.1", - "/opt/rocm/lib/librocm_smi64.so.7", - "/opt/rocm/lib/librocprofiler-register.so.0", - "/opt/rocm/lib/librocprofiler-sdk.so.0", - "... etc. ..." - ], + "/opt/rocm-6.3.1/lib/libhsa-runtime64.so.1.14.60301" + // ... more files + ], "memory_maps": [ { "cereal_class_version": 0, @@ -156,12 +145,11 @@ Metadata JSON Sample "device": "", "inode": 0, "pathname": "/opt/rocm/lib/libhsa-runtime64.so.1" - }, - { - "... etc. ..." } - ], - "settings": { + // ... more mappings + ] + }, + "settings": { "cereal_class_version": 2, "ROCPROFSYS_OUTPUT_PREFIX": { "name": "output_prefix", @@ -169,15 +157,9 @@ Metadata JSON Sample "description": "Explicitly specify a prefix for all output files", "count": 1, "max_count": -1, - "cmdline": [ - "--rocprofiler-systems-output-prefix" - ], + "cmdline": ["--rocprofiler-systems-output-prefix"], "categories": [ - "filename", - "io", - "librocprof-sys", - "native", - "rocprofsys" + "filename", "io", "librocprof-sys", "native", "rocprofsys" ], "data_type": "string", "initial": "parallel-overhead-binary-rewrite/", @@ -185,21 +167,16 @@ Metadata JSON Sample "updated": "config", "enabled": true }, - { - ... etc. ... - }, + // Additional settings can be added here "command_line": [ "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/parallel-overhead.inst", - "--", - "10", - "12", - "1000" + "--", "10", "12", "1000" ], "environment": [ - ... etc . ... + // Environment variables go here ] - }, - "environment": [ + }, + "environment": [ { "key": "LD_LIBRARY_PATH", "value": "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib:/opt/rocm/lib" @@ -207,17 +184,15 @@ Metadata JSON Sample { "key": "LIBRARY_PATH", "value": "" - }, - { - etc ... } - ] - "output": { + // ... more environment variables + ], + "output": { "json": [ { "key": "wall_clock", "value": [ - "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json" + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json" ] } ], @@ -225,7 +200,7 @@ Metadata JSON Sample { "key": "perfetto", "value": [ - "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/perfetto-trace.proto" + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/perfetto-trace.proto" ] } ], @@ -233,13 +208,14 @@ Metadata JSON Sample { "key": "wall_clock", "value": [ - "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt" + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt" ] } ] - }, - }, - } + } + } + } + } Configuring the ROCm Systems Profiler output ============================================ @@ -326,27 +302,23 @@ ROCm Profiling Data (rocpd) output Use the ``ROCPROFSYS_USE_ROCPD`` setting to trigger the ROCm Systems Profiler to output a SQLite3 database. The ROCm Profiling Data (or ``rocpd``) database will soon be the default output -format. To output in `rocpd` format, ROCProfiler-SDK version 1.0.0 or later is required (introduced in ROCm 7.0.0). +format. To output in ``rocpd`` format, ROCProfiler-SDK version 1.0.0 or later is required (introduced in ROCm 7.0.0). -Features of rocpd format ------------------------------------------------ +Features +-------------- -- **Comprehensive Data Model**: Consolidates all profiling artifacts including -execution traces, performance counters, hardware metrics, and contextual metadata -within a single SQLite3 database file (`.db` extension). -- **Standards-Compliant Access**: Supports querying through industry-standard SQL -interfaces including command-line tools (``sqlite3`` CLI), programming language -bindings (Python ``sqlite3`` module, C/C++ SQLite API), and database management -applications. -- **Advanced Analytics Integration**: Facilitates sophisticated post-processing -workflows through custom analytical scripts, automated reporting systems, and -integration with third-party visualization and analysis frameworks that provide -SQLite3 connectivity. +The features of ``rocpd`` output format are: -Generating rocpd Output -+++++++++++++++++++++++ +* **Comprehensive Data Model**: Consolidates all profiling artifacts including execution traces, performance counters, hardware metrics, and contextual metadata within a single SQLite3 database file (`.db` extension). -To generate profiling data in the rocpd format, add "ROCPROFSYS_USE_ROCPD=ON" to your profiling configuration. +* **Standards-Compliant Access**: Supports querying through industry-standard SQL interfaces including command-line tools (``sqlite3`` CLI), programming language bindings (Python ``sqlite3`` module, C/C++ SQLite API), and database management applications. + +* **Advanced Analytics Integration**: Facilitates sophisticated post-processing workflows through custom analytical scripts, automated reporting systems, and integration with third-party visualization and analysis frameworks that provide SQLite3 connectivity. + +Generating rocpd output +------------------------- + +To generate profiling data in the rocpd format, add ``ROCPROFSYS_USE_ROCPD=ON`` to your profiling configuration. .. code-block:: shell @@ -357,15 +329,15 @@ To generate profiling data in the rocpd format, add "ROCPROFSYS_USE_ROCPD=ON" to See :doc:`configuring runtime options <./configuring-runtime-options>` for additional details on setting up the profiling configuration options. -Converting rocpd to Alternative Formats -+++++++++++++++++++++++++++++++++++++ +Converting rocpd to alternative formats +------------------------------------------ ROCm provides a Python module to convert the ``rocpd`` database to alternative output formats for specialized analysis and visualization workflows. For example, (Open Trace Format 2) OTF2, Perfetto Protocol Buffers (PFTrace), and Comma-Separated Values (CSV) tables. -See `rocpd tool documentation `_ +See :doc:`Using rocpd output format ` in ROCProfiler-SDK documentation, for additional information on these conversion tools. Native Perfetto output diff --git a/projects/rocprofiler-systems/docs/reference/development-guide.rst b/projects/rocprofiler-systems/docs/reference/development-guide.rst index 119ad83f7e..f1470ef414 100644 --- a/projects/rocprofiler-systems/docs/reference/development-guide.rst +++ b/projects/rocprofiler-systems/docs/reference/development-guide.rst @@ -16,7 +16,7 @@ Executables This section lists the ROCm Systems Profiler executables. rocprof-sys-avail: `source/bin/rocprof-sys-avail `_ ------------------------------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------------------------------------------------ The ``main`` routine of ``rocprof-sys-avail`` has three important sections: @@ -25,7 +25,7 @@ The ``main`` routine of ``rocprof-sys-avail`` has three important sections: * Printing hardware counters rocprof-sys-sample: `source/bin/rocprof-sys-sample `_ --------------------------------------------------------------------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``rocprof-sys-sample -- `` * Translates command-line options into environment variables @@ -33,7 +33,7 @@ rocprof-sys-sample: `source/bin/rocprof-sys-sample `` and a modified environment rocprof-sys-causal: `source/bin/rocprof-sys-causal `_ ---------------------------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------------------------------------------------------------------- When there is exactly one causal profiling configuration variant (which enables debugging), ``rocprof-sys-casual`` has a nearly identical design to ``rocprof-sys-sample`` @@ -46,7 +46,7 @@ the following actions take place for each variant: * the parent process waits for the child process to finish rocprof-sys-instrument: `source/bin/rocprof-sys-instrument `_ --------------------------------------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``rocprof-sys-instrument -- `` * Allows the user to provide options specifying whether to perform runtime instrumentation, use binary rewrite, or @@ -71,31 +71,31 @@ Libraries ======================================== Common library: `source/lib/common `_ --------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------------------------------------------------------ * General header-only functionality used in multiple executables and/or libraries. * Not installed or exported outside of the build tree. Core library: `source/lib/core `_ --------------------------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------------------------------- * Static PIC library with functionality that does not depend on any components. * Not installed or exported outside of the build tree. Binary library: `source/lib/binary `_ --------------------------------------------------------------------------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------------- * Static PIC library with functionality for reading/analyzing binary info. * Mostly used by the causal profiling sections of ``librocprof-sys``. * Not installed or exported outside of the build tree. librocprof-sys: `source/lib/rocprof-sys `_ --------------------------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------------------------------------------- This is the main library encapsulating all the capabilities. librocprof-sys-dl: `source/lib/rocprof-sys-dl `_ ------------------------------------------------------------------------------------------------------------------------------------------ +----------------------------------------------------------------------------------------------------------------------------------------------------------------- This is a lightweight, front-end library for ``librocprof-sys`` which serves three primary purposes: @@ -106,7 +106,7 @@ This is a lightweight, front-end library for ``librocprof-sys`` which serves thr * Coordinates communication between ``librocprof-sys-user`` and ``librocprof-sys`` librocprof-sys-user: `source/lib/rocprof-sys-user `_ ------------------------------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------------------------------------------------ * Provides a set of functions and types for the users to add to their code, for example, disabling data collection globally or on a specific thread or