Reverts #1379 and properly migrates the docs --------- Co-authored-by: Matt Williams <matt.williams@amd.com>
Tento commit je obsažen v:
@@ -1,61 +0,0 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
import re
|
||||
|
||||
'''
|
||||
html_theme is usually unchanged (rocm_docs_theme).
|
||||
flavor defines the site header display, select the flavor for the corresponding portals
|
||||
flavor options: rocm, rocm-docs-home, rocm-blogs, rocm-ds, instinct, ai-developer-hub, local, generic
|
||||
'''
|
||||
html_theme = "rocm_docs_theme"
|
||||
html_theme_options = {"flavor": "rocm-docs-home"}
|
||||
|
||||
|
||||
# This section turns on/off article info
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# Dynamically extract component version
|
||||
# with open('../CMakeLists.txt', encoding='utf-8') as f:
|
||||
# pattern = r'.*\brocm_setup_version\(VERSION\s+([0-9.]+)[^0-9.]+' # Update according to each component's CMakeLists.txt
|
||||
# match = re.search(pattern,
|
||||
# f.read())
|
||||
# if not match:
|
||||
# raise ValueError("VERSION not found!")
|
||||
version_number = "1.0"
|
||||
|
||||
# for PDF output on Read the Docs
|
||||
project = "AQLprofile"
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = version_number
|
||||
release = version_number
|
||||
|
||||
external_toc_path = "./sphinx/_toc.yml" # Defines Table of Content structure definition path
|
||||
|
||||
'''
|
||||
Doxygen Settings
|
||||
Ensure Doxyfile is located at docs/doxygen.
|
||||
If the component does not need doxygen, delete this section for optimal build time
|
||||
'''
|
||||
#doxygen_root = "doxygen"
|
||||
#doxysphinx_enabled = False
|
||||
# doxygen_project = {
|
||||
# "name": "doxygen",
|
||||
# "path": "doxygen/xml",
|
||||
#}
|
||||
|
||||
# Add more addtional package accordingly
|
||||
extensions = [
|
||||
"rocm_docs",
|
||||
# "rocm_docs.doxygen",
|
||||
]
|
||||
|
||||
html_title = f"{project} {version_number} documentation"
|
||||
|
||||
external_projects_current_project = "AQLprofile"
|
||||
@@ -1,109 +0,0 @@
|
||||
.. meta::
|
||||
:description: A typical workflow for collecting PMC data
|
||||
:keywords: AQLprofile, ROCm, API, how-to, PMC
|
||||
|
||||
**********************************************************
|
||||
Performance Monitor Control (PMC) workflow with AQLprofile
|
||||
**********************************************************
|
||||
|
||||
This page describes a typical workflow for collecting PMC data using AQLprofile (as integrated in `ROCprofiler-SDK <https://github.com/ROCm/rocprofiler-sdk>`__).
|
||||
This workflow relies on creating a profile object, generating command packets, and iterating over output buffers:
|
||||
|
||||
1. **Intercept kernel dispatch**: The SDK intercepts kernel dispatch packets submitted to the GPU queue.
|
||||
2. **Create a profile object**: A profile/session object is created, specifying the agent (GPU), events (counters), and output buffers.
|
||||
3. **Generate command packets**: Start, stop, and read command packets are generated and injected into the queue around the kernel dispatch.
|
||||
4. **Submit packets and run the kernel**: The kernel and profiling packets are submitted to the GPU queue for execution.
|
||||
5. **Collect the output buffer**: After execution, the output buffer is read back from the GPU.
|
||||
6. **Iterate and extract the results**: The SDK iterates over the output buffer to extract and report counter results.
|
||||
|
||||
The SDK abstracts queue interception and packet management so tool developers can focus on results.
|
||||
|
||||
Key API code snippets
|
||||
=====================
|
||||
|
||||
These API snippets use the legacy interfaces from ``hsa_ven_amd_aqlprofile.h``. These are provided for understanding purposes only.
|
||||
For new development, refer to the updated APIs in ``aql_profile_v2.h``.
|
||||
|
||||
.. note::
|
||||
|
||||
The ROCprofiler-SDK is migrating to these newer interfaces in ``aql_profile_v2.h``. You should use the APIs in ``aql_profile_v2.h`` to stay up-to-date.
|
||||
|
||||
Define the events and profile
|
||||
-----------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
// Select events (counters) to collect
|
||||
hsa_ven_amd_aqlprofile_event_t events[] = {
|
||||
{ HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 2 }, // Example: SQ block, instance 0, counter 2
|
||||
{ HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 3 }
|
||||
};
|
||||
|
||||
// Create profile object
|
||||
hsa_ven_amd_aqlprofile_profile_t profile = {
|
||||
.agent = agent, // hsa_agent_t
|
||||
.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC,
|
||||
.events = events,
|
||||
.event_count = sizeof(events)/sizeof(events[0]),
|
||||
.parameters = nullptr,
|
||||
.parameter_count = 0,
|
||||
.output_buffer = {output_ptr, output_size},
|
||||
.command_buffer = {cmd_ptr, cmd_size}
|
||||
};
|
||||
|
||||
|
||||
Validate events
|
||||
---------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
bool valid = false;
|
||||
hsa_ven_amd_aqlprofile_validate_event(agent, &events[0], &valid);
|
||||
if (!valid) {
|
||||
// Handle invalid event
|
||||
}
|
||||
|
||||
|
||||
Generate command packets
|
||||
-------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
hsa_ext_amd_aql_pm4_packet_t start_pkt, stop_pkt, read_pkt;
|
||||
hsa_ven_amd_aqlprofile_start(&profile, &start_pkt);
|
||||
hsa_ven_amd_aqlprofile_stop(&profile, &stop_pkt);
|
||||
hsa_ven_amd_aqlprofile_read(&profile, &read_pkt);
|
||||
|
||||
|
||||
Submit packets and run the kernel
|
||||
---------------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
// Pseudocode: inject packets into HSA queue
|
||||
queue->Submit(&start_pkt);
|
||||
queue->Submit(&kernel_pkt);
|
||||
queue->Submit(&stop_pkt);
|
||||
queue->Submit(&read_pkt);
|
||||
|
||||
|
||||
Iterate and extract results
|
||||
----------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
hsa_ven_amd_aqlprofile_iterate_data(
|
||||
&profile,
|
||||
[](hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* user_data) -> hsa_status_t {
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
|
||||
printf("Event: block %d, id %d, value: %llu\n",
|
||||
info_data->pmc_data.event.block_name,
|
||||
info_data->pmc_data.event.counter_id,
|
||||
info_data->pmc_data.result);
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr
|
||||
);
|
||||
@@ -1,93 +0,0 @@
|
||||
.. meta::
|
||||
:description: A typical workflow for collecting detailed instruction-level traces
|
||||
:keywords: AQLprofile, ROCm, API, how-to, SQTT
|
||||
|
||||
***********************************************
|
||||
SQ Thread Trace (SQTT) workflow with AQLprofile
|
||||
***********************************************
|
||||
|
||||
The SQ Thread Trace workflow focuses on collecting detailed instruction-level traces.
|
||||
This workflow relies on creating a profile object, generating command packets, and iterating over output buffers:
|
||||
|
||||
1. **Intercept the kernel dispatch**: The SDK intercepts the kernel dispatch.
|
||||
2. **Create a SQTT profile object**: A profile object is created for SQTT, specifying trace parameters and output buffers.
|
||||
3. **Generate SQTT command packets**: Start, stop, and read packets for SQTT are generated and injected into the queue.
|
||||
4. **Submit packets and run the kernel**: The kernel and SQTT packets are submitted for execution.
|
||||
5. **Collect the trace buffer**: The trace output buffer is collected after execution.
|
||||
6. **Iterate and decode trace data**: The SDK iterates over the trace buffer and decodes the SQTT data for analysis.
|
||||
|
||||
The SDK abstracts queue interception and packet management so tool developers can focus on results.
|
||||
|
||||
Key API code snippets
|
||||
=====================
|
||||
|
||||
These API snippets use the legacy interfaces from ``hsa_ven_amd_aqlprofile.h``. These are provided for understanding purposes only.
|
||||
For new development, refer to the updated APIs in ``aql_profile_v2.h``.
|
||||
|
||||
In the `ROCprofiler-SDK <https://github.com/ROCm/rocprofiler-sdk>`__ codebase, these APIs are wrapped and orchestrated in the ``aql``, ``hsa``, and ``thread_trace`` folders for queue interception, packet construction, and result iteration.
|
||||
|
||||
.. note::
|
||||
|
||||
The`ROCprofiler-SDK is migrating to these newer interfaces in ``aql_profile_v2.h``. You should use the APIs in ``aql_profile_v2.h`` to stay up-to-date.
|
||||
|
||||
Define parameters and profile
|
||||
------------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
hsa_ven_amd_aqlprofile_parameter_t params[] = {
|
||||
{ HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE, 0x1000000} // 16 MB buffer
|
||||
};
|
||||
|
||||
hsa_ven_amd_aqlprofile_profile_t profile = {
|
||||
.agent = agent,
|
||||
.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE,
|
||||
.events = nullptr,
|
||||
.event_count = 0,
|
||||
.parameters = params,
|
||||
.parameter_count = sizeof(params)/sizeof(params[0]),
|
||||
.output_buffer = {trace_ptr, trace_size},
|
||||
.command_buffer = {cmd_ptr, cmd_size}
|
||||
};
|
||||
|
||||
|
||||
Generate SQTT start/stop packets
|
||||
---------------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
hsa_ext_amd_aql_pm4_packet_t sqtt_start_pkt, sqtt_stop_pkt;
|
||||
hsa_ven_amd_aqlprofile_start(&profile, &sqtt_start_pkt);
|
||||
hsa_ven_amd_aqlprofile_stop(&profile, &sqtt_stop_pkt);
|
||||
|
||||
|
||||
Submit packets and run the kernel
|
||||
---------------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
queue->Submit(&sqtt_start_pkt);
|
||||
queue->Submit(&kernel_pkt);
|
||||
queue->Submit(&sqtt_stop_pkt);
|
||||
|
||||
|
||||
Iterate and decode trace data
|
||||
-----------------------------
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
hsa_ven_amd_aqlprofile_iterate_data(
|
||||
&profile,
|
||||
[](hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* user_data) -> hsa_status_t {
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA) {
|
||||
// info_data->trace_data.ptr, info_data->trace_data.size
|
||||
decode_trace(info_data->trace_data.ptr, info_data->trace_data.size);
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr
|
||||
);
|
||||
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
.. meta::
|
||||
:description: AQLprofile is an open source library that enables advanced GPU profiling and tracing on AMD platforms.
|
||||
:keywords: AQLprofile, ROCm, tool, Instinct, accelerator, AMD
|
||||
|
||||
.. _index:
|
||||
|
||||
************************
|
||||
AQLprofile documentation
|
||||
************************
|
||||
|
||||
The Architected Queuing Language profiling library (AQLprofile) is an
|
||||
open source library that enables advanced GPU profiling and tracing on
|
||||
AMD platforms.
|
||||
|
||||
This documentation provides a comprehensive overview of the AQLprofile library.
|
||||
|
||||
If you're new to AQLprofile, see :doc:`What is AQLprofile? <what-is-aqlprofile>`.
|
||||
|
||||
AQLprofile is open source and hosted at `AQLprofile on GitHub <https://github.com/ROCm/aqlprofile>`_.
|
||||
|
||||
.. grid:: 2
|
||||
:gutter: 3
|
||||
|
||||
.. grid-item-card:: Install
|
||||
|
||||
* :doc:`Install AQLprofile <install/aqlprofile-install>`
|
||||
|
||||
.. grid-item-card:: Examples
|
||||
|
||||
* :doc:`Performance Monitor Control (PMC) workflow <examples/pmc-workflow>`
|
||||
* :doc:`SQ Thread Trace (SQTT) workflow <examples/sqtt-workflow>`
|
||||
|
||||
.. grid-item-card:: Reference
|
||||
|
||||
* :doc:`Glossary <reference/glossary>`
|
||||
* :doc:`Supported architectures and counter blocks <reference/supported-architectures>`
|
||||
* :doc:`APIs <reference/api-list>`
|
||||
|
||||
|
||||
To contribute to the documentation, refer to
|
||||
`Contributing to ROCm <https://rocm.docs.amd.com/en/latest/contribute/contributing.html>`_.
|
||||
|
||||
You can find licensing information on the
|
||||
`Licensing <https://rocm.docs.amd.com/en/latest/about/license.html>`_ page.
|
||||
@@ -1,77 +0,0 @@
|
||||
.. meta::
|
||||
:description: AQLprofile installation process
|
||||
:keywords: AQLprofile, ROCm, install
|
||||
|
||||
******************
|
||||
Install AQLprofile
|
||||
******************
|
||||
|
||||
Learn how to build AQLprofile with a script or with CMake, then install the library with a command.
|
||||
|
||||
Prerequisites
|
||||
=============
|
||||
|
||||
Before you begin, ensure these tools and dependencies are installed:
|
||||
|
||||
* ROCm stack
|
||||
* ``rocm-llvm-dev`` (required to build tests)
|
||||
|
||||
|
||||
Build AQLprofile
|
||||
================
|
||||
|
||||
You can build AQLprofile using either the provided build script (recommended for most users) or by manually invoking CMake for custom builds.
|
||||
|
||||
|
||||
Option 1: Use the build script (Recommended)
|
||||
--------------------------------------------
|
||||
|
||||
This configures and builds the project with the default settings:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
./build.sh
|
||||
|
||||
|
||||
Option 2: Use CMake for custom builds
|
||||
-------------------------------------
|
||||
|
||||
For more control over the build process, you can set the CMake options manually:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
# Set the CMAKE_PREFIX_PATH to point to hsa-runtime includes path and hsa-runtime library path
|
||||
export CMAKE_PREFIX_PATH=<path to hsa-runtime includes>:<path to hsa-runtime library>
|
||||
# For example, if ROCm is installed at /opt/rocm:
|
||||
# export CMAKE_PREFIX_PATH=/opt/rocm/lib:/opt/rocm/include/hsa
|
||||
|
||||
export CMAKE_BUILD_TYPE=<debug|release> # release by default
|
||||
|
||||
cd /path/to/aqlprofile
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j
|
||||
|
||||
|
||||
Enable debug tracing (Optional)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To enable debug tracing, set this environment variable before running CMake:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
export CMAKE_DEBUG_TRACE=1
|
||||
|
||||
This enables verbose debug output of the command packets while this library executes.
|
||||
|
||||
|
||||
Install the AQLprofile libraries
|
||||
================================
|
||||
|
||||
Once your build is successful, install the AQLprofile libraries with:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
cd build
|
||||
sudo make install
|
||||
@@ -1,29 +0,0 @@
|
||||
.. meta::
|
||||
:description: The standard MIT license for AQLprofile
|
||||
:keywords: AQLprofile, ROCm, license
|
||||
|
||||
*******
|
||||
License
|
||||
*******
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,112 +0,0 @@
|
||||
.. meta::
|
||||
:description: A description of the APIs used with AQLprofile
|
||||
:keywords: AQLprofile, ROCm, APIs
|
||||
|
||||
AQLprofile APIs
|
||||
===============
|
||||
|
||||
Learn about the typical APIs used in AQLprofile.
|
||||
|
||||
The APIs in ``aqlprofile_v2.h`` are designed for use with `ROCprofiler-SDK <https://github.com/ROCm/rocprofiler-sdk>`__, and are actively maintained and recommended for all new development.
|
||||
|
||||
.. note::
|
||||
|
||||
The APIs in ``hsa_ven_amd_aqlprofile.h`` are used by legacy tools such as ``rocprof`` and ``rocprofv2``. You should use the new ``aqlprofile_v2.h`` APIs instead.
|
||||
|
||||
From header ``aql_profile_v2.h``
|
||||
--------------------------------
|
||||
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| API Name | Purpose |
|
||||
+====================================+==========================================================================================+
|
||||
| ``aqlprofile_register_agent`` | Registers an agent for profiling using basic agent info. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_register_agent_info`` | Registers an agent for profiling using extended agent info and versioning. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_get_pmc_info`` | Retrieves information about PMC profiles (for example, buffer sizes, counter data). |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_validate_pmc_event`` | Checks if a given PMC event is valid for the specified agent. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_pmc_create_packets`` | Creates AQL packets (start, stop, read) for PMC profiling and returns a handle. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_pmc_delete_packets`` | Deletes PMC profiling packets and releases associated resources. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_pmc_iterate_data`` | Iterates over PMC profiling results using a callback. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_att_create_packets`` | Creates AQL packets (start, stop) for Advanced Thread Trace (SQTT) and returns a handle. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_att_delete_packets`` | Deletes ATT profiling packets and releases associated resources. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_att_iterate_data`` | Iterates over thread trace (SQTT) results using a callback. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_iterate_event_ids`` | Iterates over all possible event coordinate IDs and names using a callback. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_iterate_event_coord`` | Iterates over all event coordinates for a given agent and event using a callback. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_att_codeobj_marker`` | Creates a marker packet for code object events in thread trace workflows. |
|
||||
+------------------------------------+------------------------------------------------------------------------------------------+
|
||||
|
||||
Callback Typedefs
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| Callback Typedef Name | Purpose |
|
||||
+==========================================+==========================================================================================+
|
||||
| ``aqlprofile_memory_alloc_callback_t`` | Callback for allocating memory buffers for profiles (PMC/ATT). |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_memory_dealloc_callback_t`` | Callback for deallocating memory buffers allocated for profiles. |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_memory_copy_t`` | Callback for copying memory (used internally by the profiler). |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_pmc_data_callback_t`` | Used with ``aqlprofile_pmc_iterate_data`` to process each PMC profiling result. |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_att_data_callback_t`` | Used with ``aqlprofile_att_iterate_data`` to process each thread trace (SQTT) result. |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_eventname_callback_t`` | Used with ``aqlprofile_iterate_event_ids`` to process event coordinate IDs and names. |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``aqlprofile_coordinate_callback_t`` | Used with ``aqlprofile_iterate_event_coord`` to process event coordinate information. |
|
||||
+------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
|
||||
From header ``hsa_ven_amd_aqlprofile.h`` (Legacy)
|
||||
-------------------------------------------------
|
||||
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| API Name | Purpose |
|
||||
+==================================================+==========================================================================================+
|
||||
| ``hsa_ven_amd_aqlprofile_validate_event`` | Checks if a given event (counter) is valid for the specified GPU agent. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_start`` | Populates an AQL packet with commands to start profiling (PMC or SQTT). |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_stop`` | Populates an AQL packet with commands to stop profiling. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_read`` | Populates an AQL packet with commands to read profiling results from the GPU. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_legacy_get_pm4`` | Converts an AQL packet to a PM4 packet blob (for legacy devices). |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_att_marker`` | Inserts a marker (correlation ID) into the ATT (thread trace) buffer. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_get_info`` | Retrieves various profile information, such as buffer sizes or collected data. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_iterate_data`` | Iterates over the profiling output data (PMC results or SQTT trace) using a callback. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_error_string`` | Returns a human-readable error string for the last error. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_iterate_event_ids`` | Iterates over all possible event IDs and names for the agent. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_iterate_event_coord`` | Iterates over all event coordinates for a given agent and event. |
|
||||
+--------------------------------------------------+------------------------------------------------------------------------------------------+
|
||||
|
||||
.. _callback-typedefs-1:
|
||||
|
||||
Callback Typedefs
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
+---------------------------------------------------+------------------------------------------------------------------------------------------------+
|
||||
| Callback Typedef Name | Purpose |
|
||||
+===================================================+================================================================================================+
|
||||
| ``hsa_ven_amd_aqlprofile_data_callback_t`` | Used with ``hsa_ven_amd_aqlprofile_iterate_data`` to process each profiling result (PMC/SQTT). |
|
||||
+---------------------------------------------------+------------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_eventname_callback_t`` | Used with ``hsa_ven_amd_aqlprofile_iterate_event_ids`` to process event IDs and names. |
|
||||
+---------------------------------------------------+------------------------------------------------------------------------------------------------+
|
||||
| ``hsa_ven_amd_aqlprofile_coordinate_callback_t`` | Used with ``hsa_ven_amd_aqlprofile_iterate_event_coord`` to process event coordinate info. |
|
||||
+---------------------------------------------------+------------------------------------------------------------------------------------------------+
|
||||
@@ -1,109 +0,0 @@
|
||||
.. meta::
|
||||
:description: Defined concepts commonly used in AQLprofile
|
||||
:keywords: AQLprofile, ROCm
|
||||
|
||||
AQLprofile glossary
|
||||
===================
|
||||
|
||||
Learn the definitions of concepts commonly used in AQLprofile.
|
||||
|
||||
Agents
|
||||
------
|
||||
|
||||
Agents represent computational devices (CPUs, GPUs) in the Heterogeneous
|
||||
System Architecture (HSA) runtime. In AQLprofile, agents are discovered
|
||||
via HSA APIs and encapsulated in the ``AgentInfo`` structure. Each agent
|
||||
contains metadata including device type, name, compute unit count, and
|
||||
memory pools.
|
||||
|
||||
Agents are enumerated using HSA API ``hsa_iterate_agents``, and their
|
||||
properties are queried via another HSA API, ``hsa_agent_get_info``.
|
||||
Agents are used to target specific GPUs for profiling, and to allocate
|
||||
resources such as command buffers and memory pools.
|
||||
|
||||
Counters and events
|
||||
-------------------
|
||||
|
||||
Performance counters are special circuits on the hardware that count
|
||||
specific GPU events (for example, cycles, instructions, cache hits). Events
|
||||
specify which counters to collect, identified by block name, block
|
||||
index, and counter ID.
|
||||
|
||||
- Events are described using ``hsa_ven_amd_aqlprofile_event_t``
|
||||
structures.
|
||||
- Events are grouped into profiles and collected during profiling
|
||||
sessions.
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
const hsa_ven_amd_aqlprofile_event_t events_arr1[] = {
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 2 /*CYCLES*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 3 /*BUSY_CYCLES*/},
|
||||
// ...
|
||||
};
|
||||
|
||||
Counter blocks
|
||||
--------------
|
||||
|
||||
Counter blocks correspond to hardware units on the GPU (for example, SQ, TCC,
|
||||
TCP). Each block exposes a set of counters/events.
|
||||
|
||||
- Block names (for example, ``HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ``) map to
|
||||
specific hardware blocks.
|
||||
- Events specify both the block and the counter within that block.
|
||||
|
||||
Command buffers
|
||||
---------------
|
||||
|
||||
Command buffers are memory regions that store AQL packets and PM4
|
||||
commands, which control GPU profiling operations. They're allocated per
|
||||
agent, and must meet alignment and size requirements dictated by the
|
||||
hardware.
|
||||
|
||||
Command packets
|
||||
---------------
|
||||
|
||||
Command packets are AQL or PM4 packets that encode profiling commands
|
||||
for the GPU. They're constructed and written into command buffers.
|
||||
|
||||
They're built using AQLprofile APIs or helper functions and submitted to
|
||||
the GPU via HSA queues.
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
bool Queue::Submit(hsa_ext_amd_aql_pm4_packet_t* packet) {
|
||||
// Write packet to queue and signal doorbell
|
||||
}
|
||||
|
||||
Output buffer
|
||||
-------------
|
||||
|
||||
Output buffers are memory regions that store outputs such as counter
|
||||
values and thread trace tokens. They're allocated using HSA memory pools
|
||||
associated with the agent.
|
||||
|
||||
Profile object
|
||||
--------------
|
||||
|
||||
The profile object encapsulates all information required to perform a
|
||||
profiling session. It's represented by the
|
||||
``hsa_ven_amd_aqlprofile_profile_t`` struct, which includes the agent,
|
||||
event type, list of events, command buffer, and additional parameters.
|
||||
|
||||
Profile objects are constructed by specifying the agent, event type
|
||||
(PMC, SQTT), events to collect, and associated buffers. They're passed
|
||||
to AQLprofile APIs to start, stop, and read profiling data.
|
||||
|
||||
.. code:: cpp
|
||||
|
||||
hsa_ven_amd_aqlprofile_profile_t *profile =
|
||||
new hsa_ven_amd_aqlprofile_profile_t{
|
||||
agent_info->dev_id,
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC,
|
||||
events,
|
||||
num_events,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
0};
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
.. meta::
|
||||
:description: A list of the supported architectures and counter blocks used with AQLprofile
|
||||
:keywords: AQLprofile, ROCm, architectures, GFX
|
||||
|
||||
Supported architectures and counter blocks in AQLprofile
|
||||
========================================================
|
||||
|
||||
The AQLprofile library supports profiling and tracing GPU workloads
|
||||
across multiple architectures.
|
||||
|
||||
.. note::
|
||||
|
||||
The GFX versions (GFX9XX, GFX10XX, GFX11XX, GFX12XX) refer to the architecture families of the hardware for that version. See `System requirements (Linux) <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html>`__ for more info.
|
||||
|
||||
Here's a summary of the counter blocks supported for each architecture:
|
||||
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| Counter | GFX9 | GFX908 | GFX90A | GFX942 | GFX10| GFX11|GFX12|
|
||||
| Block Name | | | | | | | |
|
||||
+=============+======+========+========+========+======+======+=====+
|
||||
| ATC | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| ATC_L2 | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| CHA | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| CHC | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| CPC | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| CPF | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| CPG | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GCEA | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GCR | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GDS | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GL1A | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GL1C | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GL2A | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GL2C | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GRBM | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GRBMH | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GRBM_SE | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| GUS | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| MC_VM_L2 | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| RPB | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| SDMA | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| SPI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| SQ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| SQ_CS | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| TA | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| TCA | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| TCC | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| TCP | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
| TD | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ |
|
||||
+-------------+------+--------+--------+--------+------+------+-----+
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
defaults:
|
||||
numbered: False
|
||||
maxdepth: 6
|
||||
root: index
|
||||
subtrees:
|
||||
|
||||
- entries:
|
||||
- file: what-is-aqlprofile.rst
|
||||
|
||||
- caption: Install
|
||||
entries:
|
||||
- file: install/aqlprofile-install.rst
|
||||
title: Install AQLprofile
|
||||
|
||||
- caption: Examples
|
||||
entries:
|
||||
- file: examples/pmc-workflow.rst
|
||||
title: Performance Monitor Control workflow
|
||||
- file: examples/sqtt-workflow.rst
|
||||
title: SQ Thread Trace workflow
|
||||
|
||||
- caption: Reference
|
||||
entries:
|
||||
- file: reference/glossary.rst
|
||||
title: Glossary
|
||||
- file: reference/supported-architectures.rst
|
||||
title: Supported architectures and counter blocks
|
||||
- file: reference/api-list.rst
|
||||
title: APIs
|
||||
|
||||
- caption: About
|
||||
entries:
|
||||
- file: license.rst
|
||||
title: License
|
||||
|
||||
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
defaults:
|
||||
numbered: False
|
||||
maxdepth: 6
|
||||
root: index
|
||||
subtrees:
|
||||
|
||||
- entries:
|
||||
- file: what-is-aqlprofile.rst
|
||||
|
||||
- caption: Install
|
||||
entries:
|
||||
- file: install/aqlprofile-install.rst
|
||||
title: Install AQLprofile
|
||||
|
||||
- caption: Examples
|
||||
entries:
|
||||
- file: examples/pmc-workflow.rst
|
||||
title: Performance Monitor Control workflow
|
||||
- file: examples/sqtt-workflow.rst
|
||||
title: SQ Thread Trace workflow
|
||||
|
||||
- caption: Reference
|
||||
entries:
|
||||
- file: reference/glossary.rst
|
||||
title: Glossary
|
||||
- file: reference/supported-architectures.rst
|
||||
title: Supported architectures and counter blocks
|
||||
- file: reference/api-list.rst
|
||||
title: APIs
|
||||
|
||||
- caption: About
|
||||
entries:
|
||||
- file: license.rst
|
||||
title: License
|
||||
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
rocm-docs-core[api_reference]==1.23.0
|
||||
@@ -1,314 +0,0 @@
|
||||
#
|
||||
# This file is autogenerated by pip-compile with Python 3.10
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile requirements.in
|
||||
#
|
||||
accessible-pygments==0.0.5
|
||||
# via pydata-sphinx-theme
|
||||
alabaster==1.0.0
|
||||
# via sphinx
|
||||
asttokens==3.0.0
|
||||
# via stack-data
|
||||
attrs==25.3.0
|
||||
# via
|
||||
# jsonschema
|
||||
# jupyter-cache
|
||||
# referencing
|
||||
babel==2.17.0
|
||||
# via
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
beautifulsoup4==4.13.5
|
||||
# via pydata-sphinx-theme
|
||||
breathe==4.36.0
|
||||
# via rocm-docs-core
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
cffi==2.0.0
|
||||
# via
|
||||
# cryptography
|
||||
# pynacl
|
||||
charset-normalizer==3.4.3
|
||||
# via requests
|
||||
click==8.2.1
|
||||
# via
|
||||
# click-log
|
||||
# doxysphinx
|
||||
# jupyter-cache
|
||||
# sphinx-external-toc
|
||||
click-log==0.4.0
|
||||
# via doxysphinx
|
||||
comm==0.2.3
|
||||
# via ipykernel
|
||||
contourpy==1.3.2
|
||||
# via matplotlib
|
||||
cryptography==45.0.7
|
||||
# via pyjwt
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
debugpy==1.8.16
|
||||
# via ipykernel
|
||||
decorator==5.2.1
|
||||
# via ipython
|
||||
docutils==0.21.2
|
||||
# via
|
||||
# myst-parser
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
doxysphinx==3.3.12
|
||||
# via rocm-docs-core
|
||||
exceptiongroup==1.3.0
|
||||
# via ipython
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
fastjsonschema==2.21.2
|
||||
# via
|
||||
# nbformat
|
||||
# rocm-docs-core
|
||||
fonttools==4.59.2
|
||||
# via matplotlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via rocm-docs-core
|
||||
greenlet==3.2.4
|
||||
# via sqlalchemy
|
||||
idna==3.10
|
||||
# via requests
|
||||
imagesize==1.4.1
|
||||
# via sphinx
|
||||
importlib-metadata==8.7.0
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
ipykernel==6.30.1
|
||||
# via myst-nb
|
||||
ipython==8.37.0
|
||||
# via
|
||||
# ipykernel
|
||||
# myst-nb
|
||||
jedi==0.19.2
|
||||
# via ipython
|
||||
jinja2==3.1.6
|
||||
# via
|
||||
# myst-parser
|
||||
# sphinx
|
||||
jsonschema==4.25.1
|
||||
# via nbformat
|
||||
jsonschema-specifications==2025.9.1
|
||||
# via jsonschema
|
||||
jupyter-cache==1.0.1
|
||||
# via myst-nb
|
||||
jupyter-client==8.6.3
|
||||
# via
|
||||
# ipykernel
|
||||
# nbclient
|
||||
jupyter-core==5.8.1
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
# nbclient
|
||||
# nbformat
|
||||
kiwisolver==1.4.9
|
||||
# via matplotlib
|
||||
libsass==0.22.0
|
||||
# via doxysphinx
|
||||
lxml==5.2.1
|
||||
# via doxysphinx
|
||||
markdown-it-py==3.0.0
|
||||
# via
|
||||
# mdit-py-plugins
|
||||
# myst-parser
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
matplotlib==3.10.6
|
||||
# via doxysphinx
|
||||
matplotlib-inline==0.1.7
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
mdit-py-plugins==0.5.0
|
||||
# via myst-parser
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
mpire==2.10.2
|
||||
# via doxysphinx
|
||||
myst-nb==1.3.0
|
||||
# via rocm-docs-core
|
||||
myst-parser==4.0.1
|
||||
# via myst-nb
|
||||
nbclient==0.10.2
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
nbformat==5.10.4
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
# nbclient
|
||||
nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# contourpy
|
||||
# doxysphinx
|
||||
# matplotlib
|
||||
packaging==25.0
|
||||
# via
|
||||
# ipykernel
|
||||
# matplotlib
|
||||
# sphinx
|
||||
parso==0.8.5
|
||||
# via jedi
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
platformdirs==4.4.0
|
||||
# via jupyter-core
|
||||
prompt-toolkit==3.0.52
|
||||
# via ipython
|
||||
psutil==7.0.0
|
||||
# via ipykernel
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pycparser==2.23
|
||||
# via cffi
|
||||
pydata-sphinx-theme==0.16.1
|
||||
# via
|
||||
# rocm-docs-core
|
||||
# sphinx-book-theme
|
||||
pygithub==2.8.1
|
||||
# via rocm-docs-core
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# accessible-pygments
|
||||
# ipython
|
||||
# mpire
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
pyjson5==1.6.9
|
||||
# via doxysphinx
|
||||
pyjwt[crypto]==2.10.1
|
||||
# via pygithub
|
||||
pynacl==1.6.0
|
||||
# via pygithub
|
||||
pyparsing==3.2.3
|
||||
# via
|
||||
# doxysphinx
|
||||
# matplotlib
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# jupyter-client
|
||||
# matplotlib
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
# myst-parser
|
||||
# rocm-docs-core
|
||||
# sphinx-external-toc
|
||||
pyzmq==27.1.0
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
requests==2.32.5
|
||||
# via
|
||||
# pygithub
|
||||
# sphinx
|
||||
rocm-docs-core[api-reference]==1.23.0
|
||||
# via -r requirements.in
|
||||
rpds-py==0.27.1
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
six==1.17.0
|
||||
# via python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
snowballstemmer==3.0.1
|
||||
# via sphinx
|
||||
soupsieve==2.8
|
||||
# via beautifulsoup4
|
||||
sphinx==8.1.3
|
||||
# via
|
||||
# breathe
|
||||
# myst-nb
|
||||
# myst-parser
|
||||
# pydata-sphinx-theme
|
||||
# rocm-docs-core
|
||||
# sphinx-book-theme
|
||||
# sphinx-copybutton
|
||||
# sphinx-design
|
||||
# sphinx-external-toc
|
||||
# sphinx-notfound-page
|
||||
sphinx-book-theme==1.1.3
|
||||
# via rocm-docs-core
|
||||
sphinx-copybutton==0.5.2
|
||||
# via rocm-docs-core
|
||||
sphinx-design==0.6.1
|
||||
# via rocm-docs-core
|
||||
sphinx-external-toc==1.0.1
|
||||
# via rocm-docs-core
|
||||
sphinx-notfound-page==1.1.0
|
||||
# via rocm-docs-core
|
||||
sphinxcontrib-applehelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-devhelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-htmlhelp==2.1.0
|
||||
# via sphinx
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
# via sphinx
|
||||
sphinxcontrib-qthelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-serializinghtml==2.0.0
|
||||
# via sphinx
|
||||
sqlalchemy==2.0.43
|
||||
# via jupyter-cache
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
tabulate==0.9.0
|
||||
# via jupyter-cache
|
||||
tomli==2.2.1
|
||||
# via sphinx
|
||||
tornado==6.5.2
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
tqdm==4.67.1
|
||||
# via mpire
|
||||
traitlets==5.14.3
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
# jupyter-client
|
||||
# jupyter-core
|
||||
# matplotlib-inline
|
||||
# nbclient
|
||||
# nbformat
|
||||
typing-extensions==4.15.0
|
||||
# via
|
||||
# beautifulsoup4
|
||||
# exceptiongroup
|
||||
# ipython
|
||||
# myst-nb
|
||||
# pydata-sphinx-theme
|
||||
# pygithub
|
||||
# referencing
|
||||
# sqlalchemy
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# pygithub
|
||||
# requests
|
||||
wcwidth==0.2.13
|
||||
# via prompt-toolkit
|
||||
zipp==3.23.0
|
||||
# via importlib-metadata
|
||||
@@ -1,60 +0,0 @@
|
||||
.. meta::
|
||||
:description: AQLprofile is an open source library that enables advanced GPU profiling and tracing on AMD platforms.
|
||||
:keywords: AQLprofile, ROCm, tool, Instinct, accelerator, AMD
|
||||
|
||||
What is AQLprofile?
|
||||
===================
|
||||
|
||||
The Architected Queuing Language profiling library (AQLprofile) is an
|
||||
open source library that enables advanced GPU profiling and tracing on
|
||||
AMD platforms. It works in conjunction with
|
||||
`ROCprofiler-SDK <https://github.com/ROCm/rocprofiler-sdk>`__ to
|
||||
support profiling methods such as `performance counters
|
||||
(PMC) <https://rocm.docs.amd.com/projects/aqlprofile/en/latest/examples/pmc-workflow.html>`__ and `SQ thread trace
|
||||
(SQTT) <https://rocm.docs.amd.com/projects/aqlprofile/en/latest/examples/sqtt-workflow.html>`__. AQLprofile provides the
|
||||
foundational mechanisms for constructing AQL packets and managing
|
||||
profiling operations across multiple AMD GPU architecture families. The
|
||||
development of AQLprofile is aligned with ROCprofiler-SDK, ensuring
|
||||
compatibility and feature support for new GPU architectures and
|
||||
profiling requirements.
|
||||
|
||||
AQLprofile builds on concepts from the Heterogeneous System Architecture
|
||||
(HSA) and the AQL, which define the foundations for GPU command
|
||||
processing and profiling on AMD platforms. For more information, see:
|
||||
|
||||
- `HSA Platform System Architecture
|
||||
Specification <http://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf>`__
|
||||
- `HSA Runtime Programmer's Reference
|
||||
Specification <http://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf>`__
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
- Profiling AQL packets for GPU workloads.
|
||||
- Performance counters and SQ thread traces.
|
||||
- Support for GFX9, GFX10XX, GFX11XX, and GFX12XX architecture families.
|
||||
- Verbose tracing and error logging capabilities.
|
||||
- Thread trace binary data generated by AQLprofile can be decoded using
|
||||
`rocprof-trace-decoder <https://github.com/ROCm/rocprof-trace-decoder/releases>`__.
|
||||
|
||||
Who should use this library?
|
||||
----------------------------
|
||||
|
||||
- **End users**: If you want to profile AMD GPUs, use
|
||||
`ROCprofiler-SDK <https://github.com/ROCm/rocprofiler-sdk>`__ or
|
||||
tools that depend on it. You do *not* need to use AQLprofile
|
||||
directly.
|
||||
- **Developers/integrators**: If you're building profiling tools,
|
||||
custom workflows, or need to extend profiling capabilities, you may
|
||||
use AQLprofile directly as a backend.
|
||||
|
||||
How does AQLprofile fit into the ROCm profiling stack?
|
||||
------------------------------------------------------
|
||||
|
||||
Here's the typical workflow:
|
||||
|
||||
Application → ROCprofiler-SDK ⇄ **AQLprofile** ⇄ ROCprofiler-SDK → HSA/ROCR/KFD → AMD GPU hardware
|
||||
|
||||
- **AQLprofile** generates profiling command packets (AQL/PM4) tailored to the GPU architecture. It doesn't interact with hardware or drivers directly. It only produces the packets and buffer requirements requested by ``ROCprofiler-SDK``.
|
||||
|
||||
- **ROCprofiler-SDK** provides a higher-level API and user-facing tools, using AQLprofile internally. It manages profiling sessions, submits packets to the GPU via `ROCr <https://rocm.docs.amd.com/projects/rocr_debug_agent/en/latest/index.html>`_/HSA/KFD, and collects results.
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele