31fe8858d1
* rocDecode API Tracing support * Test bin file added to rocdecode. Need to add validate python methods * Added option to not make rocDecode tests * Added rocdecode and rocprofv3 tests * Added csv test * Address PR comments. Changed tests to use built-in rocstreambit decoder to remove ffmpeg dependancy. Changed cmake option to disbale tests rather than not build them. Tests work locally, but will fail until rocDecode is built with tracing enabled on CI * Add option to avoid building rocdecode tests * Added option to avoid building rocdecode bin file * Support for rocJPEG API Trace * Added newline to rocjpeg_version.h * json-tool code added, initial test/bin commit * Formatting * Resolved rocjpeg bin test compilation errors * Tests implemented. Perfetto module currently resulting in errors, so need to retest whenever it is fixed * Formatting and compilation errors * Minor fixes * Copyright year update and minor fixes * Doc update fix * Added rocjpeg csv file in data * Addresses review comments: Updated fixed Findroc.. and uses root directory as a hint, fixed documentation error, changed tables to use _CORE, minor style fixes * Added rocdecode and rocjpeg to CI * Removed rocdecode and rocjpeg from CI and added back build tests option * Updated Cmake Files * Added rocDecode and rocJPEG to CI * Remove cmake line added in error * Temporarily modified tests to pass if rocdecode or rocjpeg tracing are not supported for CI, cmake changes * Added find_package for test * Added back use of system rocDecode and rocJPEG, modifies system files to include prefix path * Updated no-link to include INCLUDE_DIR/roc(decode|jpeg), added comments for tests * Resolve merge conflicts and formatting * Added regex find and replace instead of include for CI * VAAPI package causing errors on Vega20 * Removed system rocjpeg and rocdecode use temporarily until cmake issues resolved * Removed workflows regex * Formatting and minor test modification * Modified test for vega20 * Update rocDecode and rocJPEG cmake and tests * Changelog * Fix merge conflict * Added back if-statements around add-tests since cmake-generator-expressions are resulting in errors when the packages are missing * Removed if found statements, replaced with TARGET:EXISTS * Skip json file for rocjpeg and rocdecode tests if not supported * Add os import --------- Co-authored-by: Kandula, Venkateshwar reddy <Venkateshwarreddy.Kandula@amd.com> Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
794 líneas
34 KiB
C
794 líneas
34 KiB
C
// MIT License
|
|
//
|
|
// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#pragma once
|
|
|
|
#include <rocprofiler-sdk/defines.h>
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
ROCPROFILER_EXTERN_C_INIT
|
|
|
|
//--------------------------------------------------------------------------------------//
|
|
//
|
|
// ENUMERATIONS
|
|
//
|
|
//--------------------------------------------------------------------------------------//
|
|
|
|
/**
|
|
* @defgroup BASIC_DATA_TYPES Basic data types
|
|
* @brief Basic data types and typedefs
|
|
*
|
|
* @{
|
|
*/
|
|
|
|
// TODO(aelwazir): Do we need to add a null (way) for every handle?
|
|
// TODO(aelwazir): Remove API Data args from the doxygen?
|
|
// TODO(aelwazir): Not everything in bin needs to be installed bin, use libexec or share?
|
|
|
|
/**
|
|
* @brief Status codes.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_STATUS_SUCCESS = 0, ///< No error occurred
|
|
ROCPROFILER_STATUS_ERROR, ///< Generalized error
|
|
ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND, ///< No valid context for given context id
|
|
ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND, ///< No valid buffer for given buffer id
|
|
ROCPROFILER_STATUS_ERROR_KIND_NOT_FOUND, ///< Kind identifier is invalid
|
|
ROCPROFILER_STATUS_ERROR_OPERATION_NOT_FOUND, ///< Operation identifier is invalid for domain
|
|
ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND, ///< No valid thread for given thread id
|
|
ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, ///< Agent identifier not found
|
|
ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, ///< Counter identifier does not exist
|
|
ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR, ///< Generalized context error
|
|
ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID, ///< Context configuration is not valid
|
|
ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED, ///< Context was not started (e.g., atomic swap
|
|
///< into active array failed)
|
|
ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT, ///< Context operation failed due to a conflict with
|
|
///< another context
|
|
ROCPROFILER_STATUS_ERROR_CONTEXT_ID_NOT_ZERO, ///< Context ID is not initialized to zero
|
|
ROCPROFILER_STATUS_ERROR_BUFFER_BUSY, ///< buffer operation failed because it currently busy
|
|
///< handling another request (e.g. flushing)
|
|
ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED, ///< service has already been configured
|
|
///< in context
|
|
ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED, ///< Function call is not valid outside of
|
|
///< rocprofiler configuration (i.e.
|
|
///< function called post-initialization)
|
|
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, ///< Function is not implemented
|
|
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, ///< Data structure provided by user is incompatible
|
|
///< with current version of rocprofiler
|
|
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT, ///< Function invoked with one or more invalid
|
|
///< arguments
|
|
ROCPROFILER_STATUS_ERROR_METRIC_NOT_VALID_FOR_AGENT, ///< Invalid metric supplied to agent.
|
|
ROCPROFILER_STATUS_ERROR_FINALIZED, ///< invalid because rocprofiler has been finalized
|
|
ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED, ///< Call requires HSA to be loaded before performed
|
|
ROCPROFILER_STATUS_ERROR_DIM_NOT_FOUND, ///< Dimension is not found for counter
|
|
ROCPROFILER_STATUS_ERROR_PROFILE_COUNTER_NOT_FOUND, ///< Profile could not find counter for GPU
|
|
///< agent
|
|
ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED, ///< AST could not be generated correctly
|
|
ROCPROFILER_STATUS_ERROR_AST_NOT_FOUND, ///< AST was not found
|
|
ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD, ///< Event coordinate was not found by AQL profile
|
|
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL, ///< A service depends on a newer version of KFD
|
|
///< (amdgpu kernel driver). Check logs for
|
|
///< service that report incompatibility
|
|
ROCPROFILER_STATUS_ERROR_OUT_OF_RESOURCES, ///< The given resources are
|
|
///< insufficient to complete operation
|
|
ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND, ///< Could not find the counter profile
|
|
ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT, ///< Cannot enable both agent and dispatch
|
|
///< counting in the same context.
|
|
ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT, ///< No agent context found, may not be an error
|
|
ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED, ///< Sample rate exceeded
|
|
ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE, ///< Profile queue creation failed
|
|
ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS, ///< No hardware counters were specified
|
|
ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH, ///< Agent mismatch between profile and context.
|
|
ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE, ///< The service is not available.
|
|
///< Please refer to API functions that return this
|
|
///< status code for more information.
|
|
ROCPROFILER_STATUS_ERROR_EXCEEDS_HW_LIMIT, ///< Exceeds hardware limits for collection.
|
|
ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED, ///< Agent HW architecture not supported.
|
|
ROCPROFILER_STATUS_ERROR_PERMISSION_DENIED, ///< Permission denied.
|
|
ROCPROFILER_STATUS_LAST,
|
|
} rocprofiler_status_t;
|
|
|
|
/**
|
|
* @brief Buffer record categories. This enumeration type is encoded in @ref
|
|
* rocprofiler_record_header_t category field
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_BUFFER_CATEGORY_NONE = 0,
|
|
ROCPROFILER_BUFFER_CATEGORY_TRACING,
|
|
ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING,
|
|
ROCPROFILER_BUFFER_CATEGORY_COUNTERS,
|
|
ROCPROFILER_BUFFER_CATEGORY_LAST,
|
|
} rocprofiler_buffer_category_t;
|
|
|
|
/**
|
|
* @brief Agent type.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_AGENT_TYPE_NONE = 0, ///< Agent type is unknown
|
|
ROCPROFILER_AGENT_TYPE_CPU, ///< Agent type is a CPU
|
|
ROCPROFILER_AGENT_TYPE_GPU, ///< Agent type is a GPU
|
|
ROCPROFILER_AGENT_TYPE_LAST,
|
|
} rocprofiler_agent_type_t;
|
|
|
|
/**
|
|
* @brief Service Callback Phase.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_CALLBACK_PHASE_NONE = 0, ///< Callback has no phase
|
|
ROCPROFILER_CALLBACK_PHASE_ENTER, ///< Callback invoked prior to function execution
|
|
ROCPROFILER_CALLBACK_PHASE_LOAD =
|
|
ROCPROFILER_CALLBACK_PHASE_ENTER, ///< Callback invoked prior to code object loading
|
|
ROCPROFILER_CALLBACK_PHASE_EXIT, ///< Callback invoked after to function execution
|
|
ROCPROFILER_CALLBACK_PHASE_UNLOAD =
|
|
ROCPROFILER_CALLBACK_PHASE_EXIT, ///< Callback invoked prior to code object unloading
|
|
ROCPROFILER_CALLBACK_PHASE_LAST,
|
|
} rocprofiler_callback_phase_t;
|
|
|
|
/**
|
|
* @brief Service Callback Tracing Kind. @see rocprofiler_configure_callback_tracing_service.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_CALLBACK_TRACING_NONE = 0,
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, ///< @see ::rocprofiler_hsa_core_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, ///< @see ::rocprofiler_hsa_amd_ext_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, ///< @see ::rocprofiler_hsa_image_ext_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API, ///< @see
|
|
///< ::rocprofiler_hsa_finalize_ext_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, ///< @see ::rocprofiler_hip_runtime_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, ///< @see ::rocprofiler_hip_compiler_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, ///< @see ::rocprofiler_marker_core_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, ///< @see
|
|
///< ::rocprofiler_marker_control_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API, ///< @see ::rocprofiler_marker_name_api_id_t
|
|
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< @see ::rocprofiler_code_object_operation_t
|
|
ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY, ///< @see ::rocprofiler_scratch_memory_operation_t
|
|
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches
|
|
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, ///< @see ::rocprofiler_memory_copy_operation_t
|
|
ROCPROFILER_CALLBACK_TRACING_RCCL_API, ///< RCCL tracing
|
|
ROCPROFILER_CALLBACK_TRACING_OMPT, ///< @see ::rocprofiler_ompt_operation_t
|
|
ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION, ///< @see
|
|
///< ::rocprofiler_memory_allocation_operation_t
|
|
ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION, ///< Callback notifying that a runtime
|
|
///< library has been initialized
|
|
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing
|
|
ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API, ///< rocJPEG API Tracing
|
|
ROCPROFILER_CALLBACK_TRACING_LAST,
|
|
} rocprofiler_callback_tracing_kind_t;
|
|
|
|
/**
|
|
* @brief Service Buffer Tracing Kind. @see rocprofiler_configure_buffer_tracing_service.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_BUFFER_TRACING_NONE = 0,
|
|
ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ///< @see ::rocprofiler_hsa_core_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, ///< @see ::rocprofiler_hsa_amd_ext_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API, ///< @see ::rocprofiler_hsa_image_ext_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API, ///< @see
|
|
///< ::rocprofiler_hsa_finalize_ext_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, ///< @see ::rocprofiler_hip_runtime_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, ///< @see ::rocprofiler_hip_compiler_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API, ///< @see ::rocprofiler_marker_core_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API, ///< @see ::rocprofiler_marker_control_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API, ///< @see ::rocprofiler_marker_name_api_id_t
|
|
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, ///< @see ::rocprofiler_memory_copy_operation_t
|
|
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ///< Buffer kernel dispatch info
|
|
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, ///< Buffer page migration info
|
|
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, ///< Buffer scratch memory reclaimation info
|
|
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT, ///< Correlation ID in no longer in use
|
|
ROCPROFILER_BUFFER_TRACING_RCCL_API, ///< RCCL tracing
|
|
ROCPROFILER_BUFFER_TRACING_OMPT, ///< @see ::rocprofiler_ompt_operation_t
|
|
ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION, ///< @see
|
|
///< ::rocprofiler_memory_allocation_operation_t
|
|
ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION, ///< Record indicating a runtime library has
|
|
///< been initialized. @see
|
|
///< ::rocprofiler_runtime_initialization_operation_t
|
|
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
|
|
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing
|
|
ROCPROFILER_BUFFER_TRACING_LAST,
|
|
} rocprofiler_buffer_tracing_kind_t;
|
|
|
|
/**
|
|
* @brief ROCProfiler Code Object Tracer Operations.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_CODE_OBJECT_NONE = 0, ///< Unknown code object operation
|
|
ROCPROFILER_CODE_OBJECT_LOAD, ///< Code object containing kernel symbols
|
|
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols - Device
|
|
ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols - Host
|
|
ROCPROFILER_CODE_OBJECT_LAST,
|
|
} rocprofiler_code_object_operation_t;
|
|
|
|
/**
|
|
* @brief Memory Copy Operations.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_MEMORY_COPY_NONE = 0, ///< Unknown memory copy direction
|
|
ROCPROFILER_MEMORY_COPY_HOST_TO_HOST, ///< Memory copy from host to host
|
|
ROCPROFILER_MEMORY_COPY_HOST_TO_DEVICE, ///< Memory copy from host to device
|
|
ROCPROFILER_MEMORY_COPY_DEVICE_TO_HOST, ///< Memory copy from device to host
|
|
ROCPROFILER_MEMORY_COPY_DEVICE_TO_DEVICE, ///< Memory copy from device to device
|
|
ROCPROFILER_MEMORY_COPY_LAST,
|
|
} rocprofiler_memory_copy_operation_t;
|
|
|
|
/**
|
|
* @brief Memory Allocation Operation.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_MEMORY_ALLOCATION_NONE = 0, ///< Unknown memory allocation function
|
|
ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE, ///< Allocate memory function
|
|
ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE, ///< Allocate vmem memory handle
|
|
ROCPROFILER_MEMORY_ALLOCATION_FREE, ///< Free memory function
|
|
ROCPROFILER_MEMORY_ALLOCATION_VMEM_FREE, ///< Release vmem memory handle
|
|
ROCPROFILER_MEMORY_ALLOCATION_LAST,
|
|
} rocprofiler_memory_allocation_operation_t;
|
|
|
|
/**
|
|
* @brief ROCProfiler Kernel Dispatch Tracing Operation Types.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_KERNEL_DISPATCH_NONE = 0, ///< Unknown kernel dispatch operation
|
|
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE = 1,
|
|
ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
|
|
ROCPROFILER_KERNEL_DISPATCH_LAST,
|
|
|
|
/// @var ROCPROFILER_KERNEL_DISPATCH_ENQUEUE
|
|
/// @brief Invoke callback prior to a kernel being enqueued and after the kernel has been
|
|
/// enqueued. When the phase is ::ROCPROFILER_CALLBACK_PHASE_ENTER, this is an opportunity to
|
|
/// push an external correlation id and/or modify the active contexts before a kernel is
|
|
/// launched. Any active contexts containing services related to a kernel dispatch (kernel
|
|
/// tracing, counter collection, etc.) will be captured after this callback and attached to the
|
|
/// kernel. These captured contexts will be considered "active" when the kernel completes even
|
|
/// if the context was stopped before the kernel completes -- this contract is designed to
|
|
/// ensure that tools do not have to delay stopping a context because of an async operation in
|
|
/// order to get the data they requested when the async operation was started. When the phase is
|
|
/// ::ROCPROFILER_CALLBACK_PHASE_EXIT, the active contexts for the kernel dispatch have been
|
|
/// captured and it is safe to disable those contexts without affecting the delivery of the
|
|
/// requested data when the kernel completes. It is important to note that, even if the context
|
|
/// associated with the kernel dispatch callback tracing service is disabled in between the
|
|
/// enter and exit phase, the exit phase callback is still delievered but that context will not
|
|
/// be captured when the kernel is enqueued and therefore will not provide a
|
|
/// ::ROCPROFILER_KERNEL_DISPATCH_COMPLETE callback. Furthermore, it should be
|
|
/// noted that if a tool encodes information into the `::rocprofiler_user_data_t` output
|
|
/// parameter in ::rocprofiler_callback_tracing_cb_t, that same value will be delivered in the
|
|
/// exit phase and in the ::ROCPROFILER_KERNEL_DISPATCH_COMPLETE callback. In
|
|
/// other words, any modifications to that user data value in the exit phase will not be
|
|
/// reflected in the ::ROCPROFILER_KERNEL_DISPATCH_COMPLETE callback because a
|
|
/// copy of that user data struct is attached to the kernel, not a reference to the user data
|
|
/// struct.
|
|
///
|
|
/// @var ROCPROFILER_KERNEL_DISPATCH_COMPLETE
|
|
/// @brief Invoke callback after a kernel has completed and the HSA runtime has processed the
|
|
/// signal indicating that the kernel has completed. The latter half of this statement is
|
|
/// important. There is no guarantee that these callbacks are invoked in any order related to
|
|
/// when the kernels were dispatched, i.e. even if kernel A is launched and fully executed
|
|
/// before kernel B is launched, it is entirely possible that the HSA runtime ends up processing
|
|
/// the signal associated with kernel B before processing the signal associated with kernel A --
|
|
/// resulting in rocprofiler-sdk invoking this operation callback for kernel B before invoking
|
|
/// the callback for kernel A.
|
|
} rocprofiler_kernel_dispatch_operation_t;
|
|
|
|
/**
|
|
* @brief PC Sampling Method.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_PC_SAMPLING_METHOD_NONE = 0, ///< Unknown sampling type
|
|
ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC, ///< Stochastic sampling (MI300+)
|
|
ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, ///< Interval sampling (MI200+)
|
|
ROCPROFILER_PC_SAMPLING_METHOD_LAST,
|
|
} rocprofiler_pc_sampling_method_t;
|
|
|
|
/**
|
|
* @brief PC Sampling Unit.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_PC_SAMPLING_UNIT_NONE = 0, ///< Sample interval has unspecified units
|
|
ROCPROFILER_PC_SAMPLING_UNIT_INSTRUCTIONS, ///< Sample interval is in instructions
|
|
ROCPROFILER_PC_SAMPLING_UNIT_CYCLES, ///< Sample interval is in cycles
|
|
ROCPROFILER_PC_SAMPLING_UNIT_TIME, ///< Sample internval is in nanoseconds
|
|
ROCPROFILER_PC_SAMPLING_UNIT_LAST,
|
|
} rocprofiler_pc_sampling_unit_t;
|
|
|
|
/**
|
|
* @brief Actions when Buffer is full.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_BUFFER_POLICY_NONE = 0, ///< No policy has been set
|
|
ROCPROFILER_BUFFER_POLICY_DISCARD, ///< Drop records when buffer is full
|
|
ROCPROFILER_BUFFER_POLICY_LOSSLESS, ///< Block when buffer is full
|
|
ROCPROFILER_BUFFER_POLICY_LAST,
|
|
} rocprofiler_buffer_policy_t;
|
|
|
|
/**
|
|
* @brief Page migration event.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event
|
|
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START,
|
|
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
|
|
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START,
|
|
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END,
|
|
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION,
|
|
ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE,
|
|
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU,
|
|
ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT,
|
|
ROCPROFILER_PAGE_MIGRATION_LAST,
|
|
} rocprofiler_page_migration_operation_t;
|
|
|
|
/**
|
|
* @brief Scratch event kind
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_SCRATCH_MEMORY_NONE = 0, ///< Unknown scratch operation
|
|
ROCPROFILER_SCRATCH_MEMORY_ALLOC, ///< Scratch memory allocation event
|
|
ROCPROFILER_SCRATCH_MEMORY_FREE, ///< Scratch memory free event
|
|
ROCPROFILER_SCRATCH_MEMORY_ASYNC_RECLAIM, ///< Scratch memory asynchronously reclaimed
|
|
ROCPROFILER_SCRATCH_MEMORY_LAST,
|
|
} rocprofiler_scratch_memory_operation_t;
|
|
|
|
/**
|
|
* @brief Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is
|
|
* used for thread creation callbacks. @see INTERNAL_THREADING.
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_LIBRARY = (1 << 0),
|
|
ROCPROFILER_HSA_LIBRARY = (1 << 1),
|
|
ROCPROFILER_HIP_LIBRARY = (1 << 2),
|
|
ROCPROFILER_MARKER_LIBRARY = (1 << 3),
|
|
ROCPROFILER_RCCL_LIBRARY = (1 << 4),
|
|
ROCPROFILER_ROCDECODE_LIBRARY = (1 << 5),
|
|
ROCPROFILER_ROCJPEG_LIBRARY = (1 << 6),
|
|
ROCPROFILER_LIBRARY_LAST = ROCPROFILER_ROCJPEG_LIBRARY,
|
|
} rocprofiler_runtime_library_t;
|
|
|
|
/**
|
|
* @brief Enumeration for specifying intercept tables supported by rocprofiler. This enumeration is
|
|
* used for intercept tables. @see INTERCEPT_TABLE.
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_HSA_TABLE = (1 << 0),
|
|
ROCPROFILER_HIP_RUNTIME_TABLE = (1 << 1),
|
|
ROCPROFILER_HIP_COMPILER_TABLE = (1 << 2),
|
|
ROCPROFILER_MARKER_CORE_TABLE = (1 << 3),
|
|
ROCPROFILER_MARKER_CONTROL_TABLE = (1 << 4),
|
|
ROCPROFILER_MARKER_NAME_TABLE = (1 << 5),
|
|
ROCPROFILER_RCCL_TABLE = (1 << 6),
|
|
ROCPROFILER_ROCDECODE_TABLE = (1 << 7),
|
|
ROCPROFILER_ROCJPEG_TABLE = (1 << 8),
|
|
ROCPROFILER_TABLE_LAST = ROCPROFILER_ROCJPEG_TABLE,
|
|
} rocprofiler_intercept_table_t;
|
|
|
|
/**
|
|
* @brief ROCProfiler Runtime Initialization Tracer Operations.
|
|
*/
|
|
typedef enum // NOLINT(performance-enum-size)
|
|
{
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, ///< Application loaded rocDecoder runtime
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_ROCJPEG, ///< Application loaded rocJPEG runtime
|
|
ROCPROFILER_RUNTIME_INITIALIZATION_LAST,
|
|
} rocprofiler_runtime_initialization_operation_t;
|
|
|
|
/**
|
|
* @brief Enumeration for specifying the counter info struct version you want.
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_COUNTER_INFO_VERSION_NONE,
|
|
ROCPROFILER_COUNTER_INFO_VERSION_0, ///< @see ::rocprofiler_counter_info_v0_t
|
|
ROCPROFILER_COUNTER_INFO_VERSION_LAST,
|
|
} rocprofiler_counter_info_version_id_t;
|
|
|
|
/**
|
|
* @brief Enumeration for distinguishing different buffer record kinds within the
|
|
* ::ROCPROFILER_BUFFER_CATEGORY_COUNTERS category
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_COUNTER_RECORD_NONE = 0,
|
|
ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER, ///< ::rocprofiler_dispatch_counting_service_record_t
|
|
ROCPROFILER_COUNTER_RECORD_VALUE,
|
|
ROCPROFILER_COUNTER_RECORD_LAST,
|
|
|
|
/// @var ROCPROFILER_COUNTER_RECORD_KIND_DISPATCH_PROFILE_HEADER
|
|
/// @brief Indicates the payload type is of type
|
|
/// ::rocprofiler_dispatch_counting_service_record_t
|
|
} rocprofiler_counter_record_kind_t;
|
|
|
|
/**
|
|
* @brief Enumeration of flags that can be used with some counter api calls
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_COUNTER_FLAG_NONE = 0,
|
|
ROCPROFILER_COUNTER_FLAG_ASYNC, ///< Do not wait for completion before returning.
|
|
ROCPROFILER_COUNTER_FLAG_APPEND_DEFINITION, ///< Append the counter definition to the system
|
|
///< provided counter definition file.
|
|
ROCPROFILER_COUNTER_FLAG_LAST,
|
|
} rocprofiler_counter_flag_t;
|
|
|
|
/**
|
|
* @brief Enumeration for distinguishing different buffer record kinds within the
|
|
* ::ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING category
|
|
*/
|
|
typedef enum
|
|
{
|
|
ROCPROFILER_PC_SAMPLING_RECORD_NONE = 0,
|
|
ROCPROFILER_PC_SAMPLING_RECORD_HOST_TRAP_V0_SAMPLE, ///< ::rocprofiler_pc_sampling_record_host_trap_v0_t
|
|
ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE, ///< for the future use
|
|
ROCPROFILER_PC_SAMPLING_RECORD_LAST,
|
|
} rocprofiler_pc_sampling_record_kind_t;
|
|
|
|
//--------------------------------------------------------------------------------------//
|
|
//
|
|
// ALIASES
|
|
//
|
|
//--------------------------------------------------------------------------------------//
|
|
|
|
/**
|
|
* @brief ROCProfiler Timestamp.
|
|
*/
|
|
typedef uint64_t rocprofiler_timestamp_t;
|
|
|
|
/**
|
|
* @brief Thread ID. Value will be equivalent to `syscall(__NR_gettid)`
|
|
*/
|
|
typedef uint64_t rocprofiler_thread_id_t;
|
|
|
|
/**
|
|
* @brief Tracing Operation ID. Depending on the kind, operations can be determined.
|
|
* If the value is equal to zero that means all operations will be considered
|
|
* for tracing. Detailed API tracing operations can be found at associated header file
|
|
* for that partiular operation. i.e: For ROCProfiler enumeration of HSA AMD Extended API tracing
|
|
* operations, look at source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h
|
|
*/
|
|
typedef int32_t rocprofiler_tracing_operation_t;
|
|
|
|
/**
|
|
* @brief Kernel identifier type
|
|
*
|
|
*/
|
|
typedef uint64_t rocprofiler_kernel_id_t;
|
|
|
|
// /**
|
|
// * @brief Sequence identifier type
|
|
// *
|
|
// */
|
|
typedef uint64_t rocprofiler_dispatch_id_t;
|
|
|
|
/**
|
|
* @brief Unique record id encoding both the counter
|
|
* and dimensional values (positions) for the record.
|
|
*/
|
|
typedef uint64_t rocprofiler_counter_instance_id_t;
|
|
|
|
/**
|
|
* @brief A dimension for counter instances. Some example
|
|
* dimensions include XCC, SM (Shader), etc. This
|
|
* value represents the dimension beind described
|
|
* or queried about.
|
|
*/
|
|
typedef uint64_t rocprofiler_counter_dimension_id_t;
|
|
|
|
//--------------------------------------------------------------------------------------//
|
|
//
|
|
// UNIONS
|
|
//
|
|
//--------------------------------------------------------------------------------------//
|
|
|
|
/**
|
|
* @brief User-assignable data type
|
|
*
|
|
*/
|
|
typedef union rocprofiler_user_data_t
|
|
{
|
|
uint64_t value; ///< usage example: set to process id, thread id, etc.
|
|
void* ptr; ///< usage example: set to address of data allocation
|
|
} rocprofiler_user_data_t;
|
|
|
|
/**
|
|
* @brief Stores memory address for profiling
|
|
*
|
|
*/
|
|
typedef union rocprofiler_address_t
|
|
{
|
|
uint64_t handle; ///< usage example: store address in uint64_t format
|
|
void* ptr; ///< usage example: generic form of address
|
|
} rocprofiler_address_t;
|
|
|
|
/**
|
|
* @brief Stores UUID for devices.
|
|
*
|
|
*/
|
|
typedef union rocprofiler_uuid_t
|
|
{
|
|
uint64_t value; ///< numerical value
|
|
void* bytes; ///< uuid in hexadecimal
|
|
} rocprofiler_uuid_t;
|
|
|
|
//--------------------------------------------------------------------------------------//
|
|
//
|
|
// STRUCTS
|
|
//
|
|
//--------------------------------------------------------------------------------------//
|
|
|
|
/**
|
|
* @brief Context ID.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t handle;
|
|
} rocprofiler_context_id_t;
|
|
|
|
/**
|
|
* @brief Queue ID.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t handle;
|
|
} rocprofiler_queue_id_t;
|
|
|
|
/**
|
|
* @brief ROCProfiler Record Correlation ID.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t internal;
|
|
rocprofiler_user_data_t external;
|
|
} rocprofiler_correlation_id_t;
|
|
|
|
/**
|
|
* @brief The NULL value of an internal correlation ID.
|
|
*/
|
|
#define ROCPROFILER_CORRELATION_ID_INTERNAL_NONE ROCPROFILER_UINT64_C(0)
|
|
|
|
/**
|
|
* @struct rocprofiler_buffer_id_t
|
|
* @brief Buffer ID.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t handle;
|
|
} rocprofiler_buffer_id_t;
|
|
|
|
/**
|
|
* @brief Agent Identifier
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t handle;
|
|
} rocprofiler_agent_id_t;
|
|
|
|
/**
|
|
* @brief Counter ID.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t handle;
|
|
} rocprofiler_counter_id_t;
|
|
|
|
/**
|
|
* @brief Profile Configurations
|
|
* @see rocprofiler_create_profile_config for how to create.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint64_t handle; // Opaque handle
|
|
} rocprofiler_profile_config_id_t;
|
|
|
|
/**
|
|
* @brief Multi-dimensional struct of data used to describe GPU workgroup and grid sizes
|
|
*/
|
|
typedef struct rocprofiler_dim3_t
|
|
{
|
|
uint32_t x;
|
|
uint32_t y;
|
|
uint32_t z;
|
|
} rocprofiler_dim3_t;
|
|
|
|
/**
|
|
* @brief Tracing record
|
|
*
|
|
*/
|
|
typedef struct rocprofiler_callback_tracing_record_t
|
|
{
|
|
rocprofiler_context_id_t context_id;
|
|
rocprofiler_thread_id_t thread_id;
|
|
rocprofiler_correlation_id_t correlation_id;
|
|
rocprofiler_callback_tracing_kind_t kind;
|
|
rocprofiler_tracing_operation_t operation;
|
|
rocprofiler_callback_phase_t phase;
|
|
void* payload;
|
|
} rocprofiler_callback_tracing_record_t;
|
|
|
|
/**
|
|
* @brief Generic record with type identifier(s) and a pointer to data. This data type is used with
|
|
* buffered data.
|
|
*
|
|
* @code{.cpp}
|
|
* void
|
|
* tool_tracing_callback(rocprofiler_record_header_t** headers,
|
|
* size_t num_headers)
|
|
* {
|
|
* for(size_t i = 0; i < num_headers; ++i)
|
|
* {
|
|
* rocprofiler_record_header_t* header = headers[i];
|
|
*
|
|
* if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
|
* header->kind == ROCPROFILER_BUFFER_TRACING_HSA_API)
|
|
* {
|
|
* // cast to rocprofiler_buffer_tracing_hsa_api_record_t which
|
|
* // is type associated with this category + kind
|
|
* auto* record =
|
|
* static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload);
|
|
*
|
|
* // trivial test
|
|
* assert(record->start_timestamp <= record->end_timestamp);
|
|
* }
|
|
* }
|
|
* }
|
|
*
|
|
* @endcode
|
|
*/
|
|
typedef struct
|
|
{
|
|
union
|
|
{
|
|
struct
|
|
{
|
|
uint32_t category; ///< rocprofiler_buffer_category_t
|
|
uint32_t kind; ///< domain
|
|
};
|
|
uint64_t hash; ///< generic identifier. You can compute this via: `uint64_t hash = category
|
|
///< | ((uint64_t)(kind) << 32)`, e.g.
|
|
};
|
|
void* payload;
|
|
} rocprofiler_record_header_t;
|
|
|
|
/**
|
|
* @brief Function for computing the unsigned 64-bit hash value in @ref rocprofiler_record_header_t
|
|
* from a category and kind (two unsigned 32-bit values)
|
|
*
|
|
* @param [in] category a value from @ref rocprofiler_buffer_category_t
|
|
* @param [in] kind depending on the category, this is the domain value, e.g., @ref
|
|
* rocprofiler_buffer_tracing_kind_t value
|
|
* @return uint64_t hash value of category and kind
|
|
*/
|
|
static inline uint64_t
|
|
rocprofiler_record_header_compute_hash(uint32_t category, uint32_t kind)
|
|
{
|
|
uint64_t value = category;
|
|
value |= ((uint64_t)(kind)) << 32;
|
|
return value;
|
|
}
|
|
|
|
/**
|
|
* @brief ROCProfiler kernel dispatch information
|
|
*
|
|
*/
|
|
typedef struct rocprofiler_kernel_dispatch_info_t
|
|
{
|
|
uint64_t size; ///< Size of this struct (minus reserved padding)
|
|
rocprofiler_agent_id_t agent_id; ///< Agent ID where kernel is launched
|
|
rocprofiler_queue_id_t queue_id; ///< Queue ID where kernel packet is enqueued
|
|
rocprofiler_kernel_id_t kernel_id; ///< Kernel identifier
|
|
rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch
|
|
uint32_t private_segment_size; ///< runtime private memory segment size
|
|
uint32_t group_segment_size; ///< runtime group memory segment size
|
|
rocprofiler_dim3_t workgroup_size; ///< runtime workgroup size (grid * threads)
|
|
rocprofiler_dim3_t grid_size; ///< runtime grid size
|
|
uint8_t reserved_padding[56]; // reserved for extensions w/o ABI break
|
|
} rocprofiler_kernel_dispatch_info_t;
|
|
|
|
/**
|
|
* @brief Details for the dimension, including its size, for a counter record.
|
|
*/
|
|
typedef struct
|
|
{
|
|
const char* name;
|
|
size_t instance_size;
|
|
rocprofiler_counter_dimension_id_t id;
|
|
|
|
/// @var id
|
|
/// @brief Id for this dimension used by @ref rocprofiler_query_record_dimension_position
|
|
} rocprofiler_record_dimension_info_t;
|
|
|
|
/**
|
|
* @brief ROCProfiler Profile Counting Counter Record per instance.
|
|
*/
|
|
typedef struct
|
|
{
|
|
rocprofiler_counter_instance_id_t id; ///< counter identifier
|
|
double counter_value; ///< counter value
|
|
rocprofiler_dispatch_id_t dispatch_id;
|
|
rocprofiler_user_data_t user_data;
|
|
rocprofiler_agent_id_t agent_id;
|
|
|
|
/// @var dispatch_id
|
|
/// @brief A value greater than zero indicates that this counter record is associated with a
|
|
/// specific dispatch.
|
|
///
|
|
/// This value can be mapped to a dispatch via the `dispatch_info` field (@see
|
|
/// ::rocprofiler_kernel_dispatch_info_t) of a ::rocprofiler_dispatch_counting_service_data_t
|
|
/// instance (provided during callback for profile config) or a
|
|
/// ::rocprofiler_dispatch_counting_service_record_t records (which will be insert into the
|
|
/// buffer prior to the associated ::rocprofiler_record_counter_t records).
|
|
} rocprofiler_record_counter_t;
|
|
|
|
/**
|
|
* @brief Counter info struct version 0
|
|
*/
|
|
typedef struct
|
|
{
|
|
rocprofiler_counter_id_t id; ///< Id of this counter
|
|
const char* name; ///< Name of the counter
|
|
const char* description; ///< Description of the counter
|
|
const char* block; ///< Block of the counter (non-derived only)
|
|
const char* expression; ///< Counter expression (derived counters only)
|
|
uint8_t is_constant : 1; ///< If this counter is HW constant
|
|
uint8_t is_derived : 1; ///< If this counter is a derived counter
|
|
} rocprofiler_counter_info_v0_t;
|
|
|
|
/**
|
|
* @brief ROCProfiler SPM Record.
|
|
*
|
|
*/
|
|
typedef struct
|
|
{
|
|
/**
|
|
* Counters, including identifiers to get counter information and Counters
|
|
* values
|
|
*/
|
|
rocprofiler_record_counter_t* counters;
|
|
uint64_t counters_count;
|
|
} rocprofiler_spm_record_t;
|
|
|
|
/** @} */
|
|
|
|
ROCPROFILER_EXTERN_C_FINI
|
|
|
|
ROCPROFILER_CXX_CODE(
|
|
static_assert(sizeof(rocprofiler_kernel_dispatch_info_t) == 128,
|
|
"Increasing the size of the kernel dispatch info is not permitted");)
|