// MIT License // // Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #pragma once #include #include #include ROCPROFILER_EXTERN_C_INIT /** * @defgroup PC_SAMPLING_SERVICE PC Sampling * @brief Enabling PC (Program Counter) Sampling for GPU Activity * @{ */ /** * @brief Function used to configure the PC sampling service on the GPU agent with @p agent_id. * * Prerequisites are the following: * - The client must create a context and supply its @p context_id. By using this context, * the client can start/stop PC sampling on the agent. For more information, * please @see rocprofiler_start_context/rocprofiler_stop_context. * - The user must create a buffer and supply its @p buffer_id. Rocprofiler-SDK uses the buffer * to deliver the PC samples to the client. For more information about the data delivery, * please @see rocprofiler_create_buffer and @see rocprofiler_buffer_tracing_cb_t. * * Before calling this function, we recommend querying PC sampling configurations * supported by the GPU agent via the @see rocprofiler_query_pc_sampling_agent_configurations. * The client chooses the @p method, @p unit, and @p interval to match one of the * available configurations. Note that the @p interval must belong to the range of values * [available_config.min_interval, available_config.max_interval], * where available_config is the instance of the @see rocprofiler_pc_sampling_configuration_s * supported/available at the moment. * * Rocprofiler-SDK checks whether the requsted configuration is actually supported * at the moment of calling this function. If the answer is yes, it returns * the @see ROCPROFILER_STATUS_SUCCESS. Otherwise, it notifies the client about the * rejection reason via the returned status code. For more information * about the status codes, please @see rocprofiler_status_t. * * There are a few constraints a client's code needs to be aware of. * * Constraint1: A GPU agent can be configured to support at most one running PC sampling * configuration at any time, which implies some of the consequences described below. * After the tool configures the PC sampling with one of the available configurations, * rocprofiler-SDK guarantees that this configuration will be valid for the tool's * lifetime. The tool can start and stop the configured PC sampling service whenever convenient. * * Constraint2: Since the same GPU agent can be used by multiple processes concurrently, * Rocprofiler-SDK cannot guarantee the exclusive access to the PC sampling capability. * The consequence is the following scenario. The tool TA that belongs to the process PA, * calls the @see rocprofiler_query_pc_sampling_agent_configurations that returns the * two supported configurations CA and CB by the agent. Then the tool TB of the process PB, * configures the PC sampling on the same agent by using the configuration CB. * Subsequently, the TA tries configuring the CA on the agent, and it fails. * To point out that this case happened, we introduce a special status code * @see ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE. * When this status code is observed by the tool TA, it queries all available configurations again * by calling @see rocprofiler_query_pc_sampling_agent_configurations, * that returns only CB this time. The tool TA can choose CB, so that both * TA and TB use the PC sampling capability in the separate processes. * Both TA and TB receives samples generated by the kernels launched by the * corresponding processes PA and PB, respectively. * * Constraint3: Rocprofiler-SDK allows only one context to contain the configured PC sampling * service within the process, that implies that at most one of the loaded tools can use PC * sampling. One context can contains multiple PC sampling services configured for different GPU * agents. * * Constraint4: PC sampling feature is not available within the ROCgdb. * * Constraint5: PC sampling service cannot be used simultaneously with * counter collection service. * * @param [in] context_id - id of the context used for starting/stopping PC sampling service * @param [in] agent_id - id of the agent on which caller tries using PC sampling capability * @param [in] method - the type of PC sampling the caller tries to use on the agent. * @param [in] unit - The unit appropriate to the PC sampling type/method. * @param [in] interval - frequency at which PC samples are generated * @param [in] buffer_id - id of the buffer used for delivering PC samples * @param [in] flags - for future use * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_SUCCESS PC sampling service configured successfully * @retval ::ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE One of the scenarios is present: * 1. PC sampling is already configured with configuration different than requested, * 2. PC sampling is requested from a process that runs within the ROCgdb. * 3. HSA runtime does not support PC sampling. * @retval ::ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL the amdgpu driver installed on the system * does not support the PC sampling feature * @retval ::ROCPROFILER_STATUS_ERROR a general error caused by the amdgpu driver * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT counter collection service already * setup in the context */ rocprofiler_status_t rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id, rocprofiler_agent_id_t agent_id, rocprofiler_pc_sampling_method_t method, rocprofiler_pc_sampling_unit_t unit, uint64_t interval, rocprofiler_buffer_id_t buffer_id, int flags) ROCPROFILER_API; /** * @brief PC sampling configuration supported by a GPU agent. */ typedef struct { uint64_t size; ///< Size of this struct rocprofiler_pc_sampling_method_t method; rocprofiler_pc_sampling_unit_t unit; size_t min_interval; size_t max_interval; uint64_t flags; /// for future use /// @var method /// @brief Sampling method supported by the GPU agent. /// Currently, it can take one of the following two values: /// - ::ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP: a background host kernel thread /// periodically interrupts waves execution on the GPU to generate PC samples /// - ::ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC: performance monitoring hardware /// on the GPU periodically interrupts waves to generate PC samples. /// @var unit /// @brief A unit used to specify the interval of the @ref method for samples generation. /// @var min_interval /// @brief the highest possible frequencey for generating samples using @ref method. /// @var max_interval /// @brief the lowest possible frequency for generating samples using @ref method } rocprofiler_pc_sampling_configuration_t; /** * @brief Rocprofiler SDK's callback function to deliver the list of available PC * sampling configurations upon the call to the * @ref rocprofiler_query_pc_sampling_agent_configurations. * * @param[out] configs - The array of PC sampling configurations supported by the agent * at the moment of invoking @ref rocprofiler_query_pc_sampling_agent_configurations. * @param[out] num_config - The number of configurations contained in the underlying array * @p configs. * In case the GPU agent does not support PC sampling, the value is 0. * @param[in] user_data - client's private data passed via * @ref rocprofiler_query_pc_sampling_agent_configurations * @return ::rocprofiler_status_t */ typedef rocprofiler_status_t (*rocprofiler_available_pc_sampling_configurations_cb_t)( const rocprofiler_pc_sampling_configuration_t* configs, size_t num_config, void* user_data); /** * @brief Query PC Sampling Configuration. * * Lists PC sampling configurations a GPU agent with @p agent_id supports at the moment * of invoking the function. Delivers configurations via @p cb. * In case the PC sampling is configured on the GPU agent, the @p cb delivers information * about the active PC sampling configuration. * In case the GPU agent does not support PC sampling capability, * the @p cb delivers none PC sampling configurations. * * @param [in] agent_id - id of the agent for which available configurations will be listed * @param [in] cb - User callback that delivers the available PC sampling configurations * @param [in] user_data - passed to the @p cb * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE One of the scenarios is present: * 1. PC sampling is requested from a process that runs within the ROCgdb. * 2. HSA runtime does not support PC sampling. * @retval ::ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL the amdgpu driver installed on the system * does not support the PC sampling feature. * @retval ::ROCPROFILER_STATUS_ERROR a general error caused by the amdgpu driver * @retval ::ROCPROFILER_STATUS_SUCCESS @p cb successfully finished */ rocprofiler_status_t rocprofiler_query_pc_sampling_agent_configurations( rocprofiler_agent_id_t agent_id, rocprofiler_available_pc_sampling_configurations_cb_t cb, void* user_data) ROCPROFILER_API ROCPROFILER_NONNULL(2, 3); /** * @brief Information about the GPU part where wave was executing * at the moment of sampling. */ typedef struct rocprofiler_pc_sampling_hw_id_v0_t { uint64_t chiplet : 6; ///< chiplet index (3 bits allocated by the ROCr runtime) uint64_t wave_id : 7; ///< wave slot index uint64_t simd_id : 2; ///< SIMD index uint64_t pipe_id : 4; ///< pipe index uint64_t cu_or_wgp_id : 4; ///< Index of compute unit on GFX9 or workgroup processer on other ///< architectures uint64_t shader_array_id : 1; ///< Shared array index uint64_t shader_engine_id : 5; ///< shared engine index uint64_t workgroup_id : 7; ///< thread_group index on GFX9, and workgroup index on GFX10+ uint64_t vm_id : 6; ///< virtual memory ID uint64_t queue_id : 4; ///< queue id uint64_t microengine_id : 2; ///< ACE (microengine) index uint64_t reserved0 : 16; ///< Reserved for the future use } rocprofiler_pc_sampling_hw_id_v0_t; /** * @brief Sampled program counter. */ typedef struct { uint64_t code_object_id; uint64_t code_object_offset; /// @var code_object_id /// @brief id of the loaded code object instance that contains sampled PC. /// This fields holds the value ::ROCPROFILER_CODE_OBJECT_ID_NONE /// if the code object cannot be determined /// (e.g., sampled PC belongs to code generated by self modifying code). /// @var code_object_offset /// @brief If @ref code_object_id is different than ::ROCPROFILER_CODE_OBJECT_ID_NONE, /// then this field contains the offset of the sampled PC relative to the /// ::rocprofiler_callback_tracing_code_object_load_data_t::load_base /// of the code object instance with @ref code_object_id. /// To calculate the original virtual address of the sampled PC, one can add the value /// of this field to the ::rocprofiler_callback_tracing_code_object_load_data_t::load_base. /// The value of @ref code_object_offset matches /// the virtual address of the sampled instruction (PC), only if the /// @ref code_object_id is equal to the ::ROCPROFILER_CODE_OBJECT_ID_NONE. } rocprofiler_pc_t; // TODO: The definition of this struct might change over time. /** * @brief ROCProfiler Host-Trap PC Sampling Record. */ typedef struct rocprofiler_pc_sampling_record_host_trap_v0_t { uint64_t size; ///< Size of this struct rocprofiler_pc_sampling_hw_id_v0_t hw_id; ///< @see ::rocprofiler_pc_sampling_hw_id_0_t rocprofiler_pc_t pc; ///< information about sampled program counter uint64_t exec_mask; ///< active SIMD lanes when sampled uint64_t timestamp; ///< timestamp when sample is generated uint64_t dispatch_id; ///< originating kernel dispatch ID rocprofiler_correlation_id_t correlation_id; ///< API launch call id that matches dispatch ID rocprofiler_dim3_t workgroup_id; ///< wave coordinates within the workgroup uint32_t wave_in_group : 8; ///< wave position within the workgroup (0-31) uint32_t reserved0 : 24; ///< wave position within the workgroup (0-31) } rocprofiler_pc_sampling_record_host_trap_v0_t; /** @} */ ROCPROFILER_EXTERN_C_FINI