From 4c4b3a3e95d3dc098569b301daa73b694fd76a5e Mon Sep 17 00:00:00 2001 From: Swati Rawat <120587655+SwRaw@users.noreply.github.com> Date: Tue, 18 Nov 2025 22:29:37 +0530 Subject: [PATCH] Fix the broken sample GitHub link (#1828) --- .../docs/api-reference/thread_trace.rst | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/projects/rocprofiler-sdk/source/docs/api-reference/thread_trace.rst b/projects/rocprofiler-sdk/source/docs/api-reference/thread_trace.rst index 6531fb9fdd..a543c731d9 100644 --- a/projects/rocprofiler-sdk/source/docs/api-reference/thread_trace.rst +++ b/projects/rocprofiler-sdk/source/docs/api-reference/thread_trace.rst @@ -22,7 +22,7 @@ ROCprofiler-SDK provides wrapper APIs for the ROCprof Trace Decoder, a library t Thread trace service API ------------------------------------ -This section describes how to use the ROCprofiler-SDK thread trace API to configure and use the thread trace service. For fully functional examples, see `Samples `_. +This section describes how to use the ROCprofiler-SDK thread trace API to configure and use the thread trace service. For fully functional examples, see `Samples `_. tool_init() setup ++++++++++++++++++ @@ -60,7 +60,7 @@ Here are the steps to set up ``tool_init()`` for thread trace: for(size_t i = 0; i < _num_agents; ++i) { auto* agent = static_cast(_agents[i]); - if(agent->type == ROCPROFILER_AGENT_TYPE_GPU) + if(agent->type == ROCPROFILER_AGENT_TYPE_GPU) agent_v->emplace_back(agent->id); } return ROCPROFILER_STATUS_SUCCESS; @@ -74,21 +74,21 @@ Here are the steps to set up ``tool_init()`` for thread trace: .. code-block:: cpp std::vector params{}; - + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, 0xF}); - + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, 0}); - + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, 0xF}); - + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, 1u<<30}); // 1 GB - + The configuration parameters are described here: - ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK: Configures the Shader Engine (SE) mask, which determines the SEs to be traced. This is a bitmask where each bit corresponds to a SE. For MI3xx, each hex digit corresponds to an XCD. It's highly recommended to trace only one SE at a time to avoid data loss. - ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU: Configures the target Compute Unit (CU) or WGP. Instruction tracing can only operate on a single CU or WGP at a time. The same target is used for all SEs in ``ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK``. - + - ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT: Configures SIMD selection. For gfx9, this is a bitmask where each bit corresponds to a SIMD lane. For example, 0xF selects all SIMD lanes in the ``target_cu``. For gfx10, gfx11, and gfx12, this selects a single SIMD ID to trace. Results are taken mod4 for compatibility with gfx9 so 0xF selects SIMD3 of the target WGP. - ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE: Configures the buffer size. This buffer is shared among all SEs specified in ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK. There is a minimal side effect to specifying a larger buffer size, except for increased VRAM usage. @@ -139,12 +139,12 @@ To enable selective thread trace based on specific kernel dispatches, use the di rocprofiler_user_data_t* dispatch_userdata) { // Trace only the desired kernels - if(target_kernel_id == kernel_id) + if(target_kernel_id == kernel_id) return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP; - + return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; } - + // Configure dispatch-based thread trace for(auto agent_id : agents) { @@ -169,7 +169,7 @@ To start the context after all services are configured, use: auto status = rocprofiler_start_context(ctx); // Run your application workload here. - + To stop the context to end data collection for device-wide thread trace, use: .. code-block:: cpp @@ -189,7 +189,7 @@ To decode the raw thread trace data, create and initialize a Trace Decoder: .. code-block:: cpp rocprofiler_thread_trace_decoder_id_t decoder{}; - + // Create the Trace Decoder with the path to the decoder library ROCPROFILER_CALL( rocprofiler_thread_trace_decoder_create(&decoder, "/opt/rocm/lib"), @@ -229,7 +229,7 @@ To properly decode instruction addresses, track the code object information: if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE) return; auto* memorybase = reinterpret_cast(data->memory_base); - + // Register code object with Trace Decoder ROCPROFILER_CALL( rocprofiler_thread_trace_decoder_codeobj_load( @@ -294,7 +294,7 @@ The trace decoder provides decoded information through a callback: } break; } - + // Handle other record types as needed } } @@ -307,13 +307,13 @@ The Trace Decoder provides important information about the quality and comprehen - ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST This event indicates that part of the trace data was dropped either due to hardware bandwidth limitations or buffer overflows. Receiving this event implies that portions of your trace might be missing or unreliable, which can affect the accuracy of any analysis based on the trace data. - + **Possible causes:** - The trace buffer size was too small for the workload - Memory bandwidth was exceeded - + **Recommended actions:** - Increase buffer sizes if possible @@ -327,7 +327,7 @@ The Trace Decoder provides important information about the quality and comprehen - ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE This event indicates that the Trace Decoder was unable to find the PC (Program Counter) address for one or more traced instructions. Affected instructions will have their "pc" field set to zero. - + **Possible causes:** - The trace was started in the middle of a kernel execution: