ATT Doc updates. Fix trace-decode return error. (#406)

* Doc updates. Some cleanup.

* Formatting

---------

Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
This commit is contained in:
Baraldi, Giovanni
2025-05-22 21:50:45 +02:00
zatwierdzone przez GitHub
rodzic e587a8b23a
commit 3cb0c87f53
5 zmienionych plików z 151 dodań i 75 usunięć
@@ -28,21 +28,24 @@
ROCPROFILER_EXTERN_C_INIT
/**
* @defgroup THREAD_TRACE Thread Trace Decoding
* @brief Provides API calls to decode thread trace data
*
* @defgroup THREAD_TRACE Thread Trace Service
* @brief ROCprof-trace-decoder wrapper. Provides API calls to decode thread trace shader data.
* @{
*/
/**
* @brief Handle containing a loaded rocprof-trace-decoder and a decoder state.
*/
typedef struct rocprofiler_thread_trace_decoder_handle_t
{
uint64_t handle;
} rocprofiler_thread_trace_decoder_handle_t;
/**
* @brief Initializes Trace Decoder library
* @brief Initializes Trace Decoder library with a library search path
* @param[out] handle Handle to created decoder instance.
* @param[in] path Path to trace decoder library location (e.g. /opt/rocm/lib).
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE Library not found
* @retval ::ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI Library found but version not supported
* @retval ::ROCPROFILER_STATUS_SUCCESS Handle created
@@ -52,7 +55,7 @@ rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_
const char* path) ROCPROFILER_API ROCPROFILER_NONNULL(1, 2);
/**
* @brief Deletes handle created by rocprofiler_thread_trace_decoder_create
* @brief Deletes handle created by ::rocprofiler_thread_trace_decoder_create
* @param[in] handle Handle to destroy
*/
void
@@ -60,12 +63,21 @@ rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle
ROCPROFILER_API;
/**
* @brief Loads a code object binary to match with Thread Trace
* @brief Loads a code object binary to match with Thread Trace.
* The size, data and load_* are reported by rocprofiler-sdk's code object tracing service.
* Used for the decoder library to know what code objects to look into when decoding shader data.
* Not all application code objects are required to be reported here, only the ones containing code
* executed at the time the shader data was collected by thread_trace services.
* If a code object not reported here is encountered while decoding shader data, a record of type
* INFO_STITCH_INCOMPLETE will be generated and instructions will not be reported with a PC address.
*
* @param[in] handle Handle to decoder instance.
* @param[in] load_id Code object load ID.
* @param[in] load_addr Code object load address.
* @param[in] load_size Code object load size.
* @param[in] data Code object binary data. Must be at least load_size bytes.
* @param[in] data Code object binary data.
* @param[in] size Code object binary data size.
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR Unable to load code object.
* @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Invalid handle
* @retval ::ROCPROFILER_STATUS_SUCCESS Code object loaded
@@ -79,9 +91,10 @@ rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_h
uint64_t size) ROCPROFILER_API ROCPROFILER_NONNULL(5);
/**
* @brief Unloads a code object binary
* @brief Unloads a code object binary.
* @param[in] handle Handle to decoder instance.
* @param[in] load_id Code object load ID to remove.
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR Code object not loaded.
* @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Invalid handle
* @retval ::ROCPROFILER_STATUS_SUCCESS Code object unloaded
@@ -91,8 +104,8 @@ rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder
uint64_t load_id) ROCPROFILER_API;
/**
* @brief Callback for rocprofiler to return traces back to rocprofiler.
* @param[in] trace_type_id One of rocprofiler_thread_trace_decoder_record_type_t
* @brief Callback for rocprof-trace-decoder to return decoder traces back to user.
* @param[in] record_type_id One of ::rocprofiler_thread_trace_decoder_record_type_t
* @param[in] trace_events A pointer to sequence of events, of size trace_size.
* @param[in] trace_size The number of events in the trace.
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
@@ -104,12 +117,22 @@ typedef void (*rocprofiler_thread_trace_decoder_callback_t)(
void* userdata);
/**
* @brief Iterate over all event coordinates for a given agent_t and event_t.
* @param[in] se_data_callback Callback to return shader engine data from.
* @brief Decodes shader data returned by ::rocprofiler_thread_trace_shader_data_callback_t.
* Use ::rocprofiler_thread_trace_decoder_codeobj_load to add references to loaded code objects
* during the trace.
* A ::rocprofiler_thread_trace_decoder_callback_t returns decoded data back to user. The first
* record is always of type ::ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP.
*
* @param[in] handle Decoder handle
* @param[in] callback Decoded trace data returned to user.
* @param[in] data Thread trace binary data.
* @param[in] size Thread trace binary size.
* @param[in] userdata Userdata passed back to caller via callback.
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT invalid argument
* @retval ::ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED arch not supported
* @retval ::ROCPROFILER_STATUS_ERROR generic error
* @retval ::ROCPROFILER_STATUS_SUCCESS on success
*/
rocprofiler_status_t
rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
@@ -119,7 +142,8 @@ rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
void* userdata) ROCPROFILER_API ROCPROFILER_NONNULL(2, 3);
/**
* @brief Returns the description of a rocprofiler_thread_trace_decoder_info_t record.
* @brief Returns the string description of a ::rocprofiler_thread_trace_decoder_info_t record.
* @param[in] handle Decoder handle
* @param[in] info The decoder info received
* @retval null terminated string as description of "info".
*/
@@ -128,4 +152,6 @@ rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_ha
rocprofiler_thread_trace_decoder_info_t info)
ROCPROFILER_API;
/** @} */
ROCPROFILER_EXTERN_C_FINI
@@ -25,6 +25,16 @@
#include <stddef.h>
#include <stdint.h>
/**
* @defgroup THREAD_TRACE Thread Trace Service
* @brief ROCprof-trace-decoder defined types. All timestamp values are in shader clock units.
*
* @{
*/
/**
* @brief Describes the type of info received.
*/
typedef enum rocprofiler_thread_trace_decoder_info_t
{
ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0,
@@ -33,35 +43,47 @@ typedef enum rocprofiler_thread_trace_decoder_info_t
ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST
} rocprofiler_thread_trace_decoder_info_t;
/**
* @brief Describes a PC address.
*/
typedef struct rocprofiler_thread_trace_decoder_pc_t
{
size_t addr;
size_t marker_id;
size_t addr; ///< Memory address (marker_id == 0), or ELF vaddr (marker_id != 0).
size_t marker_id; ///< Code object load ID. Zero if no code object was found.
} rocprofiler_thread_trace_decoder_pc_t;
/**
* @brief Describes four performance counter values.
*/
typedef struct rocprofiler_thread_trace_decoder_perfevent_t
{
int64_t time;
uint16_t events0;
uint16_t events1;
uint16_t events2;
uint16_t events3;
uint8_t CU;
uint8_t bank;
int64_t time; ///< Shader clock timestamp in which these counters were read.
uint16_t events0; ///< Counter0 (bank==0) or Counter4 (bank==1).
uint16_t events1; ///< Counter1 (bank==0) or Counter5 (bank==1).
uint16_t events2; ///< Counter2 (bank==0) or Counter6 (bank==1).
uint16_t events3; ///< Counter3 (bank==0) or Counter7 (bank==1).
uint8_t CU; ///< Shader compute unit ID these counters were collected from.
uint8_t bank; ///< Selects counter group [0,3] or [4,7]
} rocprofiler_thread_trace_decoder_perfevent_t;
/**
* @brief Describes an occupancy event (wave started or wave ended).
*/
typedef struct rocprofiler_thread_trace_decoder_occupancy_t
{
rocprofiler_thread_trace_decoder_pc_t pc;
uint64_t time;
uint8_t se;
uint8_t cu;
uint8_t simd;
uint8_t slot;
uint32_t start : 1;
rocprofiler_thread_trace_decoder_pc_t pc; ///< Wave start address (kernel entry point)
uint64_t time; ///< Timestamp of event
uint8_t reserved; ///< Reserved
uint8_t cu; ///< Compute unit ID (gfx9) or WGP ID (gfx10+).
uint8_t simd; ///< SIMD ID [0,3] within compute unit
uint8_t slot; ///< Wave slot ID within SIMD
uint32_t start : 1; ///< 1 if wave_start, 0 if a wave_end
uint32_t _rsvd : 31;
} rocprofiler_thread_trace_decoder_occupancy_t;
/**
* @brief Wave state type.
*/
typedef enum rocprofiler_thread_trace_decoder_wstate_type_t
{
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0,
@@ -72,71 +94,92 @@ typedef enum rocprofiler_thread_trace_decoder_wstate_type_t
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST,
} rocprofiler_thread_trace_decoder_wstate_type_t;
/**
* @brief A wave state change event.
*/
typedef struct rocprofiler_thread_trace_decoder_wave_state_t
{
int32_t type; // One of rocprofiler_thread_trace_decoder_waveslot_state_type_t
int32_t duration;
int32_t type; ///< one of rocprofiler_thread_trace_decoder_waveslot_state_type_t
int32_t duration; ///< state duration in cycles
} rocprofiler_thread_trace_decoder_wave_state_t;
/**
* @brief Instruction type.
*/
typedef enum rocprofiler_thread_trace_decoder_inst_category_t
{
ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = 0,
ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM,
ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU,
ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM,
ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT,
ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS,
ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU,
ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP,
ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT,
ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED,
ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT,
ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE,
ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH,
ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM, ///< Scalar memory op
ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU, ///< Scalar ALU op
ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM, ///< Vector memory op
ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT, ///< Flat addressing vmem or lds
ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS, ///< Local Data Share op
ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU, ///< Vector ALU op
ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP, ///< Branch taken
ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT, ///< Branch not taken
ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED, ///< Internal operation
ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT, ///< Wave context switch
ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE, ///< MSG types
ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH, ///< Raytrace op
ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST
} rocprofiler_thread_trace_decoder_inst_category_t;
/**
* @brief Describes an instruction execution event.
*
* The duration is measured as stall+issue time (gfx9) or stall+execution time (gfx10+).
* Time + duration marks the issue (gfx9) or execution (gfx10+) completion time.
* Time + stall marks the successful issue time.
* Duration - stall is the issue time (gfx9) or execution time (gfx10+).
*/
typedef struct rocprofiler_thread_trace_decoder_inst_t
{
uint32_t category : 8; // One of rocprofiler_thread_trace_decoder_inst_category_t
uint32_t stall : 24;
int32_t duration;
int64_t time;
uint32_t category : 8; ///< One of rocprofiler_thread_trace_decoder_inst_category_t
uint32_t stall : 24; ///< Stall duration, in clock cycles.
int32_t duration; ///< Total instruction duration, in clock cycles.
int64_t time; ///< When the wave first attempted to execute this instruction.
rocprofiler_thread_trace_decoder_pc_t pc;
} rocprofiler_thread_trace_decoder_inst_t;
/**
* @brief Struct describing a wave during it's lifetime.
* This record is only generated for waves executing in the target_cu and target_simd, selected by
* ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU and ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT
*
* instructions_array contains a time-ordered list of all (traced) instructions by the wave.
*/
typedef struct rocprofiler_thread_trace_decoder_wave_t
{
uint8_t cu;
uint8_t simd;
uint8_t wave_id;
uint8_t contexts;
uint8_t cu; ///< CU id (gfx9) or wgp id (gfx10+). This is always the target_cu.
uint8_t simd; ///< SIMD ID [0,3].
uint8_t wave_id; ///< Wave slot ID within SIMD.
uint8_t contexts; ///< Counts how many CWSR events have occured during the wave lifetime.
uint32_t _rsvd1;
uint32_t _rsvd2;
uint32_t _rsvd3;
int64_t begin_time;
int64_t end_time;
int64_t begin_time; ///< Wave begin time. Should match occupancy event wave start.
int64_t end_time; ///< Wave end time. Should match occupancy event wave end.
size_t timeline_size;
size_t instructions_size;
rocprofiler_thread_trace_decoder_wave_state_t* timeline_array;
rocprofiler_thread_trace_decoder_inst_t* instructions_array;
size_t timeline_size; ///< timeline_array size
size_t instructions_size; ///< instructions_array size
rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; ///< wave state change events
rocprofiler_thread_trace_decoder_inst_t* instructions_array; ///< Instructions executed
} rocprofiler_thread_trace_decoder_wave_t;
/**
* @brief Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t
*/
typedef enum rocprofiler_thread_trace_decoder_record_type_t
{
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP =
0, // Record is size_t representing the gfxip_major
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, // Record is pointer to
// rocprofiler_thread_trace_decoder_occupancy_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, // Record is pointer to
// rocprofiler_thread_trace_decoder_perfevent_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, // Record is pointer to
// rocprofiler_thread_trace_decoder_wave_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, // Record is pointer to
// rocprofiler_thread_trace_decoder_info_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG, // Debug
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type size_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t*
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t*
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t*
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, ///< rocprofiler_thread_trace_decoder_info_t*
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG, ///< Debug
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST
} rocprofiler_thread_trace_decoder_record_type_t;
/** @} */
+2 -2
Wyświetl plik
@@ -22,11 +22,11 @@
#pragma once
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>
#include "lib/att-tool/util.hpp"
#include "lib/common/filesystem.hpp"
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>
#include <algorithm>
#include <fstream>
#include <iostream>
@@ -62,11 +62,11 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY)
{
for(size_t i = 0; i < trace_size; i++)
tool.config.occupancy.push_back(reinterpret_cast<const occupancy_t*>(trace_events)[i]);
tool.config.occupancy.push_back(static_cast<const occupancy_t*>(trace_events)[i]);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT)
{
PerfcounterFile(tool.config, reinterpret_cast<perfevent_t*>(trace_events), trace_size);
PerfcounterFile(tool.config, static_cast<perfevent_t*>(trace_events), trace_size);
}
if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return;
@@ -74,12 +74,12 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
bool bInvalid = false;
for(size_t wave_n = 0; wave_n < trace_size; wave_n++)
{
auto& wave = reinterpret_cast<wave_t*>(trace_events)[wave_n];
int64_t prev_inst_time = wave.begin_time;
const auto& wave = static_cast<const wave_t*>(trace_events)[wave_n];
int64_t prev_inst_time = wave.begin_time;
for(size_t j = 0; j < wave.instructions_size; j++)
{
auto& inst = wave.instructions_array[j];
const auto& inst = wave.instructions_array[j];
if(inst.pc.marker_id == 0 && inst.pc.addr == 0) continue;
try
@@ -236,6 +236,13 @@ rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
const char* statustr = decoder->dl->att_status_fn(status);
if(statustr == nullptr) statustr = "Unknown error";
ROCP_ERROR << "Callback failed with status " << status << ": " << statustr;
if(status == ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT)
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
else if(status == ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA)
return ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED;
else
return ROCPROFILER_STATUS_ERROR;
}
return ROCPROFILER_STATUS_SUCCESS;