diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h index 24946fd474..746e21278b 100644 --- a/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h @@ -28,21 +28,24 @@ ROCPROFILER_EXTERN_C_INIT /** - * @defgroup THREAD_TRACE Thread Trace Decoding - * @brief Provides API calls to decode thread trace data - * + * @defgroup THREAD_TRACE Thread Trace Service + * @brief ROCprof-trace-decoder wrapper. Provides API calls to decode thread trace shader data. * @{ */ +/** + * @brief Handle containing a loaded rocprof-trace-decoder and a decoder state. + */ typedef struct rocprofiler_thread_trace_decoder_handle_t { uint64_t handle; } rocprofiler_thread_trace_decoder_handle_t; /** - * @brief Initializes Trace Decoder library + * @brief Initializes Trace Decoder library with a library search path * @param[out] handle Handle to created decoder instance. * @param[in] path Path to trace decoder library location (e.g. /opt/rocm/lib). + * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE Library not found * @retval ::ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI Library found but version not supported * @retval ::ROCPROFILER_STATUS_SUCCESS Handle created @@ -52,7 +55,7 @@ rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_ const char* path) ROCPROFILER_API ROCPROFILER_NONNULL(1, 2); /** - * @brief Deletes handle created by rocprofiler_thread_trace_decoder_create + * @brief Deletes handle created by ::rocprofiler_thread_trace_decoder_create * @param[in] handle Handle to destroy */ void @@ -60,12 +63,21 @@ rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle ROCPROFILER_API; /** - * @brief Loads a code object binary to match with Thread Trace + * @brief Loads a code object binary to match with Thread Trace. + * The size, data and load_* are reported by rocprofiler-sdk's code object tracing service. + * Used for the decoder library to know what code objects to look into when decoding shader data. + * Not all application code objects are required to be reported here, only the ones containing code + * executed at the time the shader data was collected by thread_trace services. + * If a code object not reported here is encountered while decoding shader data, a record of type + * INFO_STITCH_INCOMPLETE will be generated and instructions will not be reported with a PC address. + * * @param[in] handle Handle to decoder instance. * @param[in] load_id Code object load ID. * @param[in] load_addr Code object load address. * @param[in] load_size Code object load size. - * @param[in] data Code object binary data. Must be at least load_size bytes. + * @param[in] data Code object binary data. + * @param[in] size Code object binary data size. + * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_ERROR Unable to load code object. * @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Invalid handle * @retval ::ROCPROFILER_STATUS_SUCCESS Code object loaded @@ -79,9 +91,10 @@ rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_h uint64_t size) ROCPROFILER_API ROCPROFILER_NONNULL(5); /** - * @brief Unloads a code object binary + * @brief Unloads a code object binary. * @param[in] handle Handle to decoder instance. * @param[in] load_id Code object load ID to remove. + * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_ERROR Code object not loaded. * @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Invalid handle * @retval ::ROCPROFILER_STATUS_SUCCESS Code object unloaded @@ -91,8 +104,8 @@ rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder uint64_t load_id) ROCPROFILER_API; /** - * @brief Callback for rocprofiler to return traces back to rocprofiler. - * @param[in] trace_type_id One of rocprofiler_thread_trace_decoder_record_type_t + * @brief Callback for rocprof-trace-decoder to return decoder traces back to user. + * @param[in] record_type_id One of ::rocprofiler_thread_trace_decoder_record_type_t * @param[in] trace_events A pointer to sequence of events, of size trace_size. * @param[in] trace_size The number of events in the trace. * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback. @@ -104,12 +117,22 @@ typedef void (*rocprofiler_thread_trace_decoder_callback_t)( void* userdata); /** - * @brief Iterate over all event coordinates for a given agent_t and event_t. - * @param[in] se_data_callback Callback to return shader engine data from. + * @brief Decodes shader data returned by ::rocprofiler_thread_trace_shader_data_callback_t. + * Use ::rocprofiler_thread_trace_decoder_codeobj_load to add references to loaded code objects + * during the trace. + * A ::rocprofiler_thread_trace_decoder_callback_t returns decoded data back to user. The first + * record is always of type ::ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP. + * + * @param[in] handle Decoder handle * @param[in] callback Decoded trace data returned to user. * @param[in] data Thread trace binary data. * @param[in] size Thread trace binary size. * @param[in] userdata Userdata passed back to caller via callback. + * @return ::rocprofiler_status_t + * @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT invalid argument + * @retval ::ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED arch not supported + * @retval ::ROCPROFILER_STATUS_ERROR generic error + * @retval ::ROCPROFILER_STATUS_SUCCESS on success */ rocprofiler_status_t rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle, @@ -119,7 +142,8 @@ rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle, void* userdata) ROCPROFILER_API ROCPROFILER_NONNULL(2, 3); /** - * @brief Returns the description of a rocprofiler_thread_trace_decoder_info_t record. + * @brief Returns the string description of a ::rocprofiler_thread_trace_decoder_info_t record. + * @param[in] handle Decoder handle * @param[in] info The decoder info received * @retval null terminated string as description of "info". */ @@ -128,4 +152,6 @@ rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_ha rocprofiler_thread_trace_decoder_info_t info) ROCPROFILER_API; +/** @} */ + ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h index 4a5d656c9c..bf530a849a 100644 --- a/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h @@ -25,6 +25,16 @@ #include #include +/** + * @defgroup THREAD_TRACE Thread Trace Service + * @brief ROCprof-trace-decoder defined types. All timestamp values are in shader clock units. + * + * @{ + */ + +/** + * @brief Describes the type of info received. + */ typedef enum rocprofiler_thread_trace_decoder_info_t { ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0, @@ -33,35 +43,47 @@ typedef enum rocprofiler_thread_trace_decoder_info_t ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST } rocprofiler_thread_trace_decoder_info_t; +/** + * @brief Describes a PC address. + */ typedef struct rocprofiler_thread_trace_decoder_pc_t { - size_t addr; - size_t marker_id; + size_t addr; ///< Memory address (marker_id == 0), or ELF vaddr (marker_id != 0). + size_t marker_id; ///< Code object load ID. Zero if no code object was found. } rocprofiler_thread_trace_decoder_pc_t; +/** + * @brief Describes four performance counter values. + */ typedef struct rocprofiler_thread_trace_decoder_perfevent_t { - int64_t time; - uint16_t events0; - uint16_t events1; - uint16_t events2; - uint16_t events3; - uint8_t CU; - uint8_t bank; + int64_t time; ///< Shader clock timestamp in which these counters were read. + uint16_t events0; ///< Counter0 (bank==0) or Counter4 (bank==1). + uint16_t events1; ///< Counter1 (bank==0) or Counter5 (bank==1). + uint16_t events2; ///< Counter2 (bank==0) or Counter6 (bank==1). + uint16_t events3; ///< Counter3 (bank==0) or Counter7 (bank==1). + uint8_t CU; ///< Shader compute unit ID these counters were collected from. + uint8_t bank; ///< Selects counter group [0,3] or [4,7] } rocprofiler_thread_trace_decoder_perfevent_t; +/** + * @brief Describes an occupancy event (wave started or wave ended). + */ typedef struct rocprofiler_thread_trace_decoder_occupancy_t { - rocprofiler_thread_trace_decoder_pc_t pc; - uint64_t time; - uint8_t se; - uint8_t cu; - uint8_t simd; - uint8_t slot; - uint32_t start : 1; + rocprofiler_thread_trace_decoder_pc_t pc; ///< Wave start address (kernel entry point) + uint64_t time; ///< Timestamp of event + uint8_t reserved; ///< Reserved + uint8_t cu; ///< Compute unit ID (gfx9) or WGP ID (gfx10+). + uint8_t simd; ///< SIMD ID [0,3] within compute unit + uint8_t slot; ///< Wave slot ID within SIMD + uint32_t start : 1; ///< 1 if wave_start, 0 if a wave_end uint32_t _rsvd : 31; } rocprofiler_thread_trace_decoder_occupancy_t; +/** + * @brief Wave state type. + */ typedef enum rocprofiler_thread_trace_decoder_wstate_type_t { ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0, @@ -72,71 +94,92 @@ typedef enum rocprofiler_thread_trace_decoder_wstate_type_t ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST, } rocprofiler_thread_trace_decoder_wstate_type_t; +/** + * @brief A wave state change event. + */ typedef struct rocprofiler_thread_trace_decoder_wave_state_t { - int32_t type; // One of rocprofiler_thread_trace_decoder_waveslot_state_type_t - int32_t duration; + int32_t type; ///< one of rocprofiler_thread_trace_decoder_waveslot_state_type_t + int32_t duration; ///< state duration in cycles } rocprofiler_thread_trace_decoder_wave_state_t; +/** + * @brief Instruction type. + */ typedef enum rocprofiler_thread_trace_decoder_inst_category_t { ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = 0, - ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM, - ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU, - ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM, - ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT, - ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS, - ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU, - ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP, - ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT, - ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED, - ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT, - ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE, - ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH, + ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM, ///< Scalar memory op + ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU, ///< Scalar ALU op + ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM, ///< Vector memory op + ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT, ///< Flat addressing vmem or lds + ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS, ///< Local Data Share op + ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU, ///< Vector ALU op + ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP, ///< Branch taken + ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT, ///< Branch not taken + ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED, ///< Internal operation + ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT, ///< Wave context switch + ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE, ///< MSG types + ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH, ///< Raytrace op ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST } rocprofiler_thread_trace_decoder_inst_category_t; +/** + * @brief Describes an instruction execution event. + * + * The duration is measured as stall+issue time (gfx9) or stall+execution time (gfx10+). + * Time + duration marks the issue (gfx9) or execution (gfx10+) completion time. + * Time + stall marks the successful issue time. + * Duration - stall is the issue time (gfx9) or execution time (gfx10+). + */ typedef struct rocprofiler_thread_trace_decoder_inst_t { - uint32_t category : 8; // One of rocprofiler_thread_trace_decoder_inst_category_t - uint32_t stall : 24; - int32_t duration; - int64_t time; + uint32_t category : 8; ///< One of rocprofiler_thread_trace_decoder_inst_category_t + uint32_t stall : 24; ///< Stall duration, in clock cycles. + int32_t duration; ///< Total instruction duration, in clock cycles. + int64_t time; ///< When the wave first attempted to execute this instruction. rocprofiler_thread_trace_decoder_pc_t pc; } rocprofiler_thread_trace_decoder_inst_t; +/** + * @brief Struct describing a wave during it's lifetime. + * This record is only generated for waves executing in the target_cu and target_simd, selected by + * ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU and ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT + * + * instructions_array contains a time-ordered list of all (traced) instructions by the wave. + */ typedef struct rocprofiler_thread_trace_decoder_wave_t { - uint8_t cu; - uint8_t simd; - uint8_t wave_id; - uint8_t contexts; + uint8_t cu; ///< CU id (gfx9) or wgp id (gfx10+). This is always the target_cu. + uint8_t simd; ///< SIMD ID [0,3]. + uint8_t wave_id; ///< Wave slot ID within SIMD. + uint8_t contexts; ///< Counts how many CWSR events have occured during the wave lifetime. uint32_t _rsvd1; uint32_t _rsvd2; uint32_t _rsvd3; - int64_t begin_time; - int64_t end_time; + int64_t begin_time; ///< Wave begin time. Should match occupancy event wave start. + int64_t end_time; ///< Wave end time. Should match occupancy event wave end. - size_t timeline_size; - size_t instructions_size; - rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; - rocprofiler_thread_trace_decoder_inst_t* instructions_array; + size_t timeline_size; ///< timeline_array size + size_t instructions_size; ///< instructions_array size + rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; ///< wave state change events + rocprofiler_thread_trace_decoder_inst_t* instructions_array; ///< Instructions executed } rocprofiler_thread_trace_decoder_wave_t; +/** + * @brief Defines the type of payload received by rocprofiler_thread_trace_decoder_callback_t + */ typedef enum rocprofiler_thread_trace_decoder_record_type_t { - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = - 0, // Record is size_t representing the gfxip_major - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, // Record is pointer to - // rocprofiler_thread_trace_decoder_occupancy_t - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, // Record is pointer to - // rocprofiler_thread_trace_decoder_perfevent_t - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, // Record is pointer to - // rocprofiler_thread_trace_decoder_wave_t - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, // Record is pointer to - // rocprofiler_thread_trace_decoder_info_t - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG, // Debug + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type size_t + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t* + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t* + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t* + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, ///< rocprofiler_thread_trace_decoder_info_t* + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG, ///< Debug ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST } rocprofiler_thread_trace_decoder_record_type_t; + +/** @} */ diff --git a/source/lib/att-tool/att_lib_wrapper.hpp b/source/lib/att-tool/att_lib_wrapper.hpp index ebc2ac8ab0..58e3f6deb6 100644 --- a/source/lib/att-tool/att_lib_wrapper.hpp +++ b/source/lib/att-tool/att_lib_wrapper.hpp @@ -22,11 +22,11 @@ #pragma once -#include - #include "lib/att-tool/util.hpp" #include "lib/common/filesystem.hpp" +#include + #include #include #include diff --git a/source/lib/att-tool/profile_interface.cpp b/source/lib/att-tool/profile_interface.cpp index 7ba62e45c1..f20348617d 100644 --- a/source/lib/att-tool/profile_interface.cpp +++ b/source/lib/att-tool/profile_interface.cpp @@ -62,11 +62,11 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id, else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY) { for(size_t i = 0; i < trace_size; i++) - tool.config.occupancy.push_back(reinterpret_cast(trace_events)[i]); + tool.config.occupancy.push_back(static_cast(trace_events)[i]); } else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT) { - PerfcounterFile(tool.config, reinterpret_cast(trace_events), trace_size); + PerfcounterFile(tool.config, static_cast(trace_events), trace_size); } if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return; @@ -74,12 +74,12 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id, bool bInvalid = false; for(size_t wave_n = 0; wave_n < trace_size; wave_n++) { - auto& wave = reinterpret_cast(trace_events)[wave_n]; - int64_t prev_inst_time = wave.begin_time; + const auto& wave = static_cast(trace_events)[wave_n]; + int64_t prev_inst_time = wave.begin_time; for(size_t j = 0; j < wave.instructions_size; j++) { - auto& inst = wave.instructions_array[j]; + const auto& inst = wave.instructions_array[j]; if(inst.pc.marker_id == 0 && inst.pc.addr == 0) continue; try diff --git a/source/lib/rocprofiler-sdk/thread_trace/decode.cpp b/source/lib/rocprofiler-sdk/thread_trace/decode.cpp index 63917bee57..16ef7bfd15 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/decode.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/decode.cpp @@ -236,6 +236,13 @@ rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle, const char* statustr = decoder->dl->att_status_fn(status); if(statustr == nullptr) statustr = "Unknown error"; ROCP_ERROR << "Callback failed with status " << status << ": " << statustr; + + if(status == ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT) + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + else if(status == ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA) + return ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED; + else + return ROCPROFILER_STATUS_ERROR; } return ROCPROFILER_STATUS_SUCCESS;