diff --git a/README.md b/README.md index 2da1b74b66..fa7903e211 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ ROCProfiler-SDK is AMD’s new and improved tooling infrastructure, providing a - Dispatch Counter Collection - Device Counter Collection - PC Sampling (Host Trap) +- Thread trace and ROCprof trace decoder (SQTT, ATT). ## API Trace Support diff --git a/source/bin/rocprofv3.py b/source/bin/rocprofv3.py index 4d1769271e..006f29acc3 100755 --- a/source/bin/rocprofv3.py +++ b/source/bin/rocprofv3.py @@ -24,7 +24,6 @@ import argparse import os -import re import subprocess import textwrap import sys @@ -105,74 +104,30 @@ def strtobool(val): raise ValueError(f"invalid truth value {val} (type={val_type})") -def search_path(path_list): - supported_option = [] - lib_att_pattern = r"libatt_decoder_(trace|debug|testing1|testing2)\.so" - file_list = [] - - for path in path_list: - for root, dirs, files in os.walk(path, topdown=True): - file_list.extend(files) - break - for itr in file_list: - _match = re.match(lib_att_pattern, itr) - if _match: - lst = re.findall("trace|debug|testing1|testing2", itr) - supported_option.extend(lst) - return set(supported_option) - - def check_att_capability(args): ROCPROFV3_DIR = os.path.dirname(os.path.realpath(__file__)) ROCM_DIR = os.path.dirname(ROCPROFV3_DIR) ld_library_paths = [] - for itr in os.environ.get("LD_LIBRARY_PATH", "").split(":") + [f"{ROCM_DIR}/lib"]: - # don't add duplicates - if itr not in ld_library_paths: - ld_library_paths += [itr] - tmp_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) - tmp_parser.add_argument( - "--att-library-path", - default=os.environ.get( - "ROCPROF_ATT_LIBRARY_PATH", ":".join(ld_library_paths) - ).split(":"), - nargs="+", - type=str, - required=False, - ) + if args.att_library_path: + ld_library_paths.extend(args.att_library_path) + else: + for itr in os.environ.get("LD_LIBRARY_PATH", "").split(":") + [f"{ROCM_DIR}/lib"]: + # don't add duplicates + if itr not in ld_library_paths: + ld_library_paths += [itr] - tmp_parser.add_argument( - "-i", - "--input", - default=None, - type=str, - required=False, - ) + lib_att_name = "librocprof-trace-decoder.so" - att_args, _ = tmp_parser.parse_known_args(args) + for path in ld_library_paths: + for root, dirs, files in os.walk(path, topdown=True): + for itr in files: + if lib_att_name in itr: + args.att_library_path = itr.split(lib_att_name)[0] + return True - support = search_path(att_args.att_library_path) - support_input = {} - if att_args.input: - # If index of a pass in input file is a key in the support_input dict, then that pass has att-library-path arg - args_list = parse_input(att_args.input) - for index, itr in enumerate(args_list): - if itr.att_library_path: - library_path = ( - itr.att_library_path.split(":") - if isinstance(itr.att_library_path, str) - else itr.att_library_path - ) - _support = search_path(library_path) - # If the att-library-path in the input file for a pass is valid, then the value of index key in the dict, - # support_input, is updated to that valid path - # If the att-library-path in the input file for a pass is invalid, then the value of index key in the dict, - # support_input, is empty - support_input[index] = set(_support) if support else [] - - return (att_args.att_library_path, set(support), support_input) + return False class booleanArgAction(argparse.Action): @@ -687,72 +642,66 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins app_args = args[(idx + 1) :] break - default_att_lib_path, att_support_args, att_support_inp = check_att_capability( - rocp_args - ) - - choice_list = [] - for keys, values in att_support_inp.items(): - choice_list.extend(values) - if att_support_args: - choice_list.extend(list(att_support_args)) - - # remove duplicates - choice_list = list(set(choice_list)) - att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options") add_parser_bool_argument( att_options, "--advanced-thread-trace", "--att", - help="Enable ATT", + help="Enables thread trace", ) att_options.add_argument( "--att-library-path", - help="Search path(s) to decoder library/libraries", - default=default_att_lib_path if not att_support_inp else None, + help="Search path to decoder library.", + default=None, nargs="+", ) att_options.add_argument( "--att-target-cu", - help="ATT target compute unit", + help="Target compute unit ID (or WGP). Default 1", default=None, ) att_options.add_argument( "--att-simd-select", - help="Select ATT SIMD", + help="Bitmask of SIMDs to enable (gfx9) or SIMD ID (gfx10+). Default 0xF", default=None, type=str, ) att_options.add_argument( "--att-buffer-size", - help="Buffer Size", + help="Thread trace buffer size. Default 96MB", default=None, type=str, ) att_options.add_argument( "--att-shader-engine-mask", - help="att shader engine mask", + help="Bitmask of shader engines to enable. Default 0x1", default=None, type=str, ) att_options.add_argument( "--att-perfcounters", - help="Set performance counters, and optionally their mask. gfx9 only.", + help="(gfx9) List of performance counters, and optionally their SIMD mask.", default=None, type=str.upper, ) att_options.add_argument( "--att-perfcounter-ctrl", - help="Integer in [0,32] range specifying collection period. gfx9 only.", + help="(gfx9) Integer in [1,32] range specifying collection period. 0 = disabled.", + default=None, + type=int, + ) + + att_options.add_argument( + "--att-activity", + help="(gfx9) Collect HW activity counters. Integer in [1,16] range specifying collection period. Recommended: 8", default=None, type=int, ) @@ -761,10 +710,10 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins att_options, "--att-serialize-all", default=False, - help="Serialize all kernels", + help="Serialize all kernels, not just the traced ones.", ) - return (parser.parse_args(rocp_args), app_args, att_support_args, att_support_inp) + return (parser.parse_args(rocp_args), app_args) def parse_yaml(yaml_file): @@ -1496,12 +1445,17 @@ def run(app_args, args, **kwargs): args.att_serialize_all, overwrite=True, ) - if args.att_library_path: + if check_att_capability(args): update_env( "ROCPROF_ATT_LIBRARY_PATH", - ":".join(args.att_library_path), + args.att_library_path, overwrite=True, ) + else: + fatal_error( + "rocprof-trace-decoder library path not found in", args.att_library_path + ) + if args.att_perfcounters: if args.pmc: fatal_error("ATT perfcounters cannot be enabled with PMC") @@ -1520,6 +1474,24 @@ def run(app_args, args, **kwargs): args.att_perfcounter_ctrl, overwrite=True, ) + if args.att_activity: + if args.pmc: + fatal_error("ATT activity cannot be enabled with PMC") + elif args.att_perfcounters or args.att_perfcounter_ctrl: + fatal_error( + "ATT activity cannot be enabled with att-perfcounters or att-perfcounter-ctrl." + ) + else: + update_env( + "ROCPROF_ATT_PARAM_PERFCOUNTER_CTRL", + args.att_activity, + overwrite=True, + ) + update_env( + "ROCPROF_ATT_PARAM_PERFCOUNTERS", + "SQ_BUSY_CU_CYCLES SQ_VALU_MFMA_BUSY_CYCLES SQ_ACTIVE_INST_VALU SQ_ACTIVE_INST_LDS SQ_ACTIVE_INST_VMEM SQ_ACTIVE_INST_FLAT SQ_ACTIVE_INST_SCA SQ_ACTIVE_INST_MISC", + overwrite=True, + ) if args.log_level in ("info", "trace", "env", "config"): log_config(app_env) @@ -1547,45 +1519,9 @@ def run(app_args, args, **kwargs): return exit_code -def check_att_path_parse_method(args, index, support_att_input, att_parse_supported): - - if not att_parse_supported: - if index not in support_att_input.keys(): - fatal_error( - f"Advanced_thread_trace enabled but no decoder library found in cmdline/env paths and att_library_path not set for pass-{index + 1}" - ) - elif not support_att_input[index]: - fatal_error( - f"Advanced_thread_trace enabled but no decoder library found in att_library_path for pass-{index + 1}" - ) - else: - if args.att_parse and args.att_parse not in support_att_input[index]: - fatal_error( - f"Advanced_thread_trace enabled but decoder library for requested parse method not found in att_library_path for pass-{index + 1}" - ) - else: - if index in support_att_input.keys() and not support_att_input[index]: - fatal_error( - f"Advanced_thread_trace enabled but no decoder library found in att_library_path for pass-{index + 1}" - ) - - elif index not in support_att_input.keys(): - if args.att_parse and args.att_parse not in att_parse_supported: - fatal_error( - "Advanced_thread_trace enabled but decoder library for requested parse method not found" - ) - else: - if args.att_parse and args.att_parse not in support_att_input[index]: - fatal_error( - f"Advanced_thread_trace enabled but decoder library for requested parse method not found for pass-{index + 1}" - ) - - def main(argv=None): - # att_parse_supported is valid path for decoder in env or commandline arg - # support_att_input is a dict, where key is a pass index with value being a valid decoder path - cmd_args, app_args, att_parse_supported, support_att_input = parse_arguments(argv) + cmd_args, app_args = parse_arguments(argv) inp_args = ( parse_input(cmd_args.input) if getattr(cmd_args, "input") else [dotdict({})] ) @@ -1595,16 +1531,10 @@ def main(argv=None): pass_idx = None if has_set_attr(args, "pmc") and len(args.pmc) > 0: pass_idx = 1 - if args.advanced_thread_trace: - check_att_path_parse_method(args, 0, support_att_input, att_parse_supported) run(app_args, args, pass_id=pass_idx) else: for idx, itr in enumerate(inp_args): args = get_args(cmd_args, itr) - if args.advanced_thread_trace: - check_att_path_parse_method( - args, idx, support_att_input, att_parse_supported - ) run( app_args, args, diff --git a/source/include/rocprofiler-sdk/cxx/codeobj/code_printing.hpp b/source/include/rocprofiler-sdk/cxx/codeobj/code_printing.hpp index 2bbe7344f4..0ff1a6ada0 100644 --- a/source/include/rocprofiler-sdk/cxx/codeobj/code_printing.hpp +++ b/source/include/rocprofiler-sdk/cxx/codeobj/code_printing.hpp @@ -365,11 +365,19 @@ public: table.insert({ptr->begin(), ptr->size(), id}); } - virtual bool removeDecoder(marker_id_t id, uint64_t load_addr) + bool removeDecoder(marker_id_t id, uint64_t load_addr) { return table.remove(load_addr) && this->Super::removeDecoderbyId(id); } + bool removeDecoder(marker_id_t id) + { + uint64_t addr = 0; + if(decoders.find(id) != decoders.end()) addr = decoders.at(id)->begin(); + + return removeDecoder(id, addr); + } + std::unique_ptr get(uint64_t vaddr) { auto addr_range = table.find_codeobj_in_range(vaddr); diff --git a/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/source/include/rocprofiler-sdk/cxx/enum_string.hpp index 5504a017b7..6f550aa8f5 100644 --- a/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -1111,8 +1111,8 @@ ROCPROFILER_ENUM_INFO(rocprofiler_page_migration_queue_suspend_trigger_t, ROCPRO ROCPROFILER_ENUM_INFO(rocprofiler_page_migration_unmap_from_gpu_trigger_t, ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE, ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST, false); ROCPROFILER_ENUM_INFO(rocprofiler_external_correlation_id_request_kind_t, ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE, ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST, false); -ROCPROFILER_ENUM_INFO(rocprofiler_att_parameter_type_t, ROCPROFILER_ATT_PARAMETER_TARGET_CU, ROCPROFILER_ATT_PARAMETER_LAST, false); -// ROCPROFILER_ENUM_INFO(rocprofiler_att_control_flags_t, ROCPROFILER_ATT_CONTROL_NONE, details::compute_bitset_sequence_range(), true); +ROCPROFILER_ENUM_INFO(rocprofiler_thread_trace_parameter_type_t, ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, ROCPROFILER_THREAD_TRACE_PARAMETER_LAST, false); +// ROCPROFILER_ENUM_INFO(rocprofiler_thread_trace_control_flags_t, ROCPROFILER_THREAD_TRACE_CONTROL_NONE, details::compute_bitset_sequence_range(), true); ROCPROFILER_ENUM_INFO(rocprofiler_agent_version_t, ROCPROFILER_AGENT_INFO_VERSION_NONE, ROCPROFILER_AGENT_INFO_VERSION_LAST, false); @@ -1441,18 +1441,18 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API); static_assert(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST == 18); -// rocprofiler_att_parameter_type_t -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_TARGET_CU); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_SIMD_SELECT); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_PERFCOUNTER); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL); -static_assert(ROCPROFILER_ATT_PARAMETER_LAST == 7); +// rocprofiler_thread_trace_parameter_type_t +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL); +static_assert(ROCPROFILER_THREAD_TRACE_PARAMETER_LAST == 7); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_CONTROL_NONE); -ROCPROFILER_ENUM_LABEL(ROCPROFILER_ATT_CONTROL_START_AND_STOP); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_CONTROL_NONE); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP); // rocprofiler_agent_version_t ROCPROFILER_ENUM_LABEL(ROCPROFILER_AGENT_INFO_VERSION_NONE); diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/CMakeLists.txt b/source/include/rocprofiler-sdk/experimental/thread-trace/CMakeLists.txt index 1ee64d90f5..fa50bd6dd3 100644 --- a/source/include/rocprofiler-sdk/experimental/thread-trace/CMakeLists.txt +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/CMakeLists.txt @@ -1,4 +1,5 @@ -set(ROCPROFILER_EXPERIMENTAL_THREAD_TRACE_HEADER_FILES core.h agent.h dispatch.h) +set(ROCPROFILER_EXPERIMENTAL_THREAD_TRACE_HEADER_FILES + core.h agent.h dispatch.h trace_decoder_types.h trace_decoder.h) install( FILES ${ROCPROFILER_EXPERIMENTAL_THREAD_TRACE_HEADER_FILES} diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/agent.h b/source/include/rocprofiler-sdk/experimental/thread-trace/agent.h index dd3b666b12..24bebfbab0 100644 --- a/source/include/rocprofiler-sdk/experimental/thread-trace/agent.h +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/agent.h @@ -54,16 +54,17 @@ ROCPROFILER_EXTERN_C_INIT * @retval ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED for configuration locked * @retval ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID for conflicting configurations in the same ctx * @retval ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND for invalid context id - * @retval ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT for invalid rocprofiler_att_parameter_t + * @retval ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT for invalid + * rocprofiler_thread_trace_parameter_t */ rocprofiler_status_t rocprofiler_configure_device_thread_trace_service( - rocprofiler_context_id_t context_id, - rocprofiler_agent_id_t agent_id, - rocprofiler_att_parameter_t* parameters, - size_t num_parameters, - rocprofiler_att_shader_data_callback_t shader_callback, - rocprofiler_user_data_t callback_userdata) ROCPROFILER_API; + rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_thread_trace_parameter_t* parameters, + size_t num_parameters, + rocprofiler_thread_trace_shader_data_callback_t shader_callback, + rocprofiler_user_data_t callback_userdata) ROCPROFILER_API; /** @} */ diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/core.h b/source/include/rocprofiler-sdk/experimental/thread-trace/core.h index 9f49b4a27e..d870de5a3a 100644 --- a/source/include/rocprofiler-sdk/experimental/thread-trace/core.h +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/core.h @@ -37,28 +37,29 @@ ROCPROFILER_EXTERN_C_INIT */ /** - * @brief Types of ATT parameters + * @brief Types of Thread Trace parameters * */ -typedef enum rocprofiler_att_parameter_type_t +typedef enum rocprofiler_thread_trace_parameter_type_t { - ROCPROFILER_ATT_PARAMETER_TARGET_CU = 0, ///< Select the Target CU or WGP - ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, ///< Bitmask of shader engines. - ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT - ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs - ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, ///< Period [1,32] or disable (0) perfmon - ROCPROFILER_ATT_PARAMETER_PERFCOUNTER, ///< Perfmon ID and SIMD mask - ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, ///< Serializes kernels not under thread trace - ROCPROFILER_ATT_PARAMETER_LAST -} rocprofiler_att_parameter_type_t; + ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU = 0, ///< Select the Target CU or WGP + ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, ///< Bitmask of shader engines. + ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT + ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs + ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, ///< Period [1,32] or disable (0) perfmon + ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER, ///< Perfmon ID and SIMD mask + ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL, ///< Serializes kernels not under thread + ///< trace + ROCPROFILER_THREAD_TRACE_PARAMETER_LAST +} rocprofiler_thread_trace_parameter_type_t; /** - * @brief ATT parameter specification + * @brief Thread Trace parameter specification * */ -typedef struct rocprofiler_att_parameter_t +typedef struct rocprofiler_thread_trace_parameter_t { - rocprofiler_att_parameter_type_t type; + rocprofiler_thread_trace_parameter_type_t type; union { uint64_t value; @@ -68,7 +69,7 @@ typedef struct rocprofiler_att_parameter_t uint64_t simd_mask : 4; }; }; -} rocprofiler_att_parameter_t; +} rocprofiler_thread_trace_parameter_t; /** * @brief Callback to be triggered every time some ATT data is generated by the device @@ -76,13 +77,13 @@ typedef struct rocprofiler_att_parameter_t * @param [in] shader_engine_id ID of shader engine, as enabled by SE_MASK * @param [in] data Pointer to the buffer containing the ATT data * @param [in] data_size Number of bytes in "data" - * @param [in] userdata Passed back to user from rocprofiler_att_dispatch_callback_t() + * @param [in] userdata Passed back to user from rocprofiler_thread_trace_dispatch_callback_t() */ -typedef void (*rocprofiler_att_shader_data_callback_t)(rocprofiler_agent_id_t agent, - int64_t shader_engine_id, - void* data, - size_t data_size, - rocprofiler_user_data_t userdata); +typedef void (*rocprofiler_thread_trace_shader_data_callback_t)(rocprofiler_agent_id_t agent, + int64_t shader_engine_id, + void* data, + size_t data_size, + rocprofiler_user_data_t userdata); /** @} */ diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/dispatch.h b/source/include/rocprofiler-sdk/experimental/thread-trace/dispatch.h index 2ee1a63691..d0aa752d07 100644 --- a/source/include/rocprofiler-sdk/experimental/thread-trace/dispatch.h +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/dispatch.h @@ -37,11 +37,11 @@ ROCPROFILER_EXTERN_C_INIT * @{ */ -typedef enum rocprofiler_att_control_flags_t +typedef enum rocprofiler_thread_trace_control_flags_t { - ROCPROFILER_ATT_CONTROL_NONE = 0, - ROCPROFILER_ATT_CONTROL_START_AND_STOP = 3 -} rocprofiler_att_control_flags_t; + ROCPROFILER_THREAD_TRACE_CONTROL_NONE = 0, + ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP = 3 +} rocprofiler_thread_trace_control_flags_t; /** * @brief Callback to be triggered every kernel dispatch, indicating to start and/or stop ATT @@ -54,7 +54,7 @@ typedef enum rocprofiler_att_control_flags_t * rocprofiler_configure_dispatch_thread_trace_service. * @param [out] userdata_shader Userdata to be passed in shader_callback */ -typedef rocprofiler_att_control_flags_t (*rocprofiler_att_dispatch_callback_t)( +typedef rocprofiler_thread_trace_control_flags_t (*rocprofiler_thread_trace_dispatch_callback_t)( rocprofiler_agent_id_t agent_id, rocprofiler_queue_id_t queue_id, rocprofiler_async_correlation_id_t correlation_id, @@ -64,14 +64,14 @@ typedef rocprofiler_att_control_flags_t (*rocprofiler_att_dispatch_callback_t)( rocprofiler_user_data_t* userdata_shader); /** - * @brief Enables the advanced thread trace service for dispatch-based tracing. + * @brief Enables the thread trace service for dispatch-based tracing. * The tool has an option to enable/disable thread trace on every dispatch callback. * This service serializes all traced kernels, and optionally all non-traced kernels. * @param [in] context_id id of the context used for start/stop thread_trace. * @param [in] agent_id rocprofiler_agent_id_t to configure thread trace. * @param [in] parameters List of ATT-specific parameters. * @param [in] num_parameters Number of parameters. Zero is allowed. - * @param [in] dispatch_callback Control fn which decides when ATT starts/stop collecting. + * @param [in] dispatch_callback Control fn which decides when TT starts/stop collecting. * @param [in] shader_callback Callback fn where the collected data will be sent to. * @param [in] callback_userdata Passed back to user in dispatch_callback. * @return ::rocprofiler_status_t @@ -79,18 +79,19 @@ typedef rocprofiler_att_control_flags_t (*rocprofiler_att_dispatch_callback_t)( * @retval ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED for configuration locked * @retval ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID for conflicting configurations in the same ctx * @retval ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND for invalid context id - * @retval ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT for invalid rocprofiler_att_parameter_t + * @retval ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT for invalid + * rocprofiler_thread_trace_parameter_t * @retval ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED if already configured */ rocprofiler_status_t rocprofiler_configure_dispatch_thread_trace_service( - rocprofiler_context_id_t context_id, - rocprofiler_agent_id_t agent_id, - rocprofiler_att_parameter_t* parameters, - size_t num_parameters, - rocprofiler_att_dispatch_callback_t dispatch_callback, - rocprofiler_att_shader_data_callback_t shader_callback, - void* callback_userdata) ROCPROFILER_API; + rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_thread_trace_parameter_t* parameters, + size_t num_parameters, + rocprofiler_thread_trace_dispatch_callback_t dispatch_callback, + rocprofiler_thread_trace_shader_data_callback_t shader_callback, + void* callback_userdata) ROCPROFILER_API; /** @} */ diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h new file mode 100644 index 0000000000..24946fd474 --- /dev/null +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder.h @@ -0,0 +1,131 @@ +// MIT License +// +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +ROCPROFILER_EXTERN_C_INIT + +/** + * @defgroup THREAD_TRACE Thread Trace Decoding + * @brief Provides API calls to decode thread trace data + * + * @{ + */ + +typedef struct rocprofiler_thread_trace_decoder_handle_t +{ + uint64_t handle; +} rocprofiler_thread_trace_decoder_handle_t; + +/** + * @brief Initializes Trace Decoder library + * @param[out] handle Handle to created decoder instance. + * @param[in] path Path to trace decoder library location (e.g. /opt/rocm/lib). + * @retval ::ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE Library not found + * @retval ::ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI Library found but version not supported + * @retval ::ROCPROFILER_STATUS_SUCCESS Handle created + */ +rocprofiler_status_t +rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_t* handle, + const char* path) ROCPROFILER_API ROCPROFILER_NONNULL(1, 2); + +/** + * @brief Deletes handle created by rocprofiler_thread_trace_decoder_create + * @param[in] handle Handle to destroy + */ +void +rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle_t handle) + ROCPROFILER_API; + +/** + * @brief Loads a code object binary to match with Thread Trace + * @param[in] handle Handle to decoder instance. + * @param[in] load_id Code object load ID. + * @param[in] load_addr Code object load address. + * @param[in] load_size Code object load size. + * @param[in] data Code object binary data. Must be at least load_size bytes. + * @retval ::ROCPROFILER_STATUS_ERROR Unable to load code object. + * @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Invalid handle + * @retval ::ROCPROFILER_STATUS_SUCCESS Code object loaded + */ +rocprofiler_status_t +rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_handle_t handle, + uint64_t load_id, + uint64_t load_addr, + uint64_t load_size, + const void* data, + uint64_t size) ROCPROFILER_API ROCPROFILER_NONNULL(5); + +/** + * @brief Unloads a code object binary + * @param[in] handle Handle to decoder instance. + * @param[in] load_id Code object load ID to remove. + * @retval ::ROCPROFILER_STATUS_ERROR Code object not loaded. + * @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Invalid handle + * @retval ::ROCPROFILER_STATUS_SUCCESS Code object unloaded + */ +rocprofiler_status_t +rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_handle_t handle, + uint64_t load_id) ROCPROFILER_API; + +/** + * @brief Callback for rocprofiler to return traces back to rocprofiler. + * @param[in] trace_type_id One of rocprofiler_thread_trace_decoder_record_type_t + * @param[in] trace_events A pointer to sequence of events, of size trace_size. + * @param[in] trace_size The number of events in the trace. + * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback. + */ +typedef void (*rocprofiler_thread_trace_decoder_callback_t)( + rocprofiler_thread_trace_decoder_record_type_t record_type_id, + void* trace_events, + uint64_t trace_size, + void* userdata); + +/** + * @brief Iterate over all event coordinates for a given agent_t and event_t. + * @param[in] se_data_callback Callback to return shader engine data from. + * @param[in] callback Decoded trace data returned to user. + * @param[in] data Thread trace binary data. + * @param[in] size Thread trace binary size. + * @param[in] userdata Userdata passed back to caller via callback. + */ +rocprofiler_status_t +rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle, + rocprofiler_thread_trace_decoder_callback_t callback, + void* data, + uint64_t size, + void* userdata) ROCPROFILER_API ROCPROFILER_NONNULL(2, 3); + +/** + * @brief Returns the description of a rocprofiler_thread_trace_decoder_info_t record. + * @param[in] info The decoder info received + * @retval null terminated string as description of "info". + */ +const char* +rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_handle_t handle, + rocprofiler_thread_trace_decoder_info_t info) + ROCPROFILER_API; + +ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h new file mode 100644 index 0000000000..4a5d656c9c --- /dev/null +++ b/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h @@ -0,0 +1,142 @@ +// MIT License +// +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +typedef enum rocprofiler_thread_trace_decoder_info_t +{ + ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0, + ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST, + ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE, + ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST +} rocprofiler_thread_trace_decoder_info_t; + +typedef struct rocprofiler_thread_trace_decoder_pc_t +{ + size_t addr; + size_t marker_id; +} rocprofiler_thread_trace_decoder_pc_t; + +typedef struct rocprofiler_thread_trace_decoder_perfevent_t +{ + int64_t time; + uint16_t events0; + uint16_t events1; + uint16_t events2; + uint16_t events3; + uint8_t CU; + uint8_t bank; +} rocprofiler_thread_trace_decoder_perfevent_t; + +typedef struct rocprofiler_thread_trace_decoder_occupancy_t +{ + rocprofiler_thread_trace_decoder_pc_t pc; + uint64_t time; + uint8_t se; + uint8_t cu; + uint8_t simd; + uint8_t slot; + uint32_t start : 1; + uint32_t _rsvd : 31; +} rocprofiler_thread_trace_decoder_occupancy_t; + +typedef enum rocprofiler_thread_trace_decoder_wstate_type_t +{ + ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0, + ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE, + ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC, + ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT, + ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL, + ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST, +} rocprofiler_thread_trace_decoder_wstate_type_t; + +typedef struct rocprofiler_thread_trace_decoder_wave_state_t +{ + int32_t type; // One of rocprofiler_thread_trace_decoder_waveslot_state_type_t + int32_t duration; +} rocprofiler_thread_trace_decoder_wave_state_t; + +typedef enum rocprofiler_thread_trace_decoder_inst_category_t +{ + ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = 0, + ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM, + ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU, + ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM, + ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT, + ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS, + ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU, + ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP, + ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT, + ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED, + ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT, + ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE, + ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH, + ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST +} rocprofiler_thread_trace_decoder_inst_category_t; + +typedef struct rocprofiler_thread_trace_decoder_inst_t +{ + uint32_t category : 8; // One of rocprofiler_thread_trace_decoder_inst_category_t + uint32_t stall : 24; + int32_t duration; + int64_t time; + rocprofiler_thread_trace_decoder_pc_t pc; +} rocprofiler_thread_trace_decoder_inst_t; + +typedef struct rocprofiler_thread_trace_decoder_wave_t +{ + uint8_t cu; + uint8_t simd; + uint8_t wave_id; + uint8_t contexts; + + uint32_t _rsvd1; + uint32_t _rsvd2; + uint32_t _rsvd3; + + int64_t begin_time; + int64_t end_time; + + size_t timeline_size; + size_t instructions_size; + rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; + rocprofiler_thread_trace_decoder_inst_t* instructions_array; +} rocprofiler_thread_trace_decoder_wave_t; + +typedef enum rocprofiler_thread_trace_decoder_record_type_t +{ + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = + 0, // Record is size_t representing the gfxip_major + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, // Record is pointer to + // rocprofiler_thread_trace_decoder_occupancy_t + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, // Record is pointer to + // rocprofiler_thread_trace_decoder_perfevent_t + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, // Record is pointer to + // rocprofiler_thread_trace_decoder_wave_t + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO, // Record is pointer to + // rocprofiler_thread_trace_decoder_info_t + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG, // Debug + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST +} rocprofiler_thread_trace_decoder_record_type_t; diff --git a/source/include/rocprofiler-sdk/experimental/thread_trace.h b/source/include/rocprofiler-sdk/experimental/thread_trace.h index 15e748e89b..1856473c26 100644 --- a/source/include/rocprofiler-sdk/experimental/thread_trace.h +++ b/source/include/rocprofiler-sdk/experimental/thread_trace.h @@ -25,3 +25,4 @@ #include #include #include +#include diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt index 88607e47c4..2833dc6943 100644 --- a/source/lib/CMakeLists.txt +++ b/source/lib/CMakeLists.txt @@ -1,6 +1,8 @@ # # # +rocprofiler_activate_clang_tidy() + set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core") add_subdirectory(common) add_subdirectory(output) diff --git a/source/lib/att-tool/CMakeLists.txt b/source/lib/att-tool/CMakeLists.txt index 81b6522a6c..9f1f21b97b 100644 --- a/source/lib/att-tool/CMakeLists.txt +++ b/source/lib/att-tool/CMakeLists.txt @@ -1,28 +1,32 @@ # # ATT decoder wrapper library for use by the rocprofv3 tool # -set(ATT_TOOL_SOURCE_FILES - waitcnt/analysis.cpp - waitcnt/gfx9.cpp - waitcnt/gfx10.cpp - waitcnt/gfx12.cpp - att_lib_wrapper.cpp - wave.cpp - code.cpp - filenames.cpp - occupancy.cpp - wstates.cpp - perfcounter.cpp - profile_interface.cpp - dl.cpp) +rocprofiler_activate_clang_tidy() + +set(ATT_TOOL_SOURCE_FILES att_lib_wrapper.cpp code.cpp filenames.cpp occupancy.cpp + perfcounter.cpp profile_interface.cpp wave.cpp wstates.cpp) + +set(ATT_TOOL_HEADER_FILES + att_lib_wrapper.hpp + code.hpp + filenames.hpp + occupancy.hpp + outputfile.hpp + perfcounter.hpp + profile_interface.hpp + util.hpp + wave.hpp + wstates.hpp) add_library(rocprofiler-sdk-att-parser STATIC) add_library(rocprofiler-sdk::rocprofiler-sdk-att-parser ALIAS rocprofiler-sdk-att-parser) -target_sources(rocprofiler-sdk-att-parser PRIVATE ${ATT_TOOL_SOURCE_FILES}) +target_sources(rocprofiler-sdk-att-parser PRIVATE ${ATT_TOOL_SOURCE_FILES} + ${ATT_TOOL_HEADER_FILES}) target_link_libraries( rocprofiler-sdk-att-parser - PRIVATE rocprofiler-sdk::rocprofiler-sdk-headers + PRIVATE rocprofiler-sdk::rocprofiler-sdk-shared-library + rocprofiler-sdk::rocprofiler-sdk-headers rocprofiler-sdk::rocprofiler-sdk-json rocprofiler-sdk::rocprofiler-sdk-common-library rocprofiler-sdk::rocprofiler-sdk-amd-comgr @@ -30,7 +34,3 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-elf) add_subdirectory(waitcnt) - -if(ROCPROFILER_BUILD_TESTS) - add_subdirectory(tests) -endif() diff --git a/source/lib/att-tool/att_decoder.h b/source/lib/att-tool/att_decoder.h deleted file mode 100644 index ec37e17fe7..0000000000 --- a/source/lib/att-tool/att_decoder.h +++ /dev/null @@ -1,171 +0,0 @@ -// MIT License -// -// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#ifdef __cplusplus -# include -# include -extern "C" { -#else -# include -# include -#endif - -typedef enum -{ - ROCPROFILER_ATT_DECODER_STATUS_SUCCESS = 0, - ROCPROFILER_ATT_DECODER_STATUS_ERROR, - ROCPROFILER_ATT_DECODER_STATUS_ERROR_OUT_OF_RESOURCES, - ROCPROFILER_ATT_DECODER_STATUS_ERROR_INVALID_ARGUMENT, - ROCPROFILER_ATT_DECODER_STATUS_ERROR_INVALID_SHADER_DATA, - ROCPROFILER_ATT_DECODER_STATUS_LAST -} rocprofiler_att_decoder_status_t; - -typedef enum -{ - ROCPROFILER_ATT_DECODER_INFO_NONE = 0, - ROCPROFILER_ATT_DECODER_INFO_DATA_LOST, - ROCPROFILER_ATT_DECODER_INFO_STITCH_INCOMPLETE, - ROCPROFILER_ATT_DECODER_INFO_LAST -} rocprofiler_att_decoder_info_t; - -typedef enum -{ - ROCPROFILER_ATT_DECODER_TYPE_GFXIP = 0, - ROCPROFILER_ATT_DECODER_TYPE_OCCUPANCY, - ROCPROFILER_ATT_DECODER_TYPE_PERFEVENT, - ROCPROFILER_ATT_DECODER_TYPE_WAVE, - ROCPROFILER_ATT_DECODER_TYPE_INFO, - ROCPROFILER_ATT_DECODER_TYPE_DEBUG, - ROCPROFILER_ATT_DECODER_TYPE_LAST -} rocprofiler_att_decoder_record_type_t; - -typedef struct -{ - size_t addr; - size_t marker_id; -} pcinfo_t; - -typedef struct -{ - pcinfo_t pc; - uint64_t time; - uint8_t se; - uint8_t cu; - uint8_t simd; - uint8_t slot; - uint32_t start : 1; - uint32_t _rsvd : 31; -} att_occupancy_info_v2_t; - -typedef struct -{ - int32_t type; - int32_t duration; -} att_wave_state_t; - -typedef struct -{ - uint32_t category : 8; - uint32_t stall : 24; - int32_t duration; - int64_t time; - pcinfo_t pc; -} att_wave_instruction_t; - -typedef enum -{ - ATT_WAVE_STATE_EMPTY = 0, - ATT_WAVE_STATE_LAST = 5 -} att_waveslot_state_t; - -typedef enum -{ - ATT_INST_NONE = 0, - ATT_INST_LAST = 15, -} att_wave_inst_category_t; - -typedef struct -{ - uint8_t cu; - uint8_t simd; - uint8_t wave_id; - uint8_t contexts; - - uint32_t _rsvd; - size_t traceID; - - int64_t begin_time; - int64_t end_time; - - size_t timeline_size; - size_t instructions_size; - att_wave_state_t* timeline_array; - att_wave_instruction_t* instructions_array; -} att_wave_data_t; - -typedef struct att_perfevent_t -{ - int64_t time; - uint16_t events0; - uint16_t events1; - uint16_t events2; - uint16_t events3; - uint8_t CU; - uint8_t bank; -} att_perfevent_t; - -typedef rocprofiler_att_decoder_status_t (*rocprofiler_att_decoder_isa_callback_t)( - char* instruction, - uint64_t* memory_size, - uint64_t* size, - pcinfo_t address, - void* userdata); - -typedef rocprofiler_att_decoder_status_t (*rocprofiler_att_decoder_trace_callback_t)( - rocprofiler_att_decoder_record_type_t record_type_id, - int shader_engine_id, - void* trace_events, - uint64_t trace_size, - void* userdata); - -typedef uint64_t (*rocprofiler_att_decoder_se_data_callback_t)(int* shader_engine_id, - uint8_t** buffer, - uint64_t* buffer_size, - void* userdata); - -rocprofiler_att_decoder_status_t -rocprofiler_att_decoder_parse_data(rocprofiler_att_decoder_se_data_callback_t se_data_callback, - rocprofiler_att_decoder_trace_callback_t trace_callback, - rocprofiler_att_decoder_isa_callback_t isa_callback, - void* userdata); - -const char* -rocprofiler_att_decoder_get_info_string(rocprofiler_att_decoder_info_t info); - -const char* -rocprofiler_att_decoder_get_status_string(rocprofiler_att_decoder_status_t status); - -#ifdef __cplusplus -} -#endif diff --git a/source/lib/att-tool/att_lib_wrapper.cpp b/source/lib/att-tool/att_lib_wrapper.cpp index f345e4b6b1..e2bf7842c0 100644 --- a/source/lib/att-tool/att_lib_wrapper.cpp +++ b/source/lib/att-tool/att_lib_wrapper.cpp @@ -21,7 +21,6 @@ // SOFTWARE. #include "att_lib_wrapper.hpp" -#include "dl.hpp" #include "filenames.hpp" #include "occupancy.hpp" #include "profile_interface.hpp" @@ -42,39 +41,64 @@ namespace rocprofiler { namespace att_wrapper { -auto -get_lib_names() -{ - std::vector> lib_names = { - {tool_att_capability_t::ATT_CAPABILITIES_DEBUG, "libatt_decoder_debug.so"}, - {tool_att_capability_t::ATT_CAPABILITIES_TRACE, "libatt_decoder_trace.so"}, - {tool_att_capability_t::ATT_CAPABILITIES_TESTING1, "libatt_decoder_testing1.so"}, - {tool_att_capability_t::ATT_CAPABILITIES_TESTING2, "libatt_decoder_testing2.so"}, - }; - return lib_names; -} - -ATTFileMgr::ATTFileMgr(Fspath _dir, std::shared_ptr
_dl, std::vector _counters) +ATTFileMgr::ATTFileMgr(Fspath _dir, + std::vector _counters, + rocprofiler_thread_trace_decoder_handle_t _decoder) : dir(std::move(_dir)) -, dl(std::move(_dl)) +, decoder(_decoder) { rocprofiler::common::filesystem::create_directories(dir); table = std::make_shared(); codefile = std::make_shared(dir, table); filenames = std::make_shared(dir); - for(size_t i = 0; i < ATT_WAVE_STATE_LAST; i++) + for(size_t i = 0; i < ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST; i++) wstates.at(i) = std::make_shared(i, dir); filenames->perfcounters = std::move(_counters); } -ATTFileMgr::~ATTFileMgr() { OccupancyFile::OccupancyFile(dir, table, occupancy); } +ATTFileMgr::~ATTFileMgr() +{ + for(auto id : codeobjs_to_delete) + { + auto status = rocprofiler_thread_trace_decoder_codeobj_unload(decoder, id); + ROCP_ERROR_IF(status != ROCPROFILER_STATUS_SUCCESS) << "unable to delete codeobj " << id; + } + + OccupancyFile::OccupancyFile(dir, table, occupancy); +} void -ATTFileMgr::parseShader(int se_id, const std::vector& data) +ATTFileMgr::addDecoder(const char* filepath, uint64_t id, uint64_t load_addr, uint64_t memsize) +{ + if(filepath == nullptr) return; + + std::vector buffer{}; + + { + std::ifstream file(filepath, std::ios::in | std::ios::binary); + + if(!file.is_open()) throw std::runtime_error("Invalid file " + std::string(filepath)); + + file.seekg(0, file.end); + buffer.resize(file.tellg()); + file.seekg(0, file.beg); + file.read(buffer.data(), buffer.size()); + } + + auto status = rocprofiler_thread_trace_decoder_codeobj_load( + decoder, id, load_addr, memsize, buffer.data(), buffer.size()); + ROCP_ERROR_IF(status != ROCPROFILER_STATUS_SUCCESS) << "Unable to load codeobj: " << filepath; + + codeobjs_to_delete.push_back(id); + table->addDecoder(buffer.data(), buffer.size(), id, load_addr, memsize); +} + +void +ATTFileMgr::parseShader(int se_id, std::vector& data) { WaveConfig config(se_id, filenames, codefile, wstates); - ToolData tooldata(data, config, dl); + ToolData tooldata(data, config, decoder); if(!config.occupancy.empty()) occupancy.emplace(se_id, std::move(config.occupancy)); @@ -96,31 +120,14 @@ get_shader_id(const std::string& name) return std::stoi(std::string(stripped.substr(se_number_pos + 1))); } -std::vector -query_att_decode_capability() +ATTDecoder::ATTDecoder(const std::string& path) { - auto ret = std::vector{}; - - for(auto& [cap, libname] : get_lib_names()) - { - if(DL(libname).handle != nullptr) ret.push_back(cap); - } - - return ret; -} - -ATTDecoder::ATTDecoder(tool_att_capability_t capability) -{ - for(auto& [cap, libname] : get_lib_names()) - { - if(cap == capability) - { - dl = std::make_shared
(libname); - return; - } - } + auto status = rocprofiler_thread_trace_decoder_create(&decoder, path.c_str()); + ROCP_FATAL_IF(status != ROCPROFILER_STATUS_SUCCESS) << "Error loading decoder: " << status; }; +ATTDecoder::~ATTDecoder() { rocprofiler_thread_trace_decoder_destroy(decoder); } + void ATTDecoder::parse(const Fspath& input_dir, const Fspath& output_dir, @@ -135,7 +142,7 @@ ATTDecoder::parse(const Fspath& input_dir, return std::tolower(c); }); - ATTFileMgr mgr(output_dir, dl, counters_names); + ATTFileMgr mgr(output_dir, counters_names, decoder); for(const auto& file : codeobj_files) { @@ -149,7 +156,7 @@ ATTDecoder::parse(const Fspath& input_dir, try { - mgr.table->addDecoder((input_dir / file.name).c_str(), file.id, file.addr, file.size); + mgr.addDecoder((input_dir / file.name).c_str(), file.id, file.addr, file.size); } catch(std::exception& e) { ROCP_ERROR << file.id << ':' << file.name << " - " << e.what(); @@ -191,8 +198,7 @@ ATTDecoder::parse(const Fspath& input_dir, bool ATTDecoder::valid() const { - return dl && (dl->att_parse_data_fn != nullptr) && (dl->att_info_fn != nullptr) && - (dl->att_status_fn != nullptr); + return decoder.handle != 0; } } // namespace att_wrapper diff --git a/source/lib/att-tool/att_lib_wrapper.hpp b/source/lib/att-tool/att_lib_wrapper.hpp index c8b4edf600..ebc2ac8ab0 100644 --- a/source/lib/att-tool/att_lib_wrapper.hpp +++ b/source/lib/att-tool/att_lib_wrapper.hpp @@ -22,6 +22,8 @@ #pragma once +#include + #include "lib/att-tool/util.hpp" #include "lib/common/filesystem.hpp" @@ -45,25 +47,11 @@ struct CodeobjLoadInfo size_t size{0}; }; -enum tool_att_capability_t -{ - ATT_CAPABILITIES_TESTING1 = 0, // used for code coverage testing - ATT_CAPABILITIES_TESTING2, // used for code coverage testing - ATT_CAPABILITIES_TRACE, // used for all outputs - ATT_CAPABILITIES_DEBUG, - ATT_CAPABILITIES_LAST = ATT_CAPABILITIES_DEBUG, -}; - -/** - * Query decoder library capability. Returns list of supported capabilities. - */ -std::vector -query_att_decode_capability(); - class ATTDecoder { public: - ATTDecoder(tool_att_capability_t cap); + ATTDecoder(const std::string& path); + ~ATTDecoder(); /** * Parse a list of att files @@ -83,7 +71,7 @@ public: bool valid() const; protected: - std::shared_ptr dl{nullptr}; + rocprofiler_thread_trace_decoder_handle_t decoder{}; }; class ATTFileMgr @@ -91,20 +79,26 @@ class ATTFileMgr using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate; public: - ATTFileMgr(Fspath _dir, std::shared_ptr
_dl, std::vector _counters); + ATTFileMgr(Fspath _dir, + std::vector _counters, + rocprofiler_thread_trace_decoder_handle_t _decoder); ~ATTFileMgr(); - void parseShader(int se_id, const std::vector& data); + void addDecoder(const char* filepath, uint64_t id, uint64_t load_addr, uint64_t memsize); + + void parseShader(int se_id, std::vector& data); Fspath dir{}; - std::shared_ptr dl{nullptr}; - std::shared_ptr codefile{nullptr}; - std::shared_ptr filenames{nullptr}; - std::shared_ptr table{nullptr}; + std::shared_ptr codefile{nullptr}; + std::shared_ptr filenames{nullptr}; + std::shared_ptr table{nullptr}; + std::map> occupancy{}; + std::vector codeobjs_to_delete{}; + rocprofiler_thread_trace_decoder_handle_t decoder{}; - std::map> occupancy; - std::array, ATT_WAVE_STATE_LAST> wstates; + std::array, ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST> + wstates; }; } // namespace att_wrapper diff --git a/source/lib/att-tool/code.hpp b/source/lib/att-tool/code.hpp index 82775e5045..246fa098e1 100644 --- a/source/lib/att-tool/code.hpp +++ b/source/lib/att-tool/code.hpp @@ -27,7 +27,6 @@ #include #include #include -#include "att_decoder.h" #include "util.hpp" namespace rocprofiler diff --git a/source/lib/att-tool/counters.cpp b/source/lib/att-tool/counters.cpp deleted file mode 100644 index a6f53314e1..0000000000 --- a/source/lib/att-tool/counters.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// MIT License -// -// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "counters.hpp" -#include "outputfile.hpp" - -#include -#include -#include -#include -#include "util.hpp" - -namespace rocprofiler -{ -namespace att_wrapper -{ -CountersFile::CountersFile(const Fspath& _dir, const std::vector& _names) -: dir(_dir) -, names(_names) -{} - -void -CountersFile::AddShaderEngine(int se, const att_perfevent_t* events, size_t num_events) -{ - if(!num_events || !GlobalDefs::get().has_format("json")) return; - - nlohmann::json js; - - for(size_t i = 0; i < num_events; i++) - { - auto& ev = events[i]; - js.emplace_back({ev.time, ev.events0, ev.events1, ev.events2, ev.events3, ev.CU, ev.bank}); - } - - auto filename = dir / ("se" + std::to_string(se) + "_perfcounter.json"); - - OutputFile(filename) << nlohmann::json{"data", js}; - shaders.emplace_back(filename); -} - -CountersFile::~CountersFile() -{ - nlohmann::json counters_names; - for(auto& name : names) - counters_names.emplace_back(name); - - nlohmann::json perfcounter_filenames; - for(auto& name : shaders) - perfcounter_filenames.emplace_back(name); - - nlohmann::json js; - js["counters"] = counters_names; - js["shaders"] = perfcounter_filenames; - - OutputFile(dir / "graph_options.json") << nlohmann::json{"data", js}; -} - -} // namespace att_wrapper -} // namespace rocprofiler diff --git a/source/lib/att-tool/dl.cpp b/source/lib/att-tool/dl.cpp deleted file mode 100644 index fc52214594..0000000000 --- a/source/lib/att-tool/dl.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// MIT License -// -// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "lib/att-tool/dl.hpp" -#include "lib/common/environment.hpp" -#include "lib/common/filesystem.hpp" -#include "lib/common/logging.hpp" - -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace rocprofiler -{ -namespace att_wrapper -{ -namespace fs = ::rocprofiler::common::filesystem; - -fs::path -get_search_path(std::string_view path_name) -{ - if(fs::exists(path_name)) return fs::path(path_name); - return ""; -} - -DL::DL(const char* libname) -{ - auto paths = rocprofiler::common::get_env("ROCPROF_ATT_LIBRARY_PATH", ""); - if(paths.empty()) return; - auto path_set = rocprofiler::sdk::parse::tokenize(paths, ":"); - - for(auto&& name : path_set) - { - handle = dlopen((get_search_path(name) / libname).string().c_str(), RTLD_LAZY | RTLD_LOCAL); - if(handle) break; - } - if(!handle) return; - - att_parse_data_fn = - reinterpret_cast(dlsym(handle, "rocprofiler_att_decoder_parse_data")); - att_info_fn = - reinterpret_cast(dlsym(handle, "rocprofiler_att_decoder_get_info_string")); - att_status_fn = - reinterpret_cast(dlsym(handle, "rocprofiler_att_decoder_get_status_string")); -}; - -DL::~DL() -{ - if(handle) dlclose(handle); -} - -} // namespace att_wrapper -} // namespace rocprofiler diff --git a/source/lib/att-tool/filenames.hpp b/source/lib/att-tool/filenames.hpp index 0849097042..341dc5c9bd 100644 --- a/source/lib/att-tool/filenames.hpp +++ b/source/lib/att-tool/filenames.hpp @@ -26,7 +26,6 @@ #include #include -#include "att_decoder.h" #include "util.hpp" namespace rocprofiler diff --git a/source/lib/att-tool/occupancy.cpp b/source/lib/att-tool/occupancy.cpp index 190755c4a4..8de1cccb2d 100644 --- a/source/lib/att-tool/occupancy.cpp +++ b/source/lib/att-tool/occupancy.cpp @@ -52,9 +52,9 @@ get_kernel_id(pcinfo_t pc) namespace OccupancyFile { void -OccupancyFile(const Fspath& dir, - std::shared_ptr& table, - const std::map>& occ) +OccupancyFile(const Fspath& dir, + std::shared_ptr& table, + const std::map>& occ) { if(!GlobalDefs::get().has_format("json")) return; nlohmann::json jocc; diff --git a/source/lib/att-tool/occupancy.hpp b/source/lib/att-tool/occupancy.hpp index 8891fa30dc..0715c10c79 100644 --- a/source/lib/att-tool/occupancy.hpp +++ b/source/lib/att-tool/occupancy.hpp @@ -37,9 +37,9 @@ namespace OccupancyFile using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate; void -OccupancyFile(const Fspath& dir, - std::shared_ptr& table, - const std::map>& occ); +OccupancyFile(const Fspath& dir, + std::shared_ptr& table, + const std::map>& occ); }; // namespace OccupancyFile } // namespace att_wrapper diff --git a/source/lib/att-tool/perfcounter.cpp b/source/lib/att-tool/perfcounter.cpp index 9682dce406..45dd8f24ea 100644 --- a/source/lib/att-tool/perfcounter.cpp +++ b/source/lib/att-tool/perfcounter.cpp @@ -34,7 +34,7 @@ namespace rocprofiler namespace att_wrapper { void -PerfcounterFile(WaveConfig& config, const att_perfevent_t* events, size_t event_count) +PerfcounterFile(WaveConfig& config, const perfevent_t* events, size_t event_count) { nlohmann::json data; for(size_t i = 0; i < event_count; i++) diff --git a/source/lib/att-tool/perfcounter.hpp b/source/lib/att-tool/perfcounter.hpp index 1ceb13e71d..15faf840c9 100644 --- a/source/lib/att-tool/perfcounter.hpp +++ b/source/lib/att-tool/perfcounter.hpp @@ -29,6 +29,6 @@ namespace rocprofiler namespace att_wrapper { void -PerfcounterFile(class WaveConfig& config, const att_perfevent_t* events, size_t event_count); +PerfcounterFile(class WaveConfig& config, const perfevent_t* events, size_t event_count); } // namespace att_wrapper } // namespace rocprofiler diff --git a/source/lib/att-tool/profile_interface.cpp b/source/lib/att-tool/profile_interface.cpp index a6fde4bcc5..7ba62e45c1 100644 --- a/source/lib/att-tool/profile_interface.cpp +++ b/source/lib/att-tool/profile_interface.cpp @@ -26,10 +26,10 @@ #endif #include "profile_interface.hpp" -#include "att_decoder.h" -#include "dl.hpp" #include "perfcounter.hpp" +#include + #include #include #include @@ -38,66 +38,49 @@ namespace rocprofiler { namespace att_wrapper { -struct trace_data_t -{ - int64_t id{0}; - uint8_t* data{nullptr}; - uint64_t size{0}; - ToolData* tool{nullptr}; -}; - -rocprofiler_att_decoder_status_t -get_trace_data(rocprofiler_att_decoder_record_type_t trace_id, - int /* shader_id */, - void* trace_events, - size_t trace_size, - void* userdata) +void +get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id, + void* trace_events, + size_t trace_size, + void* userdata) { C_API_BEGIN CHECK_NOTNULL(userdata); - trace_data_t& trace_data = *reinterpret_cast(userdata); - CHECK_NOTNULL(trace_data.tool); - ToolData& tool = *trace_data.tool; + ToolData& tool = *static_cast(userdata); - if(trace_id == ROCPROFILER_ATT_DECODER_TYPE_INFO) + if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO) { - auto* infos = (rocprofiler_att_decoder_info_t*) trace_events; + auto* infos = (rocprofiler_thread_trace_decoder_info_t*) trace_events; for(size_t i = 0; i < trace_size; i++) - ROCP_WARNING << tool.dl->att_info_fn(infos[i]); + ROCP_WARNING << rocprofiler_thread_trace_decoder_info_string(tool.decoder, infos[i]); } - else if(trace_id == ROCPROFILER_ATT_DECODER_TYPE_GFXIP) + else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP) { tool.config.filemgr->gfxip = reinterpret_cast(trace_events); } - else if(trace_id == ROCPROFILER_ATT_DECODER_TYPE_OCCUPANCY) + else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY) { for(size_t i = 0; i < trace_size; i++) - tool.config.occupancy.push_back( - reinterpret_cast(trace_events)[i]); + tool.config.occupancy.push_back(reinterpret_cast(trace_events)[i]); } - else if(trace_id == ROCPROFILER_ATT_DECODER_TYPE_PERFEVENT) + else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT) { - PerfcounterFile(tool.config, reinterpret_cast(trace_events), trace_size); + PerfcounterFile(tool.config, reinterpret_cast(trace_events), trace_size); } - if(trace_id != ROCPROFILER_ATT_DECODER_TYPE_WAVE) return ROCPROFILER_ATT_DECODER_STATUS_SUCCESS; + if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return; bool bInvalid = false; for(size_t wave_n = 0; wave_n < trace_size; wave_n++) { - auto& wave = reinterpret_cast(trace_events)[wave_n]; + auto& wave = reinterpret_cast(trace_events)[wave_n]; int64_t prev_inst_time = wave.begin_time; - WaveFile(tool.config, wave); - for(size_t j = 0; j < wave.instructions_size; j++) { auto& inst = wave.instructions_array[j]; - if(inst.pc.marker_id == 0 && inst.pc.addr == 0) - continue; - else if(inst.category >= att_wave_inst_category_t::ATT_INST_LAST) - continue; + if(inst.pc.marker_id == 0 && inst.pc.addr == 0) continue; try { @@ -112,81 +95,24 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id, } prev_inst_time = std::max(prev_inst_time, inst.time + inst.duration); } + + WaveFile(tool.config, wave); } if(bInvalid) ROCP_WARNING << "Could not fetch some instructions!"; - return ROCPROFILER_ATT_DECODER_STATUS_SUCCESS; - C_API_END - - return ROCPROFILER_ATT_DECODER_STATUS_ERROR; } -uint64_t -copy_trace_data(int* seid, uint8_t** buffer, uint64_t* buffer_size, void* userdata) -{ - trace_data_t& data = *reinterpret_cast(userdata); - *seid = data.id; - *buffer_size = data.size; - *buffer = data.data; - data.size = 0; - return *buffer_size; -} - -rocprofiler_att_decoder_status_t -isa_callback(char* isa_instruction, - uint64_t* isa_memory_size, - uint64_t* isa_size, - pcinfo_t pc, - void* userdata) -{ - C_API_BEGIN - CHECK_NOTNULL(userdata); - trace_data_t& trace_data = *reinterpret_cast(userdata); - CHECK_NOTNULL(trace_data.tool); - ToolData& tool = *trace_data.tool; - - std::shared_ptr instruction{nullptr}; - - try - { - CodeLine& line = tool.get(pc); - instruction = line.code_line; - } catch(std::exception& e) - { - ROCP_WARNING << pc.marker_id << ":" << pc.addr << ' ' << e.what(); - return ROCPROFILER_ATT_DECODER_STATUS_ERROR; - } - - if(!instruction.get()) return ROCPROFILER_ATT_DECODER_STATUS_ERROR_INVALID_ARGUMENT; - - { - size_t tmp_isa_size = *isa_size; - *isa_size = instruction->inst.size(); - - if(*isa_size > tmp_isa_size) return ROCPROFILER_ATT_DECODER_STATUS_ERROR_OUT_OF_RESOURCES; - } - - memcpy(isa_instruction, instruction->inst.data(), *isa_size); - *isa_memory_size = instruction->size; - - C_API_END - return ROCPROFILER_ATT_DECODER_STATUS_SUCCESS; -} - -ToolData::ToolData(const std::vector& _data, WaveConfig& _config, std::shared_ptr
_dl) +ToolData::ToolData(std::vector& _data, + WaveConfig& _config, + rocprofiler_thread_trace_decoder_handle_t _decoder) : cfile(_config.code) , config(_config) -, dl(std::move(_dl)) +, decoder(_decoder) { - trace_data_t data{.id = config.shader_engine, - .data = (uint8_t*) _data.data(), - .size = _data.size(), - .tool = this}; - - auto status = dl->att_parse_data_fn(copy_trace_data, get_trace_data, isa_callback, &data); - if(status != ROCPROFILER_ATT_DECODER_STATUS_SUCCESS) - ROCP_ERROR << "Callback failed with status " << dl->att_status_fn(status); + auto status = + rocprofiler_trace_decode(decoder, get_trace_data, _data.data(), _data.size(), this); + ROCP_ERROR_IF(status != ROCPROFILER_STATUS_SUCCESS) << ": " << status; } ToolData::~ToolData() = default; diff --git a/source/lib/att-tool/profile_interface.hpp b/source/lib/att-tool/profile_interface.hpp index c241c30c28..dc08f1c28d 100644 --- a/source/lib/att-tool/profile_interface.hpp +++ b/source/lib/att-tool/profile_interface.hpp @@ -26,6 +26,8 @@ #endif #pragma once +#include + #include #include #include @@ -35,7 +37,6 @@ #include #include #include -#include "att_decoder.h" #include "code.hpp" #include "wave.hpp" @@ -59,17 +60,20 @@ using SymbolInfo = rocprofiler::sdk::codeobj::disassembly::SymbolInfo; struct ToolData { - ToolData(const std::vector& data, WaveConfig& config, std::shared_ptr _dl); + ToolData(std::vector& data, + WaveConfig& config, + rocprofiler_thread_trace_decoder_handle_t decoder); ~ToolData(); CodeLine& get(pcinfo_t pc); std::shared_ptr cfile{}; WaveConfig& config; - std::shared_ptr
dl{}; std::vector shader_data{}; size_t num_waves = 0; + + rocprofiler_thread_trace_decoder_handle_t decoder{}; }; } // namespace att_wrapper diff --git a/source/lib/att-tool/tests/CMakeLists.txt b/source/lib/att-tool/tests/CMakeLists.txt deleted file mode 100644 index 08755e0438..0000000000 --- a/source/lib/att-tool/tests/CMakeLists.txt +++ /dev/null @@ -1,58 +0,0 @@ -# -# -# - -rocprofiler_deactivate_clang_tidy() - -add_executable(att-parser-tool-v3) -target_sources(att-parser-tool-v3 PRIVATE standalone_tool_main.cpp) -target_link_libraries( - att-parser-tool-v3 - PRIVATE rocprofiler-sdk::rocprofiler-sdk-att-parser - rocprofiler-sdk::rocprofiler-sdk-json - rocprofiler-sdk::rocprofiler-sdk-common-library - rocprofiler-sdk::rocprofiler-sdk-dw) - -add_executable(att-decoder-test) -target_sources(att-decoder-test PRIVATE att_decoder_test.cpp) -target_link_libraries( - att-decoder-test - PRIVATE rocprofiler-sdk::rocprofiler-sdk-att-parser - rocprofiler-sdk::rocprofiler-sdk-json - rocprofiler-sdk::rocprofiler-sdk-common-library - rocprofiler-sdk::rocprofiler-sdk-glog - rocprofiler-sdk::rocprofiler-sdk-static-library - rocprofiler-sdk::rocprofiler-sdk-dw - GTest::gtest - GTest::gtest_main) - -add_library(att_decoder_testing1 SHARED) -add_library(rocprofiler-sdk::att-decoder-testing1 ALIAS att_decoder_testing1) -target_sources(att_decoder_testing1 PRIVATE dummy_decoder.cpp) -set_target_properties(att_decoder_testing1 PROPERTIES VERSION ${PROJECT_VERSION} - SOVERSION ${PROJECT_VERSION_MAJOR}) - -add_library(att_decoder_testing2 SHARED) -add_library(rocprofiler-sdk::att-decoder-testing2 ALIAS att_decoder_testing2) -target_sources(att_decoder_testing2 PRIVATE dummy_decoder.cpp) -set_target_properties( - att_decoder_testing2 - PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/att - VERSION ${PROJECT_VERSION} - SOVERSION ${PROJECT_VERSION_MAJOR}) - -if(NOT ROCPROFILER_MEMCHECK) - set(IS_MEMCHECK OFF) -else() - set(IS_MEMCHECK ON) -endif() - -set(env-att-lib "ROCPROF_ATT_LIBRARY_PATH=${PROJECT_BINARY_DIR}/lib") -gtest_add_tests( - TARGET att-decoder-test - SOURCES att_decoder_test.cpp - TEST_LIST att-decoder-test_TESTS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -set_tests_properties( - ${att-decoder-test_TESTS} PROPERTIES ENVIRONMENT "${env-att-lib}" TIMEOUT 10 LABELS - "unittests" DISABLED "${IS_MEMCHECK}") diff --git a/source/lib/att-tool/tests/att_decoder_test.cpp b/source/lib/att-tool/tests/att_decoder_test.cpp deleted file mode 100644 index 05f6018e4a..0000000000 --- a/source/lib/att-tool/tests/att_decoder_test.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// MIT License -// -// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "lib/att-tool/att_lib_wrapper.hpp" -#include "lib/att-tool/code.hpp" -#include "lib/att-tool/outputfile.hpp" -#include "lib/common/logging.hpp" -#include "lib/rocprofiler-sdk/registration.hpp" - -#include -#include - -namespace rocprofiler -{ -namespace att_wrapper -{ -class ATTDecoderTest : public ATTDecoder -{ -public: - ATTDecoderTest() - : ATTDecoder(rocprofiler::att_wrapper::tool_att_capability_t::ATT_CAPABILITIES_TESTING1) - { - rocprofiler::att_wrapper::OutputFile::Enabled() = false; - GlobalDefs::get().output_formats = "json,csv"; - registration::init_logging(); - }; - - void test_parse() - { - ATTFileMgr mgr("out/", dl, {}); - - auto append_isa = [&](const char* line) { - // matches addresses in dummy_decoder.cpp - pcinfo_t pc{}; - pc.addr = mgr.codefile->isa_map.size() * 8; - pc.marker_id = 1; - - auto code = std::make_unique(); - code->code_line = std::make_shared(); - code->code_line->inst = line; - code->line_number = mgr.codefile->isa_map.size(); - - mgr.codefile->isa_map.emplace(pc, std::move(code)); - }; - - mgr.codefile->kernel_names[pcinfo_t{}] = KernelName{"_Kernel", "Kernel"}; - - append_isa("s_load_"); - append_isa("s_store_"); - append_isa("s_waitcnt vmcnt(0) lgkmcnt(0)"); - - std::vector dummy_data; - dummy_data.resize(128); - - mgr.parseShader(0, dummy_data); - mgr.parseShader(1, dummy_data); - } -}; - -TEST(att_decoder_test, dlopen) -{ - registration::init_logging(); - auto query = query_att_decode_capability(); - ROCP_FATAL_IF(query.empty()) << "No decoder capability available!"; -} - -TEST(att_decoder_test, filewrite) -{ - ATTDecoderTest decoder; - ROCP_FATAL_IF(!decoder.valid()) << "Failed to initialize decoder library!"; - - decoder.test_parse(); -} - -TEST(att_decoder_test, warn_failures) -{ - std::vector codeobjs; - codeobjs.resize(5); - codeobjs.at(0).name = "memory://unknown"; - codeobjs.at(1).name = "memory://unknown&offset=123&size=123"; - codeobjs.at(2).name = "file://nofile"; - codeobjs.at(3).name = "file://nofile&offset=123&size=123"; - codeobjs.at(4).name = "myfile123.out"; - - std::vector att_files; - att_files.emplace_back("file123.att"); - - ATTDecoderTest decoder; - ROCP_FATAL_IF(!decoder.valid()) << "Failed to initialize decoder library!"; - - decoder.parse(".", ".", att_files, codeobjs, {}, "csv,json"); -} - -TEST(att_decoder_test, code_write) -{ - registration::init_logging(); - rocprofiler::att_wrapper::OutputFile::Enabled() = false; - GlobalDefs::get().output_formats = "json,csv"; - - CodeFile file{}; - - pcinfo_t addr{}; - addr.marker_id = 0; - addr.addr = 0x1000; - file.kernel_names[addr] = KernelName{"_Kernel", "Kernel"}; - - for(size_t i = 0; i < 4; i++) - { - auto line = std::make_unique(); - line->line_number = i; - - line->code_line = std::make_shared(); - line->code_line->inst = "v_add"; - file.isa_map[addr] = std::move(line); - } -} - -}; // namespace att_wrapper -}; // namespace rocprofiler diff --git a/source/lib/att-tool/tests/dummy_decoder.cpp b/source/lib/att-tool/tests/dummy_decoder.cpp deleted file mode 100644 index 43071cd5d9..0000000000 --- a/source/lib/att-tool/tests/dummy_decoder.cpp +++ /dev/null @@ -1,154 +0,0 @@ -// MIT License -// -// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "../att_decoder.h" - -#include -#include - -__attribute__((visibility("default"))) rocprofiler_att_decoder_status_t -rocprofiler_att_decoder_parse_data(rocprofiler_att_decoder_se_data_callback_t se_data_callback, - rocprofiler_att_decoder_trace_callback_t trace_callback, - rocprofiler_att_decoder_isa_callback_t isa_callback, - void* userdata) -{ - const int gfxip = 9; - - trace_callback(ROCPROFILER_ATT_DECODER_TYPE_GFXIP, - 0, - reinterpret_cast(gfxip), // NOLINT(performance-no-int-to-ptr) - 0, - userdata); - { - std::vector infos{}; - for(size_t i = 1; i < ROCPROFILER_ATT_DECODER_INFO_LAST; i++) - infos.emplace_back(static_cast(i)); - - trace_callback(ROCPROFILER_ATT_DECODER_TYPE_INFO, 0, infos.data(), infos.size(), userdata); - } - { - uint64_t memory_size = 0, size = 16; - std::array inst; - - isa_callback(inst.data(), &memory_size, &size, pcinfo_t{0, 0}, userdata); - } - { - int se_id = 0; - uint8_t* buffer = nullptr; - size_t buffer_size = 0; - - while(se_data_callback(&se_id, &buffer, &buffer_size, userdata) != 0u) - {}; - } - - { - std::vector vec; - att_occupancy_info_v2_t occ{}; - occ.cu = occ.se = occ.simd = occ.slot = 1; - occ.pc.marker_id = 0; - occ.pc.addr = 0; - - occ.time = 0; - occ.start = 1; - vec.push_back(occ); - occ.simd = 0; - vec.push_back(occ); - - occ.time = 1024; - occ.start = 0; - vec.push_back(occ); - occ.simd = 1; - vec.push_back(occ); - - trace_callback(ROCPROFILER_ATT_DECODER_TYPE_OCCUPANCY, 0, vec.data(), vec.size(), userdata); - } - - { - std::vector waves{}; - - att_wave_data_t wave{}; - wave.cu = wave.simd = wave.wave_id = wave.traceID = 1; - - wave.begin_time = 0; - wave.end_time = 1024; - - std::vector states; - for(int j = 0; j < 2; j++) - for(int i = 1; i < ATT_WAVE_STATE_LAST; i++) - states.emplace_back(att_wave_state_t{i, 128}); - - std::vector insts; - for(int i = 1; i < ATT_INST_LAST; i++) - { - att_wave_instruction_t inst{}; - inst.category = i; - inst.duration = 48; - inst.time = i * 64 - 32; - inst.pc.marker_id = 1; - inst.pc.addr = 8 * i; - insts.emplace_back(inst); - } - - wave.instructions_array = insts.data(); - wave.instructions_size = insts.size(); - wave.timeline_array = states.data(); - wave.timeline_size = states.size(); - - waves.push_back(wave); - wave.simd = 2; - waves.push_back(wave); - - trace_callback(ROCPROFILER_ATT_DECODER_TYPE_WAVE, 0, waves.data(), waves.size(), userdata); - } - - { - std::vector vec{}; - att_perfevent_t perf{.events0 = 1, .CU = 1, .bank = 1}; - vec.push_back(perf); - vec.push_back(perf); - trace_callback(ROCPROFILER_ATT_DECODER_TYPE_PERFEVENT, 0, vec.data(), vec.size(), userdata); - } - - return ROCPROFILER_ATT_DECODER_STATUS_SUCCESS; -} - -__attribute__((visibility("default"))) const char* -rocprofiler_att_decoder_get_info_string(rocprofiler_att_decoder_info_t info) -{ - return std::vector{"ROCPROFILER_ATT_DECODER_INFO_NONE", - "ROCPROFILER_ATT_DECODER_INFO_DATA_LOST", - "ROCPROFILER_ATT_DECODER_INFO_STITCH_INCOMPLETE", - "ROCPROFILER_ATT_DECODER_INFO_LAST"} - .at((size_t) info); -} - -__attribute__((visibility("default"))) const char* -rocprofiler_att_decoder_get_status_string(rocprofiler_att_decoder_status_t status) -{ - return std::vector{"ROCPROFILER_ATT_DECODER_STATUS_SUCCESS", - "ROCPROFILER_ATT_DECODER_STATUS_ERROR", - "ROCPROFILER_ATT_DECODER_STATUS_ERROR_OUT_OF_RESOURCES", - "ROCPROFILER_ATT_DECODER_STATUS_ERROR_INVALID_ARGUMENT", - "ROCPROFILER_ATT_DECODER_STATUS_ERROR_INVALID_SHADER_DATA", - "ROCPROFILER_ATT_DECODER_STATUS_LAST"} - .at((size_t) status); -} diff --git a/source/lib/att-tool/tests/standalone_tool_main.cpp b/source/lib/att-tool/tests/standalone_tool_main.cpp deleted file mode 100644 index 8164024e70..0000000000 --- a/source/lib/att-tool/tests/standalone_tool_main.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// MIT License -// -// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "lib/att-tool/att_lib_wrapper.hpp" -#include "lib/att-tool/outputfile.hpp" -#include "lib/common/logging.hpp" -#include "lib/rocprofiler-sdk/registration.hpp" - -#include -#include -#include - -using rocprofiler::att_wrapper::ATTDecoder; -using Fspath = rocprofiler::att_wrapper::Fspath; - -int -main(int argc, char** argv) -{ - if(argc < 2 || std::string_view(argv[1]) == "--help" || std::string_view(argv[1]) == "-h") - { - std::cout - << "Usage: att-parser-tool json_filepath [output_dir] [output_formats]\nParameters:\n" - << "\tjson_filepath: Path of rocprofv3's generated results.json\n" - << "\toutput_dir: Optional output directory. Default: json_filepath parent dir\n" - << "\toutput_formats: json, perfetto, csv. Default: all\n" - << std::endl; - exit(0); - } - - static auto flag = std::once_flag{}; - std::call_once(flag, []() { - auto logging_cfg = rocprofiler::common::logging_config{.install_failure_handler = true}; - rocprofiler::common::init_logging("ROCPROF", logging_cfg); - FLAGS_colorlogtostderr = true; - }); - - auto cap = rocprofiler::att_wrapper::tool_att_capability_t::ATT_CAPABILITIES_TRACE; - { - auto query = rocprofiler::att_wrapper::query_att_decode_capability(); - ROCP_FATAL_IF(query.empty()) << "No decoder capability available!"; - - for(auto& avail_cap : query) - cap = std::max(cap, avail_cap); - } - - ATTDecoder decoder(cap); - ROCP_FATAL_IF(!decoder.valid()) << "Failed to initialize decoder library!"; - - auto get_run_number = [](const std::string& path) -> int { - auto get_filename = [](const std::string& _path) -> std::string { - return Fspath(_path).filename().c_str(); - }; - - auto name = get_filename(path); - auto run_pos = name.rfind('_'); - auto extension_pos = name.rfind(".att"); - - if(extension_pos == std::string::npos || extension_pos <= run_pos) throw std::exception(); - - return std::stoi(name.substr(run_pos + 1, extension_pos - run_pos)); - }; - - Fspath input_path = rocprofiler::common::filesystem::absolute(argv[1]); - - std::string formats = "json,csv"; - if(argc >= 4) formats = argv[3]; - - Fspath output_path = input_path.parent_path(); - if(argc >= 3) output_path = argv[2]; - - nlohmann::json sdk_json; - { - nlohmann::json full_json; - std::ifstream ifile(input_path); - ROCP_FATAL_IF(!ifile.is_open()) << "Failed to open json file!"; - ifile >> full_json; - sdk_json = full_json["rocprofiler-sdk-tool"][0]; - } - - std::unordered_map> all_runs; - - for(auto& file : sdk_json["strings"]["att_filenames"]) - { - try - { - int n = get_run_number(file); - if(all_runs.find(n) == all_runs.end()) all_runs[n] = {}; - - all_runs[n].push_back(file); - - } catch(std::exception&) - { - ROCP_WARNING << "Invalid ATT filename " << file; - } - } - - for(auto& [run_number, att_filenames] : all_runs) - { - std::vector att_files{}; - std::vector codeobj_files{}; - - std::vector snapshot_files{}; - for(auto elem : sdk_json["strings"]["code_object_snapshot_filenames"]) - snapshot_files.push_back(elem); - - for(auto& codeobj : sdk_json["code_objects"]) - if(!std::string{codeobj["uri"]}.empty()) - { - std::string filename = codeobj["uri"]; - size_t id = size_t(codeobj["code_object_id"]); - if(filename.empty()) continue; - - try - { - filename = snapshot_files.at(id); - } catch(...) - { - ROCP_WARNING << "codeobject id " << id << " not found " << filename; - } - - try - { - codeobj_files.push_back({filename, - id, - size_t(codeobj["load_delta"]), - size_t(codeobj["load_size"])}); - } catch(std::exception& e) - { - ROCP_WARNING << "Could not load " << filename << ": " << e.what(); - } catch(std::string& r) - { - ROCP_WARNING << "Could not load " << filename << ": " << r; - } catch(...) - { - ROCP_WARNING << "Could not load " << filename; - } - } - - std::string run_name = input_path.filename().c_str(); - std::string ui_name = run_name.substr(0, run_name.find(".json")); - auto output_dir = output_path / ("ui_output_" + ui_name + std::to_string(run_number)); - decoder.parse( - input_path.parent_path(), output_dir, att_filenames, codeobj_files, {}, formats); - } - - ROCP_INFO << "Finalizing ATT Tool"; - - return 0; -} diff --git a/source/lib/att-tool/util.hpp b/source/lib/att-tool/util.hpp index d106d490f1..a0220f608a 100644 --- a/source/lib/att-tool/util.hpp +++ b/source/lib/att-tool/util.hpp @@ -27,14 +27,20 @@ #define TOOL_VERSION_REV 0 #define TOOL_VERSION "3.0.0" +#include #include -#include "att_decoder.h" #include "lib/common/logging.hpp" #include #include #include +using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t; +using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t; +using wave_t = rocprofiler_thread_trace_decoder_wave_t; +using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t; +using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t; + template <> struct std::hash { diff --git a/source/lib/att-tool/waitcnt/CMakeLists.txt b/source/lib/att-tool/waitcnt/CMakeLists.txt index 35828f8c25..2cc6293a02 100644 --- a/source/lib/att-tool/waitcnt/CMakeLists.txt +++ b/source/lib/att-tool/waitcnt/CMakeLists.txt @@ -1,6 +1,13 @@ # # ATT waitcnt ISA # +set(ATT_TOOL_WAITCNT_SOURCE_FILES analysis.cpp gfx9.cpp gfx10.cpp gfx12.cpp) + +set(ATT_TOOL_WAITCNT_HEADER_FILES analysis.hpp) + +target_sources(rocprofiler-sdk-att-parser PRIVATE ${ATT_TOOL_WAITCNT_SOURCE_FILES} + ${ATT_TOOL_WAITCNT_HEADER_FILES}) + if(ROCPROFILER_BUILD_TESTS) add_subdirectory(tests) endif() diff --git a/source/lib/att-tool/waitcnt/analysis.hpp b/source/lib/att-tool/waitcnt/analysis.hpp index b5d14f931d..a1f3011c65 100644 --- a/source/lib/att-tool/waitcnt/analysis.hpp +++ b/source/lib/att-tool/waitcnt/analysis.hpp @@ -22,7 +22,6 @@ #pragma once -#include "lib/att-tool/att_decoder.h" #include "lib/att-tool/att_lib_wrapper.hpp" #include "lib/att-tool/code.hpp" @@ -44,7 +43,7 @@ struct LineWaitcnt struct WaitcntList { using isa_map_t = std::map>; - using wave_t = att_wave_data_t; + using wave_t = rocprofiler_thread_trace_decoder_wave_t; WaitcntList(int gfxip, const wave_t& wave, isa_map_t& isa_map) { diff --git a/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp b/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp index ac509a85e6..c56125d535 100644 --- a/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp +++ b/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp @@ -76,20 +76,19 @@ TEST(att_decoder_waitcnt_test, gfx9) append_isa(20, "s_waitcnt vmcnt( 0) lgkmcnt(0x0)"); // some weird strings append_isa(21, "invalid"); - std::vector insts{}; + std::vector insts{}; for(size_t j = 0; j < LOOP_CNT; j++) { for(size_t i = 0; i < isa_map.size(); i++) { - att_wave_instruction_t inst{}; + wave_instruction_t inst{}; inst.pc.addr = i; insts.push_back(inst); } } WaitcntList::wave_t wave{}; - wave.traceID = 1; wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); @@ -170,16 +169,15 @@ TEST(att_decoder_waitcnt_test, gfx10) append_isa(26, "s_waitcnt lgkmcnt 0"); append_isa(27, "invalid"); - std::vector insts{}; + std::vector insts{}; for(size_t i = 0; i < isa_map.size(); i++) { - att_wave_instruction_t inst{}; + wave_instruction_t inst{}; inst.pc.addr = i; insts.push_back(inst); } WaitcntList::wave_t wave{}; - wave.traceID = 2; wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); @@ -291,16 +289,15 @@ TEST(att_decoder_waitcnt_test, gfx12) append_isa(42, "s_wait_idle"); append_isa(43, "invalid"); - std::vector insts{}; + std::vector insts{}; for(size_t i = 0; i < isa_map.size(); i++) { - att_wave_instruction_t inst{}; + wave_instruction_t inst{}; inst.pc.addr = i; insts.push_back(inst); } WaitcntList::wave_t wave{}; - wave.traceID = 3; wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); @@ -342,11 +339,11 @@ TEST(att_decoder_waitcnt_test, fail_conditions) WaitcntList::isa_map_t isa_map{}; - std::vector insts{}; + std::vector insts{}; for(size_t i = 0; i < 10; i++) { - att_wave_instruction_t inst{}; + wave_instruction_t inst{}; inst.pc.addr = i; insts.push_back(inst); } diff --git a/source/lib/att-tool/wave.cpp b/source/lib/att-tool/wave.cpp index 977795dcd1..dd7bc05544 100644 --- a/source/lib/att-tool/wave.cpp +++ b/source/lib/att-tool/wave.cpp @@ -32,7 +32,7 @@ namespace rocprofiler { namespace att_wrapper { -WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave) +WaveFile::WaveFile(WaveConfig& config, const wave_t& wave) { ROCP_WARNING_IF(wave.contexts != 0u) << "Wave had " << wave.contexts << " context save-restores"; diff --git a/source/lib/att-tool/wave.hpp b/source/lib/att-tool/wave.hpp index 6233a148dc..6c427b5824 100644 --- a/source/lib/att-tool/wave.hpp +++ b/source/lib/att-tool/wave.hpp @@ -22,7 +22,6 @@ #pragma once -#include "att_decoder.h" #include "code.hpp" #include "filenames.hpp" #include "waitcnt/analysis.hpp" @@ -43,8 +42,9 @@ constexpr size_t SIMD_SIZE = 32; class WaveConfig { - using WavestateArray = std::array, ATT_WAVE_STATE_LAST>; - using SIMD = std::array; + using WavestateArray = + std::array, ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST>; + using SIMD = std::array; public: WaveConfig(int se_id, @@ -64,14 +64,14 @@ public: std::shared_ptr code; std::shared_ptr filemgr; - std::map kernel_names{}; - std::vector occupancy{}; + std::map kernel_names{}; + std::vector occupancy{}; }; class WaveFile { public: - WaveFile(WaveConfig& config, const att_wave_data_t& wave); + WaveFile(WaveConfig& config, const wave_t& wave); Fspath filename{}; }; diff --git a/source/lib/rocprofiler-sdk-tool/config.hpp b/source/lib/rocprofiler-sdk-tool/config.hpp index 57e14475ce..a253439542 100644 --- a/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/source/lib/rocprofiler-sdk-tool/config.hpp @@ -149,7 +149,7 @@ struct config : output_config std::string pc_sampling_method = get_env("ROCPROF_PC_SAMPLING_METHOD", "none"); std::string pc_sampling_unit = get_env("ROCPROF_PC_SAMPLING_UNIT", "none"); std::string extra_counters_contents = get_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", ""); - std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", "trace"); + std::string att_library_path = get_env("ROCPROF_ATT_LIBRARY_PATH", ""); std::unordered_set kernel_filter_range = {}; std::vector> counters = {}; @@ -244,7 +244,7 @@ config::save(ArchiveT& ar) const CFG_SERIALIZE_MEMBER(att_param_buffer_size); CFG_SERIALIZE_MEMBER(att_param_simd_select); CFG_SERIALIZE_MEMBER(att_param_target_cu); - CFG_SERIALIZE_MEMBER(att_capability); + CFG_SERIALIZE_MEMBER(att_library_path); CFG_SERIALIZE_MEMBER(att_param_perfcounters); CFG_SERIALIZE_MEMBER(att_param_perf_ctrl); diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 1cf9fe83d5..322183cc4b 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -1266,11 +1266,11 @@ get_instruction_index(rocprofiler_pc_t pc) } // namespace -std::vector +std::vector get_att_perfcounter_params(rocprofiler_agent_id_t agent, std::vector& att_perf_counters) { - std::vector _data{}; + std::vector _data{}; if(att_perf_counters.empty()) return _data; static const auto agent_counter_info = get_agent_counter_info(); @@ -1283,8 +1283,8 @@ get_att_perfcounter_params(rocprofiler_agent_id_t agen { if(std::string_view(counter_info_.name) != att_perf_counter.counter_name) continue; - auto param = rocprofiler_att_parameter_t{}; - param.type = ROCPROFILER_ATT_PARAMETER_PERFCOUNTER; + auto param = rocprofiler_thread_trace_parameter_t{}; + param.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER; param.counter_id = counter_info_.id; param.simd_mask = att_perf_counter.simd_mask; _data.emplace_back(param); @@ -1392,7 +1392,7 @@ att_shader_data_callback(rocprofiler_agent_id_t agent, tool_metadata->att_filenames[dispatch_id].second.emplace_back(output_filename); } -rocprofiler_att_control_flags_t +rocprofiler_thread_trace_control_flags_t att_dispatch_callback(rocprofiler_agent_id_t /* agent_id */, rocprofiler_queue_id_t /* queue_id */, rocprofiler_async_correlation_id_t /* correlation_id */, @@ -1406,8 +1406,8 @@ att_dispatch_callback(rocprofiler_agent_id_t /* agent_id */, userdata_shader->value = dispatch_id; if(is_targeted_kernel(kernel_id, kernel_iteration)) - return ROCPROFILER_ATT_CONTROL_START_AND_STOP; - return ROCPROFILER_ATT_CONTROL_NONE; + return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP; + return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; } void @@ -1650,17 +1650,17 @@ struct tracing_callbacks_t , counter_record{dummy_counter_record_callback} {} - const rocprofiler_callback_tracing_cb_t code_object_tracing = nullptr; - const rocprofiler_callback_tracing_cb_t cntrl_tracing = nullptr; - const rocprofiler_callback_tracing_cb_t kernel_rename = nullptr; - const rocprofiler_callback_tracing_cb_t hip_stream = nullptr; - const rocprofiler_callback_tracing_cb_t callback_tracing = nullptr; - const rocprofiler_buffer_tracing_cb_t buffered_tracing = nullptr; - const rocprofiler_buffer_tracing_cb_t pc_sampling = nullptr; - const rocprofiler_att_dispatch_callback_t att_dispatch = nullptr; - const rocprofiler_att_shader_data_callback_t att_shader_data = nullptr; - const rocprofiler_dispatch_counting_service_cb_t counter_dispatch = nullptr; - const rocprofiler_dispatch_counting_record_cb_t counter_record = nullptr; + const rocprofiler_callback_tracing_cb_t code_object_tracing = nullptr; + const rocprofiler_callback_tracing_cb_t cntrl_tracing = nullptr; + const rocprofiler_callback_tracing_cb_t kernel_rename = nullptr; + const rocprofiler_callback_tracing_cb_t hip_stream = nullptr; + const rocprofiler_callback_tracing_cb_t callback_tracing = nullptr; + const rocprofiler_buffer_tracing_cb_t buffered_tracing = nullptr; + const rocprofiler_buffer_tracing_cb_t pc_sampling = nullptr; + const rocprofiler_thread_trace_dispatch_callback_t att_dispatch = nullptr; + const rocprofiler_thread_trace_shader_data_callback_t att_shader_data = nullptr; + const rocprofiler_dispatch_counting_service_cb_t counter_dispatch = nullptr; + const rocprofiler_dispatch_counting_record_cb_t counter_record = nullptr; }; auto @@ -1901,7 +1901,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().advanced_thread_trace) { - auto global_parameters = std::vector{}; + auto global_parameters = std::vector{}; uint64_t target_cu = tool::get_config().att_param_target_cu; uint64_t simd_select = tool::get_config().att_param_simd_select; uint64_t buffer_sz = tool::get_config().att_param_buffer_size; @@ -1910,17 +1910,19 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) auto& att_perf = tool::get_config().att_param_perfcounters; bool att_serialize_all = tool::get_config().att_serialize_all; - global_parameters.push_back({ROCPROFILER_ATT_PARAMETER_TARGET_CU, {target_cu}}); - global_parameters.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, {simd_select}}); - global_parameters.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, {buffer_sz}}); - global_parameters.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, {shader_mask}}); + global_parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, {target_cu}}); global_parameters.push_back( - {ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, {static_cast(att_serialize_all)}}); + {ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, {simd_select}}); + global_parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, {buffer_sz}}); + global_parameters.push_back( + {ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, {shader_mask}}); + global_parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL, + {static_cast(att_serialize_all)}}); if(perfcounter_ctrl != 0 && !att_perf.empty()) { global_parameters.push_back( - {ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, {perfcounter_ctrl}}); + {ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {perfcounter_ctrl}}); } else if(perfcounter_ctrl != 0 || !att_perf.empty()) { @@ -2335,8 +2337,7 @@ tool_fini(void* /*tool_data*/) generate_output(rocjpeg_output, outdata, contributions, cleanups); generate_output(pc_sampling_stochastic_output, outdata, contributions, cleanups); - if(tool::get_config().advanced_thread_trace && !tool::get_config().att_capability.empty() && - !tool_metadata->att_filenames.empty()) + if(tool::get_config().advanced_thread_trace && !tool_metadata->att_filenames.empty()) { outdata.num_output += 1; } @@ -2443,19 +2444,9 @@ tool_fini(void* /*tool_data*/) if(tool::get_config().advanced_thread_trace) { - const std::unordered_map - tool_att_capability_map = { - {"testing1", rocprofiler::att_wrapper::ATT_CAPABILITIES_TESTING1}, - {"testing2", rocprofiler::att_wrapper::ATT_CAPABILITIES_TESTING2}, - {"trace", rocprofiler::att_wrapper::ATT_CAPABILITIES_TRACE}, - {"debug", rocprofiler::att_wrapper::ATT_CAPABILITIES_DEBUG}}; + auto decoder = rocprofiler::att_wrapper::ATTDecoder(tool::get_config().att_library_path); + ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found!"; - ROCP_FATAL_IF(tool::get_config().att_capability.empty()) - << "Provide the decoder parser method as input"; - - auto att_capability_value = tool_att_capability_map.at(tool::get_config().att_capability); - auto decoder = rocprofiler::att_wrapper::ATTDecoder(att_capability_value); - ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found at ROCPROF_ATT_LIBRARY_PATH"; auto codeobj = tool_metadata->get_code_object_load_info(); auto output_path = tool::format_path(tool::get_config().output_path); diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.hpp b/source/lib/rocprofiler-sdk/aql/packet_construct.hpp index 37b437316a..035879293b 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.hpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.hpp @@ -26,7 +26,7 @@ #include "lib/rocprofiler-sdk/aql/helpers.hpp" #include "lib/rocprofiler-sdk/counters/metrics.hpp" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" -#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" +#include "lib/rocprofiler-sdk/thread_trace/core.hpp" #include diff --git a/source/lib/rocprofiler-sdk/context/context.cpp b/source/lib/rocprofiler-sdk/context/context.cpp index c2455b06d9..36a7d2d6b9 100644 --- a/source/lib/rocprofiler-sdk/context/context.cpp +++ b/source/lib/rocprofiler-sdk/context/context.cpp @@ -29,7 +29,7 @@ #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/counters/core.hpp" #include "lib/rocprofiler-sdk/pc_sampling/service.hpp" -#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" +#include "lib/rocprofiler-sdk/thread_trace/core.hpp" #include #include diff --git a/source/lib/rocprofiler-sdk/context/context.hpp b/source/lib/rocprofiler-sdk/context/context.hpp index aeb790d45e..5e99d575ca 100644 --- a/source/lib/rocprofiler-sdk/context/context.hpp +++ b/source/lib/rocprofiler-sdk/context/context.hpp @@ -30,7 +30,7 @@ #include "lib/rocprofiler-sdk/counters/device_counting.hpp" #include "lib/rocprofiler-sdk/external_correlation.hpp" #include "lib/rocprofiler-sdk/pc_sampling/types.hpp" -#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" +#include "lib/rocprofiler-sdk/thread_trace/core.hpp" #include #include diff --git a/source/lib/rocprofiler-sdk/tests/enum_string.cpp b/source/lib/rocprofiler-sdk/tests/enum_string.cpp index 03e6054f08..0ef5de57a3 100644 --- a/source/lib/rocprofiler-sdk/tests/enum_string.cpp +++ b/source/lib/rocprofiler-sdk/tests/enum_string.cpp @@ -182,9 +182,9 @@ TEST(enum_string, fwd_h) TEST_STR(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_CORE_API); TEST_STR(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API); - // rocprofiler_att_parameter_type_t - TEST_STR(ROCPROFILER_ATT_PARAMETER_TARGET_CU); - TEST_STR(ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL); + // rocprofiler_thread_trace_parameter_type_t + TEST_STR(ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU); + TEST_STR(ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL); } TEST(enum_string, hip_api_id) @@ -386,7 +386,8 @@ TEST(enum_string, unsuported) using namespace std::string_view_literals; static_assert(!details::rocprofiler_enum_info::supported); - static_assert(!details::rocprofiler_enum_info::supported); + static_assert( + !details::rocprofiler_enum_info::supported); TEST_API_ID_STR(TEST_ENUM_VALUE, V1); TEST_API_ID_STR(TEST_ENUM_VALUE, V3); diff --git a/source/lib/rocprofiler-sdk/thread_trace/CMakeLists.txt b/source/lib/rocprofiler-sdk/thread_trace/CMakeLists.txt index b7e1eb107b..262c7ea5ce 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/thread_trace/CMakeLists.txt @@ -1,5 +1,7 @@ -set(ROCPROFILER_LIB_THREAD_TRACE_SOURCES att_core.cpp att_service.cpp code_object.cpp) -set(ROCPROFILER_LIB_THREAD_TRACE_HEADERS att_core.hpp code_object.hpp) +set(ROCPROFILER_LIB_THREAD_TRACE_SOURCES core.cpp service.cpp code_object.cpp decode.cpp + dl.cpp) +set(ROCPROFILER_LIB_THREAD_TRACE_HEADERS core.hpp code_object.hpp dl.hpp + trace_decoder_api.h) target_sources( rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_THREAD_TRACE_SOURCES} ${ROCPROFILER_LIB_THREAD_TRACE_HEADERS}) diff --git a/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp b/source/lib/rocprofiler-sdk/thread_trace/core.cpp similarity index 98% rename from source/lib/rocprofiler-sdk/thread_trace/att_core.cpp rename to source/lib/rocprofiler-sdk/thread_trace/core.cpp index 39bb11aa3f..16360ad5b2 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/core.cpp @@ -62,9 +62,9 @@ constexpr uint64_t MAX_BUFFER_SIZE = std::numeric_limits::max(); // aq struct cbdata_t { - rocprofiler_agent_id_t agent; - rocprofiler_att_shader_data_callback_t cb_fn; - const rocprofiler_user_data_t* userdata; + rocprofiler_agent_id_t agent; + rocprofiler_thread_trace_shader_data_callback_t cb_fn; + const rocprofiler_user_data_t* userdata; }; common::Synchronized> client; @@ -349,7 +349,8 @@ DispatchThreadTracer::pre_kernel_call(const hsa::Queue& queue, parameters.callback_userdata.ptr, user_data); - if(control_flags == ROCPROFILER_ATT_CONTROL_NONE) return {nullptr, parameters.bSerialize}; + if(control_flags == ROCPROFILER_THREAD_TRACE_CONTROL_NONE) + return {nullptr, parameters.bSerialize}; auto packet = agent.get_control(true); post_move_data.fetch_add(1); diff --git a/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp b/source/lib/rocprofiler-sdk/thread_trace/core.hpp similarity index 95% rename from source/lib/rocprofiler-sdk/thread_trace/att_core.hpp rename to source/lib/rocprofiler-sdk/thread_trace/core.hpp index 39d13de16a..ec480295a3 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp +++ b/source/lib/rocprofiler-sdk/thread_trace/core.hpp @@ -56,10 +56,10 @@ namespace thread_trace { struct thread_trace_parameter_pack { - rocprofiler_context_id_t context_id{0}; - rocprofiler_att_dispatch_callback_t dispatch_cb_fn{nullptr}; - rocprofiler_att_shader_data_callback_t shader_cb_fn{nullptr}; - rocprofiler_user_data_t callback_userdata{}; + rocprofiler_context_id_t context_id{0}; + rocprofiler_thread_trace_dispatch_callback_t dispatch_cb_fn{nullptr}; + rocprofiler_thread_trace_shader_data_callback_t shader_cb_fn{nullptr}; + rocprofiler_user_data_t callback_userdata{}; // Parameters uint8_t target_cu = 1; diff --git a/source/lib/rocprofiler-sdk/thread_trace/decode.cpp b/source/lib/rocprofiler-sdk/thread_trace/decode.cpp new file mode 100644 index 0000000000..63917bee57 --- /dev/null +++ b/source/lib/rocprofiler-sdk/thread_trace/decode.cpp @@ -0,0 +1,253 @@ +// MIT License +// +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/common/static_object.hpp" +#include "lib/rocprofiler-sdk/aql/helpers.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/thread_trace/dl.hpp" + +#include +#include + +#include + +#include + +namespace +{ +using DL = rocprofiler::thread_trace::DL; +using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate; + +class DecoderInstance +{ +public: + DecoderInstance(std::unique_ptr
_dl) + : dl(std::move(_dl)) + {} + + std::unique_ptr
dl{nullptr}; + AddressTable table{}; +}; + +std::mutex map_mut; + +auto& +get_dlopens() +{ + static auto*& _v = rocprofiler::common::static_object< + std::unordered_map>>::construct(); + return *CHECK_NOTNULL(_v); +} + +std::shared_ptr +get_dl(rocprofiler_thread_trace_decoder_handle_t handle) +{ + auto lk = std::unique_lock{map_mut}; + auto it = get_dlopens().find(handle.handle); + if(it == get_dlopens().end()) return nullptr; + + return it->second; +} +} // namespace + +extern "C" { +rocprofiler_status_t +rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_t* handle, + const char* path) +{ + auto dl = std::make_unique
(path); + if(dl->handle == nullptr) return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE; + if(!dl->valid()) return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI; + + auto lk = std::unique_lock{map_mut}; + static uint64_t count = 1; + + auto instance = std::make_shared(std::move(dl)); + + handle->handle = count++; + get_dlopens()[handle->handle] = std::move(instance); + + return ROCPROFILER_STATUS_SUCCESS; +} + +void +rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle_t handle) +{ + auto lk = std::unique_lock{map_mut}; + get_dlopens().erase(handle.handle); +} + +rocprofiler_status_t +rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_handle_t handle, + uint64_t load_id, + uint64_t load_addr, + uint64_t load_size, + const void* data, + uint64_t size) +{ + auto decoder = get_dl(handle); + if(decoder == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + + try + { + decoder->table.addDecoder(data, size, load_id, load_addr, load_size); + } catch(...) + { + return ROCPROFILER_STATUS_ERROR; + } + return ROCPROFILER_STATUS_SUCCESS; +} + +rocprofiler_status_t +rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_handle_t handle, + uint64_t load_id) +{ + auto decoder = get_dl(handle); + if(decoder == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + + try + { + if(decoder->table.removeDecoder(load_id)) return ROCPROFILER_STATUS_SUCCESS; + } catch(std::exception&) + {} + + return ROCPROFILER_STATUS_ERROR; +} +} + +namespace +{ +using Instruction = rocprofiler::sdk::codeobj::disassembly::Instruction; +using SymbolInfo = rocprofiler::sdk::codeobj::disassembly::SymbolInfo; + +struct trace_data_t +{ + uint8_t* data{nullptr}; + uint64_t size{0}; + std::shared_ptr decoder{nullptr}; + + rocprofiler_thread_trace_decoder_callback_t cb{nullptr}; + void* userdata{nullptr}; +}; + +uint64_t +copy_trace_data(uint8_t** buffer, uint64_t* buffer_size, void* userdata) +{ + trace_data_t& data = *reinterpret_cast(userdata); + *buffer_size = data.size; + *buffer = data.data; + data.size = 0; + return *buffer_size; +} + +rocprofiler_thread_trace_decoder_status_t +isa_callback(char* isa_instruction, + uint64_t* isa_memory_size, + uint64_t* isa_size, + rocprofiler_thread_trace_decoder_pc_t pc, + void* userdata) +{ + ROCP_FATAL_IF(userdata == nullptr) << "Userdata is null!"; + auto& table = static_cast(userdata)->decoder->table; + + std::unique_ptr instruction{nullptr}; + + try + { + instruction = table.get(pc.marker_id, pc.addr); + } catch(std::exception& e) + { + ROCP_WARNING << pc.marker_id << ":" << pc.addr << ' ' << e.what(); + return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR; + } + + if(!instruction) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT; + + { + size_t tmp_isa_size = *isa_size; + *isa_size = instruction->inst.size(); + + if(*isa_size > tmp_isa_size) + return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES; + } + + memcpy(isa_instruction, instruction->inst.data(), *isa_size); + *isa_memory_size = instruction->size; + + return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS; +} + +rocprofiler_thread_trace_decoder_status_t +trace_callback(rocprofiler_thread_trace_decoder_record_type_t record_type_id, + void* trace_events, + uint64_t trace_size, + void* userdata) +{ + ROCP_FATAL_IF(userdata == nullptr) << "Userdata is null!"; + auto* trace_data = static_cast(userdata); + trace_data->cb(record_type_id, trace_events, trace_size, trace_data->userdata); + return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS; +} + +} // namespace + +extern "C" { +rocprofiler_status_t +rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle, + rocprofiler_thread_trace_decoder_callback_t user_callback, + void* data, + uint64_t size, + void* userdata) +{ + auto decoder = get_dl(handle); + if(decoder == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + + trace_data_t cbdata{.data = static_cast(data), + .size = size, + .decoder = decoder, + .cb = user_callback, + .userdata = userdata}; + + auto status = + decoder->dl->att_parse_data_fn(copy_trace_data, trace_callback, isa_callback, &cbdata); + if(status != ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS) + { + const char* statustr = decoder->dl->att_status_fn(status); + if(statustr == nullptr) statustr = "Unknown error"; + ROCP_ERROR << "Callback failed with status " << status << ": " << statustr; + } + + return ROCPROFILER_STATUS_SUCCESS; +} + +const char* +rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_handle_t handle, + rocprofiler_thread_trace_decoder_info_t info) +{ + auto decoder = get_dl(handle); + if(decoder == nullptr) return nullptr; + + return decoder->dl->att_info_fn(info); +} +} diff --git a/source/lib/att-tool/counters.hpp b/source/lib/rocprofiler-sdk/thread_trace/dl.cpp similarity index 60% rename from source/lib/att-tool/counters.hpp rename to source/lib/rocprofiler-sdk/thread_trace/dl.cpp index df705c5a1b..52b9fb2f43 100644 --- a/source/lib/att-tool/counters.hpp +++ b/source/lib/rocprofiler-sdk/thread_trace/dl.cpp @@ -20,31 +20,37 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#pragma once +#include "lib/rocprofiler-sdk/thread_trace/dl.hpp" +#include "lib/common/filesystem.hpp" -#include "att_lib_wrapper.hpp" - -#include -#include -#include "util.hpp" +#include +#include +#include namespace rocprofiler { -namespace att_wrapper +namespace thread_trace { -class CountersFile +DL::DL(const char* libpath) { -public: - CountersFile(const Fspath& dir, const std::vector& names); - ~CountersFile(); + if(libpath == nullptr) return; - void AddShaderEngine(int se, const att_perfevent_t* events, size_t num_events); + auto path = common::filesystem::path(libpath) / "librocprof-trace-decoder.so"; -private: - const Fspath dir; - std::vector names{}; - std::vector shaders{}; + handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); + if(!handle) return; + + att_parse_data_fn = + reinterpret_cast(dlsym(handle, "rocprof_trace_decoder_parse_data")); + att_info_fn = reinterpret_cast(dlsym(handle, "rocprof_trace_decoder_get_info_string")); + att_status_fn = + reinterpret_cast(dlsym(handle, "rocprof_trace_decoder_get_status_string")); }; -} // namespace att_wrapper +DL::~DL() +{ + if(handle) dlclose(handle); +} + +} // namespace thread_trace } // namespace rocprofiler diff --git a/source/lib/att-tool/dl.hpp b/source/lib/rocprofiler-sdk/thread_trace/dl.hpp similarity index 71% rename from source/lib/att-tool/dl.hpp rename to source/lib/rocprofiler-sdk/thread_trace/dl.hpp index 4e5a25ee1c..897d2ccbcb 100644 --- a/source/lib/att-tool/dl.hpp +++ b/source/lib/rocprofiler-sdk/thread_trace/dl.hpp @@ -22,23 +22,31 @@ #pragma once -#include "att_decoder.h" +#include "lib/rocprofiler-sdk/thread_trace/trace_decoder_api.h" #include namespace rocprofiler { -namespace att_wrapper +namespace thread_trace { class DL { - using ParseFn = decltype(rocprofiler_att_decoder_parse_data); - using InfoFn = decltype(rocprofiler_att_decoder_get_info_string); - using StatusFn = decltype(rocprofiler_att_decoder_get_status_string); + using ParseFn = decltype(rocprof_trace_decoder_parse_data); + using InfoFn = decltype(rocprof_trace_decoder_get_info_string); + using StatusFn = decltype(rocprof_trace_decoder_get_status_string); public: - DL(const char* libname); + DL(const char* libpath); ~DL(); + DL(DL&) = delete; + DL(DL&& other) = delete; + + bool valid() const + { + return handle != nullptr && att_parse_data_fn != nullptr && att_info_fn != nullptr && + att_status_fn != nullptr; + }; ParseFn* att_parse_data_fn = nullptr; InfoFn* att_info_fn = nullptr; @@ -46,5 +54,5 @@ public: void* handle = nullptr; }; -} // namespace att_wrapper +} // namespace thread_trace } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp b/source/lib/rocprofiler-sdk/thread_trace/service.cpp similarity index 65% rename from source/lib/rocprofiler-sdk/thread_trace/att_service.cpp rename to source/lib/rocprofiler-sdk/thread_trace/service.cpp index 3a2bd2aeea..1fcf62636c 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/service.cpp @@ -37,13 +37,13 @@ using DeviceThreadTracer = rocprofiler::thread_trace::DeviceThreadTracer; extern "C" { rocprofiler_status_t rocprofiler_configure_dispatch_thread_trace_service( - rocprofiler_context_id_t context_id, - rocprofiler_agent_id_t agent_id, - rocprofiler_att_parameter_t* parameters, - size_t num_parameters, - rocprofiler_att_dispatch_callback_t dispatch_callback, - rocprofiler_att_shader_data_callback_t shader_callback, - void* callback_userdata) + rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_thread_trace_parameter_t* parameters, + size_t num_parameters, + rocprofiler_thread_trace_dispatch_callback_t dispatch_callback, + rocprofiler_thread_trace_shader_data_callback_t shader_callback, + void* callback_userdata) { ROCP_TRACE << "Configuring Dispatch ATT for agent " << agent_id.handle; @@ -69,30 +69,37 @@ rocprofiler_configure_dispatch_thread_trace_service( auto id_map = rocprofiler::counters::getPerfCountersIdMap(); for(size_t p = 0; p < num_parameters; p++) { - const rocprofiler_att_parameter_t& param = parameters[p]; - if(param.type > ROCPROFILER_ATT_PARAMETER_LAST) + const rocprofiler_thread_trace_parameter_t& param = parameters[p]; + if(param.type > ROCPROFILER_THREAD_TRACE_PARAMETER_LAST) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; switch(param.type) { - case ROCPROFILER_ATT_PARAMETER_TARGET_CU: pack.target_cu = param.value; break; - case ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK: + case ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU: pack.target_cu = param.value; break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK: pack.shader_engine_mask = param.value; break; - case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break; - case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break; - case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER: + case ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE: + pack.buffer_size = param.value; + break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT: + pack.simd_select = param.value; + break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER: { auto event_it = id_map.find(param.counter_id.handle); if(event_it != id_map.end()) pack.perfcounters.push_back({event_it->second, param.simd_mask}); } break; - case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL: + case ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL: pack.perfcounter_ctrl = param.value; break; - case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL: pack.bSerialize = param.value != 0; break; - case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + case ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL: + pack.bSerialize = param.value != 0; + break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_LAST: + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; } } @@ -104,12 +111,12 @@ rocprofiler_configure_dispatch_thread_trace_service( rocprofiler_status_t rocprofiler_configure_device_thread_trace_service( - rocprofiler_context_id_t context_id, - rocprofiler_agent_id_t agent_id, - rocprofiler_att_parameter_t* parameters, - size_t num_parameters, - rocprofiler_att_shader_data_callback_t shader_callback, - rocprofiler_user_data_t userdata) + rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_thread_trace_parameter_t* parameters, + size_t num_parameters, + rocprofiler_thread_trace_shader_data_callback_t shader_callback, + rocprofiler_user_data_t userdata) { ROCP_TRACE << "Configuring Device ATT for agent " << agent_id.handle; @@ -131,32 +138,37 @@ rocprofiler_configure_device_thread_trace_service( auto id_map = rocprofiler::counters::getPerfCountersIdMap(); for(size_t p = 0; p < num_parameters; p++) { - const rocprofiler_att_parameter_t& param = parameters[p]; - if(param.type > ROCPROFILER_ATT_PARAMETER_LAST) + const rocprofiler_thread_trace_parameter_t& param = parameters[p]; + if(param.type > ROCPROFILER_THREAD_TRACE_PARAMETER_LAST) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; switch(param.type) { - case ROCPROFILER_ATT_PARAMETER_TARGET_CU: pack.target_cu = param.value; break; - case ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK: + case ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU: pack.target_cu = param.value; break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK: pack.shader_engine_mask = param.value; break; - case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break; - case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break; - case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER: + case ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE: + pack.buffer_size = param.value; + break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT: + pack.simd_select = param.value; + break; + case ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER: { auto event_it = id_map.find(param.counter_id.handle); if(event_it != id_map.end()) pack.perfcounters.push_back({event_it->second, param.simd_mask}); } break; - case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL: + case ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL: pack.perfcounter_ctrl = param.value; break; - case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL: + case ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL: if(param.value != 0) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; break; - case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + case ROCPROFILER_THREAD_TRACE_PARAMETER_LAST: + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; } } diff --git a/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp b/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp index 7f0243ddce..f2e00e8eea 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp @@ -31,7 +31,7 @@ #include "lib/rocprofiler-sdk/hsa/queue.hpp" #include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" #include "lib/rocprofiler-sdk/registration.hpp" -#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" +#include "lib/rocprofiler-sdk/thread_trace/core.hpp" #include #include @@ -134,11 +134,11 @@ TEST(thread_trace, configure_test) rocprofiler_context_id_t ctx{0}; ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed"); - std::vector params; - params.push_back({ROCPROFILER_ATT_PARAMETER_TARGET_CU, {1}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, {0xF}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, {0x1000000}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, {0xF}}); + std::vector params; + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, {1}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, {0xF}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, {0x1000000}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, {0xF}}); auto agents = hsa::get_queue_controller()->get_supported_agents(); ASSERT_GT(agents.size(), 0); @@ -156,7 +156,7 @@ TEST(thread_trace, configure_test) rocprofiler_kernel_id_t, rocprofiler_dispatch_id_t, void*, - rocprofiler_user_data_t*) { return ROCPROFILER_ATT_CONTROL_NONE; }, + rocprofiler_user_data_t*) { return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; }, [](rocprofiler_agent_id_t, int64_t, void*, size_t, rocprofiler_user_data_t) {}, nullptr); } @@ -180,17 +180,17 @@ TEST(thread_trace, perfcounters_configure_test) // Only GFX9 SQ Block counters are supported std::vector> perf_counters = { {"SQ_WAVES", 0x1}, {"SQ_WAVES", 0x2}, {"SQ_WAVES", 0x2}, {"GRBM_COUNT", 0x3}}; - std::set> expected; - std::vector params; - params.push_back({ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, {1}}); + std::set> expected; + std::vector params; + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {1}}); auto metrics = rocprofiler::counters::getMetricsForAgent("gfx90a"); for(auto& [counter_name, simd_mask] : perf_counters) for(auto& metric : metrics) if(metric.name() == counter_name) { - rocprofiler_att_parameter_t att_param; - att_param.type = ROCPROFILER_ATT_PARAMETER_PERFCOUNTER; + rocprofiler_thread_trace_parameter_t att_param; + att_param.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER; att_param.counter_id = rocprofiler_counter_id_t{.handle = metric.id()}; att_param.simd_mask = simd_mask; params.push_back(att_param); @@ -213,7 +213,7 @@ TEST(thread_trace, perfcounters_configure_test) rocprofiler_kernel_id_t, rocprofiler_dispatch_id_t, void*, - rocprofiler_user_data_t*) { return ROCPROFILER_ATT_CONTROL_NONE; }, + rocprofiler_user_data_t*) { return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; }, [](rocprofiler_agent_id_t, int64_t, void*, size_t, rocprofiler_user_data_t) {}, nullptr); } @@ -273,18 +273,18 @@ query_available_agents(rocprofiler_agent_version_t /* version */, const auto* agent = static_cast(agents[idx]); if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue; - std::vector params; - params.push_back({ROCPROFILER_ATT_PARAMETER_TARGET_CU, {1}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, {0xF}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, {0x1000000}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, {0xF}}); - params.push_back({ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, {1}}); + std::vector params; + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, {1}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, {0xF}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, {0x1000000}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, {0xF}}); + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {1}}); { auto metrics = rocprofiler::counters::getMetricsForAgent("gfx90a"); - rocprofiler_att_parameter_t att_param; - att_param.type = ROCPROFILER_ATT_PARAMETER_PERFCOUNTER; + rocprofiler_thread_trace_parameter_t att_param; + att_param.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER; att_param.simd_mask = 0xF; for(auto& metric : metrics) if(metric.name() == "SQ_WAVES") rocprofiler_counter_id_t{.handle = metric.id()}; diff --git a/source/lib/rocprofiler-sdk/thread_trace/trace_decoder_api.h b/source/lib/rocprofiler-sdk/thread_trace/trace_decoder_api.h new file mode 100644 index 0000000000..9c17e0c041 --- /dev/null +++ b/source/lib/rocprofiler-sdk/thread_trace/trace_decoder_api.h @@ -0,0 +1,131 @@ +// MIT License +// +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum +{ + ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS = 0, + ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR, + ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES, + ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT, + ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA, + ROCPROFILER_THREAD_TRACE_DECODER_STATUS_LAST +} rocprofiler_thread_trace_decoder_status_t; + +/** + * @brief Callback for rocprofiler to return traces back to rocprofiler. + * @param[in] trace_type_id One of rocprofiler_thread_trace_decoder_record_type_t + * @param[in] trace_events A pointer to sequence of events, of size trace_size. + * @param[in] trace_size The number of events in the trace. + * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback. + */ +typedef rocprofiler_thread_trace_decoder_status_t (*rocprof_trace_decoder_trace_callback_t)( + rocprofiler_thread_trace_decoder_record_type_t record_type_id, + void* trace_events, + uint64_t trace_size, + void* userdata); + +/** + * @brief Callback for rocprofiler to return ISA to decoder. + * The caller must copy a desired instruction on isa_instruction and source_reference, + * while obeying the max length passed by the caller. + * If the caller's length is insufficient, then this function writes the minimum sizes to isa_size + * and source_size and returns ::ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES. + * If call returns _SUCCESS, isa_size and source_size must be written with bytes used. + * @param[out] instruction Where to copy the ISA line to. + * @param[out] memory_size (Auto) The number of bytes to next instruction. 0 for custom ISA. + * @param[inout] size Size of returned ISA string. + * @param[in] address The code object ID and offset from base vaddr. + * If marker_id == 0, this parameter is raw virtual address with no codeobj ID information. + * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback. + * @retval ::ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS on success. + * @retval ::ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR on generic error. + * @retval ::ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT for invalid address. + * @retval ::ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES for insufficient + * isa_size. + */ +typedef rocprofiler_thread_trace_decoder_status_t (*rocprof_trace_decoder_isa_callback_t)( + char* instruction, + uint64_t* memory_size, + uint64_t* size, + rocprofiler_thread_trace_decoder_pc_t address, + void* userdata); + +/** + * @brief Callback for the decoder to retrieve Shader Engine data. Return zero to end parsing. + * @param[out] buffer The buffer to fill up with SE data. + * @param[out] buffer_size The space available in the buffer. + * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback. + * @returns Number of bytes remaining. + * @retval 0 if no more SE data is available. Parsing will stop. + * @retval buffer_size if the buffer does not hold enough data. + * @retval 0 > ret > buffer_size for partially filled buffer, and call ends. + */ +typedef uint64_t (*rocprof_trace_decoder_se_data_callback_t)(uint8_t** buffer, + uint64_t* buffer_size, + void* userdata); + +/** + * @brief Parses thread trace data. + * @param[in] se_data_callback Callback to return shader engine data from. + * @param[in] trace_callback Callback where the trace data is returned to. + * @param[in] isa_callback Callback to return ISA lines. + * @param[in] userdata Userdata passed back to caller via callback. + */ +rocprofiler_thread_trace_decoder_status_t +rocprof_trace_decoder_parse_data(rocprof_trace_decoder_se_data_callback_t se_data_callback, + rocprof_trace_decoder_trace_callback_t trace_callback, + rocprof_trace_decoder_isa_callback_t isa_callback, + void* userdata); + +/** + * @brief Returns the description of a rocprofiler_thread_trace_decoder_info_t record. + * @param[in] info The decoder info received + * @retval null terminated string as description of "info". + */ +const char* +rocprof_trace_decoder_get_info_string(rocprofiler_thread_trace_decoder_info_t info); + +const char* +rocprof_trace_decoder_get_status_string(rocprofiler_thread_trace_decoder_status_t status); + +typedef void (*rocprofiler_thread_trace_decoder_debug_callback_t)(int64_t time, + const char* type, + const char* info, + void* userdata); + +rocprofiler_thread_trace_decoder_status_t +rocprof_trace_decoder_dump_data(const char* data, + size_t data_size, + rocprofiler_thread_trace_decoder_debug_callback_t cb, + void* userdata); + +#ifdef __cplusplus +} +#endif diff --git a/tests/rocprofv3/CMakeLists.txt b/tests/rocprofv3/CMakeLists.txt index 6273c8a09d..436ce1302f 100644 --- a/tests/rocprofv3/CMakeLists.txt +++ b/tests/rocprofv3/CMakeLists.txt @@ -38,9 +38,7 @@ add_subdirectory(pc-sampling) add_subdirectory(collection-period) add_subdirectory(rocdecode-trace) add_subdirectory(rocjpeg-trace) -if(TARGET att_decoder_testing1) - add_subdirectory(advanced-thread-trace) -endif() +add_subdirectory(advanced-thread-trace) add_subdirectory(hip-stream-display) add_subdirectory(agent-index) add_subdirectory(negate-aggregate-tracing-options) diff --git a/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt b/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt index e65ece9d67..da3094bcec 100644 --- a/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt +++ b/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt @@ -48,7 +48,7 @@ find_library( HINTS ${LIB_PATH_LOC} PATHS ${ROCM_PATH} PATH_SUFFIXES lib - NAMES att_decoder_trace) + NAMES rocprof-trace-decoder) if(attdecoder) set(IS_DISABLED OFF) @@ -62,7 +62,7 @@ add_test( 1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD --att-buffer-size 0x6000000 --att-simd-select 0x3 --att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json - ${PRELOAD_ARGS} --att-library-path ${LIB_PATH_LOC} -- + ${PRELOAD_ARGS} --att-library-path ${LIB_PATH_LOC} --att-activity 8 -- $) set_tests_properties( @@ -164,7 +164,7 @@ add_test( set_tests_properties( rocprofv3-test-att-yaml-input-will-fail - PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED}) + PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED True) add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail COMMAND $ --att --att-library-path . @@ -172,7 +172,7 @@ add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail set_tests_properties( rocprofv3-test-att-library-path-cmd-line-will-fail - PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED}) + PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED True) add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail COMMAND $ --att --log-level env --echo @@ -189,7 +189,7 @@ set_tests_properties( WILL_FAIL ON DISABLED - ${IS_DISABLED}) + True) # # Uses ATT and Counter Collection at the same time @@ -205,3 +205,45 @@ set_tests_properties( rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT ${LIB_PATH_ENV}) + +# Check for conflict PMC + activity +add_test( + NAME rocprofv3-test-hsa-multiqueue-att-activity-pmc-will-fail + COMMAND + $ --log-level env --pmc SQ_WAVES + --advanced-thread-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o + out --att-activity 8 ${PRELOAD_ARGS} -- $) + +set_tests_properties( + rocprofv3-test-hsa-multiqueue-att-activity-pmc-will-fail + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DISABLED + ${IS_DISABLED} + ENVIRONMENT + ${LIB_PATH_ENV} + WILL_FAIL + ON) + +# Check for conflict Perfcounters + activity +add_test( + NAME rocprofv3-test-hsa-multiqueue-att-activity-perf-will-fail + COMMAND + $ --log-level env --att + --att-perfcounter-ctrl 8 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o + out --att-activity 8 ${PRELOAD_ARGS} -- $) + +set_tests_properties( + rocprofv3-test-hsa-multiqueue-att-activity-perf-will-fail + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DISABLED + ${IS_DISABLED} + ENVIRONMENT + ${LIB_PATH_ENV} + WILL_FAIL + ON) diff --git a/tests/thread-trace/agent.cpp b/tests/thread-trace/agent.cpp index 3e45cd8f2d..4d57af5945 100644 --- a/tests/thread-trace/agent.cpp +++ b/tests/thread-trace/agent.cpp @@ -65,8 +65,8 @@ dispatch_tracing_callback(rocprofiler_callback_tracing_record_t record, if(var) return atoi(var); return def; }; - static int begin_dispatch = get_int_var("ROCPROFILER_ATT_BEGIN", 1); - static int end_dispatch = get_int_var("ROCPROFILER_ATT_END", 4); + static int begin_dispatch = get_int_var("ROCPROFILER_THREAD_TRACE_BEGIN", 1); + static int end_dispatch = get_int_var("ROCPROFILER_THREAD_TRACE_END", 4); static std::atomic isprofiling{false}; static std::mutex mut; @@ -115,12 +115,12 @@ query_available_agents(rocprofiler_agent_version_t /* version */, const auto* agent = static_cast(agents[idx]); if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue; - std::vector parameters; - parameters.push_back({ROCPROFILER_ATT_PARAMETER_TARGET_CU, 1}); - parameters.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, 0xF}); - parameters.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, 0x6000000}); - parameters.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, 0x11}); - parameters.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 0}); + std::vector parameters; + parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, 1}); + parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, 0xF}); + parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, 0x6000000}); + parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, 0x11}); + parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL, 0}); ROCPROFILER_CALL( rocprofiler_configure_device_thread_trace_service(agent_ctx, diff --git a/tests/thread-trace/multi_dispatch.cpp b/tests/thread-trace/multi_dispatch.cpp index 09c27b7ddf..bf47704b5e 100644 --- a/tests/thread-trace/multi_dispatch.cpp +++ b/tests/thread-trace/multi_dispatch.cpp @@ -36,7 +36,7 @@ namespace Multi { rocprofiler_client_id_t* client_id = nullptr; -rocprofiler_att_control_flags_t +rocprofiler_thread_trace_control_flags_t dispatch_callback(rocprofiler_agent_id_t /* agent */, rocprofiler_queue_id_t /* queue_id */, rocprofiler_async_correlation_id_t /* correlation_id */, @@ -46,12 +46,12 @@ dispatch_callback(rocprofiler_agent_id_t /* agent */, rocprofiler_user_data_t* dispatch_userdata) { static std::atomic count{0}; - if(count.fetch_add(1) > NUM_KERNELS) return ROCPROFILER_ATT_CONTROL_NONE; + if(count.fetch_add(1) > NUM_KERNELS) return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; assert(userdata && "Dispatch callback passed null!"); dispatch_userdata->ptr = userdata; - return ROCPROFILER_ATT_CONTROL_START_AND_STOP; + return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP; } int @@ -70,8 +70,8 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data) tool_data), "code object tracing service configure"); - std::vector params{}; - params.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 1}); + std::vector params{}; + params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL, 1}); std::vector agents{}; diff --git a/tests/thread-trace/single_dispatch.cpp b/tests/thread-trace/single_dispatch.cpp index 8ad0ec0298..4feb8b44d1 100644 --- a/tests/thread-trace/single_dispatch.cpp +++ b/tests/thread-trace/single_dispatch.cpp @@ -35,7 +35,7 @@ namespace Single { rocprofiler_client_id_t* client_id = nullptr; -rocprofiler_att_control_flags_t +rocprofiler_thread_trace_control_flags_t dispatch_callback(rocprofiler_agent_id_t /* agent */, rocprofiler_queue_id_t /* queue_id */, rocprofiler_async_correlation_id_t /* correlation_id */, @@ -55,16 +55,16 @@ dispatch_callback(rocprofiler_agent_id_t /* agent */, { auto& kernel_name = tool.kernel_id_to_kernel_name.at(kernel_id); if(kernel_name.find(desired_func_name) == std::string::npos) - return ROCPROFILER_ATT_CONTROL_NONE; + return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; - return ROCPROFILER_ATT_CONTROL_START_AND_STOP; + return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP; } catch(...) { std::cerr << "Could not find kernel id: " << kernel_id << std::endl; } C_API_END - return ROCPROFILER_ATT_CONTROL_NONE; + return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; } int