diff --git a/source/include/rocprofiler-sdk/buffer_tracing.h b/source/include/rocprofiler-sdk/buffer_tracing.h index e241fdf6e7..2bffc42c18 100644 --- a/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/source/include/rocprofiler-sdk/buffer_tracing.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -84,7 +85,30 @@ typedef struct } rocprofiler_buffer_tracing_hip_api_record_t; /** - * @brief Additional trace data for OMPT target routines + * @brief ROCProfiler Buffer HIP API Tracer Record. + */ +typedef struct +{ + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; + rocprofiler_tracing_operation_t operation; + rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record + rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds + rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds + rocprofiler_thread_id_t thread_id; ///< id for thread generating this record + rocprofiler_hip_api_args_t args; ///< arguments of function call + rocprofiler_hip_api_retval_t retval; ///< return value of function call + + /// @var kind + /// @brief ::ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API or + /// ::ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API + /// @var operation + /// @brief Specification of the API function, e.g., ::rocprofiler_hip_runtime_api_id_t or + /// ::rocprofiler_hip_compiler_api_id_t +} rocprofiler_buffer_tracing_hip_api_ext_record_t; + +/** + * @brief Additional trace data for OpenMP target routines */ typedef struct rocprofiler_buffer_tracing_ompt_target_t @@ -494,6 +518,53 @@ rocprofiler_iterate_buffer_tracing_kind_operations( rocprofiler_buffer_tracing_kind_operation_cb_t callback, void* data) ROCPROFILER_API ROCPROFILER_NONNULL(2); +/** + * @brief Callback function for iterating over the function arguments to a traced function. + * This function will be invoked for each argument. + * @see rocprofiler_iterate_buffer_tracing_record_args + * + * @param [in] kind domain + * @param [in] operation associated domain operation + * @param [in] arg_number the argument number, starting at zero + * @param [in] arg_value_addr the address of the argument stored by rocprofiler. + * @param [in] arg_indirection_count the total number of indirection levels for the argument, e.g. + * int == 0, int* == 1, int** == 2 + * @param [in] arg_type the typeid name of the argument (not demangled) + * @param [in] arg_name the name of the argument in the prototype (or rocprofiler union) + * @param [in] arg_value_str conversion of the argument to a string, e.g. operator<< overload + * @param [in] data user data + */ +typedef int (*rocprofiler_buffer_tracing_operation_args_cb_t)( + rocprofiler_buffer_tracing_kind_t kind, + rocprofiler_tracing_operation_t operation, + uint32_t arg_number, + const void* const arg_value_addr, + int32_t arg_indirection_count, + const char* arg_type, + const char* arg_name, + const char* arg_value_str, + void* data); + +/** + * @brief Iterates over all the arguments for the traced function (when available). This is + * particularly useful when tools want to annotate traces with the function arguments. See + * @example samples/api_buffer_tracing/client.cpp for a usage example. + * + * In contrast to ::rocprofiler_iterate_callback_tracing_kind_operation_args, this function + * cannot dereference pointer arguments since there is a high probability that the pointer + * address references the stack and the buffer tracing record is delivered after the + * stack variables of the corresponding function have been destroyed. + * + * @param[in] record Buffer record + * @param[in] callback The callback function which will be invoked for each argument + * @param[in] user_data Data to be passed to each invocation of the callback + */ +rocprofiler_status_t +rocprofiler_iterate_buffer_tracing_record_args( + rocprofiler_record_header_t record, + rocprofiler_buffer_tracing_operation_args_cb_t callback, + void* user_data) ROCPROFILER_API ROCPROFILER_NONNULL(2); + /** @} */ ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index 9a8723746b..398c71a570 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -85,8 +85,69 @@ # define ROCPROFILER_SDK_CEREAL_NAMESPACE_END } // namespace cereal #endif +namespace rocprofiler +{ +namespace sdk +{ +namespace serialization +{ +struct buffer_tracing_args +{ + std::string type = {}; + std::string name = {}; + std::string value = {}; +}; + +template +auto +get_buffer_tracing_args(Tp& data) +{ + auto populate_args_array = [](rocprofiler_buffer_tracing_kind_t /*kind*/, + rocprofiler_tracing_operation_t /*operation*/, + uint32_t arg_number, + const void* const /*arg_value_addr*/, + int32_t /*arg_indirection_count*/, + const char* arg_type, + const char* arg_name, + const char* arg_value_str, + void* cb_data) -> int { + if(!cb_data) return 1; + + auto* vec = static_cast*>(cb_data); + auto sz = std::max(arg_number + 1, vec->size()); + vec->resize(sz, buffer_tracing_args{}); + vec->at(arg_number) = buffer_tracing_args{arg_type, arg_name, arg_value_str}; + + return 0; + }; + + auto ret = std::vector{}; + auto record = rocprofiler_record_header_t{}; + record.hash = + rocprofiler_record_header_compute_hash(ROCPROFILER_BUFFER_CATEGORY_TRACING, data.kind); + record.payload = &data; + + rocprofiler_iterate_buffer_tracing_record_args(record, populate_args_array, &ret); + + return ret; +} +} // namespace serialization +} // namespace sdk +} // namespace rocprofiler + ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN +namespace sdk = ::rocprofiler::sdk; + +template +void +save(ArchiveT& ar, const sdk::serialization::buffer_tracing_args& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(type); + ROCP_SDK_SAVE_DATA_FIELD(name); + ROCP_SDK_SAVE_DATA_FIELD(value); +} + template void save(ArchiveT& ar, rocprofiler_context_id_t data) @@ -316,7 +377,7 @@ template void save(ArchiveT& ar, rocprofiler_hip_api_retval_t data) { - ROCP_SDK_SAVE_DATA_FIELD(hipError_t_retval); + ROCP_SDK_SAVE_DATA_FIELD(uint64_t_retval); } template @@ -513,6 +574,16 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_hip_api_record_t data) save_buffer_tracing_api_record(ar, data); } +template +void +save(ArchiveT& ar, rocprofiler_buffer_tracing_hip_api_ext_record_t data) +{ + save_buffer_tracing_api_record(ar, data); + auto args = sdk::serialization::get_buffer_tracing_args(data); + ROCP_SDK_SAVE_VALUE("args", args); + ROCP_SDK_SAVE_DATA_FIELD(retval); +} + template void save(ArchiveT& ar, rocprofiler_buffer_tracing_marker_api_record_t data) diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index b5a634b7df..e60e411974 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -214,7 +214,16 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API, ///< Display HIP Stream + ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT, ROCPROFILER_BUFFER_TRACING_LAST, + + /// @var ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT + /// @brief Similar to ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API except the buffer record + /// contains the function argument(s) and return value + /// @var ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT + /// @brief Similar to ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API except the buffer record + /// contains the function argument(s) and return value } rocprofiler_buffer_tracing_kind_t; /** diff --git a/source/include/rocprofiler-sdk/hip/api_args.h b/source/include/rocprofiler-sdk/hip/api_args.h index ea0af4ec08..0a3657d82d 100644 --- a/source/include/rocprofiler-sdk/hip/api_args.h +++ b/source/include/rocprofiler-sdk/hip/api_args.h @@ -35,6 +35,8 @@ #include #include +#include + ROCPROFILER_EXTERN_C_INIT // Empty struct has a size of 0 in C but size of 1 in C++. @@ -52,6 +54,7 @@ typedef union rocprofiler_hip_api_retval_t ~rocprofiler_hip_api_retval_t() = default; #endif + uint64_t uint64_t_retval; int int_retval; const char* const_charp_retval; hipError_t hipError_t_retval; diff --git a/source/lib/output/buffered_output.hpp b/source/lib/output/buffered_output.hpp index 0cb4ecfa58..90de2d4aa7 100644 --- a/source/lib/output/buffered_output.hpp +++ b/source/lib/output/buffered_output.hpp @@ -138,7 +138,7 @@ buffered_output::destroy() } using hip_buffered_output_t = - buffered_output; + buffered_output; using hsa_buffered_output_t = buffered_output; using marker_buffered_output_t = diff --git a/source/lib/output/generateCSV.cpp b/source/lib/output/generateCSV.cpp index cb75b9b550..78581d86c0 100644 --- a/source/lib/output/generateCSV.cpp +++ b/source/lib/output/generateCSV.cpp @@ -316,10 +316,10 @@ generate_csv(const output_config& cfg, } void -generate_csv(const output_config& cfg, - const metadata& tool_metadata, - const generator& data, - const stats_entry_t& stats) +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) { if(data.empty()) return; diff --git a/source/lib/output/generateCSV.hpp b/source/lib/output/generateCSV.hpp index b1c6d6e3e8..29df079987 100644 --- a/source/lib/output/generateCSV.hpp +++ b/source/lib/output/generateCSV.hpp @@ -46,10 +46,10 @@ generate_csv(const output_config& cfg, const stats_entry_t& stats); void -generate_csv(const output_config& cfg, - const metadata& tool_metadata, - const generator& data, - const stats_entry_t& stats); +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); void generate_csv(const output_config& cfg, diff --git a/source/lib/output/generateJSON.cpp b/source/lib/output/generateJSON.cpp index f87870ea43..7bc279ba34 100644 --- a/source/lib/output/generateJSON.cpp +++ b/source/lib/output/generateJSON.cpp @@ -187,7 +187,7 @@ write_json(json_output& json_ar, const output_config& /*cfg*/, const metadata& /*tool_metadata*/, const domain_stats_vec_t& domain_stats, - generator&& hip_api_gen, + generator&& hip_api_gen, generator hsa_api_gen, generator kernel_dispatch_gen, generator memory_copy_gen, diff --git a/source/lib/output/generateJSON.hpp b/source/lib/output/generateJSON.hpp index 1b9343e9c4..b9db97a3c9 100644 --- a/source/lib/output/generateJSON.hpp +++ b/source/lib/output/generateJSON.hpp @@ -85,7 +85,7 @@ write_json(json_output& j const output_config& cfg, const metadata& tool_metadata, const domain_stats_vec_t& domain_stats, - generator&& hip_api_gen, + generator&& hip_api_gen, generator hsa_api_gen, generator kernel_dispatch_gen, generator memory_copy_gen, diff --git a/source/lib/output/generateOTF2.cpp b/source/lib/output/generateOTF2.cpp index 1628f8f529..737663e7c9 100644 --- a/source/lib/output/generateOTF2.cpp +++ b/source/lib/output/generateOTF2.cpp @@ -360,7 +360,7 @@ write_otf2( const metadata& tool_metadata, uint64_t pid, const std::vector& agent_data, - std::deque* hip_api_data, + std::deque* hip_api_data, std::deque* hsa_api_data, std::deque* kernel_dispatch_data, std::deque* memory_copy_data, diff --git a/source/lib/output/generateOTF2.hpp b/source/lib/output/generateOTF2.hpp index f43945d5ec..aa65dea91e 100644 --- a/source/lib/output/generateOTF2.hpp +++ b/source/lib/output/generateOTF2.hpp @@ -40,7 +40,7 @@ write_otf2( const metadata& tool_metadata, uint64_t pid, const std::vector& agent_data, - std::deque* hip_api_data, + std::deque* hip_api_data, std::deque* hsa_api_data, std::deque* kernel_dispatch_data, std::deque* memory_copy_data, diff --git a/source/lib/output/generatePerfetto.cpp b/source/lib/output/generatePerfetto.cpp index aeb356796d..296d69a3e2 100644 --- a/source/lib/output/generatePerfetto.cpp +++ b/source/lib/output/generatePerfetto.cpp @@ -68,7 +68,7 @@ write_perfetto( const output_config& ocfg, const metadata& tool_metadata, std::vector agent_data, - const generator& hip_api_gen, + const generator& hip_api_gen, const generator& hsa_api_gen, const generator& kernel_dispatch_gen, const generator& memory_copy_gen, diff --git a/source/lib/output/generatePerfetto.hpp b/source/lib/output/generatePerfetto.hpp index fbb1d66cf3..cf26ef3433 100644 --- a/source/lib/output/generatePerfetto.hpp +++ b/source/lib/output/generatePerfetto.hpp @@ -40,7 +40,7 @@ write_perfetto( const output_config& cfg, const metadata& tool_metadata, std::vector agent_data, - const generator& hip_api_gen, + const generator& hip_api_gen, const generator& hsa_api_gen, const generator& kernel_dispatch_gen, const generator& memory_copy_gen, diff --git a/source/lib/output/generateStats.cpp b/source/lib/output/generateStats.cpp index 541409fd56..98f80783b5 100644 --- a/source/lib/output/generateStats.cpp +++ b/source/lib/output/generateStats.cpp @@ -83,8 +83,8 @@ generate_stats(const output_config& /*cfg*/, stats_entry_t generate_stats(const output_config& /*cfg*/, - const metadata& tool_metadata, - const generator& data) + const metadata& tool_metadata, + const generator& data) { auto hip_stats = stats_map_t{}; for(auto ditr : data) diff --git a/source/lib/output/generateStats.hpp b/source/lib/output/generateStats.hpp index 386f94fe67..ef7ae20aba 100644 --- a/source/lib/output/generateStats.hpp +++ b/source/lib/output/generateStats.hpp @@ -37,9 +37,9 @@ generate_stats(const output_config& cfg, const generator& data); stats_entry_t -generate_stats(const output_config& cfg, - const metadata& tool_metadata, - const generator& data); +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); stats_entry_t generate_stats(const output_config& cfg, diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 87352c623a..0d3ffb66ff 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -541,12 +541,12 @@ hip_stream_display_callback(rocprofiler_callback_tracing_record_t record, // STREAM_HANDLE_CREATE and DESTROY are no-ops if(record.operation == ROCPROFILER_HIP_STREAM_CREATE) { - ROCP_INFO + ROCP_TRACE << "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_CREATE"; } else if(record.operation == ROCPROFILER_HIP_STREAM_DESTROY) { - ROCP_INFO + ROCP_TRACE << "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_DESTROY"; } else if(record.operation == ROCPROFILER_HIP_STREAM_SET) @@ -554,15 +554,15 @@ hip_stream_display_callback(rocprofiler_callback_tracing_record_t record, // Push the stream ID onto the stream stack when before underlying HIP function is called if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { - ROCP_INFO << "Entered hip_stream_display_callback function for " - "ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER"; + ROCP_TRACE << "Entered hip_stream_display_callback function for " + "ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER"; rocprofiler::tool::stream::push_stream_id(stream_id); } // Pop stream ID off of stream stack after underlying HIP function is completed else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { - ROCP_INFO << "Entered hip_stream_display_callback function for " - "ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT"; + ROCP_TRACE << "Entered hip_stream_display_callback function for " + "ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT"; rocprofiler::tool::stream::pop_stream_id(); } } @@ -948,11 +948,11 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, tool::write_ring_buffer(*record, domain_type::SCRATCH_MEMORY); } - else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API || - header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) + else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT || + header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT) { auto* record = - static_cast(header->payload); + static_cast(header->payload); tool::write_ring_buffer(*record, domain_type::HIP); } @@ -979,10 +979,18 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, } else { - ROCP_FATAL << fmt::format( - "unsupported category + kind: {} + {}", header->category, header->kind); + ROCP_CI_LOG(WARNING) << fmt::format( + "unsupported ROCPROFILER_BUFFER_CATEGORY_TRACING kind: {} :: {}", + header->kind, + tool_metadata->get_kind_name( + static_cast(header->kind))); } } + else + { + ROCP_CI_LOG(WARNING) << fmt::format( + "unsupported category + kind: {} + {}", header->category, header->kind); + } } } @@ -1682,7 +1690,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( get_client_ctx(), - ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, + ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT, nullptr, 0, get_buffers().hip_api_trace), @@ -1693,7 +1701,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( get_client_ctx(), - ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT, nullptr, 0, get_buffers().hip_api_trace), diff --git a/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 73b358a278..dbee75341b 100644 --- a/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -39,6 +39,7 @@ #include "lib/rocprofiler-sdk/rocjpeg/rocjpeg.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" +#include #include #include #include @@ -99,6 +100,8 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCJPEG_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_STREAM_API) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_RUNTIME_API_EXT) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_COMPILER_API_EXT) template std::pair @@ -263,11 +266,13 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ break; } case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API: + case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT: { val = rocprofiler::hip::name_by_id(operation); break; } case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API: + case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT: { val = rocprofiler::hip::name_by_id(operation); break; @@ -410,11 +415,13 @@ rocprofiler_iterate_buffer_tracing_kind_operations( break; } case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API: + case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT: { ops = rocprofiler::hip::get_ids(); break; } case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API: + case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT: { ops = rocprofiler::hip::get_ids(); break; @@ -467,6 +474,54 @@ rocprofiler_iterate_buffer_tracing_kind_operations( } return ROCPROFILER_STATUS_SUCCESS; } + +rocprofiler_status_t +rocprofiler_iterate_buffer_tracing_record_args( + rocprofiler_record_header_t record, + rocprofiler_buffer_tracing_operation_args_cb_t callback, + void* user_data) +{ + switch(record.kind) + { + case ROCPROFILER_BUFFER_TRACING_NONE: + case ROCPROFILER_BUFFER_TRACING_LAST: + { + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + } + case ROCPROFILER_BUFFER_TRACING_HSA_CORE_API: + case ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API: + case ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API: + case ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API: + case ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API: + case ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API: + case ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API: + case ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY: + case ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH: + case ROCPROFILER_BUFFER_TRACING_MEMORY_COPY: + case ROCPROFILER_BUFFER_TRACING_RCCL_API: + { + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; + } + case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT: + { + auto* _payload = + static_cast(record.payload); + rocprofiler::hip::iterate_args( + _payload->operation, _payload->args, callback, user_data); + return ROCPROFILER_STATUS_SUCCESS; + } + case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT: + { + auto* _payload = + static_cast(record.payload); + rocprofiler::hip::iterate_args( + _payload->operation, _payload->args, callback, user_data); + return ROCPROFILER_STATUS_SUCCESS; + } + } + + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; +} } #undef RETURN_STATUS_ON_FAIL diff --git a/source/lib/rocprofiler-sdk/callback_tracing.cpp b/source/lib/rocprofiler-sdk/callback_tracing.cpp index 9e019cb718..153055d0d4 100644 --- a/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -548,7 +548,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( { rocprofiler::hip::iterate_args( record.operation, - *static_cast(record.payload), + static_cast(record.payload)->args, callback, max_deref, user_data); @@ -558,7 +558,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( { rocprofiler::hip::iterate_args( record.operation, - *static_cast(record.payload), + static_cast(record.payload)->args, callback, max_deref, user_data); diff --git a/source/lib/rocprofiler-sdk/context/context.hpp b/source/lib/rocprofiler-sdk/context/context.hpp index 13075e0ce5..e43557526d 100644 --- a/source/lib/rocprofiler-sdk/context/context.hpp +++ b/source/lib/rocprofiler-sdk/context/context.hpp @@ -135,6 +135,12 @@ struct context template bool is_tracing(KindT _kind) const; + + template + bool is_tracing_one_of(Args... _args) const + { + return ((false || is_tracing(_args)), ...); + } }; // set the client index needs to be called before allocate_context() diff --git a/source/lib/rocprofiler-sdk/hip/defines.hpp b/source/lib/rocprofiler-sdk/hip/defines.hpp index c54cab3834..0ed510bb98 100644 --- a/source/lib/rocprofiler-sdk/hip/defines.hpp +++ b/source/lib/rocprofiler-sdk/hip/defines.hpp @@ -93,9 +93,13 @@ return &base_type::functor; \ } \ \ - static std::vector as_arg_addr(callback_data_type) { return std::vector{}; } \ + static std::vector as_arg_addr(rocprofiler_hip_api_args_t) \ + { \ + return std::vector{}; \ + } \ \ - static std::vector as_arg_list(callback_data_type, int32_t) \ + static std::vector as_arg_list(rocprofiler_hip_api_args_t, \ + int32_t) \ { \ return {}; \ } \ @@ -177,17 +181,16 @@ return common::mpl::function_args_t{}; \ } \ \ - static std::vector as_arg_addr(callback_data_type trace_data) \ + static std::vector as_arg_addr(rocprofiler_hip_api_args_t args) \ { \ return std::vector{ \ - GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(args), __VA_ARGS__)}; \ } \ \ - static auto as_arg_list(callback_data_type trace_data, int32_t max_deref) \ + static auto as_arg_list(rocprofiler_hip_api_args_t args, int32_t max_deref) \ { \ return utils::stringize( \ - max_deref, \ - GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \ + max_deref, GET_NAMED_MEMBER_FIELDS(get_api_data_args(args), __VA_ARGS__)); \ } \ }; \ } \ diff --git a/source/lib/rocprofiler-sdk/hip/hip.cpp b/source/lib/rocprofiler-sdk/hip/hip.cpp index e38c4bbe55..b0b4f027c7 100644 --- a/source/lib/rocprofiler-sdk/hip/hip.cpp +++ b/source/lib/rocprofiler-sdk/hip/hip.cpp @@ -22,11 +22,14 @@ #include "lib/rocprofiler-sdk/hip/hip.hpp" #include "lib/common/defines.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/mpl.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/hip/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/tracing/fwd.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" #include @@ -185,6 +188,7 @@ hip_api_impl::functor(Args... args) using info_type = hip_api_info; using callback_api_data_t = typename hip_domain_info::callback_data_type; using buffered_api_data_t = typename hip_domain_info::buffered_data_type; + using buffered_ext_data_t = typename hip_domain_info::buffered_ext_data_type; constexpr auto external_corr_id_domain_idx = hip_domain_info::external_correlation_id_domain_idx; @@ -202,6 +206,7 @@ hip_api_impl::functor(Args... args) auto thr_id = common::get_tid(); auto callback_contexts = tracing::callback_context_data_vec_t{}; auto buffered_contexts = tracing::buffered_context_data_vec_t{}; + auto extended_contexts = tracing::buffered_context_data_vec_t{}; auto external_corr_ids = tracing::external_correlation_id_map_t{}; tracing::populate_contexts(info_type::callback_domain_idx, @@ -211,7 +216,12 @@ hip_api_impl::functor(Args... args) buffered_contexts, external_corr_ids); - if(callback_contexts.empty() && buffered_contexts.empty()) + tracing::populate_contexts(info_type::buffered_ext_domain_idx, + info_type::operation_idx, + extended_contexts, + external_corr_ids); + + if(callback_contexts.empty() && buffered_contexts.empty() && extended_contexts.empty()) { [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); if constexpr(!std::is_void::value) @@ -221,6 +231,7 @@ hip_api_impl::functor(Args... args) } auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto extended_record = common::init_public_api_struct(buffered_ext_data_t{}); auto tracer_data = common::init_public_api_struct(callback_api_data_t{}); auto* corr_id = tracing::correlation_service::construct(ref_count); auto internal_corr_id = corr_id->internal; @@ -232,12 +243,16 @@ hip_api_impl::functor(Args... args) info_type::operation_idx, internal_corr_id); - // invoke the callbacks - if(!callback_contexts.empty()) + // set the arguments + if(!callback_contexts.empty() || !extended_contexts.empty()) { set_data_args(info_type::get_api_data_args(tracer_data.args), convert_arg_type(std::forward(args))...); + } + // invoke the callbacks + if(!callback_contexts.empty()) + { tracing::execute_phase_enter_callbacks(callback_contexts, thr_id, internal_corr_id, @@ -253,7 +268,7 @@ hip_api_impl::functor(Args... args) external_corr_ids, thr_id, external_corr_id_domain_idx); // record the start timestamp as close to the function call as possible - if(!buffered_contexts.empty()) + if(!buffered_contexts.empty() || !extended_contexts.empty()) { buffer_record.start_timestamp = common::timestamp_ns(); } @@ -264,15 +279,18 @@ hip_api_impl::functor(Args... args) auto _ret = exec(info_type::get_table_func(), std::forward(args)...); // record the end timestamp as close to the function call as possible - if(!buffered_contexts.empty()) + if(!buffered_contexts.empty() || !extended_contexts.empty()) { buffer_record.end_timestamp = common::timestamp_ns(); } - if(!callback_contexts.empty()) + if(!callback_contexts.empty() || !extended_contexts.empty()) { set_data_retval(tracer_data.retval, _ret); + } + if(!callback_contexts.empty()) + { tracing::execute_phase_exit_callbacks(callback_contexts, external_corr_ids, info_type::callback_domain_idx, @@ -292,6 +310,23 @@ hip_api_impl::functor(Args... args) buffer_record); } + if(!extended_contexts.empty()) + { + extended_record.start_timestamp = buffer_record.start_timestamp; + extended_record.end_timestamp = buffer_record.end_timestamp; + extended_record.args = tracer_data.args; + extended_record.retval = tracer_data.retval; + + tracing::execute_buffer_record_emplace(extended_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + ancestor_corr_id, + info_type::buffered_ext_domain_idx, + info_type::operation_idx, + extended_record); + } + // decrement the reference count after usage in the callback/buffers corr_id->sub_ref_count(); @@ -360,13 +395,13 @@ get_names(std::vector& _name_list, std::index_sequence(_name_list, std::index_sequence{}); } -template +template void -iterate_args(const uint32_t id, - const DataT& data, - rocprofiler_callback_tracing_operation_args_cb_t func, - int32_t max_deref, - void* user_data, +iterate_args(const uint32_t id, + const DataT& data, + FuncT func, + int32_t max_deref, + void* user_data, std::index_sequence) { if(OpIdx == id) @@ -376,16 +411,42 @@ iterate_args(const uint32_t id, auto&& arg_addr = info_type::as_arg_addr(data); for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i) { - auto ret = func(info_type::callback_domain_idx, // kind - id, // operation - i, // arg_number - arg_addr.at(i), // arg_value_addr - arg_list.at(i).indirection_level, // indirection - arg_list.at(i).type, // arg_type - arg_list.at(i).name, // arg_name - arg_list.at(i).value.c_str(), // arg_value_str - arg_list.at(i).dereference_count, // num deref in str - user_data); + using return_type = typename common::mpl::function_traits::result_type; + + auto ret = return_type{}; + if constexpr(std::is_same::value) + { + ret = func(info_type::callback_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + arg_list.at(i).dereference_count, // num deref in str + user_data); + } + else if constexpr(std::is_same::value) + { + ret = func(info_type::buffered_ext_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + user_data); + } + else + { + static_assert(common::mpl::assert_false::value, + "Error! unsupported callback type"); + } + if(ret != 0) break; } return; @@ -398,6 +459,7 @@ iterate_args(const uint32_t id, bool should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, rocprofiler_buffer_tracing_kind_t _buffered_domain, + rocprofiler_buffer_tracing_kind_t _buffered_ext_domain, int _operation) { // we loop over all the *registered* contexts and see if any of them, at any point in time, @@ -415,6 +477,11 @@ should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) && itr->buffered_tracer->domains(_buffered_domain, _operation)) return true; + + // if there is a buffered tracer enabled for the given domain and op, we need to wrap + if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_ext_domain) && + itr->buffered_tracer->domains(_buffered_ext_domain, _operation)) + return true; } return false; } @@ -473,8 +540,10 @@ update_table(Tp* _orig, std::integral_constant) if(_info.offset() >= _orig->size) return; // check to see if there are any contexts which enable this operation in the HIP API domain - if(!should_wrap_functor( - _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + if(!should_wrap_functor(_info.callback_domain_idx, + _info.buffered_domain_idx, + _info.buffered_ext_domain_idx, + _info.operation_idx)) return; ROCP_TRACE << "updating table entry for " << _info.name; @@ -546,11 +615,11 @@ get_names() template void -iterate_args(uint32_t id, - const rocprofiler_callback_tracing_hip_api_data_t& data, - rocprofiler_callback_tracing_operation_args_cb_t callback, - int32_t max_deref, - void* user_data) +iterate_args(uint32_t id, + const rocprofiler_hip_api_args_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data) { if(callback) iterate_args(id, @@ -561,6 +630,22 @@ iterate_args(uint32_t id, std::make_index_sequence::last>{}); } +template +void +iterate_args(uint32_t id, + const rocprofiler_hip_api_args_t& data, + rocprofiler_buffer_tracing_operation_args_cb_t callback, + void* user_data) +{ + if(callback) + iterate_args(id, + data, + callback, + 0, + user_data, + std::make_index_sequence::last>{}); +} + template void copy_table(TableT* _orig, uint64_t _tbl_instance) @@ -580,8 +665,9 @@ update_table(TableT* _orig) update_table(_orig, std::make_index_sequence::last>{}); } -using hip_api_data_t = rocprofiler_callback_tracing_hip_api_data_t; +using hip_api_data_t = rocprofiler_hip_api_args_t; using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; +using hip_op_args_bf_t = rocprofiler_buffer_tracing_operation_args_cb_t; #define INSTANTIATE_HIP_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \ template void copy_table(TABLE_TYPE * _tbl, uint64_t _instv); \ @@ -591,7 +677,8 @@ using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; template std::vector get_ids(); \ template std::vector get_names(); \ template void iterate_args( \ - uint32_t, const hip_api_data_t&, hip_op_args_cb_t, int32_t, void*); + uint32_t, const hip_api_data_t&, hip_op_args_cb_t, int32_t, void*); \ + template void iterate_args(uint32_t, const hip_api_data_t&, hip_op_args_bf_t, void*); INSTANTIATE_HIP_TABLE_FUNC(hip_runtime_api_table_t, ROCPROFILER_HIP_TABLE_ID_Runtime) INSTANTIATE_HIP_TABLE_FUNC(hip_compiler_api_table_t, ROCPROFILER_HIP_TABLE_ID_Compiler) diff --git a/source/lib/rocprofiler-sdk/hip/hip.def.cpp b/source/lib/rocprofiler-sdk/hip/hip.def.cpp index cd836ac768..eaa70a4ab9 100644 --- a/source/lib/rocprofiler-sdk/hip/hip.def.cpp +++ b/source/lib/rocprofiler-sdk/hip/hip.def.cpp @@ -36,20 +36,22 @@ namespace hip template <> struct hip_domain_info { - using args_type = rocprofiler_hip_api_args_t; - using retval_type = rocprofiler_hip_api_retval_t; - using callback_data_type = rocprofiler_callback_tracing_hip_api_data_t; - using buffered_data_type = rocprofiler_buffer_tracing_hip_api_record_t; + using args_type = rocprofiler_hip_api_args_t; + using retval_type = rocprofiler_hip_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_hip_api_data_t; + using buffered_data_type = rocprofiler_buffer_tracing_hip_api_record_t; + using buffered_ext_data_type = rocprofiler_buffer_tracing_hip_api_ext_record_t; }; template <> struct hip_domain_info : hip_domain_info { - static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API; - static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API; - static constexpr auto none = ROCPROFILER_HIP_RUNTIME_API_ID_NONE; - static constexpr auto last = ROCPROFILER_HIP_RUNTIME_API_ID_LAST; + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API; + static constexpr auto buffered_ext_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT; + static constexpr auto none = ROCPROFILER_HIP_RUNTIME_API_ID_NONE; + static constexpr auto last = ROCPROFILER_HIP_RUNTIME_API_ID_LAST; static constexpr auto external_correlation_id_domain_idx = ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API; }; @@ -58,10 +60,11 @@ template <> struct hip_domain_info : hip_domain_info { - static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API; - static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API; - static constexpr auto none = ROCPROFILER_HIP_COMPILER_API_ID_NONE; - static constexpr auto last = ROCPROFILER_HIP_COMPILER_API_ID_LAST; + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API; + static constexpr auto buffered_ext_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT; + static constexpr auto none = ROCPROFILER_HIP_COMPILER_API_ID_NONE; + static constexpr auto last = ROCPROFILER_HIP_COMPILER_API_ID_LAST; static constexpr auto external_correlation_id_domain_idx = ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_COMPILER_API; }; diff --git a/source/lib/rocprofiler-sdk/hip/hip.hpp b/source/lib/rocprofiler-sdk/hip/hip.hpp index af44bc4a1a..40f7964384 100644 --- a/source/lib/rocprofiler-sdk/hip/hip.hpp +++ b/source/lib/rocprofiler-sdk/hip/hip.hpp @@ -91,11 +91,18 @@ get_ids(); template void -iterate_args(uint32_t id, - const rocprofiler_callback_tracing_hip_api_data_t& data, - rocprofiler_callback_tracing_operation_args_cb_t callback, - int32_t max_deref, - void* user_data); +iterate_args(uint32_t id, + const rocprofiler_hip_api_args_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data); + +template +void +iterate_args(uint32_t id, + const rocprofiler_hip_api_args_t& data, + rocprofiler_buffer_tracing_operation_args_cb_t callback, + void* user_data); template void diff --git a/source/lib/rocprofiler-sdk/hip/stream.cpp b/source/lib/rocprofiler-sdk/hip/stream.cpp index 900bba0e9e..c78d36d961 100644 --- a/source/lib/rocprofiler-sdk/hip/stream.cpp +++ b/source/lib/rocprofiler-sdk/hip/stream.cpp @@ -34,12 +34,14 @@ #include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" +#include "rocprofiler-sdk/hip/compiler_api_id.h" #include #include #include #include #include +#include #include #include @@ -79,16 +81,26 @@ add_stream(hipStream_t stream) { return get_stream_map()->wlock( [](stream_map_t& _data, hipStream_t _stream) { - if(_data.count(_stream) == 0) + static uint64_t idx_offset = 0; + + auto idx = _data.size() + idx_offset; + ROCP_INFO << fmt::format( + "hipStream_t={} :: id={}.handle={}{}", static_cast(_stream), '{', idx, '}'); + + ROCP_CI_LOG_IF(WARNING, idx == 0 && _stream != nullptr) + << "null hip stream does not have index 0"; + + if(!_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx}).second) { - auto idx = _data.size(); - ROCP_INFO << fmt::format("hipStream_t={} :: id={}.handle={}{}", - static_cast(_stream), - '{', - idx, - '}'); - _data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx}); + idx_offset += 1; + auto _existing = _data.at(_stream); + ROCP_INFO << "existing hipStream_t (" + << sdk::utility::as_hex(static_cast(_stream)) + << ") reallocated. rocprofiler_stream_id_t{.handle = " << _existing.handle + << "} -> rocprofiler_stream_id_t{.handle = " << idx << "}"; + _data.at(_stream) = rocprofiler_stream_id_t{.handle = idx}; } + return _data.at(_stream); }, stream); @@ -353,13 +365,16 @@ enable_stream_stack() for(const auto& itr : context::get_registered_contexts()) { - if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY) || - itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API) || - itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) || - itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API) || - itr->is_tracing(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) || - itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API) || - itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API)) + if(itr->is_tracing_one_of(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, + ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, + ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, + ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API, + ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, + ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, + ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API, + ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT)) return true; } @@ -371,8 +386,9 @@ enable_compiler_stream_stack() { for(const auto& itr : context::get_registered_contexts()) { - if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) || - itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API)) + if(itr->is_tracing_one_of(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT)) return true; } @@ -399,6 +415,11 @@ update_table(Tp* _orig, std::integral_constant) ROCP_TRACE << "updating table entry for " << _info.name; constexpr auto num_args = function_args_type::size(); + constexpr auto is_hip_pop_call_config_func = + std::is_same::value && + (static_cast(info_type::operation_idx) == + ROCPROFILER_HIP_COMPILER_API_ID___hipPopCallConfiguration); if constexpr(common::mpl::is_one_of::value) { @@ -430,14 +451,17 @@ update_table(Tp* _orig, std::integral_constant) ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamDestroy); if constexpr(is_hip_destroy_func) { + ROCP_INFO << _info.name << " has been designated as a stream destroy function"; _func = create_destroy_functor(_func); } else { + ROCP_INFO << _info.name << " has been designated as a stream set function"; _func = create_read_functor(_func); } } - else if constexpr(common::mpl::is_one_of::value) + else if constexpr(common::mpl::is_one_of::value && + !is_hip_pop_call_config_func) { constexpr auto stream_idx = common::mpl::index_of::value; @@ -456,6 +480,8 @@ update_table(Tp* _orig, std::integral_constant) if(!enable_compiler_stream_stack()) return; } + ROCP_INFO << _info.name << " has been designated as a stream create function"; + // 1. get the sub-table containing the function pointer in original table // 2. get reference to function pointer in sub-table in original table // 3. update function pointer with wrapper @@ -464,6 +490,9 @@ update_table(Tp* _orig, std::integral_constant) _func = create_write_functor(_func); } } + + // suppress unused-but-set-parameter warning + common::consume_args(_orig); } template diff --git a/source/lib/rocprofiler-sdk/hsa/async_copy.cpp b/source/lib/rocprofiler-sdk/hsa/async_copy.cpp index f9205cddec..50e7da6bb9 100644 --- a/source/lib/rocprofiler-sdk/hsa/async_copy.cpp +++ b/source/lib/rocprofiler-sdk/hsa/async_copy.cpp @@ -168,6 +168,11 @@ struct async_copy_data buffered_data_t get_buffered_record(const context_t* _ctx, timestamp_t _beg = 0, timestamp_t _end = 0) const; + + auto get_lock() { return std::make_unique>(m_mtx); } + +private: + std::mutex m_mtx = {}; }; async_copy_data::callback_data_t @@ -347,12 +352,22 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) auto ts = common::timestamp_ns(); auto* _data = static_cast(arg); + auto _lk = _data->get_lock(); auto copy_time = hsa_amd_profiling_async_copy_time_t{}; auto copy_time_status = get_amd_ext_table()->hsa_amd_profiling_get_async_copy_time_fn( _data->rocp_signal, ©_time); auto _profile_time = tracing::profiling_time{copy_time_status, copy_time.start, copy_time.end}; + // we need to decrement this reference count at the end of the functions + auto* _corr_id = _data->correlation_id; + auto _dtor = common::scope_destructor{[&_lk, &_data, &_corr_id]() { + _lk.reset(); // reset the unique_ptr so the lock is released + delete _data; + + if(_corr_id) _corr_id->sub_ref_count(); + }}; + if(_profile_time.status == HSA_STATUS_SUCCESS) { _profile_time = tracing::adjust_profiling_time( @@ -375,8 +390,6 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) // get the contexts that were active when the signal was created const auto& tracing_data = _data->tracing_data; - // we need to decrement this reference count at the end of the functions - auto* _corr_id = _data->correlation_id; if(_profile_time.status == HSA_STATUS_SUCCESS && !tracing_data.empty()) { @@ -431,9 +444,6 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) } ROCP_HSA_TABLE_CALL(ERROR, get_core_table()->hsa_signal_destroy_fn(_data->rocp_signal)); - delete _data; - - if(_corr_id) _corr_id->sub_ref_count(); return false; } @@ -619,6 +629,7 @@ async_copy_impl(Args... args) _data->tracing_data = std::move(tracing_data); } + auto _lk = _data->get_lock(); auto& tracing_data = _data->tracing_data; // at this point, we want to install our own signal handler diff --git a/source/lib/rocprofiler-sdk/tracing/tracing.hpp b/source/lib/rocprofiler-sdk/tracing/tracing.hpp index 8fb6e55084..ffa451f266 100644 --- a/source/lib/rocprofiler-sdk/tracing/tracing.hpp +++ b/source/lib/rocprofiler-sdk/tracing/tracing.hpp @@ -23,6 +23,7 @@ #pragma once #include "lib/common/mpl.hpp" +#include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/context/correlation_id.hpp" @@ -99,6 +100,57 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, } } +template +inline void +populate_contexts(DomainIdx domain_idx, + rocprofiler_tracing_operation_t operation_idx, + ContextContainerT& contexts, + external_correlation_id_map_t& extern_corr_ids, + ClearContainersT = ClearContainersT{}) +{ + if constexpr(ClearContainersT::value) + { + contexts.clear(); + extern_corr_ids.clear(); + } + + const auto minimal_context_filter = [](const context_t* ctx) { + return (ctx->callback_tracer || ctx->buffered_tracer); + }; + + for(const auto* itr : context::get_active_contexts(minimal_context_filter)) + { + if(!itr) continue; + + if constexpr(std::is_same::value) + { + // if the given domain + op is not enabled, skip this context + if(context_filter(itr, domain_idx, operation_idx)) + { + contexts.emplace_back( + callback_context_data{itr, rocprofiler_callback_tracing_record_t{}}); + extern_corr_ids.emplace(itr, empty_user_data); + } + } + else if constexpr(std::is_same::value) + { + // if the given domain + op is not enabled, skip this context + if(context_filter(itr, domain_idx, operation_idx)) + { + contexts.emplace_back(buffered_context_data{itr}); + extern_corr_ids.emplace(itr, empty_user_data); + } + } + else + { + static_assert(common::mpl::assert_false::value, + "Error! invalid domain type"); + } + } +} + template inline void populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, @@ -350,6 +402,8 @@ execute_buffer_record_emplace(const buffered_context_data_vec_t& buffered_cont buffer_v->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING, domain, record_v); } } + + common::consume_args(ancestor_corr_id); } } // namespace tracing } // namespace rocprofiler diff --git a/tests/rocprofv3/aborted-app/validate.py b/tests/rocprofv3/aborted-app/validate.py index 7c0b3f5b99..7c15b30c7e 100644 --- a/tests/rocprofv3/aborted-app/validate.py +++ b/tests/rocprofv3/aborted-app/validate.py @@ -91,7 +91,10 @@ def test_hip_api_trace_json(json_data): def get_kind_name(kind_id): return data["strings"]["buffer_records"][kind_id]["kind"] - valid_domain_names = ("HIP_RUNTIME_API",) + valid_domain_names = ( + "HIP_RUNTIME_API", + "HIP_RUNTIME_API_EXT", + ) hip_api_data = data["buffer_records"]["hip_api"] diff --git a/tests/rocprofv3/kernel-rename/validate.py b/tests/rocprofv3/kernel-rename/validate.py index c46845c3bb..e6521d9b2d 100644 --- a/tests/rocprofv3/kernel-rename/validate.py +++ b/tests/rocprofv3/kernel-rename/validate.py @@ -61,7 +61,10 @@ def test_hip_api_trace(json_data): def get_kind_name(kind_id): return data["strings"]["buffer_records"][kind_id]["kind"] - valid_domain_names = ("HIP_RUNTIME_API",) + valid_domain_names = ( + "HIP_RUNTIME_API", + "HIP_RUNTIME_API_EXT", + ) hip_api_data = data["buffer_records"]["hip_api"] diff --git a/tests/rocprofv3/summary/validate.py b/tests/rocprofv3/summary/validate.py index 94ecb3d498..d1112f89f1 100644 --- a/tests/rocprofv3/summary/validate.py +++ b/tests/rocprofv3/summary/validate.py @@ -37,7 +37,10 @@ def test_hip_api_trace(json_data): def get_kind_name(kind_id): return data["strings"]["buffer_records"][kind_id]["kind"] - valid_domain_names = ("HIP_RUNTIME_API",) + valid_domain_names = ( + "HIP_RUNTIME_API", + "HIP_RUNTIME_API_EXT", + ) hip_api_data = data["buffer_records"]["hip_api"] diff --git a/tests/rocprofv3/tracing-hip-in-libraries/validate.py b/tests/rocprofv3/tracing-hip-in-libraries/validate.py index 46ed1ab4cf..eeb8305e15 100644 --- a/tests/rocprofv3/tracing-hip-in-libraries/validate.py +++ b/tests/rocprofv3/tracing-hip-in-libraries/validate.py @@ -89,6 +89,8 @@ def test_api_trace( assert row["Domain"] in [ "HIP_RUNTIME_API", "HIP_COMPILER_API", + "HIP_RUNTIME_API_EXT", + "HIP_COMPILER_API_EXT", ] assert int(row["Process_Id"]) > 0 assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int( @@ -210,6 +212,8 @@ def test_api_trace_json(json_data): valid_hip_domain = [ "HIP_RUNTIME_API", "HIP_COMPILER_API", + "HIP_RUNTIME_API_EXT", + "HIP_COMPILER_API_EXT", ] valid_marker_domain = [