diff --git a/source/include/rocprofiler-sdk/hip/api_args.h b/source/include/rocprofiler-sdk/hip/api_args.h index 54a04d41f9..6210c61b79 100644 --- a/source/include/rocprofiler-sdk/hip/api_args.h +++ b/source/include/rocprofiler-sdk/hip/api_args.h @@ -23,6 +23,7 @@ #pragma once #include +#include #include #include @@ -43,8 +44,13 @@ typedef struct rocprofiler_hip_api_no_args char empty; } rocprofiler_hip_api_no_args; -typedef union rocprofiler_hip_api_retval_u +typedef union rocprofiler_hip_api_retval_t { +#ifdef __cplusplus + rocprofiler_hip_api_retval_t() = default; + ~rocprofiler_hip_api_retval_t() = default; +#endif + int int_retval; const char* const_charp_retval; hipError_t hipError_t_retval; @@ -52,8 +58,15 @@ typedef union rocprofiler_hip_api_retval_u void** voidpp_retval; } rocprofiler_hip_api_retval_t; -typedef union rocprofiler_hip_api_args_u +// NOTE: dim3 value arguments replaced with rocprofiler_dim3_t because dim3 has a non-trivial +// destructor +typedef union rocprofiler_hip_api_args_t { +#ifdef __cplusplus + rocprofiler_hip_api_args_t() = default; + ~rocprofiler_hip_api_args_t() = default; +#endif + // compiler struct { @@ -64,10 +77,10 @@ typedef union rocprofiler_hip_api_args_u } __hipPopCallConfiguration; struct { - dim3 gridDim; - dim3 blockDim; - size_t sharedMem; - hipStream_t stream; + rocprofiler_dim3_t gridDim; + rocprofiler_dim3_t blockDim; + size_t sharedMem; + hipStream_t stream; } __hipPushCallConfiguration; struct { @@ -207,10 +220,10 @@ typedef union rocprofiler_hip_api_args_u } hipChooseDeviceR0000; struct { - dim3 gridDim; - dim3 blockDim; - size_t sharedMem; - hipStream_t stream; + rocprofiler_dim3_t gridDim; + rocprofiler_dim3_t blockDim; + size_t sharedMem; + hipStream_t stream; } hipConfigureCall; struct { @@ -558,15 +571,15 @@ typedef union rocprofiler_hip_api_args_u } hipExtGetLinkTypeAndHopCount; struct { - const void* function_address; - dim3 numBlocks; - dim3 dimBlocks; - void** args; - size_t sharedMemBytes; - hipStream_t stream; - hipEvent_t startEvent; - hipEvent_t stopEvent; - int flags; + const void* function_address; + rocprofiler_dim3_t numBlocks; + rocprofiler_dim3_t dimBlocks; + void** args; + size_t sharedMemBytes; + hipStream_t stream; + hipEvent_t startEvent; + hipEvent_t stopEvent; + int flags; } hipExtLaunchKernel; struct { @@ -1311,12 +1324,12 @@ typedef union rocprofiler_hip_api_args_u } hipLaunchByPtr; struct { - const void* func; - dim3 gridDim; - dim3 blockDimX; - void** kernelParams; - unsigned int sharedMemBytes; - hipStream_t stream; + const void* func; + rocprofiler_dim3_t gridDim; + rocprofiler_dim3_t blockDimX; + void** kernelParams; + unsigned int sharedMemBytes; + hipStream_t stream; } hipLaunchCooperativeKernel; struct { @@ -1332,12 +1345,12 @@ typedef union rocprofiler_hip_api_args_u } hipLaunchHostFunc; struct { - const void* function_address; - dim3 numBlocks; - dim3 dimBlocks; - void** args; - size_t sharedMemBytes; - hipStream_t stream; + const void* function_address; + rocprofiler_dim3_t numBlocks; + rocprofiler_dim3_t dimBlocks; + void** args; + size_t sharedMemBytes; + hipStream_t stream; } hipLaunchKernel; struct { @@ -2699,21 +2712,21 @@ typedef union rocprofiler_hip_api_args_u } hipEventRecord_spt; struct { - const void* func; - dim3 gridDim; - dim3 blockDim; - void** kernelParams; - uint32_t sharedMemBytes; - hipStream_t stream; + const void* func; + rocprofiler_dim3_t gridDim; + rocprofiler_dim3_t blockDim; + void** kernelParams; + uint32_t sharedMemBytes; + hipStream_t stream; } hipLaunchCooperativeKernel_spt; struct { - const void* function_address; - dim3 numBlocks; - dim3 dimBlocks; - void** args; - size_t sharedMemBytes; - hipStream_t stream; + const void* function_address; + rocprofiler_dim3_t numBlocks; + rocprofiler_dim3_t dimBlocks; + void** args; + size_t sharedMemBytes; + hipStream_t stream; } hipLaunchKernel_spt; struct { diff --git a/source/include/rocprofiler-sdk/hsa/api_args.h b/source/include/rocprofiler-sdk/hsa/api_args.h index 8b1ac140af..bda5410963 100644 --- a/source/include/rocprofiler-sdk/hsa/api_args.h +++ b/source/include/rocprofiler-sdk/hsa/api_args.h @@ -41,7 +41,7 @@ typedef struct rocprofiler_hsa_api_no_args char empty; } rocprofiler_hsa_api_no_args; -typedef union rocprofiler_hsa_api_retval_u +typedef union rocprofiler_hsa_api_retval_t { uint64_t uint64_t_retval; uint32_t uint32_t_retval; @@ -67,7 +67,7 @@ typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent, void* data); -typedef union rocprofiler_hsa_api_args_u +typedef union rocprofiler_hsa_api_args_t { // block: CoreApi API struct diff --git a/source/include/rocprofiler-sdk/marker/api_args.h b/source/include/rocprofiler-sdk/marker/api_args.h index 0fd32cc353..bfb52d02b9 100644 --- a/source/include/rocprofiler-sdk/marker/api_args.h +++ b/source/include/rocprofiler-sdk/marker/api_args.h @@ -40,14 +40,14 @@ typedef struct rocprofiler_marker_api_no_args char empty; } rocprofiler_marker_api_no_args; -typedef union rocprofiler_marker_api_retval_u +typedef union rocprofiler_marker_api_retval_t { int32_t int32_t_retval; int64_t int64_t_retval; roctx_range_id_t roctx_range_id_t_retval; } rocprofiler_marker_api_retval_t; -typedef union rocprofiler_marker_api_args_u +typedef union rocprofiler_marker_api_args_t { struct { diff --git a/source/lib/common/container/small_vector.hpp b/source/lib/common/container/small_vector.hpp index bb0c56ddcd..3db7c157d8 100644 --- a/source/lib/common/container/small_vector.hpp +++ b/source/lib/common/container/small_vector.hpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -345,6 +346,17 @@ public: return begin()[idx]; } + reference at(size_type idx) + { + if(idx >= size()) throw std::out_of_range{"small_vector::at"}; + return begin()[idx]; + } + const_reference at(size_type idx) const + { + if(idx >= size()) throw std::out_of_range{"small_vector::at"}; + return begin()[idx]; + } + reference front() { assert(!empty()); diff --git a/source/lib/common/mpl.hpp b/source/lib/common/mpl.hpp index 3187688e33..151041a01e 100644 --- a/source/lib/common/mpl.hpp +++ b/source/lib/common/mpl.hpp @@ -147,6 +147,15 @@ template struct indirection_level : indirection_level_impl_n>>, 0> {}; + +template +struct unqualified_type +{ + using type = std::remove_reference_t>>; +}; + +template +using unqualified_type_t = typename unqualified_type::type; } // namespace mpl } // namespace common } // namespace rocprofiler diff --git a/source/lib/common/stringize_arg.hpp b/source/lib/common/stringize_arg.hpp index f252bf9832..911afa44e9 100644 --- a/source/lib/common/stringize_arg.hpp +++ b/source/lib/common/stringize_arg.hpp @@ -22,6 +22,7 @@ #pragma once +#include "lib/common/container/small_vector.hpp" #include "lib/common/mpl.hpp" #include @@ -40,10 +41,14 @@ struct stringified_argument int32_t indirection_level = 0; int32_t dereference_count = 0; const char* type = nullptr; - std::string name = {}; + const char* name = nullptr; std::string value = {}; }; +template +using stringified_argument_array_t = + container::small_vector(N, 6)>; + template auto stringize_arg_impl(const Tp& _v, const int32_t max_deref, int32_t& deref_cnt, FuncT&& impl) @@ -114,7 +119,7 @@ stringize_arg(int32_t max_deref, const std::pair& arg, FuncT&& auto _arg = common::stringified_argument{}; _arg.indirection_level = mpl::indirection_level::value; _arg.type = typeid(Tp).name(); - _arg.name = std::string{arg.first}; + _arg.name = arg.first; _arg.value = stringize_arg_impl( arg.second, max_deref, _arg.dereference_count, std::forward(impl)); return _arg; diff --git a/source/lib/common/utility.hpp b/source/lib/common/utility.hpp index 162af7e840..500b0e28e3 100644 --- a/source/lib/common/utility.hpp +++ b/source/lib/common/utility.hpp @@ -107,26 +107,33 @@ get_val(Container& map, const Key& key) return (pos != map.end() ? &pos->second : nullptr); } +template +constexpr void +assert_public_data_type_properties() +{ + static_assert(std::is_standard_layout::value, + "public data type struct should have a standard layout"); + static_assert(std::is_trivial::value, "public data type should be trivial"); + static_assert(std::is_default_constructible::value, + "public data type struct should be default constructible"); + static_assert(std::is_trivially_copy_constructible::value, + "public data type struct should be trivially copy constructible"); + static_assert(std::is_trivially_move_constructible::value, + "public data type struct should be trivially move constructible"); + static_assert(std::is_trivially_copy_assignable::value, + "public data type struct should be trivially move assignable"); + static_assert(std::is_trivially_move_assignable::value, + "public data type struct should be trivially move assignable"); + static_assert(std::is_trivially_copyable::value, + "public data type struct should be trivially move assignable"); +} + template constexpr void assert_public_api_struct_properties() { + assert_public_data_type_properties(); static_assert(std::is_class::value, "this is not a public API struct"); - static_assert(std::is_standard_layout::value, - "public API struct should have a standard layout"); - static_assert(std::is_trivially_default_constructible::value, - "public API struct should be trivially default constructible"); - static_assert(std::is_trivially_copy_constructible::value, - "public API struct should be trivially copy constructible"); - static_assert(std::is_trivially_move_constructible::value, - "public API struct should be trivially move constructible"); - static_assert(std::is_trivially_copy_assignable::value, - "public API struct should be trivially move assignable"); - static_assert(std::is_trivially_move_assignable::value, - "public API struct should be trivially move assignable"); - static_assert(std::is_trivially_copyable::value, - "public API struct should be trivially move assignable"); - static_assert(std::is_trivial::value, "public API struct should be trivial"); static_assert(offsetof(Tp, size) == 0, "public API struct should have a size field first"); static_assert(sizeof(std::declval().size) == sizeof(uint64_t), "public API struct size field should be 64 bits"); diff --git a/source/lib/rocprofiler-sdk/hip/hip.cpp b/source/lib/rocprofiler-sdk/hip/hip.cpp index 4cd08d15a8..efbb0f66ee 100644 --- a/source/lib/rocprofiler-sdk/hip/hip.cpp +++ b/source/lib/rocprofiler-sdk/hip/hip.cpp @@ -109,6 +109,21 @@ set_data_retval(DataT& _data, Tp _val) static_assert(std::is_empty::value, "Error! unsupported return type"); } } + +template +decltype(auto) +convert_arg_type(Tp&& val) +{ + using data_type = common::mpl::unqualified_type_t; + if constexpr(std::is_same::value) + { + return rocprofiler_dim3_t{val.x, val.y, val.z}; + } + else + { + return std::forward(val); + } +} } // namespace hip_api_table_t& @@ -195,8 +210,6 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, { if(!itr) continue; - // if(itr->pc_sampler) has_pc_sampling = true; - if(itr->callback_tracer) { // if the given domain + op is not enabled, skip this context @@ -228,7 +241,6 @@ hip_api_impl::functor(Args&&... args) auto thr_id = common::get_tid(); auto callback_contexts = std::vector{}; auto buffered_contexts = std::vector{}; - auto has_pc_sampling = false; populate_contexts(info_type::callback_domain_idx, info_type::buffered_domain_idx, @@ -245,11 +257,11 @@ hip_api_impl::functor(Args&&... args) return 0; } - auto ref_count = (has_pc_sampling) ? 4 : 2; - auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); - auto tracer_data = callback_api_data_t{.size = sizeof(callback_api_data_t)}; - auto* corr_id = correlation_service::construct(ref_count); - auto internal_corr_id = corr_id->internal; + constexpr auto ref_count = 2; + auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto tracer_data = common::init_public_api_struct(callback_api_data_t{}); + auto* corr_id = correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; // construct the buffered info before the callback so the callbacks are as closely wrapped // around the function call as possible @@ -264,12 +276,12 @@ hip_api_impl::functor(Args&&... args) } tracer_data.size = sizeof(callback_api_data_t); - set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); // invoke the callbacks if(!callback_contexts.empty()) { - set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); + set_data_args(info_type::get_api_data_args(tracer_data.args), + convert_arg_type(std::forward(args))...); for(auto& itr : callback_contexts) { @@ -455,7 +467,7 @@ iterate_args(const uint32_t id, arg_addr.at(i), // arg_value_addr arg_list.at(i).indirection_level, // indirection arg_list.at(i).type, // arg_type - arg_list.at(i).name.c_str(), // arg_name + arg_list.at(i).name, // arg_name arg_list.at(i).value.c_str(), // arg_value_str arg_list.at(i).dereference_count, // num deref in str user_data); diff --git a/source/lib/rocprofiler-sdk/hip/utils.hpp b/source/lib/rocprofiler-sdk/hip/utils.hpp index 0ee23cc97c..fa305e2a15 100644 --- a/source/lib/rocprofiler-sdk/hip/utils.hpp +++ b/source/lib/rocprofiler-sdk/hip/utils.hpp @@ -72,9 +72,29 @@ template auto stringize(int32_t max_deref, Args... args) { - return std::vector{common::stringize_arg( + using array_type = common::stringified_argument_array_t; + return array_type{common::stringize_arg( max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...}; } } // namespace utils } // namespace hip } // namespace rocprofiler + +namespace fmt +{ +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const rocprofiler_dim3_t& v, Ctx& ctx) const + { + return fmt::format_to(ctx.out(), "{}z={}, y={}, x={}{}", '{', v.z, v.y, v.x, '}'); + } +}; +} // namespace fmt diff --git a/source/lib/rocprofiler-sdk/hsa/hsa.cpp b/source/lib/rocprofiler-sdk/hsa/hsa.cpp index 969d3bd901..b07a2df00c 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa.cpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa.cpp @@ -167,7 +167,8 @@ get_table() .core_ = get_core_table(), .amd_ext_ = get_amd_ext_table(), .finalizer_ext_ = get_fini_ext_table(), - .image_ext_ = get_img_ext_table()}; + .image_ext_ = get_img_ext_table(), + .tools_ = nullptr}; return tbl; } @@ -246,8 +247,6 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, { if(!itr) continue; - // if(itr->pc_sampler) has_pc_sampling = true; - if(itr->callback_tracer) { // if the given domain + op is not enabled, skip this context @@ -286,7 +285,6 @@ hsa_api_impl::functor(Args&&... args) auto thr_id = common::get_tid(); auto callback_contexts = std::vector{}; auto buffered_contexts = std::vector{}; - auto has_pc_sampling = false; populate_contexts(info_type::callback_domain_idx, info_type::buffered_domain_idx, @@ -303,11 +301,11 @@ hsa_api_impl::functor(Args&&... args) return HSA_STATUS_SUCCESS; } - auto ref_count = (has_pc_sampling) ? 4 : 2; - auto buffer_record = common::init_public_api_struct(buffer_hsa_api_record_t{}); - auto tracer_data = common::init_public_api_struct(callback_hsa_api_data_t{}); - auto* corr_id = correlation_service::construct(ref_count); - auto internal_corr_id = corr_id->internal; + constexpr auto ref_count = 2; + auto buffer_record = common::init_public_api_struct(buffer_hsa_api_record_t{}); + auto tracer_data = common::init_public_api_struct(callback_hsa_api_data_t{}); + auto* corr_id = correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; // construct the buffered info before the callback so the callbacks are as closely wrapped // around the function call as possible @@ -513,7 +511,7 @@ iterate_args(const uint32_t id, arg_addr.at(i), // arg_value_addr arg_list.at(i).indirection_level, // indirection arg_list.at(i).type, // arg_type - arg_list.at(i).name.c_str(), // arg_name + arg_list.at(i).name, // arg_name arg_list.at(i).value.c_str(), // arg_value_str arg_list.at(i).dereference_count, // num deref in str user_data); diff --git a/source/lib/rocprofiler-sdk/hsa/utils.hpp b/source/lib/rocprofiler-sdk/hsa/utils.hpp index 914a74ce35..b86a3bb1cc 100644 --- a/source/lib/rocprofiler-sdk/hsa/utils.hpp +++ b/source/lib/rocprofiler-sdk/hsa/utils.hpp @@ -75,7 +75,8 @@ template auto stringize(int32_t max_deref, Args... args) { - return std::vector{common::stringize_arg( + using array_type = common::stringified_argument_array_t; + return array_type{common::stringize_arg( max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...}; } diff --git a/source/lib/rocprofiler-sdk/marker/marker.cpp b/source/lib/rocprofiler-sdk/marker/marker.cpp index aac6da1f19..8466875aed 100644 --- a/source/lib/rocprofiler-sdk/marker/marker.cpp +++ b/source/lib/rocprofiler-sdk/marker/marker.cpp @@ -131,8 +131,7 @@ roctx_api_impl::exec(FuncT&& _func, Args&&... args) namespace { -using correlation_service = context::correlation_tracing_service; -using buffer_marker_api_record_t = rocprofiler_buffer_tracing_marker_api_record_t; +using correlation_service = context::correlation_tracing_service; struct callback_context_data { @@ -162,8 +161,6 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, { if(!itr) continue; - // if(itr->pc_sampler) has_pc_sampling = true; - if(itr->callback_tracer) { // if the given domain + op is not enabled, skip this context @@ -190,6 +187,7 @@ roctx_api_impl::functor(Args&&... args) { using info_type = roctx_api_info; using callback_api_data_t = typename roctx_domain_info::callback_data_type; + using buffered_api_data_t = typename roctx_domain_info::buffer_data_type; auto thr_id = common::get_tid(); auto callback_contexts = std::vector{}; @@ -211,8 +209,8 @@ roctx_api_impl::functor(Args&&... args) } auto ref_count = 2; - auto buffer_record = common::init_public_api_struct(buffer_marker_api_record_t{}); - auto tracer_data = callback_api_data_t{.size = sizeof(callback_api_data_t)}; + auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto tracer_data = common::init_public_api_struct(callback_api_data_t{}); auto* corr_id = correlation_service::construct(ref_count); auto internal_corr_id = corr_id->internal; @@ -420,7 +418,7 @@ iterate_args(const uint32_t id, arg_addr.at(i), // arg_value_addr arg_list.at(i).indirection_level, // indirection arg_list.at(i).type, // arg_type - arg_list.at(i).name.c_str(), // arg_name + arg_list.at(i).name, // arg_name arg_list.at(i).value.c_str(), // arg_value_str arg_list.at(i).dereference_count, // num deref in str user_data); diff --git a/source/lib/rocprofiler-sdk/marker/utils.hpp b/source/lib/rocprofiler-sdk/marker/utils.hpp index 404d92ab48..37a26ac470 100644 --- a/source/lib/rocprofiler-sdk/marker/utils.hpp +++ b/source/lib/rocprofiler-sdk/marker/utils.hpp @@ -66,7 +66,8 @@ template auto stringize(int32_t max_deref, Args... args) { - return std::vector{common::stringize_arg( + using array_type = common::stringified_argument_array_t; + return array_type{common::stringize_arg( max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...}; } diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index 7dcc94b6cb..f99da438e7 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -1521,7 +1521,8 @@ write_perfetto() auto _args = callback_arg_array_t{}; auto ritr = std::find_if( hsa_api_cb_records.begin(), hsa_api_cb_records.end(), [&itr](const auto& citr) { - return (citr.record.correlation_id.internal == itr.correlation_id.internal); + return (citr.record.correlation_id.internal == itr.correlation_id.internal && + !citr.args.empty()); }); if(ritr != hsa_api_cb_records.end()) _args = ritr->args; @@ -1559,7 +1560,8 @@ write_perfetto() auto _args = callback_arg_array_t{}; auto ritr = std::find_if( hip_api_cb_records.begin(), hip_api_cb_records.end(), [&itr](const auto& citr) { - return (citr.record.correlation_id.internal == itr.correlation_id.internal); + return (citr.record.correlation_id.internal == itr.correlation_id.internal && + !citr.args.empty()); }); if(ritr != hip_api_cb_records.end()) _args = ritr->args;