Use small_vector for API iterate_args (#597)

* Use small_vector for API iterate_args

- replace dim3 value arguments with rocprofiler_dim3_t
  - dim3 has a non-trivial destructor
- common::mpl::unqualified_type
- common::stringified_argument_array_t<N> alias
- assert_public_data_type_properties()
- common::container::small_vector<T>::at function
- stringize returns small_vector<stringified_argument>
  - stack allocated vector
- remove has_pc_sampling condition (HSA, HIP)
  - this will be handled in queue interception

* Misc tweaks
Этот коммит содержится в:
Jonathan R. Madsen
2024-03-13 07:36:55 -05:00
коммит произвёл GitHub
родитель 2a262235db
Коммит 8591ed1c96
14 изменённых файлов: 175 добавлений и 97 удалений
+56 -43
Просмотреть файл
@@ -23,6 +23,7 @@
#pragma once
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/version.h>
#include <hip/hip_runtime.h>
@@ -43,8 +44,13 @@ typedef struct rocprofiler_hip_api_no_args
char empty;
} rocprofiler_hip_api_no_args;
typedef union rocprofiler_hip_api_retval_u
typedef union rocprofiler_hip_api_retval_t
{
#ifdef __cplusplus
rocprofiler_hip_api_retval_t() = default;
~rocprofiler_hip_api_retval_t() = default;
#endif
int int_retval;
const char* const_charp_retval;
hipError_t hipError_t_retval;
@@ -52,8 +58,15 @@ typedef union rocprofiler_hip_api_retval_u
void** voidpp_retval;
} rocprofiler_hip_api_retval_t;
typedef union rocprofiler_hip_api_args_u
// NOTE: dim3 value arguments replaced with rocprofiler_dim3_t because dim3 has a non-trivial
// destructor
typedef union rocprofiler_hip_api_args_t
{
#ifdef __cplusplus
rocprofiler_hip_api_args_t() = default;
~rocprofiler_hip_api_args_t() = default;
#endif
// compiler
struct
{
@@ -64,10 +77,10 @@ typedef union rocprofiler_hip_api_args_u
} __hipPopCallConfiguration;
struct
{
dim3 gridDim;
dim3 blockDim;
size_t sharedMem;
hipStream_t stream;
rocprofiler_dim3_t gridDim;
rocprofiler_dim3_t blockDim;
size_t sharedMem;
hipStream_t stream;
} __hipPushCallConfiguration;
struct
{
@@ -207,10 +220,10 @@ typedef union rocprofiler_hip_api_args_u
} hipChooseDeviceR0000;
struct
{
dim3 gridDim;
dim3 blockDim;
size_t sharedMem;
hipStream_t stream;
rocprofiler_dim3_t gridDim;
rocprofiler_dim3_t blockDim;
size_t sharedMem;
hipStream_t stream;
} hipConfigureCall;
struct
{
@@ -558,15 +571,15 @@ typedef union rocprofiler_hip_api_args_u
} hipExtGetLinkTypeAndHopCount;
struct
{
const void* function_address;
dim3 numBlocks;
dim3 dimBlocks;
void** args;
size_t sharedMemBytes;
hipStream_t stream;
hipEvent_t startEvent;
hipEvent_t stopEvent;
int flags;
const void* function_address;
rocprofiler_dim3_t numBlocks;
rocprofiler_dim3_t dimBlocks;
void** args;
size_t sharedMemBytes;
hipStream_t stream;
hipEvent_t startEvent;
hipEvent_t stopEvent;
int flags;
} hipExtLaunchKernel;
struct
{
@@ -1311,12 +1324,12 @@ typedef union rocprofiler_hip_api_args_u
} hipLaunchByPtr;
struct
{
const void* func;
dim3 gridDim;
dim3 blockDimX;
void** kernelParams;
unsigned int sharedMemBytes;
hipStream_t stream;
const void* func;
rocprofiler_dim3_t gridDim;
rocprofiler_dim3_t blockDimX;
void** kernelParams;
unsigned int sharedMemBytes;
hipStream_t stream;
} hipLaunchCooperativeKernel;
struct
{
@@ -1332,12 +1345,12 @@ typedef union rocprofiler_hip_api_args_u
} hipLaunchHostFunc;
struct
{
const void* function_address;
dim3 numBlocks;
dim3 dimBlocks;
void** args;
size_t sharedMemBytes;
hipStream_t stream;
const void* function_address;
rocprofiler_dim3_t numBlocks;
rocprofiler_dim3_t dimBlocks;
void** args;
size_t sharedMemBytes;
hipStream_t stream;
} hipLaunchKernel;
struct
{
@@ -2699,21 +2712,21 @@ typedef union rocprofiler_hip_api_args_u
} hipEventRecord_spt;
struct
{
const void* func;
dim3 gridDim;
dim3 blockDim;
void** kernelParams;
uint32_t sharedMemBytes;
hipStream_t stream;
const void* func;
rocprofiler_dim3_t gridDim;
rocprofiler_dim3_t blockDim;
void** kernelParams;
uint32_t sharedMemBytes;
hipStream_t stream;
} hipLaunchCooperativeKernel_spt;
struct
{
const void* function_address;
dim3 numBlocks;
dim3 dimBlocks;
void** args;
size_t sharedMemBytes;
hipStream_t stream;
const void* function_address;
rocprofiler_dim3_t numBlocks;
rocprofiler_dim3_t dimBlocks;
void** args;
size_t sharedMemBytes;
hipStream_t stream;
} hipLaunchKernel_spt;
struct
{
+2 -2
Просмотреть файл
@@ -41,7 +41,7 @@ typedef struct rocprofiler_hsa_api_no_args
char empty;
} rocprofiler_hsa_api_no_args;
typedef union rocprofiler_hsa_api_retval_u
typedef union rocprofiler_hsa_api_retval_t
{
uint64_t uint64_t_retval;
uint32_t uint32_t_retval;
@@ -67,7 +67,7 @@ typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue,
hsa_agent_t agent,
void* data);
typedef union rocprofiler_hsa_api_args_u
typedef union rocprofiler_hsa_api_args_t
{
// block: CoreApi API
struct
+2 -2
Просмотреть файл
@@ -40,14 +40,14 @@ typedef struct rocprofiler_marker_api_no_args
char empty;
} rocprofiler_marker_api_no_args;
typedef union rocprofiler_marker_api_retval_u
typedef union rocprofiler_marker_api_retval_t
{
int32_t int32_t_retval;
int64_t int64_t_retval;
roctx_range_id_t roctx_range_id_t_retval;
} rocprofiler_marker_api_retval_t;
typedef union rocprofiler_marker_api_args_u
typedef union rocprofiler_marker_api_args_t
{
struct
{
+12
Просмотреть файл
@@ -50,6 +50,7 @@
#include <limits>
#include <memory>
#include <new>
#include <stdexcept>
#include <type_traits>
#include <utility>
@@ -345,6 +346,17 @@ public:
return begin()[idx];
}
reference at(size_type idx)
{
if(idx >= size()) throw std::out_of_range{"small_vector::at"};
return begin()[idx];
}
const_reference at(size_type idx) const
{
if(idx >= size()) throw std::out_of_range{"small_vector::at"};
return begin()[idx];
}
reference front()
{
assert(!empty());
+9
Просмотреть файл
@@ -147,6 +147,15 @@ template <typename Tp>
struct indirection_level
: indirection_level_impl_n<std::remove_cv_t<std::remove_reference_t<std::decay_t<Tp>>>, 0>
{};
template <typename Tp>
struct unqualified_type
{
using type = std::remove_reference_t<std::remove_cv_t<std::decay_t<Tp>>>;
};
template <typename Tp>
using unqualified_type_t = typename unqualified_type<Tp>::type;
} // namespace mpl
} // namespace common
} // namespace rocprofiler
+7 -2
Просмотреть файл
@@ -22,6 +22,7 @@
#pragma once
#include "lib/common/container/small_vector.hpp"
#include "lib/common/mpl.hpp"
#include <fmt/core.h>
@@ -40,10 +41,14 @@ struct stringified_argument
int32_t indirection_level = 0;
int32_t dereference_count = 0;
const char* type = nullptr;
std::string name = {};
const char* name = nullptr;
std::string value = {};
};
template <size_t N>
using stringified_argument_array_t =
container::small_vector<stringified_argument, std::min<size_t>(N, 6)>;
template <typename Tp, typename FuncT>
auto
stringize_arg_impl(const Tp& _v, const int32_t max_deref, int32_t& deref_cnt, FuncT&& impl)
@@ -114,7 +119,7 @@ stringize_arg(int32_t max_deref, const std::pair<const char*, Tp>& arg, FuncT&&
auto _arg = common::stringified_argument{};
_arg.indirection_level = mpl::indirection_level<Tp>::value;
_arg.type = typeid(Tp).name();
_arg.name = std::string{arg.first};
_arg.name = arg.first;
_arg.value = stringize_arg_impl(
arg.second, max_deref, _arg.dereference_count, std::forward<FuncT>(impl));
return _arg;
+22 -15
Просмотреть файл
@@ -107,26 +107,33 @@ get_val(Container& map, const Key& key)
return (pos != map.end() ? &pos->second : nullptr);
}
template <typename Tp>
constexpr void
assert_public_data_type_properties()
{
static_assert(std::is_standard_layout<Tp>::value,
"public data type struct should have a standard layout");
static_assert(std::is_trivial<Tp>::value, "public data type should be trivial");
static_assert(std::is_default_constructible<Tp>::value,
"public data type struct should be default constructible");
static_assert(std::is_trivially_copy_constructible<Tp>::value,
"public data type struct should be trivially copy constructible");
static_assert(std::is_trivially_move_constructible<Tp>::value,
"public data type struct should be trivially move constructible");
static_assert(std::is_trivially_copy_assignable<Tp>::value,
"public data type struct should be trivially move assignable");
static_assert(std::is_trivially_move_assignable<Tp>::value,
"public data type struct should be trivially move assignable");
static_assert(std::is_trivially_copyable<Tp>::value,
"public data type struct should be trivially move assignable");
}
template <typename Tp>
constexpr void
assert_public_api_struct_properties()
{
assert_public_data_type_properties<Tp>();
static_assert(std::is_class<Tp>::value, "this is not a public API struct");
static_assert(std::is_standard_layout<Tp>::value,
"public API struct should have a standard layout");
static_assert(std::is_trivially_default_constructible<Tp>::value,
"public API struct should be trivially default constructible");
static_assert(std::is_trivially_copy_constructible<Tp>::value,
"public API struct should be trivially copy constructible");
static_assert(std::is_trivially_move_constructible<Tp>::value,
"public API struct should be trivially move constructible");
static_assert(std::is_trivially_copy_assignable<Tp>::value,
"public API struct should be trivially move assignable");
static_assert(std::is_trivially_move_assignable<Tp>::value,
"public API struct should be trivially move assignable");
static_assert(std::is_trivially_copyable<Tp>::value,
"public API struct should be trivially move assignable");
static_assert(std::is_trivial<Tp>::value, "public API struct should be trivial");
static_assert(offsetof(Tp, size) == 0, "public API struct should have a size field first");
static_assert(sizeof(std::declval<Tp>().size) == sizeof(uint64_t),
"public API struct size field should be 64 bits");
+23 -11
Просмотреть файл
@@ -109,6 +109,21 @@ set_data_retval(DataT& _data, Tp _val)
static_assert(std::is_empty<Tp>::value, "Error! unsupported return type");
}
}
template <typename Tp>
decltype(auto)
convert_arg_type(Tp&& val)
{
using data_type = common::mpl::unqualified_type_t<Tp>;
if constexpr(std::is_same<data_type, dim3>::value)
{
return rocprofiler_dim3_t{val.x, val.y, val.z};
}
else
{
return std::forward<Tp>(val);
}
}
} // namespace
hip_api_table_t&
@@ -195,8 +210,6 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx,
{
if(!itr) continue;
// if(itr->pc_sampler) has_pc_sampling = true;
if(itr->callback_tracer)
{
// if the given domain + op is not enabled, skip this context
@@ -228,7 +241,6 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
auto thr_id = common::get_tid();
auto callback_contexts = std::vector<callback_context_data>{};
auto buffered_contexts = std::vector<buffered_context_data>{};
auto has_pc_sampling = false;
populate_contexts(info_type::callback_domain_idx,
info_type::buffered_domain_idx,
@@ -245,11 +257,11 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
return 0;
}
auto ref_count = (has_pc_sampling) ? 4 : 2;
auto buffer_record = common::init_public_api_struct(buffered_api_data_t{});
auto tracer_data = callback_api_data_t{.size = sizeof(callback_api_data_t)};
auto* corr_id = correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
constexpr auto ref_count = 2;
auto buffer_record = common::init_public_api_struct(buffered_api_data_t{});
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
auto* corr_id = correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
// construct the buffered info before the callback so the callbacks are as closely wrapped
// around the function call as possible
@@ -264,12 +276,12 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
}
tracer_data.size = sizeof(callback_api_data_t);
set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward<Args>(args)...);
// invoke the callbacks
if(!callback_contexts.empty())
{
set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward<Args>(args)...);
set_data_args(info_type::get_api_data_args(tracer_data.args),
convert_arg_type(std::forward<Args>(args))...);
for(auto& itr : callback_contexts)
{
@@ -455,7 +467,7 @@ iterate_args(const uint32_t id,
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name.c_str(), // arg_name
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
arg_list.at(i).dereference_count, // num deref in str
user_data);
+21 -1
Просмотреть файл
@@ -72,9 +72,29 @@ template <typename... Args>
auto
stringize(int32_t max_deref, Args... args)
{
return std::vector<common::stringified_argument>{common::stringize_arg(
using array_type = common::stringified_argument_array_t<sizeof...(Args)>;
return array_type{common::stringize_arg(
max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...};
}
} // namespace utils
} // namespace hip
} // namespace rocprofiler
namespace fmt
{
template <>
struct formatter<rocprofiler_dim3_t>
{
template <typename ParseContext>
constexpr auto parse(ParseContext& ctx)
{
return ctx.begin();
}
template <typename Ctx>
auto format(const rocprofiler_dim3_t& v, Ctx& ctx) const
{
return fmt::format_to(ctx.out(), "{}z={}, y={}, x={}{}", '{', v.z, v.y, v.x, '}');
}
};
} // namespace fmt
+8 -10
Просмотреть файл
@@ -167,7 +167,8 @@ get_table()
.core_ = get_core_table(),
.amd_ext_ = get_amd_ext_table(),
.finalizer_ext_ = get_fini_ext_table(),
.image_ext_ = get_img_ext_table()};
.image_ext_ = get_img_ext_table(),
.tools_ = nullptr};
return tbl;
}
@@ -246,8 +247,6 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx,
{
if(!itr) continue;
// if(itr->pc_sampler) has_pc_sampling = true;
if(itr->callback_tracer)
{
// if the given domain + op is not enabled, skip this context
@@ -286,7 +285,6 @@ hsa_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
auto thr_id = common::get_tid();
auto callback_contexts = std::vector<callback_context_data>{};
auto buffered_contexts = std::vector<buffered_context_data>{};
auto has_pc_sampling = false;
populate_contexts(info_type::callback_domain_idx,
info_type::buffered_domain_idx,
@@ -303,11 +301,11 @@ hsa_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
return HSA_STATUS_SUCCESS;
}
auto ref_count = (has_pc_sampling) ? 4 : 2;
auto buffer_record = common::init_public_api_struct(buffer_hsa_api_record_t{});
auto tracer_data = common::init_public_api_struct(callback_hsa_api_data_t{});
auto* corr_id = correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
constexpr auto ref_count = 2;
auto buffer_record = common::init_public_api_struct(buffer_hsa_api_record_t{});
auto tracer_data = common::init_public_api_struct(callback_hsa_api_data_t{});
auto* corr_id = correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
// construct the buffered info before the callback so the callbacks are as closely wrapped
// around the function call as possible
@@ -513,7 +511,7 @@ iterate_args(const uint32_t id,
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name.c_str(), // arg_name
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
arg_list.at(i).dereference_count, // num deref in str
user_data);
+2 -1
Просмотреть файл
@@ -75,7 +75,8 @@ template <typename... Args>
auto
stringize(int32_t max_deref, Args... args)
{
return std::vector<common::stringified_argument>{common::stringize_arg(
using array_type = common::stringified_argument_array_t<sizeof...(Args)>;
return array_type{common::stringize_arg(
max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...};
}
+5 -7
Просмотреть файл
@@ -131,8 +131,7 @@ roctx_api_impl<TableIdx, OpIdx>::exec(FuncT&& _func, Args&&... args)
namespace
{
using correlation_service = context::correlation_tracing_service;
using buffer_marker_api_record_t = rocprofiler_buffer_tracing_marker_api_record_t;
using correlation_service = context::correlation_tracing_service;
struct callback_context_data
{
@@ -162,8 +161,6 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx,
{
if(!itr) continue;
// if(itr->pc_sampler) has_pc_sampling = true;
if(itr->callback_tracer)
{
// if the given domain + op is not enabled, skip this context
@@ -190,6 +187,7 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
{
using info_type = roctx_api_info<TableIdx, OpIdx>;
using callback_api_data_t = typename roctx_domain_info<TableIdx>::callback_data_type;
using buffered_api_data_t = typename roctx_domain_info<TableIdx>::buffer_data_type;
auto thr_id = common::get_tid();
auto callback_contexts = std::vector<callback_context_data>{};
@@ -211,8 +209,8 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
}
auto ref_count = 2;
auto buffer_record = common::init_public_api_struct(buffer_marker_api_record_t{});
auto tracer_data = callback_api_data_t{.size = sizeof(callback_api_data_t)};
auto buffer_record = common::init_public_api_struct(buffered_api_data_t{});
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
auto* corr_id = correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
@@ -420,7 +418,7 @@ iterate_args(const uint32_t id,
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name.c_str(), // arg_name
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
arg_list.at(i).dereference_count, // num deref in str
user_data);
+2 -1
Просмотреть файл
@@ -66,7 +66,8 @@ template <typename... Args>
auto
stringize(int32_t max_deref, Args... args)
{
return std::vector<common::stringified_argument>{common::stringize_arg(
using array_type = common::stringified_argument_array_t<sizeof...(Args)>;
return array_type{common::stringize_arg(
max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...};
}
+4 -2
Просмотреть файл
@@ -1521,7 +1521,8 @@ write_perfetto()
auto _args = callback_arg_array_t{};
auto ritr = std::find_if(
hsa_api_cb_records.begin(), hsa_api_cb_records.end(), [&itr](const auto& citr) {
return (citr.record.correlation_id.internal == itr.correlation_id.internal);
return (citr.record.correlation_id.internal == itr.correlation_id.internal &&
!citr.args.empty());
});
if(ritr != hsa_api_cb_records.end()) _args = ritr->args;
@@ -1559,7 +1560,8 @@ write_perfetto()
auto _args = callback_arg_array_t{};
auto ritr = std::find_if(
hip_api_cb_records.begin(), hip_api_cb_records.end(), [&itr](const auto& citr) {
return (citr.record.correlation_id.internal == itr.correlation_id.internal);
return (citr.record.correlation_id.internal == itr.correlation_id.internal &&
!citr.args.empty());
});
if(ritr != hip_api_cb_records.end()) _args = ritr->args;