Async memory copy callback tracing + memory copy size (#791)
* Async memory copy tracing update - rocprofiler_buffer_tracing_memory_copy_record_t: thread_id and bytes - support ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY - init_public_api_struct can fully construct * Testing for callback async copy tracing
Este commit está contenido en:
cometido por
GitHub
padre
edb1883a05
commit
12c836f95f
@@ -156,10 +156,12 @@ typedef struct
|
||||
rocprofiler_buffer_tracing_kind_t kind;
|
||||
rocprofiler_memory_copy_operation_t operation;
|
||||
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
|
||||
rocprofiler_thread_id_t thread_id; ///< id for thread that triggered copy
|
||||
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
|
||||
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
|
||||
rocprofiler_agent_id_t dst_agent_id; ///< destination agent of copy
|
||||
rocprofiler_agent_id_t src_agent_id; ///< source agent of copy
|
||||
uint64_t bytes; ///< bytes copied
|
||||
|
||||
/// @var kind
|
||||
/// @brief ::ROCPROFILER_BUFFER_TRACING_MEMORY_COPY
|
||||
|
||||
@@ -168,6 +168,22 @@ typedef struct rocprofiler_callback_tracing_kernel_dispatch_data_t
|
||||
rocprofiler_kernel_dispatch_info_t dispatch_info; ///< Dispatch info
|
||||
} rocprofiler_callback_tracing_kernel_dispatch_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Memory Copy Callback Tracer Record.
|
||||
*
|
||||
* The timestamps in this record will only be non-zero in the ::ROCPROFILER_CALLBACK_PHASE_EXIT
|
||||
* callback
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
|
||||
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
|
||||
rocprofiler_agent_id_t dst_agent_id; ///< destination agent of copy
|
||||
rocprofiler_agent_id_t src_agent_id; ///< source agent of copy
|
||||
uint64_t bytes; ///< bytes copied
|
||||
} rocprofiler_callback_tracing_memory_copy_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Scratch Memory Callback Data.
|
||||
*/
|
||||
|
||||
@@ -155,6 +155,7 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< @see ::rocprofiler_code_object_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY, ///< @see ::rocprofiler_scratch_memory_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches
|
||||
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, ///< @see ::rocprofiler_memory_copy_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_LAST,
|
||||
} rocprofiler_callback_tracing_kind_t;
|
||||
|
||||
@@ -613,11 +614,11 @@ rocprofiler_record_header_compute_hash(uint32_t category, uint32_t kind)
|
||||
*/
|
||||
typedef struct rocprofiler_kernel_dispatch_info_t
|
||||
{
|
||||
uint64_t size; ///< Size of this struct
|
||||
rocprofiler_agent_id_t agent_id; ///< Agent ID where kernel is launched
|
||||
rocprofiler_queue_id_t queue_id; ///< Queue ID where kernel packet is enqueued
|
||||
rocprofiler_kernel_id_t kernel_id; ///< Kernel identifier
|
||||
rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch
|
||||
uint64_t size; ///< Size of this struct (minus reserved padding)
|
||||
rocprofiler_agent_id_t agent_id; ///< Agent ID where kernel is launched
|
||||
rocprofiler_queue_id_t queue_id; ///< Queue ID where kernel packet is enqueued
|
||||
rocprofiler_kernel_id_t kernel_id; ///< Kernel identifier
|
||||
rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch
|
||||
uint32_t private_segment_size; ///< runtime private memory segment size
|
||||
uint32_t group_segment_size; ///< runtime group memory segment size
|
||||
rocprofiler_dim3_t workgroup_size; ///< runtime workgroup size (grid * threads)
|
||||
|
||||
@@ -170,25 +170,35 @@ compute_runtime_sizeof()
|
||||
return compute_runtime_sizeof<Tp>(0);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
template <typename Tp, typename... Args>
|
||||
decltype(auto)
|
||||
init_public_api_struct(Tp&& val)
|
||||
init_public_api_struct(Tp&& val, Args&&... args)
|
||||
{
|
||||
assert_public_api_struct_properties<Tp>();
|
||||
|
||||
::memset(&val, 0, sizeof(Tp));
|
||||
val.size = compute_runtime_sizeof<Tp>();
|
||||
|
||||
if constexpr(sizeof...(Args) == 0)
|
||||
val.size = compute_runtime_sizeof<Tp>();
|
||||
else
|
||||
val = {compute_runtime_sizeof<Tp>(), std::forward<Args>(args)...};
|
||||
|
||||
return std::forward<Tp>(val);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
template <typename Tp, typename... Args>
|
||||
Tp&
|
||||
init_public_api_struct(Tp& val)
|
||||
init_public_api_struct(Tp& val, Args&&... args)
|
||||
{
|
||||
assert_public_api_struct_properties<Tp>();
|
||||
|
||||
::memset(&val, 0, sizeof(Tp));
|
||||
val.size = compute_runtime_sizeof<Tp>();
|
||||
|
||||
if constexpr(sizeof...(Args) == 0)
|
||||
val.size = compute_runtime_sizeof<Tp>();
|
||||
else
|
||||
val = {compute_runtime_sizeof<Tp>(), std::forward<Args>(args)...};
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/domain.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/hip.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp"
|
||||
@@ -79,6 +80,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_NAME_API)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(CODE_OBJECT)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(SCRATCH_MEMORY)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(KERNEL_DISPATCH)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MEMORY_COPY)
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
std::pair<const char*, size_t>
|
||||
@@ -218,6 +220,12 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac
|
||||
case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH:
|
||||
{
|
||||
val = rocprofiler::kernel_dispatch::name_by_id(operation);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
{
|
||||
val = rocprofiler::hsa::async_copy::name_by_id(operation);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -323,6 +331,11 @@ rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
ops = rocprofiler::kernel_dispatch::get_ids();
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
{
|
||||
ops = rocprofiler::hsa::async_copy::get_ids();
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
for(const auto& itr : ops)
|
||||
@@ -451,6 +464,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
|
||||
case ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY:
|
||||
case ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT:
|
||||
case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
@@ -32,7 +32,11 @@
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/utils.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/fwd.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/external_correlation.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa/api_id.h>
|
||||
#include <rocprofiler-sdk/hsa/table_id.h>
|
||||
@@ -146,8 +150,13 @@ get_names(std::vector<const char*>& _name_list, std::index_sequence<Idx...>)
|
||||
bool
|
||||
context_filter(const context::context* ctx)
|
||||
{
|
||||
return (ctx->buffered_tracer &&
|
||||
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)));
|
||||
auto has_buffered = (ctx->buffered_tracer &&
|
||||
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)));
|
||||
|
||||
auto has_callback = (ctx->callback_tracer &&
|
||||
(ctx->callback_tracer->domains(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY)));
|
||||
|
||||
return (has_buffered || has_callback);
|
||||
}
|
||||
|
||||
constexpr auto null_rocp_agent_id =
|
||||
@@ -155,17 +164,58 @@ constexpr auto null_rocp_agent_id =
|
||||
|
||||
struct async_copy_data
|
||||
{
|
||||
hsa_signal_t orig_signal = {};
|
||||
hsa_signal_t rocp_signal = {};
|
||||
rocprofiler_thread_id_t tid = common::get_tid();
|
||||
rocprofiler_agent_id_t dst_agent = null_rocp_agent_id;
|
||||
rocprofiler_agent_id_t src_agent = null_rocp_agent_id;
|
||||
rocprofiler_memory_copy_operation_t direction = ROCPROFILER_MEMORY_COPY_NONE;
|
||||
context::correlation_id* correlation_id = nullptr;
|
||||
context::context_array_t contexts = {};
|
||||
external_corr_id_map_t extern_corr_ids = {};
|
||||
using timestamp_t = rocprofiler_timestamp_t;
|
||||
using callback_data_t = rocprofiler_callback_tracing_memory_copy_data_t;
|
||||
using buffered_data_t = rocprofiler_buffer_tracing_memory_copy_record_t;
|
||||
|
||||
hsa_signal_t orig_signal = {};
|
||||
hsa_signal_t rocp_signal = {};
|
||||
rocprofiler_thread_id_t tid = common::get_tid();
|
||||
rocprofiler_agent_id_t dst_agent = null_rocp_agent_id;
|
||||
rocprofiler_agent_id_t src_agent = null_rocp_agent_id;
|
||||
rocprofiler_memory_copy_operation_t direction = ROCPROFILER_MEMORY_COPY_NONE;
|
||||
uint64_t bytes_copied = 0;
|
||||
context::correlation_id* correlation_id = nullptr;
|
||||
tracing::tracing_data tracing_data = {};
|
||||
|
||||
callback_data_t get_callback_data(timestamp_t _beg = 0, timestamp_t _end = 0) const;
|
||||
buffered_data_t get_buffered_record(const context_t* _ctx,
|
||||
timestamp_t _beg = 0,
|
||||
timestamp_t _end = 0) const;
|
||||
};
|
||||
|
||||
async_copy_data::callback_data_t
|
||||
async_copy_data::get_callback_data(timestamp_t _beg, timestamp_t _end) const
|
||||
{
|
||||
LOG_IF(FATAL, direction == ROCPROFILER_MEMORY_COPY_NONE) << "direction has not been set";
|
||||
|
||||
return common::init_public_api_struct(
|
||||
callback_data_t{}, _beg, _end, dst_agent, src_agent, bytes_copied);
|
||||
}
|
||||
|
||||
async_copy_data::buffered_data_t
|
||||
async_copy_data::get_buffered_record(const context_t* _ctx,
|
||||
timestamp_t _beg,
|
||||
timestamp_t _end) const
|
||||
{
|
||||
LOG_IF(FATAL, direction == ROCPROFILER_MEMORY_COPY_NONE) << "direction has not been set";
|
||||
|
||||
auto _external_corr_id =
|
||||
(_ctx) ? tracing_data.external_correlation_ids.at(_ctx) : context::null_user_data;
|
||||
auto _corr_id = rocprofiler_correlation_id_t{correlation_id->internal, _external_corr_id};
|
||||
|
||||
return common::init_public_api_struct(buffered_data_t{},
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
direction,
|
||||
_corr_id,
|
||||
correlation_id->thread_idx,
|
||||
_beg,
|
||||
_end,
|
||||
dst_agent,
|
||||
src_agent,
|
||||
bytes_copied);
|
||||
}
|
||||
|
||||
struct active_signals
|
||||
{
|
||||
active_signals();
|
||||
@@ -316,48 +366,34 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
|
||||
<< copy_time.end << ") was less than the start time (" << copy_time.start << ")";
|
||||
|
||||
// get the contexts that were active when the signal was created
|
||||
const auto& ctxs = _data->contexts;
|
||||
const auto& tracing_data = _data->tracing_data;
|
||||
// we need to decrement this reference count at the end of the functions
|
||||
auto* _corr_id = _data->correlation_id;
|
||||
|
||||
if(copy_time_status == HSA_STATUS_SUCCESS && !ctxs.empty())
|
||||
if(copy_time_status == HSA_STATUS_SUCCESS && !tracing_data.empty())
|
||||
{
|
||||
const auto& _extern_corr_ids = _data->extern_corr_ids;
|
||||
|
||||
for(const auto* itr : ctxs)
|
||||
if(!_data->tracing_data.callback_contexts.empty())
|
||||
{
|
||||
auto* _buffer = buffer::get_buffer(
|
||||
itr->buffered_tracer->buffer_data.at(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY));
|
||||
auto _tracer_data = _data->get_callback_data(copy_time.start, copy_time.end);
|
||||
|
||||
// go ahead and create the correlation id value since we expect at least one of these
|
||||
// domains will require it
|
||||
auto _corr_id_v =
|
||||
rocprofiler_correlation_id_t{.internal = 0, .external = context::null_user_data};
|
||||
if(_corr_id)
|
||||
{
|
||||
_corr_id_v.internal = _corr_id->internal;
|
||||
_corr_id_v.external = _extern_corr_ids.at(itr);
|
||||
}
|
||||
tracing::execute_phase_exit_callbacks(_data->tracing_data.callback_contexts,
|
||||
_data->tracing_data.external_correlation_ids,
|
||||
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
|
||||
_data->direction,
|
||||
_tracer_data);
|
||||
}
|
||||
|
||||
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
|
||||
{
|
||||
if(copy_time_status == HSA_STATUS_SUCCESS)
|
||||
{
|
||||
auto record = rocprofiler_buffer_tracing_memory_copy_record_t{
|
||||
sizeof(rocprofiler_buffer_tracing_memory_copy_record_t),
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
_data->direction,
|
||||
_corr_id_v,
|
||||
copy_time.start * sysclock_period,
|
||||
copy_time.end * sysclock_period,
|
||||
_data->dst_agent,
|
||||
_data->src_agent};
|
||||
if(!_data->tracing_data.buffered_contexts.empty())
|
||||
{
|
||||
auto record = _data->get_buffered_record(nullptr, copy_time.start, copy_time.end);
|
||||
|
||||
CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
record);
|
||||
}
|
||||
}
|
||||
tracing::execute_buffer_record_emplace(_data->tracing_data.buffered_contexts,
|
||||
_data->tid,
|
||||
_data->correlation_id->internal,
|
||||
_data->tracing_data.external_correlation_ids,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
_data->direction,
|
||||
record);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,18 +448,19 @@ template <size_t Idx>
|
||||
struct arg_indices;
|
||||
|
||||
#define HSA_ASYNC_COPY_DEFINE_ARG_INDICES( \
|
||||
ENUM_ID, DST_AGENT_IDX, SRC_AGENT_IDX, COMPLETION_SIGNAL_IDX) \
|
||||
ENUM_ID, DST_AGENT_IDX, SRC_AGENT_IDX, COMPLETION_SIGNAL_IDX, COPY_SIZE_IDX) \
|
||||
template <> \
|
||||
struct arg_indices<ENUM_ID> \
|
||||
{ \
|
||||
static constexpr auto dst_agent_idx = DST_AGENT_IDX; \
|
||||
static constexpr auto src_agent_idx = SRC_AGENT_IDX; \
|
||||
static constexpr auto completion_signal_idx = COMPLETION_SIGNAL_IDX; \
|
||||
static constexpr auto copy_size_idx = COPY_SIZE_IDX; \
|
||||
};
|
||||
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_id, 1, 3, 7)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_on_engine_id, 1, 3, 7)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_rect_id, 5, 5, 9)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_id, 1, 3, 7, 4)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_on_engine_id, 1, 3, 7, 4)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_rect_id, 5, 5, 9, 4)
|
||||
|
||||
template <typename FuncT, typename ArgsT, size_t... Idx>
|
||||
decltype(auto)
|
||||
@@ -432,24 +469,33 @@ invoke(FuncT&& _func, ArgsT&& _args, std::index_sequence<Idx...>)
|
||||
return std::forward<FuncT>(_func)(std::get<Idx>(_args)...);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
uint64_t compute_copy_bytes(Tp);
|
||||
|
||||
template <>
|
||||
uint64_t
|
||||
compute_copy_bytes(size_t val)
|
||||
{
|
||||
return val;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint64_t
|
||||
compute_copy_bytes(const hsa_dim3_t* val)
|
||||
{
|
||||
return (val) ? (val->x * val->y * val->z) : 0;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx, typename... Args>
|
||||
hsa_status_t
|
||||
async_copy_impl(Args... args)
|
||||
{
|
||||
using meta_type = hsa_api_meta<TableIdx, OpIdx>;
|
||||
|
||||
constexpr auto N = sizeof...(Args);
|
||||
constexpr auto N = sizeof...(Args);
|
||||
constexpr auto copy_size_idx = arg_indices<OpIdx>::copy_size_idx;
|
||||
|
||||
auto&& _tied_args = std::tie(args...);
|
||||
auto ctxs = context::get_active_contexts(context_filter);
|
||||
|
||||
// no active contexts so just execute original
|
||||
if(ctxs.empty())
|
||||
{
|
||||
return invoke(get_next_dispatch<TableIdx, OpIdx>(),
|
||||
std::move(_tied_args),
|
||||
std::make_index_sequence<N>{});
|
||||
}
|
||||
|
||||
// determine the direction of the memory copy
|
||||
auto _direction = ROCPROFILER_MEMORY_COPY_NONE;
|
||||
@@ -515,30 +561,35 @@ async_copy_impl(Args... args)
|
||||
}
|
||||
}
|
||||
|
||||
// remove any contexts which do not wish to trace this memory copy direction
|
||||
ctxs.erase(std::remove_if(ctxs.begin(),
|
||||
ctxs.end(),
|
||||
[_direction](const context_t* ctx) {
|
||||
return !ctx->buffered_tracer->domains(
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, _direction);
|
||||
}),
|
||||
ctxs.end());
|
||||
async_copy_data* _data = nullptr;
|
||||
|
||||
// if no contexts remain, execute as usual
|
||||
if(ctxs.empty())
|
||||
{
|
||||
return invoke(get_next_dispatch<TableIdx, OpIdx>(),
|
||||
std::move(_tied_args),
|
||||
std::make_index_sequence<N>{});
|
||||
auto tracing_data = tracing::tracing_data{};
|
||||
|
||||
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
_direction,
|
||||
tracing_data);
|
||||
// if no contexts are tracing memory copies for this direction, execute as usual
|
||||
if(tracing_data.empty())
|
||||
{
|
||||
return invoke(get_next_dispatch<TableIdx, OpIdx>(),
|
||||
std::move(_tied_args),
|
||||
std::make_index_sequence<N>{});
|
||||
}
|
||||
|
||||
_data = new async_copy_data{};
|
||||
_data->tracing_data = std::move(tracing_data);
|
||||
}
|
||||
|
||||
auto& tracing_data = _data->tracing_data;
|
||||
|
||||
// at this point, we want to install our own signal handler
|
||||
auto* _data = new async_copy_data{};
|
||||
_data->tid = common::get_tid();
|
||||
_data->dst_agent = _dst_agent_id;
|
||||
_data->src_agent = _src_agent_id;
|
||||
_data->direction = _direction;
|
||||
_data->contexts = ctxs; // avoid using move in case code below accidentally uses ctxs
|
||||
_data->tid = common::get_tid();
|
||||
_data->dst_agent = _dst_agent_id;
|
||||
_data->src_agent = _src_agent_id;
|
||||
_data->direction = _direction;
|
||||
_data->bytes_copied = compute_copy_bytes(std::get<copy_size_idx>(_tied_args));
|
||||
|
||||
constexpr auto completion_signal_idx = arg_indices<OpIdx>::completion_signal_idx;
|
||||
auto& _completion_signal = std::get<completion_signal_idx>(_tied_args);
|
||||
@@ -607,22 +658,33 @@ async_copy_impl(Args... args)
|
||||
}
|
||||
}};
|
||||
|
||||
auto& extern_corr_ids = _data->extern_corr_ids;
|
||||
extern_corr_ids.reserve(_data->contexts.size()); // reserve for performance
|
||||
for(const auto* ctx : _data->contexts)
|
||||
extern_corr_ids.emplace(ctx,
|
||||
ctx->correlation_tracer.external_correlator.get(
|
||||
_data->tid,
|
||||
ctx,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY,
|
||||
_direction,
|
||||
_data->correlation_id->internal));
|
||||
auto thr_id = _data->correlation_id->thread_idx;
|
||||
tracing::populate_external_correlation_ids(tracing_data.external_correlation_ids,
|
||||
thr_id,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY,
|
||||
_direction,
|
||||
_data->correlation_id->internal);
|
||||
|
||||
if(!tracing_data.callback_contexts.empty())
|
||||
{
|
||||
auto _tracer_data = _data->get_callback_data();
|
||||
|
||||
tracing::execute_phase_enter_callbacks(tracing_data.callback_contexts,
|
||||
thr_id,
|
||||
_data->correlation_id->internal,
|
||||
tracing_data.external_correlation_ids,
|
||||
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
|
||||
_direction,
|
||||
_tracer_data);
|
||||
}
|
||||
|
||||
_data->orig_signal = _completion_signal;
|
||||
_completion_signal = _data->rocp_signal;
|
||||
|
||||
LOG(INFO) << "Memcpy Original Signal " << std::hex << _data->orig_signal.handle << std::dec
|
||||
<< ": " << original_value << " | Replacement Signal: " << std::hex
|
||||
<< _completion_signal.handle << std::dec << ": 1";
|
||||
|
||||
CHECK_NOTNULL(get_active_signals())->fetch_add(1);
|
||||
|
||||
return invoke(
|
||||
|
||||
@@ -69,6 +69,8 @@ struct tracing_data
|
||||
callback_context_data_vec_t callback_contexts = {};
|
||||
buffered_context_data_vec_t buffered_contexts = {};
|
||||
external_correlation_id_map_t external_correlation_ids = {};
|
||||
|
||||
bool empty() const { return (callback_contexts.empty() && buffered_contexts.empty()); }
|
||||
};
|
||||
} // namespace tracing
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
@@ -17,4 +19,4 @@ def pytest_addoption(parser):
|
||||
def input_data(request):
|
||||
filename = request.config.getoption("--input")
|
||||
with open(filename, "r") as inp:
|
||||
return json.load(inp)
|
||||
return dotdict(json.load(inp))
|
||||
|
||||
@@ -9,7 +9,7 @@ def node_exists(name, data, min_len=1):
|
||||
assert name in data
|
||||
assert data[name] is not None
|
||||
if isinstance(data[name], (list, tuple, dict, set)):
|
||||
assert len(data[name]) >= min_len
|
||||
assert len(data[name]) >= min_len, f"{name}:\n{data}"
|
||||
|
||||
|
||||
def test_data_structure(input_data):
|
||||
@@ -38,10 +38,11 @@ def test_data_structure(input_data):
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["callback_records"])
|
||||
node_exists("kernel_dispatches", sdk_data["callback_records"])
|
||||
node_exists("memory_copies", sdk_data["callback_records"], 24)
|
||||
|
||||
node_exists("names", sdk_data["buffer_records"])
|
||||
node_exists("kernel_dispatches", sdk_data["buffer_records"])
|
||||
node_exists("memory_copies", sdk_data["buffer_records"], 4)
|
||||
node_exists("memory_copies", sdk_data["buffer_records"], 12)
|
||||
node_exists("hsa_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["buffer_records"])
|
||||
@@ -171,6 +172,10 @@ def test_external_correlation_ids(input_data):
|
||||
assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
|
||||
|
||||
def test_kernel_ids(input_data):
|
||||
data = input_data
|
||||
@@ -205,7 +210,7 @@ def test_kernel_dispatch_ids(input_data):
|
||||
num_dispatches = len(sdk_data["buffer_records"]["kernel_dispatches"])
|
||||
num_cb_dispatches = len(sdk_data["callback_records"]["kernel_dispatches"])
|
||||
|
||||
assert num_cb_dispatches == (2 * num_dispatches)
|
||||
assert num_cb_dispatches == (3 * num_dispatches)
|
||||
|
||||
bf_seq_ids = []
|
||||
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
||||
@@ -218,7 +223,7 @@ def test_kernel_dispatch_ids(input_data):
|
||||
bf_seq_ids = sorted(bf_seq_ids)
|
||||
cb_seq_ids = sorted(cb_seq_ids)
|
||||
|
||||
assert (2 * len(bf_seq_ids)) == len(cb_seq_ids)
|
||||
assert (3 * len(bf_seq_ids)) == len(cb_seq_ids)
|
||||
|
||||
assert bf_seq_ids[0] == cb_seq_ids[0]
|
||||
assert bf_seq_ids[-1] == cb_seq_ids[-1]
|
||||
@@ -230,7 +235,7 @@ def test_kernel_dispatch_ids(input_data):
|
||||
cb_seq_ids_uniq = get_uniq(cb_seq_ids)
|
||||
|
||||
assert bf_seq_ids == bf_seq_ids_uniq
|
||||
assert len(cb_seq_ids) == (2 * len(cb_seq_ids_uniq))
|
||||
assert len(cb_seq_ids) == (3 * len(cb_seq_ids_uniq))
|
||||
assert len(bf_seq_ids) == num_dispatches
|
||||
assert len(bf_seq_ids_uniq) == num_dispatches
|
||||
assert len(cb_seq_ids_uniq) == num_dispatches
|
||||
@@ -249,17 +254,39 @@ def test_async_copy_direction(input_data):
|
||||
async_dir_cnt = dict([(idx, 0) for idx in range(0, 5)])
|
||||
for itr in sdk_data["buffer_records"]["memory_copies"]:
|
||||
op_id = itr["operation"]
|
||||
assert op_id > 1
|
||||
assert op_id < 4
|
||||
assert op_id > 1, f"{itr}"
|
||||
assert op_id < 4, f"{itr}"
|
||||
async_dir_cnt[op_id] += 1
|
||||
|
||||
for itr in sdk_data["callback_records"]["memory_copies"]:
|
||||
op_id = itr.operation
|
||||
assert op_id > 1, f"{itr}"
|
||||
assert op_id < 4, f"{itr}"
|
||||
async_dir_cnt[op_id] += 1
|
||||
|
||||
phase = itr.phase
|
||||
pitr = itr.payload
|
||||
|
||||
assert phase is not None, f"{itr}"
|
||||
assert pitr is not None, f"{itr}"
|
||||
|
||||
if phase == 1:
|
||||
assert pitr.start_timestamp == 0, f"{itr}"
|
||||
assert pitr.end_timestamp == 0, f"{itr}"
|
||||
elif phase == 2:
|
||||
assert pitr.start_timestamp > 0, f"{itr}"
|
||||
assert pitr.end_timestamp > 0, f"{itr}"
|
||||
assert pitr.end_timestamp >= pitr.start_timestamp, f"{itr}"
|
||||
else:
|
||||
assert phase == 1 or phase == 2, f"{itr}"
|
||||
|
||||
# in the transpose test which generates the input file,
|
||||
# two threads and the main thread (so three threads total)
|
||||
# each perform one H2D + one D2H memory copy
|
||||
assert async_dir_cnt[0] == 0
|
||||
assert async_dir_cnt[1] == 0
|
||||
assert async_dir_cnt[2] == 6
|
||||
assert async_dir_cnt[3] == 6
|
||||
assert async_dir_cnt[2] == 6 * 3
|
||||
assert async_dir_cnt[3] == 6 * 3
|
||||
assert async_dir_cnt[4] == 0
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
namespace std
|
||||
{
|
||||
template <typename Tp>
|
||||
struct hash;
|
||||
|
||||
template <>
|
||||
struct hash<rocprofiler_agent_id_t>
|
||||
{
|
||||
size_t operator()(rocprofiler_agent_id_t id) const { return id.handle; }
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
inline bool
|
||||
operator==(rocprofiler_agent_id_t lhs, rocprofiler_agent_id_t rhs)
|
||||
{
|
||||
return (lhs.handle == rhs.handle);
|
||||
}
|
||||
@@ -300,6 +300,18 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_kernel_dispatch_data_t data)
|
||||
SAVE_DATA_FIELD(dispatch_info);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_callback_tracing_memory_copy_data_t data)
|
||||
{
|
||||
SAVE_DATA_FIELD(size);
|
||||
SAVE_DATA_FIELD(start_timestamp);
|
||||
SAVE_DATA_FIELD(end_timestamp);
|
||||
SAVE_DATA_FIELD(dst_agent_id);
|
||||
SAVE_DATA_FIELD(src_agent_id);
|
||||
SAVE_DATA_FIELD(bytes);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_profile_counting_dispatch_data_t data)
|
||||
@@ -395,11 +407,13 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
|
||||
SAVE_DATA_FIELD(size);
|
||||
SAVE_DATA_FIELD(kind);
|
||||
SAVE_DATA_FIELD(operation);
|
||||
SAVE_DATA_FIELD(thread_id);
|
||||
SAVE_DATA_FIELD(correlation_id);
|
||||
SAVE_DATA_FIELD(start_timestamp);
|
||||
SAVE_DATA_FIELD(end_timestamp);
|
||||
SAVE_DATA_FIELD(dst_agent_id);
|
||||
SAVE_DATA_FIELD(src_agent_id);
|
||||
SAVE_DATA_FIELD(bytes);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
|
||||
@@ -265,7 +265,7 @@ def test_kernel_dispatch_ids(input_data):
|
||||
num_dispatches = len(sdk_data["buffer_records"]["kernel_dispatches"])
|
||||
num_cb_dispatches = len(sdk_data["callback_records"]["kernel_dispatches"])
|
||||
|
||||
assert num_cb_dispatches == (2 * num_dispatches)
|
||||
assert num_cb_dispatches == (3 * num_dispatches)
|
||||
|
||||
bf_seq_ids = []
|
||||
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
||||
@@ -278,7 +278,7 @@ def test_kernel_dispatch_ids(input_data):
|
||||
bf_seq_ids = sorted(bf_seq_ids)
|
||||
cb_seq_ids = sorted(cb_seq_ids)
|
||||
|
||||
assert (2 * len(bf_seq_ids)) == len(cb_seq_ids)
|
||||
assert (3 * len(bf_seq_ids)) == len(cb_seq_ids)
|
||||
|
||||
assert bf_seq_ids[0] == cb_seq_ids[0]
|
||||
assert bf_seq_ids[-1] == cb_seq_ids[-1]
|
||||
@@ -290,7 +290,7 @@ def test_kernel_dispatch_ids(input_data):
|
||||
cb_seq_ids_uniq = get_uniq(cb_seq_ids)
|
||||
|
||||
assert bf_seq_ids == bf_seq_ids_uniq
|
||||
assert len(cb_seq_ids) == (2 * len(cb_seq_ids_uniq))
|
||||
assert len(cb_seq_ids) == (3 * len(cb_seq_ids_uniq))
|
||||
assert len(bf_seq_ids) == num_dispatches
|
||||
assert len(bf_seq_ids_uniq) == num_dispatches
|
||||
assert len(cb_seq_ids_uniq) == num_dispatches
|
||||
|
||||
+72
-58
@@ -33,6 +33,7 @@
|
||||
|
||||
#include "common/defines.hpp"
|
||||
#include "common/filesystem.hpp"
|
||||
#include "common/hash.hpp"
|
||||
#include "common/perfetto.hpp"
|
||||
#include "common/serialization.hpp"
|
||||
|
||||
@@ -240,20 +241,6 @@ rocprofiler_client_finalize_t client_fini_func = nullptr;
|
||||
callback_name_info
|
||||
get_callback_tracing_names()
|
||||
{
|
||||
static const auto supported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY,
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
|
||||
};
|
||||
|
||||
auto cb_name_info = callback_name_info{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
@@ -262,14 +249,11 @@ get_callback_tracing_names()
|
||||
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<callback_name_info*>(data_v);
|
||||
|
||||
if(supported.count(kindv) > 0)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
}
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
return 0;
|
||||
};
|
||||
|
||||
@@ -284,12 +268,8 @@ get_callback_tracing_names()
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
if(supported.count(kind) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
|
||||
"iterating buffer tracing kind operations");
|
||||
}
|
||||
rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data));
|
||||
return 0;
|
||||
};
|
||||
|
||||
@@ -326,14 +306,11 @@ get_buffer_tracing_names()
|
||||
[](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<buffer_name_info*>(data_v);
|
||||
|
||||
if(supported.count(kindv) > 0)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
}
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
return 0;
|
||||
};
|
||||
|
||||
@@ -348,12 +325,8 @@ get_buffer_tracing_names()
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
if(supported.count(kind) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
|
||||
"iterating buffer tracing kind operations");
|
||||
}
|
||||
rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data));
|
||||
return 0;
|
||||
};
|
||||
|
||||
@@ -517,6 +490,21 @@ struct kernel_dispatch_callback_record_t
|
||||
}
|
||||
};
|
||||
|
||||
struct memory_copy_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
rocprofiler_callback_tracing_record_t record = {};
|
||||
rocprofiler_callback_tracing_memory_copy_data_t payload = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("timestamp", timestamp));
|
||||
cereal::save(ar, record);
|
||||
ar(cereal::make_nvp("payload", payload));
|
||||
}
|
||||
};
|
||||
|
||||
struct scratch_memory_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
@@ -608,6 +596,7 @@ auto counter_collection_bf_records = std::deque<profile_counting_record>{};
|
||||
auto hip_api_cb_records = std::deque<hip_api_callback_record_t>{};
|
||||
auto scratch_memory_cb_records = std::deque<scratch_memory_callback_record_t>{};
|
||||
auto kernel_dispatch_cb_records = std::deque<kernel_dispatch_callback_record_t>{};
|
||||
auto memory_copy_cb_records = std::deque<memory_copy_callback_record_t>{};
|
||||
|
||||
int
|
||||
set_external_correlation_id(rocprofiler_thread_id_t thr_id,
|
||||
@@ -812,6 +801,14 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
kernel_dispatch_cb_records.emplace_back(
|
||||
kernel_dispatch_callback_record_t{ts, record, *data});
|
||||
}
|
||||
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY)
|
||||
{
|
||||
auto* data = static_cast<rocprofiler_callback_tracing_memory_copy_data_t*>(record.payload);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
memory_copy_cb_records.emplace_back(memory_copy_callback_record_t{ts, record, *data});
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error{"unsupported callback kind"};
|
||||
@@ -822,7 +819,7 @@ auto hsa_api_bf_records = std::deque<rocprofiler_buffer_tracing_hsa_api_
|
||||
auto marker_api_bf_records = std::deque<rocprofiler_buffer_tracing_marker_api_record_t>{};
|
||||
auto hip_api_bf_records = std::deque<rocprofiler_buffer_tracing_hip_api_record_t>{};
|
||||
auto kernel_dispatch_bf_records = std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>{};
|
||||
auto memory_copy_records = std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>{};
|
||||
auto memory_copy_bf_records = std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>{};
|
||||
auto scratch_memory_records = std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>{};
|
||||
auto page_migration_records = std::deque<rocprofiler_buffer_tracing_page_migration_record_t>{};
|
||||
auto corr_id_retire_records =
|
||||
@@ -900,7 +897,7 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_memory_copy_record_t*>(header->payload);
|
||||
|
||||
memory_copy_records.emplace_back(*record);
|
||||
memory_copy_bf_records.emplace_back(*record);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY)
|
||||
{
|
||||
@@ -1006,7 +1003,8 @@ rocprofiler_context_id_t code_object_ctx = {};
|
||||
rocprofiler_context_id_t hsa_api_buffered_ctx = {};
|
||||
rocprofiler_context_id_t hip_api_buffered_ctx = {};
|
||||
rocprofiler_context_id_t marker_api_buffered_ctx = {};
|
||||
rocprofiler_context_id_t memory_copy_ctx = {};
|
||||
rocprofiler_context_id_t memory_copy_callback_ctx = {};
|
||||
rocprofiler_context_id_t memory_copy_buffered_ctx = {};
|
||||
rocprofiler_context_id_t counter_collection_ctx = {};
|
||||
rocprofiler_context_id_t scratch_memory_ctx = {};
|
||||
rocprofiler_context_id_t corr_id_retire_ctx = {};
|
||||
@@ -1030,11 +1028,12 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
|
||||
{"MARKER_API_CALLBACK", &marker_api_callback_ctx},
|
||||
{"CODE_OBJECT", &code_object_ctx},
|
||||
{"KERNEL_DISPATCH_CALLBACK", &kernel_dispatch_callback_ctx},
|
||||
{"MEMORY_COPY_CALLBACK", &memory_copy_callback_ctx},
|
||||
{"HSA_API_BUFFERED", &hsa_api_buffered_ctx},
|
||||
{"HIP_API_BUFFERED", &hip_api_buffered_ctx},
|
||||
{"MARKER_API_BUFFERED", &marker_api_buffered_ctx},
|
||||
{"KERNEL_DISPATCH_BUFFERED", &kernel_dispatch_buffered_ctx},
|
||||
{"MEMORY_COPY", &memory_copy_ctx},
|
||||
{"MEMORY_COPY_BUFFERED", &memory_copy_buffered_ctx},
|
||||
{"PAGE_MIGRATION", &page_migration_ctx},
|
||||
{"COUNTER_COLLECTION", &counter_collection_ctx},
|
||||
{"SCRATCH_MEMORY", &scratch_memory_ctx},
|
||||
@@ -1051,7 +1050,8 @@ auto buffers = std::array<rocprofiler_buffer_id_t*, 9>{&hsa_api_buffered_buffer,
|
||||
&counter_collection_buffer,
|
||||
&corr_id_retire_buffer};
|
||||
|
||||
auto agents = std::vector<rocprofiler_agent_t>{};
|
||||
auto agents = std::vector<rocprofiler_agent_t>{};
|
||||
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
|
||||
|
||||
rocprofiler_timestamp_t init_time = 0;
|
||||
rocprofiler_timestamp_t fini_time = 0;
|
||||
@@ -1085,6 +1085,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
const_cast<void*>(static_cast<const void*>(&agents))),
|
||||
"query available agents");
|
||||
|
||||
for(auto itr : agents)
|
||||
agents_map.emplace(itr.id, itr);
|
||||
|
||||
auto* call_stack_v = static_cast<call_stack_t*>(tool_data);
|
||||
|
||||
call_stack_v->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
|
||||
@@ -1154,8 +1157,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
nullptr),
|
||||
"marker name api tracing service configure");
|
||||
|
||||
auto kernel_dispatch_cb_ops =
|
||||
make_array<rocprofiler_tracing_operation_t>(ROCPROFILER_KERNEL_DISPATCH_ENQUEUE);
|
||||
auto kernel_dispatch_cb_ops = make_array<rocprofiler_tracing_operation_t>(
|
||||
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE, ROCPROFILER_KERNEL_DISPATCH_COMPLETE);
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(kernel_dispatch_callback_ctx,
|
||||
@@ -1166,6 +1169,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
nullptr),
|
||||
"kernel dispatch callback tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(memory_copy_callback_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
|
||||
nullptr,
|
||||
0,
|
||||
tool_tracing_callback,
|
||||
nullptr),
|
||||
"memory copy callback tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(scratch_memory_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY,
|
||||
@@ -1214,7 +1226,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
&kernel_dispatch_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(memory_copy_ctx,
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(memory_copy_buffered_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
@@ -1310,7 +1322,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
"buffer tracing service for kernel dispatch configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffer_tracing_service(memory_copy_ctx,
|
||||
rocprofiler_configure_buffer_tracing_service(memory_copy_buffered_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
nullptr,
|
||||
0,
|
||||
@@ -1495,8 +1507,9 @@ tool_fini(void* tool_data)
|
||||
<< ", marker_api_callback_records=" << marker_api_cb_records.size()
|
||||
<< ", scratch_memory_callback_records=" << scratch_memory_cb_records.size()
|
||||
<< ", kernel_dispatch_callback_records=" << kernel_dispatch_cb_records.size()
|
||||
<< ", memory_copy_callback_records=" << memory_copy_cb_records.size()
|
||||
<< ", kernel_dispatch_bf_records=" << kernel_dispatch_bf_records.size()
|
||||
<< ", memory_copy_records=" << memory_copy_records.size()
|
||||
<< ", memory_copy_bf_records=" << memory_copy_bf_records.size()
|
||||
<< ", scratch_memory_records=" << scratch_memory_records.size()
|
||||
<< ", page_migration=" << page_migration_records.size()
|
||||
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size()
|
||||
@@ -1593,6 +1606,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("marker_api_traces", marker_api_cb_records));
|
||||
json_ar(cereal::make_nvp("scratch_memory_traces", scratch_memory_cb_records));
|
||||
json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_cb_records));
|
||||
json_ar(cereal::make_nvp("memory_copies", memory_copy_cb_records));
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
std::cerr << "[" << getpid() << "][" << __FUNCTION__
|
||||
@@ -1607,7 +1621,7 @@ write_json(call_stack_t* _call_stack)
|
||||
{
|
||||
json_ar(cereal::make_nvp("names", buffer_name_info));
|
||||
json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_bf_records));
|
||||
json_ar(cereal::make_nvp("memory_copies", memory_copy_records));
|
||||
json_ar(cereal::make_nvp("memory_copies", memory_copy_bf_records));
|
||||
json_ar(cereal::make_nvp("scratch_memory_traces", scratch_memory_records));
|
||||
json_ar(cereal::make_nvp("page_migration", page_migration_records));
|
||||
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records));
|
||||
@@ -1682,7 +1696,7 @@ write_perfetto()
|
||||
for(auto itr : marker_api_bf_records)
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto itr : memory_copy_records)
|
||||
for(auto itr : memory_copy_bf_records)
|
||||
{
|
||||
agent_ids.emplace(itr.dst_agent_id.handle);
|
||||
agent_ids.emplace(itr.src_agent_id.handle);
|
||||
@@ -1853,7 +1867,7 @@ write_perfetto()
|
||||
itr.end_timestamp);
|
||||
}
|
||||
|
||||
for(auto itr : memory_copy_records)
|
||||
for(auto itr : memory_copy_bf_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto& track = agent_tracks.at(itr.dst_agent_id.handle);
|
||||
@@ -1870,9 +1884,9 @@ write_perfetto()
|
||||
"operation",
|
||||
itr.operation,
|
||||
"src_agent",
|
||||
itr.src_agent_id.handle,
|
||||
agents_map.at(itr.src_agent_id).logical_node_id,
|
||||
"dst_agent",
|
||||
itr.dst_agent_id.handle);
|
||||
agents_map.at(itr.dst_agent_id).logical_node_id);
|
||||
TRACE_EVENT_END(rocprofiler::trait::name<rocprofiler::category::memory_copy>::value,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
@@ -1913,7 +1927,7 @@ write_perfetto()
|
||||
"kind",
|
||||
itr.kind,
|
||||
"agent",
|
||||
info.agent_id.handle,
|
||||
agents_map.at(info.agent_id).logical_node_id,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal,
|
||||
"queue",
|
||||
|
||||
Referencia en una nueva incidencia
Block a user