[SDK] Memory copy src and dst addresses (#282)
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: 66e9dc54e9]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
798e13d4c2
Коммит
50ed966153
@@ -235,6 +235,8 @@ typedef struct
|
||||
rocprofiler_agent_id_t dst_agent_id; ///< destination agent of copy
|
||||
rocprofiler_agent_id_t src_agent_id; ///< source agent of copy
|
||||
uint64_t bytes; ///< bytes copied
|
||||
rocprofiler_address_t dst_address; ///< destination address
|
||||
rocprofiler_address_t src_address; ///< source address
|
||||
|
||||
/// @var kind
|
||||
/// @brief ::ROCPROFILER_BUFFER_TRACING_MEMORY_COPY
|
||||
@@ -258,6 +260,7 @@ typedef struct
|
||||
rocprofiler_agent_id_t agent_id; ///< agent information for memory allocation
|
||||
rocprofiler_address_t address; ///< starting address for memory allocation
|
||||
uint64_t allocation_size; ///< size for memory allocation
|
||||
|
||||
/// @var kind
|
||||
/// @brief ::ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION
|
||||
/// @var operation
|
||||
|
||||
@@ -291,6 +291,8 @@ typedef struct
|
||||
rocprofiler_agent_id_t dst_agent_id; ///< destination agent of copy
|
||||
rocprofiler_agent_id_t src_agent_id; ///< source agent of copy
|
||||
uint64_t bytes; ///< bytes copied
|
||||
rocprofiler_address_t dst_address; ///< destination address
|
||||
rocprofiler_address_t src_address; ///< source address
|
||||
} rocprofiler_callback_tracing_memory_copy_data_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -553,8 +553,9 @@ typedef union rocprofiler_user_data_t
|
||||
*/
|
||||
typedef union rocprofiler_address_t
|
||||
{
|
||||
uint64_t handle; ///< usage example: store address in uint64_t format
|
||||
void* ptr; ///< usage example: generic form of address
|
||||
uint64_t handle; ///< compatability
|
||||
uint64_t value; ///< usage example: store address in uint64_t format
|
||||
const void* ptr; ///< usage example: generic form of address
|
||||
} rocprofiler_address_t;
|
||||
|
||||
/**
|
||||
@@ -742,11 +743,19 @@ typedef struct rocprofiler_kernel_dispatch_info_t
|
||||
rocprofiler_queue_id_t queue_id; ///< Queue ID where kernel packet is enqueued
|
||||
rocprofiler_kernel_id_t kernel_id; ///< Kernel identifier
|
||||
rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch
|
||||
uint32_t private_segment_size; ///< runtime private memory segment size
|
||||
uint32_t group_segment_size; ///< runtime group memory segment size
|
||||
uint32_t private_segment_size;
|
||||
uint32_t group_segment_size;
|
||||
rocprofiler_dim3_t workgroup_size; ///< runtime workgroup size (grid * threads)
|
||||
rocprofiler_dim3_t grid_size; ///< runtime grid size
|
||||
uint8_t reserved_padding[56]; // reserved for extensions w/o ABI break
|
||||
|
||||
/// @var group_segment_size
|
||||
/// @brief Runtime group memory segment size. Size of group segment memory (static + runtime)
|
||||
/// required by the kernel (per work-group), in bytes. AKA: LDS size
|
||||
///
|
||||
/// @var private_segment_size
|
||||
/// @brief Runtime private memory segment size. Size of private, spill, and arg segment memory
|
||||
/// (static + runtime) required by this kernel (per work-item), in bytes. AKA: scratch size
|
||||
} rocprofiler_kernel_dispatch_info_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -156,6 +156,8 @@ struct async_copy_data
|
||||
rocprofiler_thread_id_t tid = common::get_tid();
|
||||
rocprofiler_agent_id_t dst_agent = null_rocp_agent_id;
|
||||
rocprofiler_agent_id_t src_agent = null_rocp_agent_id;
|
||||
rocprofiler_address_t dst_address = {.value = 0};
|
||||
rocprofiler_address_t src_address = {.value = 0};
|
||||
rocprofiler_memory_copy_operation_t direction = ROCPROFILER_MEMORY_COPY_NONE;
|
||||
uint64_t bytes_copied = 0;
|
||||
uint64_t start_ts = 0;
|
||||
@@ -173,8 +175,14 @@ async_copy_data::get_callback_data(timestamp_t _beg, timestamp_t _end) const
|
||||
{
|
||||
ROCP_FATAL_IF(direction == ROCPROFILER_MEMORY_COPY_NONE) << "direction has not been set";
|
||||
|
||||
return common::init_public_api_struct(
|
||||
callback_data_t{}, _beg, _end, dst_agent, src_agent, bytes_copied);
|
||||
return common::init_public_api_struct(callback_data_t{},
|
||||
_beg,
|
||||
_end,
|
||||
dst_agent,
|
||||
src_agent,
|
||||
bytes_copied,
|
||||
dst_address,
|
||||
src_address);
|
||||
}
|
||||
|
||||
async_copy_data::buffered_data_t
|
||||
@@ -197,7 +205,9 @@ async_copy_data::get_buffered_record(const context_t* _ctx,
|
||||
_end,
|
||||
dst_agent,
|
||||
src_agent,
|
||||
bytes_copied);
|
||||
bytes_copied,
|
||||
dst_address,
|
||||
src_address);
|
||||
}
|
||||
|
||||
struct active_signals
|
||||
@@ -446,8 +456,13 @@ get_next_dispatch()
|
||||
template <size_t Idx>
|
||||
struct arg_indices;
|
||||
|
||||
#define HSA_ASYNC_COPY_DEFINE_ARG_INDICES( \
|
||||
ENUM_ID, DST_AGENT_IDX, SRC_AGENT_IDX, COMPLETION_SIGNAL_IDX, COPY_SIZE_IDX) \
|
||||
#define HSA_ASYNC_COPY_DEFINE_ARG_INDICES(ENUM_ID, \
|
||||
DST_AGENT_IDX, \
|
||||
SRC_AGENT_IDX, \
|
||||
COMPLETION_SIGNAL_IDX, \
|
||||
COPY_SIZE_IDX, \
|
||||
DST_ADDR_IDX, \
|
||||
SRC_ADDR_IDX) \
|
||||
template <> \
|
||||
struct arg_indices<ENUM_ID> \
|
||||
{ \
|
||||
@@ -455,11 +470,13 @@ struct arg_indices;
|
||||
static constexpr auto src_agent_idx = SRC_AGENT_IDX; \
|
||||
static constexpr auto completion_signal_idx = COMPLETION_SIGNAL_IDX; \
|
||||
static constexpr auto copy_size_idx = COPY_SIZE_IDX; \
|
||||
static constexpr auto dst_address_idx = DST_ADDR_IDX; \
|
||||
static constexpr auto src_address_idx = SRC_ADDR_IDX; \
|
||||
};
|
||||
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_id, 1, 3, 7, 4)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_on_engine_id, 1, 3, 7, 4)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_rect_id, 5, 5, 9, 4)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_id, 1, 3, 7, 4, 0, 2)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_on_engine_id, 1, 3, 7, 4, 0, 2)
|
||||
HSA_ASYNC_COPY_DEFINE_ARG_INDICES(async_copy_rect_id, 5, 5, 9, 4, 0, 2)
|
||||
|
||||
template <typename FuncT, typename ArgsT, size_t... Idx>
|
||||
decltype(auto)
|
||||
@@ -471,6 +488,10 @@ invoke(FuncT&& _func, ArgsT&& _args, std::index_sequence<Idx...>)
|
||||
template <typename Tp>
|
||||
uint64_t compute_copy_bytes(Tp);
|
||||
|
||||
template <typename Tp>
|
||||
rocprofiler_address_t
|
||||
compute_address(const Tp*);
|
||||
|
||||
template <>
|
||||
uint64_t
|
||||
compute_copy_bytes(size_t val)
|
||||
@@ -485,6 +506,20 @@ compute_copy_bytes(const hsa_dim3_t* val)
|
||||
return (val) ? (val->x * val->y * val->z) : 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
rocprofiler_address_t
|
||||
compute_address(const void* val)
|
||||
{
|
||||
return rocprofiler_address_t{.ptr = val};
|
||||
}
|
||||
|
||||
template <>
|
||||
rocprofiler_address_t
|
||||
compute_address(const hsa_pitched_ptr_t* val)
|
||||
{
|
||||
return rocprofiler_address_t{.ptr = val->base};
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx, typename... Args>
|
||||
hsa_status_t
|
||||
async_copy_impl(Args... args)
|
||||
@@ -493,6 +528,8 @@ async_copy_impl(Args... args)
|
||||
|
||||
constexpr auto N = sizeof...(Args);
|
||||
constexpr auto copy_size_idx = arg_indices<OpIdx>::copy_size_idx;
|
||||
constexpr auto dst_addr_idx = arg_indices<OpIdx>::dst_address_idx;
|
||||
constexpr auto src_addr_idx = arg_indices<OpIdx>::src_address_idx;
|
||||
|
||||
auto&& _tied_args = std::tie(args...);
|
||||
|
||||
@@ -589,6 +626,8 @@ async_copy_impl(Args... args)
|
||||
_data->src_agent = _src_agent_id;
|
||||
_data->direction = _direction;
|
||||
_data->bytes_copied = compute_copy_bytes(std::get<copy_size_idx>(_tied_args));
|
||||
_data->dst_address = compute_address(std::get<dst_addr_idx>(_tied_args));
|
||||
_data->src_address = compute_address(std::get<src_addr_idx>(_tied_args));
|
||||
|
||||
constexpr auto completion_signal_idx = arg_indices<OpIdx>::completion_signal_idx;
|
||||
auto& _completion_signal = std::get<completion_signal_idx>(_tied_args);
|
||||
|
||||
Ссылка в новой задаче
Block a user