Этот коммит содержится в:
lloginov-amd
2026-01-19 16:24:30 +01:00
коммит произвёл GitHub
родитель 1ac805cb35
Коммит e49b501e9a
10 изменённых файлов: 398 добавлений и 55 удалений
+1 -2
Просмотреть файл
@@ -367,7 +367,6 @@ config_settings(const std::shared_ptr<settings>& _config)
_skip_domains.emplace("kernel_dispatch");
_skip_domains.emplace("page_migration");
_skip_domains.emplace("scratch_memory");
_add_operation_settings(
"MARKER_API", callback_tracing_info[ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API],
@@ -652,7 +651,7 @@ get_backtrace_operations(rocprofiler_callback_tracing_kind_t kindv)
{
if(callback_operation_option_names.count(kindv) == 0)
{
LOG_CRITICAL("callback_operation_operation_names does not have value for {}",
LOG_CRITICAL("callback_operation_option_names does not have value for {}",
static_cast<int>(kindv));
::rocprofsys::set_state(::rocprofsys::State::Finalized);
std::abort();
+1 -1
Просмотреть файл
@@ -41,7 +41,7 @@ using storage_parser_t =
storage_parser<type_identifier_t, kernel_dispatch_sample, memory_copy_sample,
memory_allocate_sample, region_sample, in_time_sample,
pmc_event_with_sample, amd_smi_sample, cpu_freq_sample,
backtrace_region_sample>;
backtrace_region_sample, scratch_memory_sample>;
using buffer_storage_t = buffer_storage<flush_worker_factory_t, type_identifier_t>;
+66
Просмотреть файл
@@ -540,6 +540,72 @@ perfetto_processor_t::handle([[maybe_unused]] const kernel_dispatch_sample& _kds
#endif
}
void
perfetto_processor_t::handle([[maybe_unused]] const scratch_memory_sample& _sms)
{
#if ROCPROFSYS_USE_ROCM > 0
auto _corr_id = _sms.correlation_id_internal;
auto _stream_id = _sms.stream_handle;
auto _queue_id_handle = _sms.queue_id_handle;
const auto& _t_info = thread_info::get(_sms.thread_id, SystemTID);
const auto _thread_id_sequent = _t_info->index_data->sequent_value;
auto _beg_ts = _sms.start_timestamp;
auto _end_ts = _sms.end_timestamp;
auto _agent_device_id =
m_agent_manager.get_agent_by_handle(_sms.agent_id_handle).device_type_index;
auto _name = std::string{ m_metadata.get_buffer_name_info().at(
static_cast<rocprofiler_buffer_tracing_kind_t>(_sms.kind),
static_cast<rocprofiler_tracing_operation_t>(_sms.operation)) };
// Scratch memory samples from SDK versions prior to 7.0.2 do not include
// allocation_size field, so counter tracks are not needed
# if ROCPROFSYS_ROCM_VERSION >= 70002
using counter_track =
perfetto_counter_track<rocprofiler_buffer_tracing_scratch_memory_record_t>;
if(!counter_track::exists(_agent_device_id))
{
auto _track_desc_alloc_size = JOIN("", "GPU Scratch Memory [", _agent_device_id,
"] Thread ", _thread_id_sequent);
counter_track::emplace(_agent_device_id, _track_desc_alloc_size, "bytes");
}
if(_sms.operation == ROCPROFILER_SCRATCH_MEMORY_ALLOC)
{
TRACE_COUNTER("rocm_scratch_memory", counter_track::at(_agent_device_id, 0),
_beg_ts, _sms.allocation_size);
}
# endif
auto _track_desc_events = [&]() {
return JOIN("", "GPU Scratch Memory Events Thread ", _thread_id_sequent);
};
const auto _track =
tracing::get_perfetto_track(category::rocm_scratch_memory{}, _track_desc_events);
auto add_perfetto_annotations = [&](::perfetto::EventContext ctx) {
if(!m_use_annotations) return;
annotate_perfetto(ctx, { { "begin_ns", _beg_ts },
{ "end_ns", _end_ts },
{ "corr_id", _corr_id },
{ "stream_id", _stream_id },
{ "queue", _queue_id_handle },
{ "allocation_size", _sms.allocation_size },
{ "agent_id", _agent_device_id },
{ "operation", _name },
{ "flags", _sms.flags } });
};
tracing::push_perfetto(category::rocm_scratch_memory{}, _name.c_str(), _track,
_beg_ts, ::perfetto::Flow::ProcessScoped(_corr_id),
add_perfetto_annotations);
tracing::pop_perfetto(category::rocm_scratch_memory{}, "", _track, _end_ts);
#endif
}
void
perfetto_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
{
+1
Просмотреть файл
@@ -56,6 +56,7 @@ public:
void finalize_processing();
void handle(const kernel_dispatch_sample& sample);
void handle(const scratch_memory_sample& sample);
void handle(const memory_copy_sample& sample);
void handle(const memory_allocate_sample& sample);
void handle(const region_sample& sample);
+76 -44
Просмотреть файл
@@ -66,6 +66,37 @@ get_handle_from_code_object(
# endif
}
#endif
#if ROCPROFSYS_USE_ROCM > 0
using memory_operation = std::string;
using memory_type = std::string;
std::pair<memory_operation, memory_type>
parse_memory_operation_name(std::string_view memory_operation_name)
{
static const std::unordered_map<std::string_view,
std::pair<memory_operation, memory_type>>
parsing_map{
{ "MEMORY_ALLOCATION_NONE", { "NONE", "REAL" } },
{ "MEMORY_ALLOCATION_ALLOCATE", { "ALLOC", "REAL" } },
{ "MEMORY_ALLOCATION_VMEM_ALLOCATE", { "ALLOC", "VIRTUAL" } },
{ "MEMORY_ALLOCATION_FREE", { "FREE", "REAL" } },
{ "MEMORY_ALLOCATION_VMEM_FREE", { "FREE", "VIRTUAL" } },
{ "SCRATCH_MEMORY_NONE", { "NONE", "SCRATCH" } },
{ "SCRATCH_MEMORY_ALLOC", { "ALLOC", "SCRATCH" } },
{ "SCRATCH_MEMORY_FREE", { "FREE", "SCRATCH" } },
{ "SCRATCH_MEMORY_ASYNC_RECLAIM", { "ASYNC_RECLAIM", "SCRATCH" } },
};
auto item = parsing_map.find(memory_operation_name);
if(item == parsing_map.end())
{
LOG_WARNING("Unknown memory operation name: {}", memory_operation_name);
return { "UNKNOWN", "UNKNOWN" };
}
return item->second;
}
#endif
} // namespace
void
@@ -110,6 +141,46 @@ rocpd_processor_t::handle([[maybe_unused]] const kernel_dispatch_sample& _kds)
#endif
}
void
rocpd_processor_t::handle([[maybe_unused]] const scratch_memory_sample& _sms)
{
#if ROCPROFSYS_USE_ROCM > 0
auto& n_info = node_info::get_instance();
auto process = m_metadata->get_process_info();
const auto* _name = m_metadata->get_buffer_name_info().at(
static_cast<rocprofiler_buffer_tracing_kind_t>(_sms.kind),
static_cast<rocprofiler_tracing_operation_t>(_sms.operation));
auto agent_primary_key =
m_agent_manager->get_agent_by_handle(_sms.agent_id_handle).base_id;
auto thread_primary_key =
m_data_processor->map_thread_id_to_primary_key(_sms.thread_id);
auto category_primary_key = m_data_processor->insert_string(
trait::name<category::rocm_scratch_memory>::value);
auto stack_id = _sms.correlation_id_internal;
auto parent_stack_id = _sms.correlation_id_ancestor;
auto correlation_id = 0;
auto address_value = 0;
auto event_primary_key = m_data_processor->insert_event(
category_primary_key, stack_id, parent_stack_id, correlation_id);
auto [memory_operation, memory_type] = parse_memory_operation_name(_name);
auto extdata_json_str = JOIN("", "{\"flags\": ", _sms.flags, "}");
m_data_processor->insert_memory_alloc(
n_info.id, process.pid, thread_primary_key, agent_primary_key,
memory_operation.c_str(), memory_type.c_str(), _sms.start_timestamp,
_sms.end_timestamp, address_value, _sms.allocation_size, _sms.queue_id_handle,
_sms.stream_handle, event_primary_key, extdata_json_str.c_str());
#endif
}
void
rocpd_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
{
@@ -153,46 +224,6 @@ void
rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
{
#if ROCPROFSYS_USE_ROCM > 0 && (ROCPROFILER_VERSION >= 600)
static auto memtype_to_db =
[](std::string_view memory_type) -> std::pair<std::string, std::string> {
constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" };
constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" };
constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" };
constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" };
std::string _type;
std::string _level;
if(memory_type.find(MEMORY_PREFIX) == 0)
{
_type = memory_type.substr(MEMORY_PREFIX.length());
if(_type.find(VMEM_PREFIX) == 0)
{
_type = _type.substr(VMEM_PREFIX.length());
_level = "VIRTUAL";
}
else
{
_level = "REAL";
}
}
else if(memory_type.find(SCRATCH_PREFIX) == 0)
{
_type = memory_type.substr(SCRATCH_PREFIX.length());
_level = "SCRATCH";
if(memory_type.find(ASYNC_PREFIX) == 0)
{
_type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM
}
}
if(_type == "ALLOCATE")
{
_type = "ALLOC";
}
return std::make_pair(_type, _level);
};
auto& n_info = node_info::get_instance();
auto process = m_metadata->get_process_info();
auto thread_primary_key =
@@ -210,7 +241,7 @@ rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
auto [type, level] = memtype_to_db(_name);
auto [memory_operation, memory_type] = parse_memory_operation_name(_name);
auto stack_id = _mas.correlation_id_internal;
auto parent_stack_id = _mas.correlation_id_ancestor;
@@ -224,9 +255,10 @@ rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
category_primary_key, stack_id, parent_stack_id, correlation_id);
m_data_processor->insert_memory_alloc(
n_info.id, process.pid, thread_primary_key, agent_primary_key, type.c_str(),
level.c_str(), _mas.start_timestamp, _mas.end_timestamp, _mas.address_value,
_mas.allocation_size, queue_id, _mas.stream_handle, event_primary_key);
n_info.id, process.pid, thread_primary_key, agent_primary_key,
memory_operation.c_str(), memory_type.c_str(), _mas.start_timestamp,
_mas.end_timestamp, _mas.address_value, _mas.allocation_size, queue_id,
_mas.stream_handle, event_primary_key);
}
#endif
}
+1
Просмотреть файл
@@ -45,6 +45,7 @@ public:
void finalize_processing();
void handle(const kernel_dispatch_sample& sample);
void handle(const scratch_memory_sample& sample);
void handle(const memory_copy_sample& sample);
void handle(const memory_allocate_sample& sample);
void handle(const region_sample& sample);
+18
Просмотреть файл
@@ -43,6 +43,11 @@ struct processor_t
static_cast<T*>(this)->handle(sample);
}
void handle(const scratch_memory_sample& sample)
{
static_cast<T*>(this)->handle(sample);
}
void handle(const memory_copy_sample& sample)
{
static_cast<T*>(this)->handle(sample);
@@ -84,6 +89,7 @@ protected:
struct processor_view_t
{
using kernel_dispatch_fn_t = void (*)(void*, const kernel_dispatch_sample&) noexcept;
using scratch_memory_fn_t = void (*)(void*, const scratch_memory_sample&) noexcept;
using memory_copy_fn_t = void (*)(void*, const memory_copy_sample&) noexcept;
#if(ROCPROFILER_VERSION >= 600)
using memory_allocate_fn_t = void (*)(void*, const memory_allocate_sample&) noexcept;
@@ -101,6 +107,7 @@ struct processor_view_t
struct vtable_t
{
kernel_dispatch_fn_t handle_kernel_dispatch;
scratch_memory_fn_t handle_scratch_memory;
memory_copy_fn_t handle_memory_copy;
#if(ROCPROFILER_VERSION >= 600)
memory_allocate_fn_t handle_memory_allocate;
@@ -134,6 +141,11 @@ struct processor_view_t
m_vtable->handle_kernel_dispatch(m_object, sample);
}
ROCPROFSYS_INLINE void handle(const scratch_memory_sample& sample) const noexcept
{
m_vtable->handle_scratch_memory(m_object, sample);
}
ROCPROFSYS_INLINE void handle(const memory_copy_sample& sample) const noexcept
{
m_vtable->handle_memory_copy(m_object, sample);
@@ -194,6 +206,9 @@ private:
+[](void* obj, const kernel_dispatch_sample& sample) noexcept {
static_cast<T*>(obj)->handle(sample);
},
+[](void* obj, const scratch_memory_sample& sample) noexcept {
static_cast<T*>(obj)->handle(sample);
},
+[](void* obj, const memory_copy_sample& sample) noexcept {
static_cast<T*>(obj)->handle(sample);
},
@@ -275,6 +290,9 @@ struct sample_processor_t
case type_identifier_t::kernel_dispatch:
handle_sample(static_cast<const kernel_dispatch_sample&>(sample));
break;
case type_identifier_t::scratch_memory:
handle_sample(static_cast<const scratch_memory_sample&>(sample));
break;
case type_identifier_t::memory_copy:
handle_sample(static_cast<const memory_copy_sample&>(sample));
break;
+78
Просмотреть файл
@@ -46,6 +46,7 @@ enum class type_identifier_t : uint32_t
amd_smi_sample = 0x0006,
cpu_freq_sample = 0x0007,
backtrace_region_sample = 0x0008,
scratch_memory = 0x0009,
fragmented_space = 0xFFFF
};
@@ -148,6 +149,83 @@ get_size(const kernel_dispatch_sample& item)
item.grid_size_z, static_cast<uint64_t>(item.stream_handle));
}
struct scratch_memory_sample : cacheable_t
{
static constexpr type_identifier_t type_identifier =
type_identifier_t::scratch_memory;
scratch_memory_sample() = default;
scratch_memory_sample(uint64_t _start_timestamp, uint64_t _end_timestamp,
uint64_t _thread_id, uint64_t _agent_id_handle,
uint64_t _queue_id_handle, int32_t _kind, int32_t _operation,
int32_t _flags, uint64_t _allocation_size,
uint64_t _correlation_id_internal,
uint64_t _correlation_id_ancestor, size_t _stream_handle)
: start_timestamp(_start_timestamp)
, end_timestamp(_end_timestamp)
, thread_id(_thread_id)
, agent_id_handle(_agent_id_handle)
, queue_id_handle(_queue_id_handle)
, kind(_kind)
, operation(_operation)
, flags(_flags)
, allocation_size(_allocation_size)
, correlation_id_internal(_correlation_id_internal)
, correlation_id_ancestor(_correlation_id_ancestor)
, stream_handle(_stream_handle)
{}
uint64_t start_timestamp;
uint64_t end_timestamp;
uint64_t thread_id;
uint64_t agent_id_handle;
uint64_t queue_id_handle;
int32_t kind;
int32_t operation;
int32_t flags;
uint64_t allocation_size;
uint64_t correlation_id_internal;
uint64_t correlation_id_ancestor;
size_t stream_handle;
};
template <>
inline void
serialize(uint8_t* buffer, const scratch_memory_sample& item)
{
utility::store_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
item.agent_id_handle, item.queue_id_handle, item.kind,
item.operation, item.flags, item.allocation_size,
item.correlation_id_internal, item.correlation_id_ancestor,
static_cast<uint64_t>(item.stream_handle));
}
template <>
inline scratch_memory_sample
deserialize(uint8_t*& buffer)
{
scratch_memory_sample item;
uint64_t stream_handle;
utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
item.agent_id_handle, item.queue_id_handle, item.kind,
item.operation, item.flags, item.allocation_size,
item.correlation_id_internal, item.correlation_id_ancestor,
stream_handle);
item.stream_handle = stream_handle;
return item;
}
template <>
inline size_t
get_size(const scratch_memory_sample& item)
{
return utility::get_size(item.start_timestamp, item.end_timestamp, item.thread_id,
item.agent_id_handle, item.queue_id_handle, item.kind,
item.operation, item.flags, item.allocation_size,
item.correlation_id_internal, item.correlation_id_ancestor,
static_cast<uint64_t>(item.stream_handle));
}
struct memory_copy_sample : cacheable_t
{
static constexpr type_identifier_t type_identifier = type_identifier_t::memory_copy;
+151 -1
Просмотреть файл
@@ -567,6 +567,18 @@ get_mem_alloc_address(
}
#endif
uint64_t
get_scratch_mem_alloc_size(
[[maybe_unused]] const rocprofiler_buffer_tracing_scratch_memory_record_t& record)
{
// Scratch memory samples from SDK versions prior to 7.0.2 do not include allocation_size
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFSYS_ROCM_VERSION >= 70002)
return record.allocation_size;
#else
return 0;
#endif
}
void
cache_region(const rocprofiler_callback_tracing_record_t* record,
const rocprofiler_timestamp_t start_timestamp,
@@ -615,13 +627,26 @@ cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* recor
record->dispatch_info.grid_size.z, stream_handle });
}
void
cache_scratch_memory(rocprofiler_buffer_tracing_scratch_memory_record_t* record,
uint64_t stream_handle)
{
trace_cache::get_metadata_registry().add_stream(stream_handle);
trace_cache::get_buffer_storage().store(trace_cache::scratch_memory_sample{
record->start_timestamp, record->end_timestamp, record->thread_id,
record->agent_id.handle, record->queue_id.handle,
static_cast<int32_t>(record->kind), static_cast<int32_t>(record->operation),
static_cast<int32_t>(record->flags), get_scratch_mem_alloc_size(*record),
record->correlation_id.internal, get_parent_stack_id(record->correlation_id),
stream_handle });
}
void
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record,
uint64_t stream_handle)
{
trace_cache::get_metadata_registry().add_stream(stream_handle);
trace_cache::get_buffer_storage().store(trace_cache::memory_copy_sample{
record->start_timestamp, record->end_timestamp, record->thread_id,
record->dst_agent_id.handle, record->src_agent_id.handle,
static_cast<int32_t>(record->kind), static_cast<int32_t>(record->operation),
@@ -1759,6 +1784,120 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
}
}
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_scratch_memory_record_t*>(
header->payload);
bool _group_by_queue = _default_group_by_queue;
const auto* agent = tool_data->get_gpu_tool_agent(record->agent_id);
auto device_id = static_cast<uint32_t>(agent->device_id);
const auto& t_info = thread_info::get(record->thread_id, SystemTID);
auto thread_id_sequent = t_info->index_data->sequent_value;
auto _corr_id = record->correlation_id.internal;
auto _beg_ns = record->start_timestamp;
auto _end_ns = record->end_timestamp;
auto _name =
tool_data->buffered_tracing_info.at(record->kind, record->operation);
auto _stream_id = get_stream_id(record).handle;
if(_stream_id == 0)
{
// Scratch memory event is not associated with a HIP stream
_group_by_queue = true;
}
{
auto track_name = JOIN("", "GPU Scratch Memory [", device_id,
"] Thread ", record->thread_id);
cache_category<category::rocm_scratch_memory>();
cache_add_thread_info(record->thread_id);
cache_add_track(track_name.c_str(), record->thread_id);
cache_scratch_memory(record, _stream_id);
}
if(get_use_timemory())
{
auto _bundle = kernel_dispatch_bundle_t{ _name };
_bundle.push(thread_id_sequent).start().stop();
_bundle.get([_beg_ns, _end_ns](tim::component::wall_clock* _wc) {
_wc->set_value(_end_ns - _beg_ns);
_wc->set_accum(_end_ns - _beg_ns);
});
_bundle.pop();
}
if(get_use_perfetto())
{
// Scratch memory samples from SDK versions prior to 7.0.2 do not include
// allocation_size field, so counter tracks are not needed
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFSYS_ROCM_VERSION >= 70002)
using counter_track = perfetto_counter_track<
rocprofiler_buffer_tracing_scratch_memory_record_t>;
if(!counter_track::exists(device_id))
{
auto track_name_alloc_size =
JOIN("", "GPU Scratch Memory [", device_id, "] (S) Thread ",
thread_id_sequent);
counter_track::emplace(device_id, track_name_alloc_size, "bytes");
}
if(record->operation == ROCPROFILER_SCRATCH_MEMORY_ALLOC)
{
TRACE_COUNTER("rocm_scratch_memory",
counter_track::at(device_id, 0), _beg_ns,
record->allocation_size);
}
#endif
auto add_perfetto_annotations = [&](::perfetto::EventContext ctx) {
if(config::get_perfetto_annotations())
{
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ns);
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns);
tracing::add_perfetto_annotation(ctx, "corr_id", _corr_id);
tracing::add_perfetto_annotation(ctx, "stream_id",
_stream_id);
}
};
if(_group_by_queue)
{
auto track_name_events = [&]() {
return JOIN("", "GPU Scratch Memory (S) Events Thread ",
thread_id_sequent);
};
const auto _track = tracing::get_perfetto_track(
category::rocm_scratch_memory{}, track_name_events);
tracing::push_perfetto(category::rocm_scratch_memory{},
_name.data(), _track, _beg_ns,
::perfetto::Flow::ProcessScoped(_corr_id),
add_perfetto_annotations);
tracing::pop_perfetto(category::rocm_scratch_memory{}, "", _track,
_end_ns);
}
else
{
const auto _track = tracing::get_perfetto_track(
category::rocm_hip_stream{}, _track_desc_stream, _stream_id);
tracing::push_perfetto(category::rocm_hip_stream{}, _name.data(),
_track, _beg_ns,
::perfetto::Flow::ProcessScoped(_corr_id),
add_perfetto_annotations);
tracing::pop_perfetto(category::rocm_hip_stream{}, "", _track,
_end_ns);
}
}
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
{
auto* record =
@@ -2249,6 +2388,17 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0,
_data->memory_copy_buffer));
}
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY) > 0)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(
_data->primary_ctx, buffer_size, watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data,
&_data->scratch_memory_buffer));
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, nullptr, 0,
_data->scratch_memory_buffer));
}
#if(ROCPROFILER_VERSION >= 600)
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) > 0)
+5 -7
Просмотреть файл
@@ -122,7 +122,7 @@ using backtrace_operation_map_t =
struct client_data
{
static constexpr size_t num_buffers = 4;
static constexpr size_t num_buffers = 5;
static constexpr size_t num_contexts = 2;
using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t<std::string_view>;
@@ -138,6 +138,7 @@ struct client_data
rocprofiler_context_id_t primary_ctx = { 0 };
rocprofiler_context_id_t counter_ctx = { 0 };
rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 };
rocprofiler_buffer_id_t scratch_memory_buffer = { 0 };
rocprofiler_buffer_id_t memory_copy_buffer = { 0 };
rocprofiler_buffer_id_t memory_alloc_buffer = { 0 };
rocprofiler_buffer_id_t counter_collection_buffer = { 0 };
@@ -179,12 +180,9 @@ client_data::get_contexts() const
inline client_data::buffer_id_vec_t
client_data::get_buffers() const
{
return buffer_id_vec_t{
kernel_dispatch_buffer,
memory_copy_buffer,
memory_alloc_buffer,
counter_collection_buffer,
};
return buffer_id_vec_t{ kernel_dispatch_buffer, scratch_memory_buffer,
memory_copy_buffer, memory_alloc_buffer,
counter_collection_buffer };
}
inline const rocprofsys_agent_t*