Contexts update + buffer flushing + cleanup (#338)

* Update lib/rocprofiler-sdk/context/context.*

- get_registered_contexts functions (local copy)

* Update lib/rocprofiler-sdk/hsa/{queue,queue_controller}.cpp

- remove ROCPROFILER_BUFFER_TRACING_MEMORY_COPY code

* Update tests/kernel-tracing/kernel-tracing.cpp

- move stop() and flush() in tool_fini to before reporting of sizes of data collected

* Update lib/rocprofiler-sdk/hsa/hsa.*

- remove stale set_callback / activity_functor_t code

* Update lib/rocprofiler-sdk/buffer.cpp

- full wait instead of returning busy when buffer is busy
- use task_group::join instead of task_group::wait to fully wait for tasks to finish (bug fix)

* Update lib/rocprofiler-sdk/agent.cpp

- support agent mapping for CPU agents

* Remove direct access to vector of registered contexts

[ROCm/rocprofiler-sdk commit: 199f0b5421]
Bu işleme şunda yer alıyor:
Jonathan R. Madsen
2024-01-03 04:26:46 -06:00
işlemeyi yapan: GitHub
ebeveyn 56fea9f08b
işleme 2ccb156030
15 değiştirilmiş dosya ile 168 ekleme ve 168 silme
+20 -4
Dosyayı Görüntüle
@@ -642,6 +642,19 @@ get_agent_caches()
static auto _v = std::vector<hsa::AgentCache>{};
return _v;
}
struct agent_pair
{
const rocprofiler_agent_t* rocp_agent = nullptr;
hsa_agent_t hsa_agent = {};
};
auto&
get_agent_mapping()
{
static auto _v = std::vector<agent_pair>{};
return _v;
}
} // namespace
std::vector<const rocprofiler_agent_t*>
@@ -677,6 +690,8 @@ construct_agent_cache(::HsaApiTable* table)
<< "Found " << rocp_agents.size() << " rocprofiler agents and " << hsa_agents.size()
<< " HSA agents";
get_agent_mapping().reserve(get_agent_mapping().size() + rocp_agents.size());
auto hsa_agent_node_map = std::unordered_map<uint32_t, hsa_agent_t>{};
for(const auto& itr : hsa_agents)
{
@@ -704,6 +719,7 @@ construct_agent_cache(::HsaApiTable* table)
if(ritr->node_id == node_id)
{
agent_map.emplace(ritr->node_id, std::make_tuple(ritr, hitr));
get_agent_mapping().emplace_back(agent_pair{ritr, hitr});
break;
}
}
@@ -798,9 +814,9 @@ construct_agent_cache(::HsaApiTable* table)
std::optional<hsa_agent_t>
get_hsa_agent(const rocprofiler_agent_t* agent)
{
for(const auto& itr : get_agent_caches())
for(const auto& itr : get_agent_mapping())
{
if(itr == agent) return itr.get_hsa_agent();
if(itr.rocp_agent->id.handle == agent->id.handle) return itr.hsa_agent;
}
return std::nullopt;
@@ -809,9 +825,9 @@ get_hsa_agent(const rocprofiler_agent_t* agent)
const rocprofiler_agent_t*
get_rocprofiler_agent(hsa_agent_t agent)
{
for(const auto& itr : get_agent_caches())
for(const auto& itr : get_agent_mapping())
{
if(itr == agent) return itr.get_rocp_agent();
if(itr.hsa_agent.handle == agent.handle) return itr.rocp_agent;
}
return nullptr;
+10 -2
Dosyayı Görüntüle
@@ -144,7 +144,15 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
if(wait && task_group) task_group->wait();
// buffer is currently being flushed or destroyed
if(buff->syncer.test_and_set()) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
if(buff->syncer.test_and_set())
{
if(!wait) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
while(buff->syncer.test_and_set())
{
std::this_thread::yield();
std::this_thread::sleep_for(std::chrono::milliseconds{10});
}
}
auto idx = buff->buffer_idx++;
@@ -187,7 +195,7 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
if(task_group)
{
task_group->exec(_task);
if(wait) task_group->wait();
if(wait) task_group->join();
}
else
{
+1 -6
Dosyayı Görüntüle
@@ -91,12 +91,7 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
if(context_id.handle >= rocprofiler::context::get_registered_contexts().size())
{
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
auto& ctx = rocprofiler::context::get_registered_contexts().at(context_id.handle);
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
+1 -6
Dosyayı Görüntüle
@@ -88,12 +88,7 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
if(context_id.handle >= rocprofiler::context::get_registered_contexts().size())
{
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
auto& ctx = rocprofiler::context::get_registered_contexts().at(context_id.handle);
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
+1 -1
Dosyayı Görüntüle
@@ -107,7 +107,7 @@ rocprofiler_context_is_valid(rocprofiler_context_id_t context_id, int* status)
{
if(itr && itr->context_idx == context_id.handle)
{
auto _ret = rocprofiler::context::validate_context(itr.get());
auto _ret = rocprofiler::context::validate_context(itr);
*status = (_ret == ROCPROFILER_STATUS_SUCCESS) ? 1 : 0;
return _ret;
}
+75 -26
Dosyayı Görüntüle
@@ -41,6 +41,7 @@
#include <memory>
#include <mutex>
#include <optional>
#include <random>
namespace rocprofiler
{
@@ -49,6 +50,11 @@ namespace context
namespace
{
using reserve_size_t = common::container::reserve_size;
using unique_context_vec_t =
common::container::stable_vector<allocator::unique_static_ptr_t<context>, 8>;
using active_context_vec_t = common::container::stable_vector<std::atomic<const context*>, 8>;
constexpr auto invalid_client_idx = std::numeric_limits<uint32_t>::max();
auto&
get_contexts_mutex()
@@ -57,7 +63,17 @@ get_contexts_mutex()
return _v;
}
constexpr auto invalid_client_idx = std::numeric_limits<uint32_t>::max();
uint64_t
get_contexts_offset()
{
static uint64_t _v = []() {
auto gen = std::mt19937{std::random_device{}()};
auto rng = std::uniform_int_distribution<uint64_t>{std::numeric_limits<uint8_t>::max(),
std::numeric_limits<uint16_t>::max()};
return rng(gen);
}();
return _v;
}
auto&
get_client_index()
@@ -66,6 +82,13 @@ get_client_index()
return _v;
}
unique_context_vec_t&
get_registered_contexts_impl()
{
static auto _v = unique_context_vec_t{reserve_size_t{unique_context_vec_t::chunk_size}};
return _v;
}
auto&
get_num_active_contexts()
{
@@ -136,11 +159,31 @@ pop_latest_correlation_id(const correlation_id* val)
if(get_latest_correlation_id_impl() == val) get_latest_correlation_id_impl() = nullptr;
}
unique_context_vec_t&
get_registered_contexts()
context_array_t&
get_registered_contexts(context_array_t& data, context_filter_t filter)
{
static auto _v = unique_context_vec_t{reserve_size_t{unique_context_vec_t::chunk_size}};
return _v;
data.clear();
auto num_ctx = get_registered_contexts_impl().size();
if(num_ctx <= 0) return data;
data.reserve(num_ctx);
for(auto& itr : get_registered_contexts_impl())
{
const auto* ctx = itr.get();
if(ctx)
{
if(!filter || (filter && filter(ctx))) data.emplace_back(ctx);
}
}
return data;
}
context_array_t
get_registered_contexts(context_filter_t filter)
{
auto data = context_array_t{};
get_registered_contexts(data, filter);
return data;
}
context_array_t&
@@ -209,13 +252,13 @@ allocate_context()
auto _lk = std::unique_lock<std::mutex>{get_contexts_mutex()};
// initial context identifier number
auto _idx = get_registered_contexts().size();
auto _idx = get_registered_contexts_impl().size() + get_contexts_offset();
// make space in registered
get_registered_contexts().emplace_back(nullptr);
get_registered_contexts_impl().emplace_back(nullptr);
// create an entry in the registered
auto& _cfg_v = get_registered_contexts().back();
auto& _cfg_v = get_registered_contexts_impl().back();
_cfg_v = allocator::make_unique_static<context>();
auto* _cfg = _cfg_v.get();
// ...
@@ -233,35 +276,41 @@ allocate_context()
return rocprofiler_context_id_t{_idx};
}
context*
get_mutable_registered_context(rocprofiler_context_id_t id)
{
if(id.handle < get_contexts_offset()) return nullptr;
auto _idx = id.handle - get_contexts_offset();
if(_idx >= get_registered_contexts_impl().size()) return nullptr;
return get_registered_contexts_impl().at(_idx).get();
}
const context*
get_registered_context(rocprofiler_context_id_t id)
{
return get_mutable_registered_context(id);
}
rocprofiler_status_t
validate_context(const context* cfg)
{
// if(cfg->buffer == nullptr) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
// if(cfg->filter == nullptr) return ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND;
return (cfg) ? ROCPROFILER_STATUS_SUCCESS : ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
rocprofiler_status_t
start_context(rocprofiler_context_id_t context_id)
{
if(context_id.handle >= get_registered_contexts().size())
{
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
if(context_id.handle < get_contexts_offset()) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
context* cfg = get_registered_contexts().at(context_id.handle).get();
if(!cfg)
{
if((context_id.handle - get_contexts_offset()) >= get_registered_contexts_impl().size())
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
const auto* cfg = get_registered_context(context_id);
if(!cfg) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
if(validate_context(cfg) != ROCPROFILER_STATUS_SUCCESS)
{
return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
}
auto current_contexts = context_array_t{};
for(const auto* itr : get_active_contexts(current_contexts))
@@ -277,7 +326,7 @@ start_context(rocprofiler_context_id_t context_id)
}
}
uint64_t rocp_tot_contexts = get_registered_contexts().size();
uint64_t rocp_tot_contexts = get_registered_contexts_impl().size();
auto idx = rocp_tot_contexts;
auto& active_contexts = get_active_contexts_impl();
{
@@ -310,7 +359,7 @@ start_context(rocprofiler_context_id_t context_id)
// atomic swap the pointer into the "active" array used internally
const context* _expected = nullptr;
bool success = active_contexts.at(idx).compare_exchange_strong(
_expected, get_registered_contexts().at(context_id.handle).get());
_expected, get_registered_context(context_id));
if(!success)
{
@@ -369,7 +418,7 @@ deactivate_client_contexts(rocprofiler_client_id_t client_id)
void
deregister_client_contexts(rocprofiler_client_id_t client_id)
{
for(auto& itr : get_registered_contexts())
for(auto& itr : get_registered_contexts_impl())
{
if(itr->client_idx == client_id.handle)
{
+19 -10
Dosyayı Görüntüle
@@ -174,21 +174,24 @@ start_context(rocprofiler_context_id_t id);
/// \brief disable the contexturation.
rocprofiler_status_t stop_context(rocprofiler_context_id_t);
using unique_context_vec_t =
common::container::stable_vector<allocator::unique_static_ptr_t<context>, 8>;
using active_context_vec_t = common::container::stable_vector<std::atomic<const context*>, 8>;
using context_array_t = common::container::small_vector<const context*>;
using context_array_t = common::container::small_vector<const context*>;
unique_context_vec_t&
get_registered_contexts();
context*
get_mutable_registered_context(rocprofiler_context_id_t id);
const context*
get_registered_context(rocprofiler_context_id_t id);
using context_filter_t = bool (*)(const context*);
inline bool
default_context_filter(const context* val)
{
return (val != nullptr);
}
default_context_filter(const context* val);
context_array_t&
get_registered_contexts(context_array_t& data, context_filter_t filter = default_context_filter);
context_array_t
get_registered_contexts(context_filter_t filter = default_context_filter);
context_array_t&
get_active_contexts(context_array_t& data, context_filter_t filter = default_context_filter);
@@ -200,5 +203,11 @@ void deactivate_client_contexts(rocprofiler_client_id_t);
// should only be called if the client failed to initialize
void deregister_client_contexts(rocprofiler_client_id_t);
inline bool
default_context_filter(const context* val)
{
return (val != nullptr);
}
} // namespace context
} // namespace rocprofiler
+11 -9
Dosyayı Görüntüle
@@ -70,7 +70,10 @@ public:
rocprofiler_profile_counting_dispatch_callback_t callback,
void* callback_args)
{
auto& ctx = *rocprofiler::context::get_registered_contexts().at(context_id.handle);
auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id);
if(!ctx_p) return false;
auto& ctx = *ctx_p;
if(!ctx.counter_collection)
{
@@ -81,12 +84,11 @@ public:
auto& cb = *ctx.counter_collection->callbacks.emplace_back(
std::make_shared<counter_callback_info>());
cb.user_cb = callback;
cb.callback_args = callback_args;
cb.context = context_id;
cb.buffer = buffer;
cb.internal_context =
rocprofiler::context::get_registered_contexts().at(context_id.handle).get();
cb.user_cb = callback;
cb.callback_args = callback_args;
cb.context = context_id;
cb.buffer = buffer;
cb.internal_context = ctx_p;
return true;
}
@@ -309,7 +311,7 @@ completed_cb(const std::shared_ptr<counter_callback_info>& info,
}
void
start_context(context::context* ctx)
start_context(const context::context* ctx)
{
if(!ctx || !ctx->counter_collection) return;
@@ -346,7 +348,7 @@ start_context(context::context* ctx)
}
void
stop_context(context::context* ctx)
stop_context(const context::context* ctx)
{
if(!ctx || !ctx->counter_collection) return;
+2 -2
Dosyayı Görüntüle
@@ -103,9 +103,9 @@ configure_buffered_dispatch(rocprofiler_context_id_t con
void* callback_args);
void
start_context(context::context*);
start_context(const context::context*);
void
stop_context(context::context*);
stop_context(const context::context*);
} // namespace counters
} // namespace rocprofiler
+9 -19
Dosyayı Görüntüle
@@ -152,16 +152,11 @@ rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context,
static uint64_t pid_v = getpid();
if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
for(auto& itr : rocprofiler::context::get_registered_contexts())
{
if(itr->context_idx == context.handle)
{
itr->correlation_tracer.external_correlator.push(tid, external_correlation_id);
return ROCPROFILER_STATUS_SUCCESS;
}
}
auto* ctx = rocprofiler::context::get_mutable_registered_context(context);
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
ctx->correlation_tracer.external_correlator.push(tid, external_correlation_id);
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
@@ -173,16 +168,11 @@ rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context,
static uint64_t pid_v = getpid();
if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
for(auto& itr : rocprofiler::context::get_registered_contexts())
{
if(itr->context_idx == context.handle)
{
auto former = itr->correlation_tracer.external_correlator.pop(tid);
if(external_correlation_id) *external_correlation_id = former;
return ROCPROFILER_STATUS_SUCCESS;
}
}
auto* ctx = rocprofiler::context::get_mutable_registered_context(context);
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
auto former = ctx->correlation_tracer.external_correlator.pop(tid);
if(external_correlation_id) *external_correlation_id = former;
return ROCPROFILER_STATUS_SUCCESS;
}
}
+10 -15
Dosyayı Görüntüle
@@ -48,8 +48,6 @@ namespace hsa
{
namespace
{
std::atomic<activity_functor_t> report_activity = {};
struct null_type
{};
@@ -460,13 +458,14 @@ get_names(std::vector<const char*>& _name_list, std::index_sequence<Idx...>)
}
bool
should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
should_wrap_functor(const context::context_array_t& _contexts,
rocprofiler_callback_tracing_kind_t _callback_domain,
rocprofiler_buffer_tracing_kind_t _buffered_domain,
int _operation)
{
// we loop over all the *registered* contexts and see if any of them, at any point in time,
// might require callback or buffered API tracing
for(const auto& itr : context::get_registered_contexts())
for(const auto& itr : _contexts)
{
if(!itr) continue;
@@ -487,10 +486,12 @@ template <size_t... Idx>
void
update_table(hsa_api_table_t* _orig, std::index_sequence<Idx...>)
{
auto _update = [](hsa_api_table_t* _orig_v, auto _info) {
auto _update = [](hsa_api_table_t* _orig_v, const auto& _contexts_v, auto _info) {
// check to see if there are any contexts which enable this operation in the HSA API domain
if(!should_wrap_functor(
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
if(!should_wrap_functor(_contexts_v,
_info.callback_domain_idx,
_info.buffered_domain_idx,
_info.operation_idx))
return;
// 1. get the sub-table containing the function pointer
@@ -501,7 +502,8 @@ update_table(hsa_api_table_t* _orig, std::index_sequence<Idx...>)
_func = _info.get_functor(_func);
};
(_update(_orig, hsa_api_info<Idx>{}), ...);
auto _contexts = context::get_registered_contexts();
(_update(_orig, _contexts, hsa_api_info<Idx>{}), ...);
}
} // namespace
@@ -547,13 +549,6 @@ get_names()
return _data;
}
void
set_callback(activity_functor_t _func)
{
auto&& _v = report_activity.load();
report_activity.compare_exchange_strong(_v, _func);
}
void
update_table(hsa_api_table_t* _orig)
{
+3 -10
Dosyayı Görüntüle
@@ -31,10 +31,6 @@ namespace rocprofiler
{
namespace hsa
{
using activity_functor_t = int (*)(rocprofiler_callback_tracing_kind_t domain,
uint32_t operation_id,
void* data);
using hsa_api_table_t = HsaApiTable;
hsa_api_table_t&
@@ -43,6 +39,9 @@ get_table();
template <size_t Idx>
struct hsa_table_lookup;
template <size_t Idx>
struct hsa_api_info;
template <size_t Idx>
struct hsa_api_impl
{
@@ -56,9 +55,6 @@ struct hsa_api_impl
static auto functor(Args&&... args);
};
template <size_t Idx>
struct hsa_api_info;
const char*
name_by_id(uint32_t id);
@@ -77,9 +73,6 @@ get_names();
std::vector<uint32_t>
get_ids();
void
set_callback(activity_functor_t _func);
void
update_table(hsa_api_table_t* _orig);
} // namespace hsa
+1 -53
Dosyayı Görüntüle
@@ -80,8 +80,7 @@ bool
context_filter(const context::context* ctx)
{
return (ctx->buffered_tracer &&
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) ||
ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)));
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)));
}
bool
@@ -120,23 +119,6 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
<< " returned dispatch times where the end time (" << dispatch_time.end
<< ") was less than the start time (" << dispatch_time.start << ")";
// try to extract the async copy time. this will return HSA_STATUS_ERROR if there
// is not an async copy agent associated with the signal so we just predicate
// putting something into the buffer based on whether or not
// hsa_amd_profiling_get_async_copy_time returns HSA_STATUS_SUCCESS.
auto copy_time = hsa_amd_profiling_async_copy_time_t{};
auto copy_time_status =
queue_info_session.queue.ext_api().hsa_amd_profiling_get_async_copy_time_fn(_signal,
&copy_time);
// if we encounter this in CI, it will cause test to fail
ROCP_CI_LOG_IF(ERROR,
copy_time_status == HSA_STATUS_SUCCESS && copy_time.end < copy_time.start)
<< "hsa_amd_profiling_get_async_copy_time for kernel_id=" << _kern_id
<< " on rocprofiler_agent=" << _rocp_agent->id.handle
<< " returned async times where the end time (" << copy_time.end
<< ") was less than the start time (" << copy_time.start << ")";
for(const auto* itr : ctxs)
{
auto* _buffer = buffer::get_buffer(
@@ -181,26 +163,6 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
record);
}
}
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
{
if(copy_time_status == HSA_STATUS_SUCCESS)
{
auto record = rocprofiler_buffer_tracing_memory_copy_record_t{
sizeof(rocprofiler_buffer_tracing_memory_copy_record_t),
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
_corr_id_v,
copy_time.start,
copy_time.end,
_rocp_agent->id,
_queue_id,
_kern_id};
CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
record);
}
}
}
}
@@ -522,20 +484,6 @@ Queue::Queue(const AgentCache& agent,
_ext_api.hsa_amd_queue_intercept_register_fn(_intercept_queue, WriteInterceptor, this))
<< "Could not register interceptor";
bool enable_async_copy = false;
for(const auto& itr : context::get_registered_contexts())
{
if(itr->buffered_tracer &&
itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
enable_async_copy = true;
}
if(enable_async_copy)
{
LOG_IF(FATAL, _ext_api.hsa_amd_profiling_async_copy_enable_fn(true) != HSA_STATUS_SUCCESS)
<< "Could not enable async copy timing";
}
*queue = _intercept_queue;
}
+1 -2
Dosyayı Görüntüle
@@ -186,8 +186,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
}
else if(itr->buffered_tracer)
{
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) ||
itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH))
{
enable_intercepter = true;
break;
+4 -3
Dosyayı Görüntüle
@@ -642,6 +642,9 @@ tool_fini(void* tool_data)
static auto _once = std::atomic_flag{ATOMIC_FLAG_INIT};
if(_once.test_and_set()) return;
stop();
flush();
std::cerr << "[" << getpid() << "][" << __FUNCTION__
<< "] Finalizing... agents=" << agents.size()
<< ", code_object_callback_records=" << code_object_records.size()
@@ -652,9 +655,6 @@ tool_fini(void* tool_data)
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size() << " ...\n"
<< std::flush;
stop();
flush();
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
if(_call_stack)
{
@@ -799,6 +799,7 @@ flush()
{
for(auto* itr : buffers)
{
if(!itr) continue;
auto status = rocprofiler_flush_buffer(*itr);
if(status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
{