From 199f0b54218b668effb8772bfceefe3c682c2016 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 3 Jan 2024 04:26:46 -0600 Subject: [PATCH] Contexts update + buffer flushing + cleanup (#338) * Update lib/rocprofiler-sdk/context/context.* - get_registered_contexts functions (local copy) * Update lib/rocprofiler-sdk/hsa/{queue,queue_controller}.cpp - remove ROCPROFILER_BUFFER_TRACING_MEMORY_COPY code * Update tests/kernel-tracing/kernel-tracing.cpp - move stop() and flush() in tool_fini to before reporting of sizes of data collected * Update lib/rocprofiler-sdk/hsa/hsa.* - remove stale set_callback / activity_functor_t code * Update lib/rocprofiler-sdk/buffer.cpp - full wait instead of returning busy when buffer is busy - use task_group::join instead of task_group::wait to fully wait for tasks to finish (bug fix) * Update lib/rocprofiler-sdk/agent.cpp - support agent mapping for CPU agents * Remove direct access to vector of registered contexts --- source/lib/rocprofiler-sdk/agent.cpp | 24 ++++- source/lib/rocprofiler-sdk/buffer.cpp | 12 ++- source/lib/rocprofiler-sdk/buffer_tracing.cpp | 7 +- .../lib/rocprofiler-sdk/callback_tracing.cpp | 7 +- source/lib/rocprofiler-sdk/context.cpp | 2 +- .../lib/rocprofiler-sdk/context/context.cpp | 101 +++++++++++++----- .../lib/rocprofiler-sdk/context/context.hpp | 29 +++-- source/lib/rocprofiler-sdk/counters/core.cpp | 20 ++-- source/lib/rocprofiler-sdk/counters/core.hpp | 4 +- .../rocprofiler-sdk/external_correlation.cpp | 28 ++--- source/lib/rocprofiler-sdk/hsa/hsa.cpp | 25 ++--- source/lib/rocprofiler-sdk/hsa/hsa.hpp | 13 +-- source/lib/rocprofiler-sdk/hsa/queue.cpp | 54 +--------- .../rocprofiler-sdk/hsa/queue_controller.cpp | 3 +- tests/kernel-tracing/kernel-tracing.cpp | 7 +- 15 files changed, 168 insertions(+), 168 deletions(-) diff --git a/source/lib/rocprofiler-sdk/agent.cpp b/source/lib/rocprofiler-sdk/agent.cpp index a737615e9e..7a39f1b6b6 100644 --- a/source/lib/rocprofiler-sdk/agent.cpp +++ b/source/lib/rocprofiler-sdk/agent.cpp @@ -642,6 +642,19 @@ get_agent_caches() static auto _v = std::vector{}; return _v; } + +struct agent_pair +{ + const rocprofiler_agent_t* rocp_agent = nullptr; + hsa_agent_t hsa_agent = {}; +}; + +auto& +get_agent_mapping() +{ + static auto _v = std::vector{}; + return _v; +} } // namespace std::vector @@ -677,6 +690,8 @@ construct_agent_cache(::HsaApiTable* table) << "Found " << rocp_agents.size() << " rocprofiler agents and " << hsa_agents.size() << " HSA agents"; + get_agent_mapping().reserve(get_agent_mapping().size() + rocp_agents.size()); + auto hsa_agent_node_map = std::unordered_map{}; for(const auto& itr : hsa_agents) { @@ -704,6 +719,7 @@ construct_agent_cache(::HsaApiTable* table) if(ritr->node_id == node_id) { agent_map.emplace(ritr->node_id, std::make_tuple(ritr, hitr)); + get_agent_mapping().emplace_back(agent_pair{ritr, hitr}); break; } } @@ -798,9 +814,9 @@ construct_agent_cache(::HsaApiTable* table) std::optional get_hsa_agent(const rocprofiler_agent_t* agent) { - for(const auto& itr : get_agent_caches()) + for(const auto& itr : get_agent_mapping()) { - if(itr == agent) return itr.get_hsa_agent(); + if(itr.rocp_agent->id.handle == agent->id.handle) return itr.hsa_agent; } return std::nullopt; @@ -809,9 +825,9 @@ get_hsa_agent(const rocprofiler_agent_t* agent) const rocprofiler_agent_t* get_rocprofiler_agent(hsa_agent_t agent) { - for(const auto& itr : get_agent_caches()) + for(const auto& itr : get_agent_mapping()) { - if(itr == agent) return itr.get_rocp_agent(); + if(itr.hsa_agent.handle == agent.handle) return itr.rocp_agent; } return nullptr; diff --git a/source/lib/rocprofiler-sdk/buffer.cpp b/source/lib/rocprofiler-sdk/buffer.cpp index b8770430cd..62282d9400 100644 --- a/source/lib/rocprofiler-sdk/buffer.cpp +++ b/source/lib/rocprofiler-sdk/buffer.cpp @@ -144,7 +144,15 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait) if(wait && task_group) task_group->wait(); // buffer is currently being flushed or destroyed - if(buff->syncer.test_and_set()) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY; + if(buff->syncer.test_and_set()) + { + if(!wait) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY; + while(buff->syncer.test_and_set()) + { + std::this_thread::yield(); + std::this_thread::sleep_for(std::chrono::milliseconds{10}); + } + } auto idx = buff->buffer_idx++; @@ -187,7 +195,7 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait) if(task_group) { task_group->exec(_task); - if(wait) task_group->wait(); + if(wait) task_group->join(); } else { diff --git a/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 717f1e5051..cc59cfbd22 100644 --- a/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -91,12 +91,7 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c if(rocprofiler::registration::get_init_status() > -1) return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; - if(context_id.handle >= rocprofiler::context::get_registered_contexts().size()) - { - return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - } - - auto& ctx = rocprofiler::context::get_registered_contexts().at(context_id.handle); + auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id); if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; diff --git a/source/lib/rocprofiler-sdk/callback_tracing.cpp b/source/lib/rocprofiler-sdk/callback_tracing.cpp index 50d2124f64..8dea70a86f 100644 --- a/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -88,12 +88,7 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t if(rocprofiler::registration::get_init_status() > -1) return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; - if(context_id.handle >= rocprofiler::context::get_registered_contexts().size()) - { - return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - } - - auto& ctx = rocprofiler::context::get_registered_contexts().at(context_id.handle); + auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id); if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; diff --git a/source/lib/rocprofiler-sdk/context.cpp b/source/lib/rocprofiler-sdk/context.cpp index cd30b74135..aad941f54e 100644 --- a/source/lib/rocprofiler-sdk/context.cpp +++ b/source/lib/rocprofiler-sdk/context.cpp @@ -107,7 +107,7 @@ rocprofiler_context_is_valid(rocprofiler_context_id_t context_id, int* status) { if(itr && itr->context_idx == context_id.handle) { - auto _ret = rocprofiler::context::validate_context(itr.get()); + auto _ret = rocprofiler::context::validate_context(itr); *status = (_ret == ROCPROFILER_STATUS_SUCCESS) ? 1 : 0; return _ret; } diff --git a/source/lib/rocprofiler-sdk/context/context.cpp b/source/lib/rocprofiler-sdk/context/context.cpp index 7427b061b3..ae63ab60e8 100644 --- a/source/lib/rocprofiler-sdk/context/context.cpp +++ b/source/lib/rocprofiler-sdk/context/context.cpp @@ -41,6 +41,7 @@ #include #include #include +#include namespace rocprofiler { @@ -49,6 +50,11 @@ namespace context namespace { using reserve_size_t = common::container::reserve_size; +using unique_context_vec_t = + common::container::stable_vector, 8>; +using active_context_vec_t = common::container::stable_vector, 8>; + +constexpr auto invalid_client_idx = std::numeric_limits::max(); auto& get_contexts_mutex() @@ -57,7 +63,17 @@ get_contexts_mutex() return _v; } -constexpr auto invalid_client_idx = std::numeric_limits::max(); +uint64_t +get_contexts_offset() +{ + static uint64_t _v = []() { + auto gen = std::mt19937{std::random_device{}()}; + auto rng = std::uniform_int_distribution{std::numeric_limits::max(), + std::numeric_limits::max()}; + return rng(gen); + }(); + return _v; +} auto& get_client_index() @@ -66,6 +82,13 @@ get_client_index() return _v; } +unique_context_vec_t& +get_registered_contexts_impl() +{ + static auto _v = unique_context_vec_t{reserve_size_t{unique_context_vec_t::chunk_size}}; + return _v; +} + auto& get_num_active_contexts() { @@ -136,11 +159,31 @@ pop_latest_correlation_id(const correlation_id* val) if(get_latest_correlation_id_impl() == val) get_latest_correlation_id_impl() = nullptr; } -unique_context_vec_t& -get_registered_contexts() +context_array_t& +get_registered_contexts(context_array_t& data, context_filter_t filter) { - static auto _v = unique_context_vec_t{reserve_size_t{unique_context_vec_t::chunk_size}}; - return _v; + data.clear(); + auto num_ctx = get_registered_contexts_impl().size(); + if(num_ctx <= 0) return data; + + data.reserve(num_ctx); + for(auto& itr : get_registered_contexts_impl()) + { + const auto* ctx = itr.get(); + if(ctx) + { + if(!filter || (filter && filter(ctx))) data.emplace_back(ctx); + } + } + return data; +} + +context_array_t +get_registered_contexts(context_filter_t filter) +{ + auto data = context_array_t{}; + get_registered_contexts(data, filter); + return data; } context_array_t& @@ -209,13 +252,13 @@ allocate_context() auto _lk = std::unique_lock{get_contexts_mutex()}; // initial context identifier number - auto _idx = get_registered_contexts().size(); + auto _idx = get_registered_contexts_impl().size() + get_contexts_offset(); // make space in registered - get_registered_contexts().emplace_back(nullptr); + get_registered_contexts_impl().emplace_back(nullptr); // create an entry in the registered - auto& _cfg_v = get_registered_contexts().back(); + auto& _cfg_v = get_registered_contexts_impl().back(); _cfg_v = allocator::make_unique_static(); auto* _cfg = _cfg_v.get(); // ... @@ -233,35 +276,41 @@ allocate_context() return rocprofiler_context_id_t{_idx}; } +context* +get_mutable_registered_context(rocprofiler_context_id_t id) +{ + if(id.handle < get_contexts_offset()) return nullptr; + auto _idx = id.handle - get_contexts_offset(); + if(_idx >= get_registered_contexts_impl().size()) return nullptr; + return get_registered_contexts_impl().at(_idx).get(); +} + +const context* +get_registered_context(rocprofiler_context_id_t id) +{ + return get_mutable_registered_context(id); +} + rocprofiler_status_t validate_context(const context* cfg) { - // if(cfg->buffer == nullptr) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND; - - // if(cfg->filter == nullptr) return ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND; - return (cfg) ? ROCPROFILER_STATUS_SUCCESS : ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; } rocprofiler_status_t start_context(rocprofiler_context_id_t context_id) { - if(context_id.handle >= get_registered_contexts().size()) - { - return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - } + if(context_id.handle < get_contexts_offset()) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - context* cfg = get_registered_contexts().at(context_id.handle).get(); - - if(!cfg) - { + if((context_id.handle - get_contexts_offset()) >= get_registered_contexts_impl().size()) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - } + + const auto* cfg = get_registered_context(context_id); + + if(!cfg) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; if(validate_context(cfg) != ROCPROFILER_STATUS_SUCCESS) - { return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID; - } auto current_contexts = context_array_t{}; for(const auto* itr : get_active_contexts(current_contexts)) @@ -277,7 +326,7 @@ start_context(rocprofiler_context_id_t context_id) } } - uint64_t rocp_tot_contexts = get_registered_contexts().size(); + uint64_t rocp_tot_contexts = get_registered_contexts_impl().size(); auto idx = rocp_tot_contexts; auto& active_contexts = get_active_contexts_impl(); { @@ -310,7 +359,7 @@ start_context(rocprofiler_context_id_t context_id) // atomic swap the pointer into the "active" array used internally const context* _expected = nullptr; bool success = active_contexts.at(idx).compare_exchange_strong( - _expected, get_registered_contexts().at(context_id.handle).get()); + _expected, get_registered_context(context_id)); if(!success) { @@ -369,7 +418,7 @@ deactivate_client_contexts(rocprofiler_client_id_t client_id) void deregister_client_contexts(rocprofiler_client_id_t client_id) { - for(auto& itr : get_registered_contexts()) + for(auto& itr : get_registered_contexts_impl()) { if(itr->client_idx == client_id.handle) { diff --git a/source/lib/rocprofiler-sdk/context/context.hpp b/source/lib/rocprofiler-sdk/context/context.hpp index d7a4016abd..b8ad1293bb 100644 --- a/source/lib/rocprofiler-sdk/context/context.hpp +++ b/source/lib/rocprofiler-sdk/context/context.hpp @@ -174,21 +174,24 @@ start_context(rocprofiler_context_id_t id); /// \brief disable the contexturation. rocprofiler_status_t stop_context(rocprofiler_context_id_t); -using unique_context_vec_t = - common::container::stable_vector, 8>; -using active_context_vec_t = common::container::stable_vector, 8>; -using context_array_t = common::container::small_vector; +using context_array_t = common::container::small_vector; -unique_context_vec_t& -get_registered_contexts(); +context* +get_mutable_registered_context(rocprofiler_context_id_t id); + +const context* +get_registered_context(rocprofiler_context_id_t id); using context_filter_t = bool (*)(const context*); inline bool -default_context_filter(const context* val) -{ - return (val != nullptr); -} +default_context_filter(const context* val); + +context_array_t& +get_registered_contexts(context_array_t& data, context_filter_t filter = default_context_filter); + +context_array_t +get_registered_contexts(context_filter_t filter = default_context_filter); context_array_t& get_active_contexts(context_array_t& data, context_filter_t filter = default_context_filter); @@ -200,5 +203,11 @@ void deactivate_client_contexts(rocprofiler_client_id_t); // should only be called if the client failed to initialize void deregister_client_contexts(rocprofiler_client_id_t); + +inline bool +default_context_filter(const context* val) +{ + return (val != nullptr); +} } // namespace context } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/counters/core.cpp b/source/lib/rocprofiler-sdk/counters/core.cpp index fd0e1fbef1..dd7cc25c93 100644 --- a/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/core.cpp @@ -70,7 +70,10 @@ public: rocprofiler_profile_counting_dispatch_callback_t callback, void* callback_args) { - auto& ctx = *rocprofiler::context::get_registered_contexts().at(context_id.handle); + auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id); + if(!ctx_p) return false; + + auto& ctx = *ctx_p; if(!ctx.counter_collection) { @@ -81,12 +84,11 @@ public: auto& cb = *ctx.counter_collection->callbacks.emplace_back( std::make_shared()); - cb.user_cb = callback; - cb.callback_args = callback_args; - cb.context = context_id; - cb.buffer = buffer; - cb.internal_context = - rocprofiler::context::get_registered_contexts().at(context_id.handle).get(); + cb.user_cb = callback; + cb.callback_args = callback_args; + cb.context = context_id; + cb.buffer = buffer; + cb.internal_context = ctx_p; return true; } @@ -309,7 +311,7 @@ completed_cb(const std::shared_ptr& info, } void -start_context(context::context* ctx) +start_context(const context::context* ctx) { if(!ctx || !ctx->counter_collection) return; @@ -346,7 +348,7 @@ start_context(context::context* ctx) } void -stop_context(context::context* ctx) +stop_context(const context::context* ctx) { if(!ctx || !ctx->counter_collection) return; diff --git a/source/lib/rocprofiler-sdk/counters/core.hpp b/source/lib/rocprofiler-sdk/counters/core.hpp index f0a597318b..7c70087c5c 100644 --- a/source/lib/rocprofiler-sdk/counters/core.hpp +++ b/source/lib/rocprofiler-sdk/counters/core.hpp @@ -103,9 +103,9 @@ configure_buffered_dispatch(rocprofiler_context_id_t con void* callback_args); void -start_context(context::context*); +start_context(const context::context*); void -stop_context(context::context*); +stop_context(const context::context*); } // namespace counters } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/external_correlation.cpp b/source/lib/rocprofiler-sdk/external_correlation.cpp index 091757f34f..c2b3601e61 100644 --- a/source/lib/rocprofiler-sdk/external_correlation.cpp +++ b/source/lib/rocprofiler-sdk/external_correlation.cpp @@ -152,16 +152,11 @@ rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context, static uint64_t pid_v = getpid(); if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; - for(auto& itr : rocprofiler::context::get_registered_contexts()) - { - if(itr->context_idx == context.handle) - { - itr->correlation_tracer.external_correlator.push(tid, external_correlation_id); - return ROCPROFILER_STATUS_SUCCESS; - } - } + auto* ctx = rocprofiler::context::get_mutable_registered_context(context); + if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; + ctx->correlation_tracer.external_correlator.push(tid, external_correlation_id); + return ROCPROFILER_STATUS_SUCCESS; } rocprofiler_status_t @@ -173,16 +168,11 @@ rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context, static uint64_t pid_v = getpid(); if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; - for(auto& itr : rocprofiler::context::get_registered_contexts()) - { - if(itr->context_idx == context.handle) - { - auto former = itr->correlation_tracer.external_correlator.pop(tid); - if(external_correlation_id) *external_correlation_id = former; - return ROCPROFILER_STATUS_SUCCESS; - } - } + auto* ctx = rocprofiler::context::get_mutable_registered_context(context); + if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; - return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; + auto former = ctx->correlation_tracer.external_correlator.pop(tid); + if(external_correlation_id) *external_correlation_id = former; + return ROCPROFILER_STATUS_SUCCESS; } } diff --git a/source/lib/rocprofiler-sdk/hsa/hsa.cpp b/source/lib/rocprofiler-sdk/hsa/hsa.cpp index 52cf6330d6..8b94092739 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa.cpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa.cpp @@ -48,8 +48,6 @@ namespace hsa { namespace { -std::atomic report_activity = {}; - struct null_type {}; @@ -460,13 +458,14 @@ get_names(std::vector& _name_list, std::index_sequence) } bool -should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, +should_wrap_functor(const context::context_array_t& _contexts, + rocprofiler_callback_tracing_kind_t _callback_domain, rocprofiler_buffer_tracing_kind_t _buffered_domain, int _operation) { // we loop over all the *registered* contexts and see if any of them, at any point in time, // might require callback or buffered API tracing - for(const auto& itr : context::get_registered_contexts()) + for(const auto& itr : _contexts) { if(!itr) continue; @@ -487,10 +486,12 @@ template void update_table(hsa_api_table_t* _orig, std::index_sequence) { - auto _update = [](hsa_api_table_t* _orig_v, auto _info) { + auto _update = [](hsa_api_table_t* _orig_v, const auto& _contexts_v, auto _info) { // check to see if there are any contexts which enable this operation in the HSA API domain - if(!should_wrap_functor( - _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + if(!should_wrap_functor(_contexts_v, + _info.callback_domain_idx, + _info.buffered_domain_idx, + _info.operation_idx)) return; // 1. get the sub-table containing the function pointer @@ -501,7 +502,8 @@ update_table(hsa_api_table_t* _orig, std::index_sequence) _func = _info.get_functor(_func); }; - (_update(_orig, hsa_api_info{}), ...); + auto _contexts = context::get_registered_contexts(); + (_update(_orig, _contexts, hsa_api_info{}), ...); } } // namespace @@ -547,13 +549,6 @@ get_names() return _data; } -void -set_callback(activity_functor_t _func) -{ - auto&& _v = report_activity.load(); - report_activity.compare_exchange_strong(_v, _func); -} - void update_table(hsa_api_table_t* _orig) { diff --git a/source/lib/rocprofiler-sdk/hsa/hsa.hpp b/source/lib/rocprofiler-sdk/hsa/hsa.hpp index 18d7af320e..ff005daf4f 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa.hpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa.hpp @@ -31,10 +31,6 @@ namespace rocprofiler { namespace hsa { -using activity_functor_t = int (*)(rocprofiler_callback_tracing_kind_t domain, - uint32_t operation_id, - void* data); - using hsa_api_table_t = HsaApiTable; hsa_api_table_t& @@ -43,6 +39,9 @@ get_table(); template struct hsa_table_lookup; +template +struct hsa_api_info; + template struct hsa_api_impl { @@ -56,9 +55,6 @@ struct hsa_api_impl static auto functor(Args&&... args); }; -template -struct hsa_api_info; - const char* name_by_id(uint32_t id); @@ -77,9 +73,6 @@ get_names(); std::vector get_ids(); -void -set_callback(activity_functor_t _func); - void update_table(hsa_api_table_t* _orig); } // namespace hsa diff --git a/source/lib/rocprofiler-sdk/hsa/queue.cpp b/source/lib/rocprofiler-sdk/hsa/queue.cpp index 23d1b22b54..7a8d93df0f 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -80,8 +80,7 @@ bool context_filter(const context::context* ctx) { return (ctx->buffered_tracer && - (ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) || - ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))); + (ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH))); } bool @@ -120,23 +119,6 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data) << " returned dispatch times where the end time (" << dispatch_time.end << ") was less than the start time (" << dispatch_time.start << ")"; - // try to extract the async copy time. this will return HSA_STATUS_ERROR if there - // is not an async copy agent associated with the signal so we just predicate - // putting something into the buffer based on whether or not - // hsa_amd_profiling_get_async_copy_time returns HSA_STATUS_SUCCESS. - auto copy_time = hsa_amd_profiling_async_copy_time_t{}; - auto copy_time_status = - queue_info_session.queue.ext_api().hsa_amd_profiling_get_async_copy_time_fn(_signal, - ©_time); - - // if we encounter this in CI, it will cause test to fail - ROCP_CI_LOG_IF(ERROR, - copy_time_status == HSA_STATUS_SUCCESS && copy_time.end < copy_time.start) - << "hsa_amd_profiling_get_async_copy_time for kernel_id=" << _kern_id - << " on rocprofiler_agent=" << _rocp_agent->id.handle - << " returned async times where the end time (" << copy_time.end - << ") was less than the start time (" << copy_time.start << ")"; - for(const auto* itr : ctxs) { auto* _buffer = buffer::get_buffer( @@ -181,26 +163,6 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data) record); } } - - if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)) - { - if(copy_time_status == HSA_STATUS_SUCCESS) - { - auto record = rocprofiler_buffer_tracing_memory_copy_record_t{ - sizeof(rocprofiler_buffer_tracing_memory_copy_record_t), - ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, - _corr_id_v, - copy_time.start, - copy_time.end, - _rocp_agent->id, - _queue_id, - _kern_id}; - - CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING, - ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, - record); - } - } } } @@ -522,20 +484,6 @@ Queue::Queue(const AgentCache& agent, _ext_api.hsa_amd_queue_intercept_register_fn(_intercept_queue, WriteInterceptor, this)) << "Could not register interceptor"; - bool enable_async_copy = false; - for(const auto& itr : context::get_registered_contexts()) - { - if(itr->buffered_tracer && - itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)) - enable_async_copy = true; - } - - if(enable_async_copy) - { - LOG_IF(FATAL, _ext_api.hsa_amd_profiling_async_copy_enable_fn(true) != HSA_STATUS_SUCCESS) - << "Could not enable async copy timing"; - } - *queue = _intercept_queue; } diff --git a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index 118b0c40fc..b445d45d4e 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -186,8 +186,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table) } else if(itr->buffered_tracer) { - if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) || - itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)) + if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)) { enable_intercepter = true; break; diff --git a/tests/kernel-tracing/kernel-tracing.cpp b/tests/kernel-tracing/kernel-tracing.cpp index e12fb88f76..a7ca50ae54 100644 --- a/tests/kernel-tracing/kernel-tracing.cpp +++ b/tests/kernel-tracing/kernel-tracing.cpp @@ -642,6 +642,9 @@ tool_fini(void* tool_data) static auto _once = std::atomic_flag{ATOMIC_FLAG_INIT}; if(_once.test_and_set()) return; + stop(); + flush(); + std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalizing... agents=" << agents.size() << ", code_object_callback_records=" << code_object_records.size() @@ -652,9 +655,6 @@ tool_fini(void* tool_data) << ", hsa_api_bf_records=" << hsa_api_bf_records.size() << " ...\n" << std::flush; - stop(); - flush(); - auto* _call_stack = static_cast(tool_data); if(_call_stack) { @@ -799,6 +799,7 @@ flush() { for(auto* itr : buffers) { + if(!itr) continue; auto status = rocprofiler_flush_buffer(*itr); if(status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) {