Contexts update + buffer flushing + cleanup (#338)
* Update lib/rocprofiler-sdk/context/context.*
- get_registered_contexts functions (local copy)
* Update lib/rocprofiler-sdk/hsa/{queue,queue_controller}.cpp
- remove ROCPROFILER_BUFFER_TRACING_MEMORY_COPY code
* Update tests/kernel-tracing/kernel-tracing.cpp
- move stop() and flush() in tool_fini to before reporting of sizes of data collected
* Update lib/rocprofiler-sdk/hsa/hsa.*
- remove stale set_callback / activity_functor_t code
* Update lib/rocprofiler-sdk/buffer.cpp
- full wait instead of returning busy when buffer is busy
- use task_group::join instead of task_group::wait to fully wait for tasks to finish (bug fix)
* Update lib/rocprofiler-sdk/agent.cpp
- support agent mapping for CPU agents
* Remove direct access to vector of registered contexts
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
c5e45803e9
Коммит
199f0b5421
@@ -642,6 +642,19 @@ get_agent_caches()
|
||||
static auto _v = std::vector<hsa::AgentCache>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
struct agent_pair
|
||||
{
|
||||
const rocprofiler_agent_t* rocp_agent = nullptr;
|
||||
hsa_agent_t hsa_agent = {};
|
||||
};
|
||||
|
||||
auto&
|
||||
get_agent_mapping()
|
||||
{
|
||||
static auto _v = std::vector<agent_pair>{};
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::vector<const rocprofiler_agent_t*>
|
||||
@@ -677,6 +690,8 @@ construct_agent_cache(::HsaApiTable* table)
|
||||
<< "Found " << rocp_agents.size() << " rocprofiler agents and " << hsa_agents.size()
|
||||
<< " HSA agents";
|
||||
|
||||
get_agent_mapping().reserve(get_agent_mapping().size() + rocp_agents.size());
|
||||
|
||||
auto hsa_agent_node_map = std::unordered_map<uint32_t, hsa_agent_t>{};
|
||||
for(const auto& itr : hsa_agents)
|
||||
{
|
||||
@@ -704,6 +719,7 @@ construct_agent_cache(::HsaApiTable* table)
|
||||
if(ritr->node_id == node_id)
|
||||
{
|
||||
agent_map.emplace(ritr->node_id, std::make_tuple(ritr, hitr));
|
||||
get_agent_mapping().emplace_back(agent_pair{ritr, hitr});
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -798,9 +814,9 @@ construct_agent_cache(::HsaApiTable* table)
|
||||
std::optional<hsa_agent_t>
|
||||
get_hsa_agent(const rocprofiler_agent_t* agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
for(const auto& itr : get_agent_mapping())
|
||||
{
|
||||
if(itr == agent) return itr.get_hsa_agent();
|
||||
if(itr.rocp_agent->id.handle == agent->id.handle) return itr.hsa_agent;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
@@ -809,9 +825,9 @@ get_hsa_agent(const rocprofiler_agent_t* agent)
|
||||
const rocprofiler_agent_t*
|
||||
get_rocprofiler_agent(hsa_agent_t agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
for(const auto& itr : get_agent_mapping())
|
||||
{
|
||||
if(itr == agent) return itr.get_rocp_agent();
|
||||
if(itr.hsa_agent.handle == agent.handle) return itr.rocp_agent;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
||||
@@ -144,7 +144,15 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
if(wait && task_group) task_group->wait();
|
||||
|
||||
// buffer is currently being flushed or destroyed
|
||||
if(buff->syncer.test_and_set()) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
|
||||
if(buff->syncer.test_and_set())
|
||||
{
|
||||
if(!wait) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
|
||||
while(buff->syncer.test_and_set())
|
||||
{
|
||||
std::this_thread::yield();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{10});
|
||||
}
|
||||
}
|
||||
|
||||
auto idx = buff->buffer_idx++;
|
||||
|
||||
@@ -187,7 +195,7 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
if(task_group)
|
||||
{
|
||||
task_group->exec(_task);
|
||||
if(wait) task_group->wait();
|
||||
if(wait) task_group->join();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -91,12 +91,7 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
|
||||
if(rocprofiler::registration::get_init_status() > -1)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
if(context_id.handle >= rocprofiler::context::get_registered_contexts().size())
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
}
|
||||
|
||||
auto& ctx = rocprofiler::context::get_registered_contexts().at(context_id.handle);
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
|
||||
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
|
||||
@@ -88,12 +88,7 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t
|
||||
if(rocprofiler::registration::get_init_status() > -1)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
if(context_id.handle >= rocprofiler::context::get_registered_contexts().size())
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
}
|
||||
|
||||
auto& ctx = rocprofiler::context::get_registered_contexts().at(context_id.handle);
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
|
||||
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ rocprofiler_context_is_valid(rocprofiler_context_id_t context_id, int* status)
|
||||
{
|
||||
if(itr && itr->context_idx == context_id.handle)
|
||||
{
|
||||
auto _ret = rocprofiler::context::validate_context(itr.get());
|
||||
auto _ret = rocprofiler::context::validate_context(itr);
|
||||
*status = (_ret == ROCPROFILER_STATUS_SUCCESS) ? 1 : 0;
|
||||
return _ret;
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <random>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -49,6 +50,11 @@ namespace context
|
||||
namespace
|
||||
{
|
||||
using reserve_size_t = common::container::reserve_size;
|
||||
using unique_context_vec_t =
|
||||
common::container::stable_vector<allocator::unique_static_ptr_t<context>, 8>;
|
||||
using active_context_vec_t = common::container::stable_vector<std::atomic<const context*>, 8>;
|
||||
|
||||
constexpr auto invalid_client_idx = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
auto&
|
||||
get_contexts_mutex()
|
||||
@@ -57,7 +63,17 @@ get_contexts_mutex()
|
||||
return _v;
|
||||
}
|
||||
|
||||
constexpr auto invalid_client_idx = std::numeric_limits<uint32_t>::max();
|
||||
uint64_t
|
||||
get_contexts_offset()
|
||||
{
|
||||
static uint64_t _v = []() {
|
||||
auto gen = std::mt19937{std::random_device{}()};
|
||||
auto rng = std::uniform_int_distribution<uint64_t>{std::numeric_limits<uint8_t>::max(),
|
||||
std::numeric_limits<uint16_t>::max()};
|
||||
return rng(gen);
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_client_index()
|
||||
@@ -66,6 +82,13 @@ get_client_index()
|
||||
return _v;
|
||||
}
|
||||
|
||||
unique_context_vec_t&
|
||||
get_registered_contexts_impl()
|
||||
{
|
||||
static auto _v = unique_context_vec_t{reserve_size_t{unique_context_vec_t::chunk_size}};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_num_active_contexts()
|
||||
{
|
||||
@@ -136,11 +159,31 @@ pop_latest_correlation_id(const correlation_id* val)
|
||||
if(get_latest_correlation_id_impl() == val) get_latest_correlation_id_impl() = nullptr;
|
||||
}
|
||||
|
||||
unique_context_vec_t&
|
||||
get_registered_contexts()
|
||||
context_array_t&
|
||||
get_registered_contexts(context_array_t& data, context_filter_t filter)
|
||||
{
|
||||
static auto _v = unique_context_vec_t{reserve_size_t{unique_context_vec_t::chunk_size}};
|
||||
return _v;
|
||||
data.clear();
|
||||
auto num_ctx = get_registered_contexts_impl().size();
|
||||
if(num_ctx <= 0) return data;
|
||||
|
||||
data.reserve(num_ctx);
|
||||
for(auto& itr : get_registered_contexts_impl())
|
||||
{
|
||||
const auto* ctx = itr.get();
|
||||
if(ctx)
|
||||
{
|
||||
if(!filter || (filter && filter(ctx))) data.emplace_back(ctx);
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
context_array_t
|
||||
get_registered_contexts(context_filter_t filter)
|
||||
{
|
||||
auto data = context_array_t{};
|
||||
get_registered_contexts(data, filter);
|
||||
return data;
|
||||
}
|
||||
|
||||
context_array_t&
|
||||
@@ -209,13 +252,13 @@ allocate_context()
|
||||
auto _lk = std::unique_lock<std::mutex>{get_contexts_mutex()};
|
||||
|
||||
// initial context identifier number
|
||||
auto _idx = get_registered_contexts().size();
|
||||
auto _idx = get_registered_contexts_impl().size() + get_contexts_offset();
|
||||
|
||||
// make space in registered
|
||||
get_registered_contexts().emplace_back(nullptr);
|
||||
get_registered_contexts_impl().emplace_back(nullptr);
|
||||
|
||||
// create an entry in the registered
|
||||
auto& _cfg_v = get_registered_contexts().back();
|
||||
auto& _cfg_v = get_registered_contexts_impl().back();
|
||||
_cfg_v = allocator::make_unique_static<context>();
|
||||
auto* _cfg = _cfg_v.get();
|
||||
// ...
|
||||
@@ -233,35 +276,41 @@ allocate_context()
|
||||
return rocprofiler_context_id_t{_idx};
|
||||
}
|
||||
|
||||
context*
|
||||
get_mutable_registered_context(rocprofiler_context_id_t id)
|
||||
{
|
||||
if(id.handle < get_contexts_offset()) return nullptr;
|
||||
auto _idx = id.handle - get_contexts_offset();
|
||||
if(_idx >= get_registered_contexts_impl().size()) return nullptr;
|
||||
return get_registered_contexts_impl().at(_idx).get();
|
||||
}
|
||||
|
||||
const context*
|
||||
get_registered_context(rocprofiler_context_id_t id)
|
||||
{
|
||||
return get_mutable_registered_context(id);
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
validate_context(const context* cfg)
|
||||
{
|
||||
// if(cfg->buffer == nullptr) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
|
||||
|
||||
// if(cfg->filter == nullptr) return ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND;
|
||||
|
||||
return (cfg) ? ROCPROFILER_STATUS_SUCCESS : ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
start_context(rocprofiler_context_id_t context_id)
|
||||
{
|
||||
if(context_id.handle >= get_registered_contexts().size())
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
}
|
||||
if(context_id.handle < get_contexts_offset()) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
context* cfg = get_registered_contexts().at(context_id.handle).get();
|
||||
|
||||
if(!cfg)
|
||||
{
|
||||
if((context_id.handle - get_contexts_offset()) >= get_registered_contexts_impl().size())
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
}
|
||||
|
||||
const auto* cfg = get_registered_context(context_id);
|
||||
|
||||
if(!cfg) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
if(validate_context(cfg) != ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
|
||||
}
|
||||
|
||||
auto current_contexts = context_array_t{};
|
||||
for(const auto* itr : get_active_contexts(current_contexts))
|
||||
@@ -277,7 +326,7 @@ start_context(rocprofiler_context_id_t context_id)
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t rocp_tot_contexts = get_registered_contexts().size();
|
||||
uint64_t rocp_tot_contexts = get_registered_contexts_impl().size();
|
||||
auto idx = rocp_tot_contexts;
|
||||
auto& active_contexts = get_active_contexts_impl();
|
||||
{
|
||||
@@ -310,7 +359,7 @@ start_context(rocprofiler_context_id_t context_id)
|
||||
// atomic swap the pointer into the "active" array used internally
|
||||
const context* _expected = nullptr;
|
||||
bool success = active_contexts.at(idx).compare_exchange_strong(
|
||||
_expected, get_registered_contexts().at(context_id.handle).get());
|
||||
_expected, get_registered_context(context_id));
|
||||
|
||||
if(!success)
|
||||
{
|
||||
@@ -369,7 +418,7 @@ deactivate_client_contexts(rocprofiler_client_id_t client_id)
|
||||
void
|
||||
deregister_client_contexts(rocprofiler_client_id_t client_id)
|
||||
{
|
||||
for(auto& itr : get_registered_contexts())
|
||||
for(auto& itr : get_registered_contexts_impl())
|
||||
{
|
||||
if(itr->client_idx == client_id.handle)
|
||||
{
|
||||
|
||||
@@ -174,21 +174,24 @@ start_context(rocprofiler_context_id_t id);
|
||||
/// \brief disable the contexturation.
|
||||
rocprofiler_status_t stop_context(rocprofiler_context_id_t);
|
||||
|
||||
using unique_context_vec_t =
|
||||
common::container::stable_vector<allocator::unique_static_ptr_t<context>, 8>;
|
||||
using active_context_vec_t = common::container::stable_vector<std::atomic<const context*>, 8>;
|
||||
using context_array_t = common::container::small_vector<const context*>;
|
||||
using context_array_t = common::container::small_vector<const context*>;
|
||||
|
||||
unique_context_vec_t&
|
||||
get_registered_contexts();
|
||||
context*
|
||||
get_mutable_registered_context(rocprofiler_context_id_t id);
|
||||
|
||||
const context*
|
||||
get_registered_context(rocprofiler_context_id_t id);
|
||||
|
||||
using context_filter_t = bool (*)(const context*);
|
||||
|
||||
inline bool
|
||||
default_context_filter(const context* val)
|
||||
{
|
||||
return (val != nullptr);
|
||||
}
|
||||
default_context_filter(const context* val);
|
||||
|
||||
context_array_t&
|
||||
get_registered_contexts(context_array_t& data, context_filter_t filter = default_context_filter);
|
||||
|
||||
context_array_t
|
||||
get_registered_contexts(context_filter_t filter = default_context_filter);
|
||||
|
||||
context_array_t&
|
||||
get_active_contexts(context_array_t& data, context_filter_t filter = default_context_filter);
|
||||
@@ -200,5 +203,11 @@ void deactivate_client_contexts(rocprofiler_client_id_t);
|
||||
|
||||
// should only be called if the client failed to initialize
|
||||
void deregister_client_contexts(rocprofiler_client_id_t);
|
||||
|
||||
inline bool
|
||||
default_context_filter(const context* val)
|
||||
{
|
||||
return (val != nullptr);
|
||||
}
|
||||
} // namespace context
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -70,7 +70,10 @@ public:
|
||||
rocprofiler_profile_counting_dispatch_callback_t callback,
|
||||
void* callback_args)
|
||||
{
|
||||
auto& ctx = *rocprofiler::context::get_registered_contexts().at(context_id.handle);
|
||||
auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
if(!ctx_p) return false;
|
||||
|
||||
auto& ctx = *ctx_p;
|
||||
|
||||
if(!ctx.counter_collection)
|
||||
{
|
||||
@@ -81,12 +84,11 @@ public:
|
||||
auto& cb = *ctx.counter_collection->callbacks.emplace_back(
|
||||
std::make_shared<counter_callback_info>());
|
||||
|
||||
cb.user_cb = callback;
|
||||
cb.callback_args = callback_args;
|
||||
cb.context = context_id;
|
||||
cb.buffer = buffer;
|
||||
cb.internal_context =
|
||||
rocprofiler::context::get_registered_contexts().at(context_id.handle).get();
|
||||
cb.user_cb = callback;
|
||||
cb.callback_args = callback_args;
|
||||
cb.context = context_id;
|
||||
cb.buffer = buffer;
|
||||
cb.internal_context = ctx_p;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -309,7 +311,7 @@ completed_cb(const std::shared_ptr<counter_callback_info>& info,
|
||||
}
|
||||
|
||||
void
|
||||
start_context(context::context* ctx)
|
||||
start_context(const context::context* ctx)
|
||||
{
|
||||
if(!ctx || !ctx->counter_collection) return;
|
||||
|
||||
@@ -346,7 +348,7 @@ start_context(context::context* ctx)
|
||||
}
|
||||
|
||||
void
|
||||
stop_context(context::context* ctx)
|
||||
stop_context(const context::context* ctx)
|
||||
{
|
||||
if(!ctx || !ctx->counter_collection) return;
|
||||
|
||||
|
||||
@@ -103,9 +103,9 @@ configure_buffered_dispatch(rocprofiler_context_id_t con
|
||||
void* callback_args);
|
||||
|
||||
void
|
||||
start_context(context::context*);
|
||||
start_context(const context::context*);
|
||||
|
||||
void
|
||||
stop_context(context::context*);
|
||||
stop_context(const context::context*);
|
||||
} // namespace counters
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -152,16 +152,11 @@ rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context,
|
||||
static uint64_t pid_v = getpid();
|
||||
if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
for(auto& itr : rocprofiler::context::get_registered_contexts())
|
||||
{
|
||||
if(itr->context_idx == context.handle)
|
||||
{
|
||||
itr->correlation_tracer.external_correlator.push(tid, external_correlation_id);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context);
|
||||
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
ctx->correlation_tracer.external_correlator.push(tid, external_correlation_id);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
@@ -173,16 +168,11 @@ rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context,
|
||||
static uint64_t pid_v = getpid();
|
||||
if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
for(auto& itr : rocprofiler::context::get_registered_contexts())
|
||||
{
|
||||
if(itr->context_idx == context.handle)
|
||||
{
|
||||
auto former = itr->correlation_tracer.external_correlator.pop(tid);
|
||||
if(external_correlation_id) *external_correlation_id = former;
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context);
|
||||
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
auto former = ctx->correlation_tracer.external_correlator.pop(tid);
|
||||
if(external_correlation_id) *external_correlation_id = former;
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,8 +48,6 @@ namespace hsa
|
||||
{
|
||||
namespace
|
||||
{
|
||||
std::atomic<activity_functor_t> report_activity = {};
|
||||
|
||||
struct null_type
|
||||
{};
|
||||
|
||||
@@ -460,13 +458,14 @@ get_names(std::vector<const char*>& _name_list, std::index_sequence<Idx...>)
|
||||
}
|
||||
|
||||
bool
|
||||
should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
|
||||
should_wrap_functor(const context::context_array_t& _contexts,
|
||||
rocprofiler_callback_tracing_kind_t _callback_domain,
|
||||
rocprofiler_buffer_tracing_kind_t _buffered_domain,
|
||||
int _operation)
|
||||
{
|
||||
// we loop over all the *registered* contexts and see if any of them, at any point in time,
|
||||
// might require callback or buffered API tracing
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
for(const auto& itr : _contexts)
|
||||
{
|
||||
if(!itr) continue;
|
||||
|
||||
@@ -487,10 +486,12 @@ template <size_t... Idx>
|
||||
void
|
||||
update_table(hsa_api_table_t* _orig, std::index_sequence<Idx...>)
|
||||
{
|
||||
auto _update = [](hsa_api_table_t* _orig_v, auto _info) {
|
||||
auto _update = [](hsa_api_table_t* _orig_v, const auto& _contexts_v, auto _info) {
|
||||
// check to see if there are any contexts which enable this operation in the HSA API domain
|
||||
if(!should_wrap_functor(
|
||||
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
|
||||
if(!should_wrap_functor(_contexts_v,
|
||||
_info.callback_domain_idx,
|
||||
_info.buffered_domain_idx,
|
||||
_info.operation_idx))
|
||||
return;
|
||||
|
||||
// 1. get the sub-table containing the function pointer
|
||||
@@ -501,7 +502,8 @@ update_table(hsa_api_table_t* _orig, std::index_sequence<Idx...>)
|
||||
_func = _info.get_functor(_func);
|
||||
};
|
||||
|
||||
(_update(_orig, hsa_api_info<Idx>{}), ...);
|
||||
auto _contexts = context::get_registered_contexts();
|
||||
(_update(_orig, _contexts, hsa_api_info<Idx>{}), ...);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -547,13 +549,6 @@ get_names()
|
||||
return _data;
|
||||
}
|
||||
|
||||
void
|
||||
set_callback(activity_functor_t _func)
|
||||
{
|
||||
auto&& _v = report_activity.load();
|
||||
report_activity.compare_exchange_strong(_v, _func);
|
||||
}
|
||||
|
||||
void
|
||||
update_table(hsa_api_table_t* _orig)
|
||||
{
|
||||
|
||||
@@ -31,10 +31,6 @@ namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
using activity_functor_t = int (*)(rocprofiler_callback_tracing_kind_t domain,
|
||||
uint32_t operation_id,
|
||||
void* data);
|
||||
|
||||
using hsa_api_table_t = HsaApiTable;
|
||||
|
||||
hsa_api_table_t&
|
||||
@@ -43,6 +39,9 @@ get_table();
|
||||
template <size_t Idx>
|
||||
struct hsa_table_lookup;
|
||||
|
||||
template <size_t Idx>
|
||||
struct hsa_api_info;
|
||||
|
||||
template <size_t Idx>
|
||||
struct hsa_api_impl
|
||||
{
|
||||
@@ -56,9 +55,6 @@ struct hsa_api_impl
|
||||
static auto functor(Args&&... args);
|
||||
};
|
||||
|
||||
template <size_t Idx>
|
||||
struct hsa_api_info;
|
||||
|
||||
const char*
|
||||
name_by_id(uint32_t id);
|
||||
|
||||
@@ -77,9 +73,6 @@ get_names();
|
||||
std::vector<uint32_t>
|
||||
get_ids();
|
||||
|
||||
void
|
||||
set_callback(activity_functor_t _func);
|
||||
|
||||
void
|
||||
update_table(hsa_api_table_t* _orig);
|
||||
} // namespace hsa
|
||||
|
||||
@@ -80,8 +80,7 @@ bool
|
||||
context_filter(const context::context* ctx)
|
||||
{
|
||||
return (ctx->buffered_tracer &&
|
||||
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) ||
|
||||
ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)));
|
||||
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)));
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -120,23 +119,6 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
|
||||
<< " returned dispatch times where the end time (" << dispatch_time.end
|
||||
<< ") was less than the start time (" << dispatch_time.start << ")";
|
||||
|
||||
// try to extract the async copy time. this will return HSA_STATUS_ERROR if there
|
||||
// is not an async copy agent associated with the signal so we just predicate
|
||||
// putting something into the buffer based on whether or not
|
||||
// hsa_amd_profiling_get_async_copy_time returns HSA_STATUS_SUCCESS.
|
||||
auto copy_time = hsa_amd_profiling_async_copy_time_t{};
|
||||
auto copy_time_status =
|
||||
queue_info_session.queue.ext_api().hsa_amd_profiling_get_async_copy_time_fn(_signal,
|
||||
©_time);
|
||||
|
||||
// if we encounter this in CI, it will cause test to fail
|
||||
ROCP_CI_LOG_IF(ERROR,
|
||||
copy_time_status == HSA_STATUS_SUCCESS && copy_time.end < copy_time.start)
|
||||
<< "hsa_amd_profiling_get_async_copy_time for kernel_id=" << _kern_id
|
||||
<< " on rocprofiler_agent=" << _rocp_agent->id.handle
|
||||
<< " returned async times where the end time (" << copy_time.end
|
||||
<< ") was less than the start time (" << copy_time.start << ")";
|
||||
|
||||
for(const auto* itr : ctxs)
|
||||
{
|
||||
auto* _buffer = buffer::get_buffer(
|
||||
@@ -181,26 +163,6 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
|
||||
record);
|
||||
}
|
||||
}
|
||||
|
||||
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
|
||||
{
|
||||
if(copy_time_status == HSA_STATUS_SUCCESS)
|
||||
{
|
||||
auto record = rocprofiler_buffer_tracing_memory_copy_record_t{
|
||||
sizeof(rocprofiler_buffer_tracing_memory_copy_record_t),
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
_corr_id_v,
|
||||
copy_time.start,
|
||||
copy_time.end,
|
||||
_rocp_agent->id,
|
||||
_queue_id,
|
||||
_kern_id};
|
||||
|
||||
CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
record);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -522,20 +484,6 @@ Queue::Queue(const AgentCache& agent,
|
||||
_ext_api.hsa_amd_queue_intercept_register_fn(_intercept_queue, WriteInterceptor, this))
|
||||
<< "Could not register interceptor";
|
||||
|
||||
bool enable_async_copy = false;
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
if(itr->buffered_tracer &&
|
||||
itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
|
||||
enable_async_copy = true;
|
||||
}
|
||||
|
||||
if(enable_async_copy)
|
||||
{
|
||||
LOG_IF(FATAL, _ext_api.hsa_amd_profiling_async_copy_enable_fn(true) != HSA_STATUS_SUCCESS)
|
||||
<< "Could not enable async copy timing";
|
||||
}
|
||||
|
||||
*queue = _intercept_queue;
|
||||
}
|
||||
|
||||
|
||||
@@ -186,8 +186,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
|
||||
}
|
||||
else if(itr->buffered_tracer)
|
||||
{
|
||||
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) ||
|
||||
itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY))
|
||||
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH))
|
||||
{
|
||||
enable_intercepter = true;
|
||||
break;
|
||||
|
||||
@@ -642,6 +642,9 @@ tool_fini(void* tool_data)
|
||||
static auto _once = std::atomic_flag{ATOMIC_FLAG_INIT};
|
||||
if(_once.test_and_set()) return;
|
||||
|
||||
stop();
|
||||
flush();
|
||||
|
||||
std::cerr << "[" << getpid() << "][" << __FUNCTION__
|
||||
<< "] Finalizing... agents=" << agents.size()
|
||||
<< ", code_object_callback_records=" << code_object_records.size()
|
||||
@@ -652,9 +655,6 @@ tool_fini(void* tool_data)
|
||||
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size() << " ...\n"
|
||||
<< std::flush;
|
||||
|
||||
stop();
|
||||
flush();
|
||||
|
||||
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
|
||||
if(_call_stack)
|
||||
{
|
||||
@@ -799,6 +799,7 @@ flush()
|
||||
{
|
||||
for(auto* itr : buffers)
|
||||
{
|
||||
if(!itr) continue;
|
||||
auto status = rocprofiler_flush_buffer(*itr);
|
||||
if(status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
|
||||
{
|
||||
|
||||
Ссылка в новой задаче
Block a user