[SDK] Update finalization and correlation ID retirement (#281)
* Update finalization and correlation ID retirement
- directly invoke finalize if only one client
- correlation_id_finalize
* Address PR comments
* Improve logging for correlation_id_finalize
* Fix correlation ID handling in memory allocation service
* Fix clang-tidy issues in hsa-memory-allocation test exe
---------
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: 91f7f42104]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
5c782d91b9
Коммит
70b787fb41
+28
-7
@@ -21,6 +21,7 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
@@ -148,22 +149,20 @@ pop_latest_correlation_id(correlation_id* val)
|
||||
{
|
||||
if(!val)
|
||||
{
|
||||
ROCP_ERROR << "passed nullptr to correlation id";
|
||||
ROCP_CI_LOG(ERROR) << "passed nullptr to correlation id";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto& stack = get_latest_correlation_id_impl();
|
||||
if(stack.empty())
|
||||
{
|
||||
ROCP_ERROR << "empty thread-local correlation id stack";
|
||||
ROCP_CI_LOG(ERROR) << "empty thread-local correlation id stack";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if(stack.back() != val)
|
||||
{
|
||||
ROCP_ERROR << "pop_latest_correlation_id is happening out of order for " << val->internal
|
||||
<< ". top of stack is " << stack.back()->internal;
|
||||
}
|
||||
ROCP_CI_LOG_IF(ERROR, get_latest_correlation_id_impl().back() != val)
|
||||
<< "pop_latest_correlation_id is happening out of order for " << val->internal
|
||||
<< ". top of stack is " << get_latest_correlation_id_impl().back()->internal;
|
||||
|
||||
stack.pop_back();
|
||||
|
||||
@@ -200,5 +199,27 @@ dump_correlation_stack(const char* s)
|
||||
printf("%s", info.str().c_str());
|
||||
}
|
||||
|
||||
void
|
||||
correlation_id_finalize()
|
||||
{
|
||||
if(!get_correlation_id_map()) return;
|
||||
|
||||
get_correlation_id_map()->rlock([](const auto& data) {
|
||||
uint64_t ndangling = 0;
|
||||
for(const auto& itr : data)
|
||||
{
|
||||
if(itr && itr->get_ref_count() > 0)
|
||||
{
|
||||
++ndangling;
|
||||
ROCP_WARNING << "retiring dangling correlation ID " << itr->internal
|
||||
<< " from thread " << itr->thread_idx
|
||||
<< " :: remaining reference count: " << itr->get_ref_count();
|
||||
while(itr && itr->get_ref_count() > 0 && itr->sub_ref_count() > 1)
|
||||
{}
|
||||
}
|
||||
}
|
||||
ROCP_CI_LOG_IF(INFO, ndangling > 0) << "retired dangling correlation IDs: " << ndangling;
|
||||
});
|
||||
}
|
||||
} // namespace context
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -94,6 +94,9 @@ push_correlation_id(correlation_id*);
|
||||
void
|
||||
dump_correlation_stack(const char*);
|
||||
|
||||
void
|
||||
correlation_id_finalize();
|
||||
|
||||
/// permits tools opportunity to modify the correlation id based on the domain, op, and
|
||||
/// the rocprofiler generated correlation id
|
||||
struct correlation_tracing_service
|
||||
|
||||
+22
-4
@@ -490,14 +490,20 @@ memory_allocation_impl(Args... args)
|
||||
_data.func = rocprofiler_enum;
|
||||
_data.correlation_id = context::get_latest_correlation_id();
|
||||
|
||||
bool _constructed_corr_id = false;
|
||||
if(!_data.correlation_id)
|
||||
{
|
||||
constexpr auto ref_count = 1;
|
||||
_data.correlation_id = context::correlation_tracing_service::construct(ref_count);
|
||||
_constructed_corr_id = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// increase the reference count to prevent this correlation ID from being retired by another
|
||||
// service
|
||||
_data.correlation_id->add_ref_count();
|
||||
}
|
||||
|
||||
// increase the reference count to denote that this correlation id is being used in a kernel
|
||||
_data.correlation_id->add_ref_count();
|
||||
auto thr_id = _data.correlation_id->thread_idx;
|
||||
tracing::populate_external_correlation_ids(
|
||||
tracing_data.external_correlation_ids,
|
||||
@@ -563,6 +569,9 @@ memory_allocation_impl(Args... args)
|
||||
|
||||
// decrement the reference count after usage in the callback/buffers
|
||||
_data.correlation_id->sub_ref_count();
|
||||
|
||||
if(_constructed_corr_id) context::pop_latest_correlation_id(_data.correlation_id);
|
||||
|
||||
return _ret;
|
||||
}
|
||||
|
||||
@@ -605,14 +614,20 @@ memory_free_impl(Args... args)
|
||||
_data.correlation_id = context::get_latest_correlation_id();
|
||||
_data.address = handle_starting_addr(std::get<address_idx>(_tied_args));
|
||||
|
||||
bool _constructed_corr_id = false;
|
||||
if(!_data.correlation_id)
|
||||
{
|
||||
constexpr auto ref_count = 1;
|
||||
_data.correlation_id = context::correlation_tracing_service::construct(ref_count);
|
||||
_constructed_corr_id = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// increase the reference count to prevent this correlation ID from being retired by another
|
||||
// service
|
||||
_data.correlation_id->add_ref_count();
|
||||
}
|
||||
|
||||
// increase the reference count to denote that this correlation id is being used in a kernel
|
||||
_data.correlation_id->add_ref_count();
|
||||
auto thr_id = _data.correlation_id->thread_idx;
|
||||
tracing::populate_external_correlation_ids(
|
||||
tracing_data.external_correlation_ids,
|
||||
@@ -672,6 +687,9 @@ memory_free_impl(Args... args)
|
||||
|
||||
// decrement the reference count after usage in the callback/buffers
|
||||
_data.correlation_id->sub_ref_count();
|
||||
|
||||
if(_constructed_corr_id) context::pop_latest_correlation_id(_data.correlation_id);
|
||||
|
||||
return _ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/code_object/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/hip.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/stream.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
|
||||
@@ -56,6 +57,7 @@
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
#include <rocprofiler-sdk/marker.h>
|
||||
#include <rocprofiler-sdk/ompt.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/version.h>
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
@@ -537,13 +539,17 @@ invoke_client_initializers()
|
||||
|
||||
if(!get_clients()) return false;
|
||||
|
||||
// if there is only one client, just fully finalize
|
||||
rocprofiler_client_finalize_t client_fini_func =
|
||||
(get_clients()->size() == 1) ? [](rocprofiler_client_id_t) -> void { finalize(); }
|
||||
: &invoke_client_finalizer;
|
||||
|
||||
for(auto& itr : *get_clients())
|
||||
{
|
||||
if(itr && itr->configure_result && itr->configure_result->initialize)
|
||||
{
|
||||
context::push_client(itr->internal_client_id.handle);
|
||||
itr->configure_result->initialize(&invoke_client_finalizer,
|
||||
itr->configure_result->tool_data);
|
||||
itr->configure_result->initialize(client_fini_func, itr->configure_result->tool_data);
|
||||
context::pop_client(itr->internal_client_id.handle);
|
||||
// set to nullptr so initialize only gets called once
|
||||
itr->configure_result->initialize = nullptr;
|
||||
@@ -722,6 +728,7 @@ finalize()
|
||||
pc_sampling::code_object::finalize();
|
||||
#endif
|
||||
code_object::finalize();
|
||||
context::correlation_id_finalize();
|
||||
if(get_init_status() > 0)
|
||||
{
|
||||
invoke_client_finalizers();
|
||||
|
||||
+6
-6
@@ -125,7 +125,7 @@ get_agent_list()
|
||||
std::vector<hsa_agent_t> agents(num_agents);
|
||||
|
||||
// Get the agent list
|
||||
hsa_agent_t* agent_iter = &agents[0];
|
||||
hsa_agent_t* agent_iter = agents.data();
|
||||
status = hsa_iterate_agents(get_agents, &agent_iter);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
@@ -182,7 +182,7 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age
|
||||
}
|
||||
// Allocate memory to hold region list of an agent
|
||||
std::vector<hsa_region_t> region_list(num_regions);
|
||||
hsa_region_t* ptr_reg = ®ion_list[0];
|
||||
hsa_region_t* ptr_reg = region_list.data();
|
||||
status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg);
|
||||
RET_IF_HSA_ERR(status)
|
||||
auto address_vec = std::vector<void*>{};
|
||||
@@ -190,7 +190,7 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age
|
||||
|
||||
for(size_t j = 0; j < i; ++j)
|
||||
{
|
||||
void* addr = 0;
|
||||
void* addr = nullptr;
|
||||
|
||||
status = hsa_memory_allocate(region_list[0], base_size, &addr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
@@ -218,7 +218,7 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_
|
||||
}
|
||||
// Allocate memory to hold region list of an agent
|
||||
std::vector<hsa_amd_memory_pool_t> memory_pool_list(num_pools);
|
||||
hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0];
|
||||
hsa_amd_memory_pool_t* ptr_memory_pool = memory_pool_list.data();
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool);
|
||||
RET_IF_HSA_ERR(status)
|
||||
auto address_vec = std::vector<void*>{};
|
||||
@@ -226,7 +226,7 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_
|
||||
|
||||
for(size_t j = 0; j < i; ++j)
|
||||
{
|
||||
void* addr = 0;
|
||||
void* addr = nullptr;
|
||||
uint32_t flags = 0;
|
||||
|
||||
status = hsa_amd_memory_pool_allocate(memory_pool_list[0], base_size, flags, &addr);
|
||||
@@ -255,7 +255,7 @@ call_hsa_vmem_allocate(const size_t i, hsa_agent_t agent)
|
||||
}
|
||||
// Allocate memory to hold region list of an agent
|
||||
std::vector<hsa_amd_memory_pool_t> memory_pool_list(num_pools);
|
||||
hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0];
|
||||
hsa_amd_memory_pool_t* ptr_memory_pool = memory_pool_list.data();
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user