[SDK] Update finalization and correlation ID retirement (#281)

* Update finalization and correlation ID retirement

- directly invoke finalize if only one client
- correlation_id_finalize

* Address PR comments

* Improve logging for correlation_id_finalize

* Fix correlation ID handling in memory allocation service

* Fix clang-tidy issues in hsa-memory-allocation test exe

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>

[ROCm/rocprofiler-sdk commit: 91f7f42104]
Этот коммит содержится в:
Madsen, Jonathan
2025-03-20 16:59:23 -05:00
коммит произвёл GitHub
родитель 5c782d91b9
Коммит 70b787fb41
5 изменённых файлов: 68 добавлений и 19 удалений
+28 -7
Просмотреть файл
@@ -21,6 +21,7 @@
// SOFTWARE.
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/common/logging.hpp"
#include "lib/common/static_object.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
@@ -148,22 +149,20 @@ pop_latest_correlation_id(correlation_id* val)
{
if(!val)
{
ROCP_ERROR << "passed nullptr to correlation id";
ROCP_CI_LOG(ERROR) << "passed nullptr to correlation id";
return nullptr;
}
auto& stack = get_latest_correlation_id_impl();
if(stack.empty())
{
ROCP_ERROR << "empty thread-local correlation id stack";
ROCP_CI_LOG(ERROR) << "empty thread-local correlation id stack";
return nullptr;
}
if(stack.back() != val)
{
ROCP_ERROR << "pop_latest_correlation_id is happening out of order for " << val->internal
<< ". top of stack is " << stack.back()->internal;
}
ROCP_CI_LOG_IF(ERROR, get_latest_correlation_id_impl().back() != val)
<< "pop_latest_correlation_id is happening out of order for " << val->internal
<< ". top of stack is " << get_latest_correlation_id_impl().back()->internal;
stack.pop_back();
@@ -200,5 +199,27 @@ dump_correlation_stack(const char* s)
printf("%s", info.str().c_str());
}
void
correlation_id_finalize()
{
if(!get_correlation_id_map()) return;
get_correlation_id_map()->rlock([](const auto& data) {
uint64_t ndangling = 0;
for(const auto& itr : data)
{
if(itr && itr->get_ref_count() > 0)
{
++ndangling;
ROCP_WARNING << "retiring dangling correlation ID " << itr->internal
<< " from thread " << itr->thread_idx
<< " :: remaining reference count: " << itr->get_ref_count();
while(itr && itr->get_ref_count() > 0 && itr->sub_ref_count() > 1)
{}
}
}
ROCP_CI_LOG_IF(INFO, ndangling > 0) << "retired dangling correlation IDs: " << ndangling;
});
}
} // namespace context
} // namespace rocprofiler
+3
Просмотреть файл
@@ -94,6 +94,9 @@ push_correlation_id(correlation_id*);
void
dump_correlation_stack(const char*);
void
correlation_id_finalize();
/// permits tools opportunity to modify the correlation id based on the domain, op, and
/// the rocprofiler generated correlation id
struct correlation_tracing_service
+22 -4
Просмотреть файл
@@ -490,14 +490,20 @@ memory_allocation_impl(Args... args)
_data.func = rocprofiler_enum;
_data.correlation_id = context::get_latest_correlation_id();
bool _constructed_corr_id = false;
if(!_data.correlation_id)
{
constexpr auto ref_count = 1;
_data.correlation_id = context::correlation_tracing_service::construct(ref_count);
_constructed_corr_id = true;
}
else
{
// increase the reference count to prevent this correlation ID from being retired by another
// service
_data.correlation_id->add_ref_count();
}
// increase the reference count to denote that this correlation id is being used in a kernel
_data.correlation_id->add_ref_count();
auto thr_id = _data.correlation_id->thread_idx;
tracing::populate_external_correlation_ids(
tracing_data.external_correlation_ids,
@@ -563,6 +569,9 @@ memory_allocation_impl(Args... args)
// decrement the reference count after usage in the callback/buffers
_data.correlation_id->sub_ref_count();
if(_constructed_corr_id) context::pop_latest_correlation_id(_data.correlation_id);
return _ret;
}
@@ -605,14 +614,20 @@ memory_free_impl(Args... args)
_data.correlation_id = context::get_latest_correlation_id();
_data.address = handle_starting_addr(std::get<address_idx>(_tied_args));
bool _constructed_corr_id = false;
if(!_data.correlation_id)
{
constexpr auto ref_count = 1;
_data.correlation_id = context::correlation_tracing_service::construct(ref_count);
_constructed_corr_id = true;
}
else
{
// increase the reference count to prevent this correlation ID from being retired by another
// service
_data.correlation_id->add_ref_count();
}
// increase the reference count to denote that this correlation id is being used in a kernel
_data.correlation_id->add_ref_count();
auto thr_id = _data.correlation_id->thread_idx;
tracing::populate_external_correlation_ids(
tracing_data.external_correlation_ids,
@@ -672,6 +687,9 @@ memory_free_impl(Args... args)
// decrement the reference count after usage in the callback/buffers
_data.correlation_id->sub_ref_count();
if(_constructed_corr_id) context::pop_latest_correlation_id(_data.correlation_id);
return _ret;
}
+9 -2
Просмотреть файл
@@ -31,6 +31,7 @@
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/code_object/code_object.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/rocprofiler-sdk/hip/hip.hpp"
#include "lib/rocprofiler-sdk/hip/stream.hpp"
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
@@ -56,6 +57,7 @@
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/marker.h>
#include <rocprofiler-sdk/ompt.h>
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/version.h>
#include <hsa/hsa_api_trace.h>
@@ -537,13 +539,17 @@ invoke_client_initializers()
if(!get_clients()) return false;
// if there is only one client, just fully finalize
rocprofiler_client_finalize_t client_fini_func =
(get_clients()->size() == 1) ? [](rocprofiler_client_id_t) -> void { finalize(); }
: &invoke_client_finalizer;
for(auto& itr : *get_clients())
{
if(itr && itr->configure_result && itr->configure_result->initialize)
{
context::push_client(itr->internal_client_id.handle);
itr->configure_result->initialize(&invoke_client_finalizer,
itr->configure_result->tool_data);
itr->configure_result->initialize(client_fini_func, itr->configure_result->tool_data);
context::pop_client(itr->internal_client_id.handle);
// set to nullptr so initialize only gets called once
itr->configure_result->initialize = nullptr;
@@ -722,6 +728,7 @@ finalize()
pc_sampling::code_object::finalize();
#endif
code_object::finalize();
context::correlation_id_finalize();
if(get_init_status() > 0)
{
invoke_client_finalizers();
+6 -6
Просмотреть файл
@@ -125,7 +125,7 @@ get_agent_list()
std::vector<hsa_agent_t> agents(num_agents);
// Get the agent list
hsa_agent_t* agent_iter = &agents[0];
hsa_agent_t* agent_iter = agents.data();
status = hsa_iterate_agents(get_agents, &agent_iter);
RET_IF_HSA_ERR(status)
@@ -182,7 +182,7 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age
}
// Allocate memory to hold region list of an agent
std::vector<hsa_region_t> region_list(num_regions);
hsa_region_t* ptr_reg = &region_list[0];
hsa_region_t* ptr_reg = region_list.data();
status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg);
RET_IF_HSA_ERR(status)
auto address_vec = std::vector<void*>{};
@@ -190,7 +190,7 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age
for(size_t j = 0; j < i; ++j)
{
void* addr = 0;
void* addr = nullptr;
status = hsa_memory_allocate(region_list[0], base_size, &addr);
RET_IF_HSA_ERR(status)
@@ -218,7 +218,7 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_
}
// Allocate memory to hold region list of an agent
std::vector<hsa_amd_memory_pool_t> memory_pool_list(num_pools);
hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0];
hsa_amd_memory_pool_t* ptr_memory_pool = memory_pool_list.data();
status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool);
RET_IF_HSA_ERR(status)
auto address_vec = std::vector<void*>{};
@@ -226,7 +226,7 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_
for(size_t j = 0; j < i; ++j)
{
void* addr = 0;
void* addr = nullptr;
uint32_t flags = 0;
status = hsa_amd_memory_pool_allocate(memory_pool_list[0], base_size, flags, &addr);
@@ -255,7 +255,7 @@ call_hsa_vmem_allocate(const size_t i, hsa_agent_t agent)
}
// Allocate memory to hold region list of an agent
std::vector<hsa_amd_memory_pool_t> memory_pool_list(num_pools);
hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0];
hsa_amd_memory_pool_t* ptr_memory_pool = memory_pool_list.data();
status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool);
RET_IF_HSA_ERR(status)