diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.cpp index e4e4750eaf..1b73a30fd2 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "lib/rocprofiler-sdk/context/correlation_id.hpp" +#include "lib/common/logging.hpp" #include "lib/common/static_object.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" @@ -148,22 +149,20 @@ pop_latest_correlation_id(correlation_id* val) { if(!val) { - ROCP_ERROR << "passed nullptr to correlation id"; + ROCP_CI_LOG(ERROR) << "passed nullptr to correlation id"; return nullptr; } auto& stack = get_latest_correlation_id_impl(); if(stack.empty()) { - ROCP_ERROR << "empty thread-local correlation id stack"; + ROCP_CI_LOG(ERROR) << "empty thread-local correlation id stack"; return nullptr; } - if(stack.back() != val) - { - ROCP_ERROR << "pop_latest_correlation_id is happening out of order for " << val->internal - << ". top of stack is " << stack.back()->internal; - } + ROCP_CI_LOG_IF(ERROR, get_latest_correlation_id_impl().back() != val) + << "pop_latest_correlation_id is happening out of order for " << val->internal + << ". top of stack is " << get_latest_correlation_id_impl().back()->internal; stack.pop_back(); @@ -200,5 +199,27 @@ dump_correlation_stack(const char* s) printf("%s", info.str().c_str()); } +void +correlation_id_finalize() +{ + if(!get_correlation_id_map()) return; + + get_correlation_id_map()->rlock([](const auto& data) { + uint64_t ndangling = 0; + for(const auto& itr : data) + { + if(itr && itr->get_ref_count() > 0) + { + ++ndangling; + ROCP_WARNING << "retiring dangling correlation ID " << itr->internal + << " from thread " << itr->thread_idx + << " :: remaining reference count: " << itr->get_ref_count(); + while(itr && itr->get_ref_count() > 0 && itr->sub_ref_count() > 1) + {} + } + } + ROCP_CI_LOG_IF(INFO, ndangling > 0) << "retired dangling correlation IDs: " << ndangling; + }); +} } // namespace context } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.hpp index dd6f20abf4..a4a7d09a50 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/correlation_id.hpp @@ -94,6 +94,9 @@ push_correlation_id(correlation_id*); void dump_correlation_stack(const char*); +void +correlation_id_finalize(); + /// permits tools opportunity to modify the correlation id based on the domain, op, and /// the rocprofiler generated correlation id struct correlation_tracing_service diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp index bedbd3589e..5624c8e981 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/memory_allocation.cpp @@ -490,14 +490,20 @@ memory_allocation_impl(Args... args) _data.func = rocprofiler_enum; _data.correlation_id = context::get_latest_correlation_id(); + bool _constructed_corr_id = false; if(!_data.correlation_id) { constexpr auto ref_count = 1; _data.correlation_id = context::correlation_tracing_service::construct(ref_count); + _constructed_corr_id = true; + } + else + { + // increase the reference count to prevent this correlation ID from being retired by another + // service + _data.correlation_id->add_ref_count(); } - // increase the reference count to denote that this correlation id is being used in a kernel - _data.correlation_id->add_ref_count(); auto thr_id = _data.correlation_id->thread_idx; tracing::populate_external_correlation_ids( tracing_data.external_correlation_ids, @@ -563,6 +569,9 @@ memory_allocation_impl(Args... args) // decrement the reference count after usage in the callback/buffers _data.correlation_id->sub_ref_count(); + + if(_constructed_corr_id) context::pop_latest_correlation_id(_data.correlation_id); + return _ret; } @@ -605,14 +614,20 @@ memory_free_impl(Args... args) _data.correlation_id = context::get_latest_correlation_id(); _data.address = handle_starting_addr(std::get(_tied_args)); + bool _constructed_corr_id = false; if(!_data.correlation_id) { constexpr auto ref_count = 1; _data.correlation_id = context::correlation_tracing_service::construct(ref_count); + _constructed_corr_id = true; + } + else + { + // increase the reference count to prevent this correlation ID from being retired by another + // service + _data.correlation_id->add_ref_count(); } - // increase the reference count to denote that this correlation id is being used in a kernel - _data.correlation_id->add_ref_count(); auto thr_id = _data.correlation_id->thread_idx; tracing::populate_external_correlation_ids( tracing_data.external_correlation_ids, @@ -672,6 +687,9 @@ memory_free_impl(Args... args) // decrement the reference count after usage in the callback/buffers _data.correlation_id->sub_ref_count(); + + if(_constructed_corr_id) context::pop_latest_correlation_id(_data.correlation_id); + return _ret; } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index 48fc5d8645..54c04ef894 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -31,6 +31,7 @@ #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/code_object/code_object.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/correlation_id.hpp" #include "lib/rocprofiler-sdk/hip/hip.hpp" #include "lib/rocprofiler-sdk/hip/stream.hpp" #include "lib/rocprofiler-sdk/hsa/async_copy.hpp" @@ -56,6 +57,7 @@ #include #include #include +#include #include #include @@ -537,13 +539,17 @@ invoke_client_initializers() if(!get_clients()) return false; + // if there is only one client, just fully finalize + rocprofiler_client_finalize_t client_fini_func = + (get_clients()->size() == 1) ? [](rocprofiler_client_id_t) -> void { finalize(); } + : &invoke_client_finalizer; + for(auto& itr : *get_clients()) { if(itr && itr->configure_result && itr->configure_result->initialize) { context::push_client(itr->internal_client_id.handle); - itr->configure_result->initialize(&invoke_client_finalizer, - itr->configure_result->tool_data); + itr->configure_result->initialize(client_fini_func, itr->configure_result->tool_data); context::pop_client(itr->internal_client_id.handle); // set to nullptr so initialize only gets called once itr->configure_result->initialize = nullptr; @@ -722,6 +728,7 @@ finalize() pc_sampling::code_object::finalize(); #endif code_object::finalize(); + context::correlation_id_finalize(); if(get_init_status() > 0) { invoke_client_finalizers(); diff --git a/projects/rocprofiler-sdk/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp b/projects/rocprofiler-sdk/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp index 9f7de38493..5044208d15 100644 --- a/projects/rocprofiler-sdk/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp +++ b/projects/rocprofiler-sdk/tests/bin/hsa-memory-allocation/hsa-memory-allocation.cpp @@ -125,7 +125,7 @@ get_agent_list() std::vector agents(num_agents); // Get the agent list - hsa_agent_t* agent_iter = &agents[0]; + hsa_agent_t* agent_iter = agents.data(); status = hsa_iterate_agents(get_agents, &agent_iter); RET_IF_HSA_ERR(status) @@ -182,7 +182,7 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age } // Allocate memory to hold region list of an agent std::vector region_list(num_regions); - hsa_region_t* ptr_reg = ®ion_list[0]; + hsa_region_t* ptr_reg = region_list.data(); status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); RET_IF_HSA_ERR(status) auto address_vec = std::vector{}; @@ -190,7 +190,7 @@ call_hsa_memory_allocate(const size_t i, const size_t base_size, hsa_agent_t age for(size_t j = 0; j < i; ++j) { - void* addr = 0; + void* addr = nullptr; status = hsa_memory_allocate(region_list[0], base_size, &addr); RET_IF_HSA_ERR(status) @@ -218,7 +218,7 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_ } // Allocate memory to hold region list of an agent std::vector memory_pool_list(num_pools); - hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0]; + hsa_amd_memory_pool_t* ptr_memory_pool = memory_pool_list.data(); status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool); RET_IF_HSA_ERR(status) auto address_vec = std::vector{}; @@ -226,7 +226,7 @@ call_hsa_memory_pool_allocate(const size_t i, const size_t base_size, hsa_agent_ for(size_t j = 0; j < i; ++j) { - void* addr = 0; + void* addr = nullptr; uint32_t flags = 0; status = hsa_amd_memory_pool_allocate(memory_pool_list[0], base_size, flags, &addr); @@ -255,7 +255,7 @@ call_hsa_vmem_allocate(const size_t i, hsa_agent_t agent) } // Allocate memory to hold region list of an agent std::vector memory_pool_list(num_pools); - hsa_amd_memory_pool_t* ptr_memory_pool = &memory_pool_list[0]; + hsa_amd_memory_pool_t* ptr_memory_pool = memory_pool_list.data(); status = hsa_amd_agent_iterate_memory_pools(agent, callback_get_memory_pools, &ptr_memory_pool); RET_IF_HSA_ERR(status)