Files
rocm-systems/source/lib/rocprofiler-sdk/context/correlation_id.cpp
T
Madsen, Jonathan 3580478426 Build system (libdw), correlation ID, and shebang fixes (#354)
* Fix compilation for output library

- link to targets for ATT (amd-comgr, dw, elf)

* Relax correlation ID retirement log failures

- only fail for correlation ID retirement underflow when building in CI mode

* Fix shebang for several files

- license was inserted before shebang in several places

* Update code coverage exclude folders for samples

* Tweak to agent tests

- test to make sure hsa agent is not the old value instead of testing that it is the new value

* Fix libdw include/link

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
2025-04-27 20:16:18 -05:00

243 baris
7.1 KiB
C++

// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/common/logging.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include <rocprofiler-sdk/fwd.h>
namespace rocprofiler
{
namespace context
{
namespace
{
auto*&
get_correlation_id_map()
{
using data_type = common::container::stable_vector<std::unique_ptr<correlation_id>>;
static auto*& _v = common::static_object<common::Synchronized<data_type>>::construct();
return _v;
}
auto&
get_latest_correlation_id_impl()
{
static thread_local auto _v = common::container::small_vector<correlation_id*, 16>{};
return _v;
}
uint64_t
get_unique_internal_id()
{
static auto _v = std::atomic<uint64_t>{};
return ++_v;
}
} // namespace
uint32_t
correlation_id::add_ref_count()
{
auto _ret = m_ref_count.fetch_add(1);
ROCP_CI_LOG_IF(WARNING, _ret == 0)
<< fmt::format("correlation id {} already retired", internal);
return _ret;
}
uint32_t
correlation_id::sub_ref_count()
{
if(m_ref_count == 0)
{
ROCP_CI_LOG(WARNING) << fmt::format(
"attempt to decrement correlation id {} reference count but reference count is zero",
internal);
return 0;
}
auto _ret = m_ref_count.fetch_sub(1);
if(registration::get_fini_status() > 0) return 0;
ROCP_CI_LOG_IF(WARNING, _ret == 0) << fmt::format("correlation id underflow on {}", internal);
if(_ret == 1)
{
auto ctxs = get_active_contexts([](const context* ctx) {
return (ctx->buffered_tracer &&
(ctx->buffered_tracer->domains(
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)));
});
auto record = rocprofiler_buffer_tracing_correlation_id_retirement_record_t{
.size = sizeof(rocprofiler_buffer_tracing_correlation_id_retirement_record_t),
.kind = ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
.timestamp = common::timestamp_ns(),
.internal_correlation_id = internal};
if(!ctxs.empty())
{
for(const auto* itr : ctxs)
{
auto* _buffer = buffer::get_buffer(itr->buffered_tracer->buffer_data.at(
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT));
auto success = CHECK_NOTNULL(_buffer)->emplace(
ROCPROFILER_BUFFER_CATEGORY_TRACING,
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
record);
ROCP_CI_LOG_IF(WARNING, !success)
<< fmt::format("failed to emplace correlation id retirement for {}", internal);
}
}
}
return _ret;
}
uint32_t
correlation_id::add_kern_count()
{
return m_kern_count.fetch_add(1);
}
uint32_t
correlation_id::sub_kern_count()
{
return m_kern_count.fetch_sub(1);
}
correlation_id*
correlation_tracing_service::construct(uint32_t _init_ref_count)
{
ROCP_FATAL_IF(_init_ref_count == 0) << "must have reference count > 0";
auto _internal_id = get_unique_internal_id();
auto* corr_id_map = get_correlation_id_map();
if(!corr_id_map) return nullptr;
auto& ret = corr_id_map->wlock([](auto& data) -> auto& { return data.emplace_back(); });
ret = std::make_unique<correlation_id>(_init_ref_count, common::get_tid(), _internal_id);
if(auto* prev_api_corr_id = get_latest_correlation_id())
ret->ancestor = prev_api_corr_id->internal;
get_latest_correlation_id_impl().emplace_back(ret.get());
return ret.get();
}
correlation_id*
get_latest_correlation_id()
{
return (get_latest_correlation_id_impl().empty()) ? nullptr
: get_latest_correlation_id_impl().back();
}
const correlation_id*
pop_latest_correlation_id(correlation_id* val)
{
if(!val)
{
ROCP_CI_LOG(ERROR) << "passed nullptr to correlation id";
return nullptr;
}
auto& stack = get_latest_correlation_id_impl();
if(stack.empty())
{
ROCP_CI_LOG(ERROR) << "empty thread-local correlation id stack";
return nullptr;
}
ROCP_CI_LOG_IF(ERROR, get_latest_correlation_id_impl().back() != val)
<< "pop_latest_correlation_id is happening out of order for " << val->internal
<< ". top of stack is " << get_latest_correlation_id_impl().back()->internal;
stack.pop_back();
return (stack.empty()) ? nullptr : stack.back();
}
correlation_id*
push_correlation_id(correlation_id* val)
{
if(!val)
{
ROCP_ERROR << "passed nullptr to correlation id";
return nullptr;
}
val->thread_idx = common::get_tid();
get_latest_correlation_id_impl().emplace_back(val);
return val;
}
void
dump_correlation_stack(const char* s)
{
auto& stack = get_latest_correlation_id_impl();
auto info = std::stringstream{};
info << s << ": tid: " << common::get_tid() << " :";
for(const auto* itr : stack)
{
info << " " << itr->internal;
;
}
info << "\n";
printf("%s", info.str().c_str());
}
void
correlation_id_finalize()
{
if(!get_correlation_id_map()) return;
get_correlation_id_map()->rlock([](const auto& data) {
uint64_t ndangling = 0;
for(const auto& itr : data)
{
if(itr && itr->get_ref_count() > 0)
{
++ndangling;
ROCP_WARNING << "retiring dangling correlation ID " << itr->internal
<< " from thread " << itr->thread_idx
<< " :: remaining reference count: " << itr->get_ref_count();
while(itr && itr->get_ref_count() > 0 && itr->sub_ref_count() > 1)
{}
}
}
ROCP_CI_LOG_IF(INFO, ndangling > 0) << "retired dangling correlation IDs: " << ndangling;
});
}
} // namespace context
} // namespace rocprofiler