Files
marantic-amd 956a73c4c8 [rocprof-sys] Use fmt APIs to construct strings instead of JOIN (#2643)
## Motivation

With the introduction of the new logging system base on `spdlog` library, opportunity shows to replace `timemory` dependent JOIN implementation with `fmt` library `format` and `join` APIs, which are shipped as a part of `spdlog` lib

## Technical Details

Use `fmt` provided APIs to properly format and package strings.
2026-01-23 00:34:58 -05:00

706 строки
22 KiB
C++

// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "common/defines.h"
#include "core/common.hpp"
#include "core/concepts.hpp"
#include "core/config.hpp"
#include "core/defines.hpp"
#include "core/demangler.hpp"
#include "core/perfetto.hpp"
#include "core/state.hpp"
#include "core/timemory.hpp"
#include "core/utility.hpp"
#include "library/causal/sampling.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
#include "library/tracing/annotation.hpp"
#include <timemory/components/io/components.hpp>
#include <timemory/components/network/types.hpp>
#include <timemory/components/papi/types.hpp>
#include <timemory/components/rusage/components.hpp>
#include <timemory/components/timing/backends.hpp>
#include <timemory/components/timing/components.hpp>
#include <timemory/enum.h>
#include <timemory/hash/types.hpp>
#include <timemory/mpl/concepts.hpp>
#include <timemory/mpl/type_traits.hpp>
#include <timemory/types.hpp>
#include "logger/debug.hpp"
#include <atomic>
#include <functional>
#include <memory>
#include <ratio>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
namespace rocprofsys
{
namespace tracing
{
using interval_data_instances = thread_data<std::vector<bool>>;
using hash_value_t = tim::hash_value_t;
using perfetto_annotate_component_types = tim::mpl::available_t<type_list<
comp::cpu_clock, comp::cpu_util, comp::kernel_mode_time, comp::num_major_page_faults,
comp::num_minor_page_faults, comp::page_rss, comp::peak_rss, comp::papi_array_t,
comp::papi_vector, comp::priority_context_switch, comp::voluntary_context_switch,
comp::process_cpu_clock, comp::process_cpu_util, comp::system_clock,
comp::thread_cpu_clock, comp::thread_cpu_util, comp::user_clock, comp::user_mode_time,
comp::virtual_memory>>;
//
// declarations
//
extern ROCPROFSYS_HIDDEN_API bool debug_push;
extern ROCPROFSYS_HIDDEN_API bool debug_pop;
extern ROCPROFSYS_HIDDEN_API bool debug_user;
extern ROCPROFSYS_HIDDEN_API bool debug_mark;
std::unordered_map<hash_value_t, std::string>&
get_perfetto_track_uuids();
void
copy_timemory_hash_ids();
std::vector<std::function<void()>>&
get_finalization_functions();
void
record_thread_start_time();
void
thread_init();
template <typename CategoryT>
auto&
get_category_stack();
template <typename T>
auto
get_perfetto_string(T& name)
{
if constexpr(std::is_const_v<T>)
{
return ::perfetto::StaticString{ name };
}
else
{
return ::perfetto::DynamicString{ name };
}
}
template <typename CategoryT, typename... Args>
inline void
push_perfetto(CategoryT, const char*, Args&&...);
template <typename CategoryT, typename... Args>
inline void
pop_perfetto(CategoryT, const char*, Args&&...);
template <typename CategoryT, typename... Args>
inline void
push_perfetto_ts(CategoryT, const char*, uint64_t _ts, Args&&...);
template <typename CategoryT, typename... Args>
inline void
pop_perfetto_ts(CategoryT, const char*, uint64_t, Args&&...);
template <typename CategoryT, typename... Args>
inline void
push_perfetto_track(CategoryT, const char*, ::perfetto::Track, uint64_t, Args&&...);
template <typename CategoryT, typename... Args>
inline void
pop_perfetto_track(CategoryT, const char*, ::perfetto::Track, uint64_t, Args&&...);
template <typename CategoryT, typename... Args>
inline void
mark_perfetto(CategoryT, const char*, Args&&...);
template <typename CategoryT, typename... Args>
inline void
mark_perfetto_ts(CategoryT, const char*, uint64_t, Args&&...);
template <typename CategoryT, typename... Args>
inline void
mark_perfetto_track(CategoryT, const char*, ::perfetto::Track, uint64_t, Args&&...);
//
// definitions
//
template <typename CategoryT, typename... Args>
auto
get_perfetto_category_uuid(Args&&... _args)
{
return tim::hash::get_hash_id(tim::hash::get_hash_id(fmt::format(
"rocprofsys_{}", trait::name<CategoryT>::value)),
std::forward<Args>(_args)...);
}
template <typename CategoryT, typename TrackT = ::perfetto::Track, typename FuncT,
typename... Args>
auto
get_perfetto_track(CategoryT, FuncT&& _desc_generator, Args&&... _args)
{
auto _uuid = get_perfetto_category_uuid<CategoryT>(std::forward<Args>(_args)...);
auto& _track_uuids = get_perfetto_track_uuids();
if(_track_uuids.find(_uuid) == _track_uuids.end())
{
const auto _track = TrackT(_uuid, ::perfetto::ProcessTrack::Current());
auto _desc = _track.Serialize();
auto _name = std::forward<FuncT>(_desc_generator)(std::forward<Args>(_args)...);
_desc.set_name(_name);
::perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
LOG_TRACE("[{}] Created {}({}) with description: \"{}\"",
trait::name<CategoryT>::value, rocprofsys::utility::demangle<TrackT>(),
_uuid, _name);
_track_uuids.emplace(_uuid, _name);
}
// guard this with ppdefs in addition to runtime check to avoid
// overhead of generating string during releases
#if defined(ROCPROFSYS_CI) && ROCPROFSYS_CI > 0
auto _name = std::forward<FuncT>(_desc_generator)(std::forward<Args>(_args)...);
if(get_is_continuous_integration() && _track_uuids.at(_uuid) != _name)
{
throw std::runtime_error(
fmt::format("Error! Multiple invocations of UUID {} produced different "
"descriptions: \"{}\" and \"{}\"",
_uuid, _track_uuids.at(_uuid), _name));
}
#endif
return TrackT(_uuid, ::perfetto::ProcessTrack::Current());
}
template <typename Tp = uint64_t>
ROCPROFSYS_INLINE auto
now()
{
return ::tim::get_clock_real_now<Tp, std::nano>();
}
inline auto&
get_instrumentation_bundles(int64_t _tid = threading::get_id())
{
return instrumentation_bundles::instance(construct_on_thread{ _tid });
}
inline auto&
push_count()
{
static std::atomic<size_t> _v{ 0 };
return _v;
}
inline auto&
pop_count()
{
static std::atomic<size_t> _v{ 0 };
return _v;
}
struct category_stack
{
int32_t profile = 0; // use signed so compiler doesn't have to
int32_t tracing = 0; // account for underflow/overflow
};
template <typename CategoryT>
auto&
get_category_stack()
{
static thread_local auto _v = category_stack{};
return _v;
}
template <typename CategoryT>
auto&
get_tracing_stack()
{
return get_category_stack<CategoryT>().tracing;
}
template <typename CategoryT>
auto&
get_profile_stack()
{
return get_category_stack<CategoryT>().profile;
}
template <typename CategoryT>
auto
category_push_disabled()
{
return !trait::runtime_enabled<CategoryT>::get();
}
template <typename CategoryT>
auto
category_mark_disabled()
{
return !trait::runtime_enabled<CategoryT>::get();
}
template <typename CategoryT>
auto
category_pop_disabled()
{
return !trait::runtime_enabled<CategoryT>::get() &&
(get_profile_stack<CategoryT>() + get_tracing_stack<CategoryT>()) <= 0;
}
template <typename CategoryT>
auto
tracing_pop_disabled()
{
return !trait::runtime_enabled<CategoryT>::get() &&
get_tracing_stack<CategoryT>() <= 0;
}
template <typename CategoryT>
auto
profile_pop_disabled()
{
return !trait::runtime_enabled<CategoryT>::get() &&
get_profile_stack<CategoryT>() <= 0;
}
template <typename CategoryT, typename... Args>
inline void
push_timemory(CategoryT, std::string_view name, Args&&... args)
{
// skip if category is disabled
if(category_push_disabled<CategoryT>()) return;
auto& _data = tracing::get_instrumentation_bundles();
if(ROCPROFSYS_LIKELY(_data != nullptr))
{
// this generates a hash for the raw string array
auto _hash = tim::add_hash_id(name);
_data->construct(_hash)->start(std::forward<Args>(args)...);
// increment the profile stack
++get_profile_stack<CategoryT>();
}
}
template <typename CategoryT>
inline std::pair<instrumentation_bundle_t*, size_t>
get_timemory(CategoryT, std::string_view name)
{
using return_type = std::pair<instrumentation_bundle_t*, size_t>;
// skip if category is disabled and not pushed on this thread
if(profile_pop_disabled<CategoryT>()) return return_type{ nullptr, -1 };
auto _hash = tim::hash::get_hash_id(name);
auto& _data = tracing::get_instrumentation_bundles();
if(ROCPROFSYS_UNLIKELY(_data == nullptr || _data->empty()))
{
LOG_DEBUG("[rocprofsys_pop_trace] skipped {} :: empty bundle stack", name);
return return_type{ nullptr, -1 };
}
auto*& _v_back = _data->back();
if(ROCPROFSYS_LIKELY(_v_back->get_hash() == _hash))
{
return std::make_pair(_v_back, _data->size() - 1);
}
else if(_data->size() > 1)
{
for(size_t i = _data->size() - 1; i > 0; --i)
{
auto*& _v = _data->at(i - 1);
if(_v->get_hash() == _hash)
{
return std::make_pair(_v, i - 1);
}
}
}
return return_type{ nullptr, -1 };
}
template <typename CategoryT, typename... Args>
inline auto
stop_timemory(CategoryT, std::string_view name, Args&&... args)
{
using return_type = std::pair<instrumentation_bundle_t*, size_t>;
// skip if category is disabled and not pushed on this thread
if(profile_pop_disabled<CategoryT>()) return return_type{ nullptr, -1 };
auto&& _data = get_timemory(CategoryT{}, name);
if(_data.first)
{
_data.first->stop(std::forward<Args>(args)...);
}
return _data;
}
inline void
destroy_timemory(std::pair<instrumentation_bundle_t*, size_t> _data)
{
if(_data.first)
{
auto& _bundles = tracing::get_instrumentation_bundles();
if(ROCPROFSYS_LIKELY(_bundles != nullptr))
_bundles->destroy(_data.first, _data.second);
}
}
template <typename CategoryT, typename... Args>
inline void
pop_timemory(CategoryT, std::string_view name, Args&&... args)
{
// skip if category is disabled and not pushed on this thread
if(profile_pop_disabled<CategoryT>()) return;
auto _data = stop_timemory(CategoryT{}, name, std::forward<Args>(args)...);
if(_data.first) destroy_timemory(std::move(_data));
}
template <typename CategoryT, typename... Args>
inline void
push_perfetto(CategoryT, const char* name, Args&&... args)
{
// skip if category is disabled
if(category_push_disabled<CategoryT>()) return;
if constexpr(sizeof...(Args) == 1 &&
std::is_invocable<Args..., ::perfetto::EventContext>::value)
{
++get_tracing_stack<CategoryT>();
uint64_t _ts = now();
if(config::get_perfetto_annotations())
{
TRACE_EVENT_BEGIN(trait::name<CategoryT>::value, get_perfetto_string(name),
_ts, "begin_ns", _ts, std::forward<Args>(args)...);
}
else
{
TRACE_EVENT_BEGIN(trait::name<CategoryT>::value, get_perfetto_string(name),
_ts, std::forward<Args>(args)...);
}
}
else
{
using tuple_type = std::tuple<concepts::unqualified_type_t<Args>...>;
using arg0_type = concepts::tuple_element_t<0, tuple_type>;
using arg1_type = concepts::tuple_element_t<1, tuple_type>;
if constexpr(std::is_same<arg0_type, ::perfetto::Track>::value &&
std::is_same<arg1_type, uint64_t>::value)
{
push_perfetto_track(CategoryT{}, name, std::forward<Args>(args)...);
}
else if constexpr(std::is_same<arg0_type, uint64_t>::value)
{
push_perfetto_ts(CategoryT{}, name, std::forward<Args>(args)...);
}
else
{
++get_tracing_stack<CategoryT>();
uint64_t _ts = now();
TRACE_EVENT_BEGIN(
trait::name<CategoryT>::value, get_perfetto_string(name), _ts,
std::forward<Args>(args)..., [&](::perfetto::EventContext ctx) {
if(config::get_perfetto_annotations())
{
tracing::add_perfetto_annotation(ctx, "begin_ns", _ts);
}
});
}
}
}
/// \brief This function is used to take an existing lambda accepting a
/// perfetto::EventContext and append the timemory annotations. Examples
/// are seen in the pop_perfetto* functions
template <typename CategoryT, typename Arg>
inline decltype(auto)
perfetto_annotate_timemory_data(CategoryT, const char* name, Arg&& arg)
{
if constexpr(std::is_invocable<Arg, ::perfetto::EventContext>::value)
{
return [&arg, name](::perfetto::EventContext _ctx) {
if(config::get_perfetto_annotations())
{
auto _timemory_data = get_timemory(CategoryT{}, name);
if(_timemory_data.first)
{
_timemory_data.first->stop();
_timemory_data.first
->template invoke_with<tim::operation::perfetto_annotate>(
perfetto_annotate_component_types{}, _ctx);
}
}
std::forward<Arg>(arg)(std::move(_ctx));
};
}
else
{
return std::move(arg);
}
}
template <typename CategoryT, typename... Args>
inline void
pop_perfetto(CategoryT, const char* name, Args&&... args)
{
// skip if category is disabled and not pushed on this thread
if(tracing_pop_disabled<CategoryT>()) return;
if constexpr(sizeof...(Args) == 1 &&
std::is_invocable<Args..., ::perfetto::EventContext>::value)
{
// decrement tracing stack
--get_tracing_stack<CategoryT>();
uint64_t _ts = now();
if(config::get_perfetto_annotations())
{
TRACE_EVENT_END(trait::name<CategoryT>::value, _ts, "end_ns", _ts,
perfetto_annotate_timemory_data(CategoryT{}, name,
std::forward<Args>(args))...);
}
else
{
TRACE_EVENT_END(trait::name<CategoryT>::value, _ts,
std::forward<Args>(args)...);
}
}
else
{
using tuple_type = std::tuple<concepts::unqualified_type_t<Args>...>;
using arg0_type = concepts::tuple_element_t<0, tuple_type>;
using arg1_type = concepts::tuple_element_t<1, tuple_type>;
if constexpr(std::is_same<arg0_type, ::perfetto::Track>::value &&
std::is_same<arg1_type, uint64_t>::value)
{
pop_perfetto_track(CategoryT{}, name, std::forward<Args>(args)...);
}
else if constexpr(std::is_same<arg0_type, uint64_t>::value)
{
pop_perfetto_ts(CategoryT{}, name, std::forward<Args>(args)...);
}
else
{
// decrement tracing stack
--get_tracing_stack<CategoryT>();
uint64_t _ts = now();
TRACE_EVENT_END(
trait::name<CategoryT>::value, _ts, std::forward<Args>(args)...,
perfetto_annotate_timemory_data(
CategoryT{}, name, [&](::perfetto::EventContext ctx) {
if(config::get_perfetto_annotations())
{
tracing::add_perfetto_annotation(ctx, "end_ns", _ts);
}
}));
}
}
(void) name;
}
template <typename CategoryT, typename... Args>
inline void
push_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args)
{
// skip if category is disabled
if(category_push_disabled<CategoryT>()) return;
++get_tracing_stack<CategoryT>();
TRACE_EVENT_BEGIN(trait::name<CategoryT>::value, get_perfetto_string(name), _ts,
std::forward<Args>(args)...);
}
template <typename CategoryT, typename... Args>
inline void
pop_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args)
{
// skip if category is disabled and not pushed on this thread
if(tracing_pop_disabled<CategoryT>()) return;
// decrement tracing stack
--get_tracing_stack<CategoryT>();
TRACE_EVENT_END(
trait::name<CategoryT>::value, _ts,
perfetto_annotate_timemory_data(CategoryT{}, name, std::forward<Args>(args))...);
}
template <typename CategoryT, typename... Args>
inline void
push_perfetto_track(CategoryT, const char* name, ::perfetto::Track _track, uint64_t _ts,
Args&&... args)
{
// skip if category is disabled
if(category_push_disabled<CategoryT>()) return;
++get_tracing_stack<CategoryT>();
TRACE_EVENT_BEGIN(trait::name<CategoryT>::value, get_perfetto_string(name), _track,
_ts, std::forward<Args>(args)...);
}
template <typename CategoryT, typename... Args>
inline void
pop_perfetto_track(CategoryT, const char* name, ::perfetto::Track _track, uint64_t _ts,
Args&&... args)
{
// skip if category is disabled and not pushed on this thread
if(tracing_pop_disabled<CategoryT>()) return;
// decrement tracing stack
--get_tracing_stack<CategoryT>();
TRACE_EVENT_END(
trait::name<CategoryT>::value, _track, _ts,
perfetto_annotate_timemory_data(CategoryT{}, name, std::forward<Args>(args))...);
}
template <typename CategoryT, typename... Args>
inline void
mark_perfetto(CategoryT, const char* name, Args&&... args)
{
// skip if category is disabled
if(category_mark_disabled<CategoryT>()) return;
if constexpr(sizeof...(Args) == 1 &&
std::is_invocable<Args..., ::perfetto::EventContext>::value)
{
uint64_t _ts = now();
if(config::get_perfetto_annotations())
{
TRACE_EVENT_INSTANT(trait::name<CategoryT>::value, get_perfetto_string(name),
_ts, "ns", _ts, std::forward<Args>(args)...);
}
else
{
TRACE_EVENT_INSTANT(trait::name<CategoryT>::value, get_perfetto_string(name),
_ts, std::forward<Args>(args)...);
}
}
else
{
using tuple_type = std::tuple<concepts::unqualified_type_t<Args>...>;
using arg0_type = concepts::tuple_element_t<0, tuple_type>;
using arg1_type = concepts::tuple_element_t<1, tuple_type>;
if constexpr(std::is_same<arg0_type, ::perfetto::Track>::value &&
std::is_same<arg1_type, uint64_t>::value)
{
mark_perfetto_track(CategoryT{}, name, std::forward<Args>(args)...);
}
else if constexpr(std::is_same<arg0_type, uint64_t>::value)
{
mark_perfetto_ts(CategoryT{}, name, std::forward<Args>(args)...);
}
else
{
uint64_t _ts = now();
TRACE_EVENT_INSTANT(trait::name<CategoryT>::value, get_perfetto_string(name),
_ts, std::forward<Args>(args)...,
[&](::perfetto::EventContext ctx) {
if(config::get_perfetto_annotations())
{
tracing::add_perfetto_annotation(ctx, "ns", _ts);
}
});
}
}
}
template <typename CategoryT, typename... Args>
inline void
mark_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args)
{
// skip if category is disabled
if(category_mark_disabled<CategoryT>()) return;
TRACE_EVENT_INSTANT(trait::name<CategoryT>::value, get_perfetto_string(name), _ts,
std::forward<Args>(args)...);
}
template <typename CategoryT, typename... Args>
inline void
mark_perfetto_track(CategoryT, const char* name, ::perfetto::Track _track, uint64_t _ts,
Args&&... args)
{
// skip if category is disabled
if(category_mark_disabled<CategoryT>()) return;
TRACE_EVENT_INSTANT(trait::name<CategoryT>::value, get_perfetto_string(name), _track,
_ts, std::forward<Args>(args)...);
}
template <typename FuncT>
int64_t
get_clock_skew(FuncT&& _timestamp_func, int64_t _n = 1)
{
namespace cpu = tim::cpu;
// synchronize timestamps
// We'll take a CPU timestamp before and after taking a GPU timestmp, then
// take the average of those two, hoping that it's roughly at the same time
// as the GPU timestamp.
auto _cpu_now = []() {
cpu::fence();
return now();
};
auto _gpu_now = [&_timestamp_func]() {
cpu::fence();
return std::forward<FuncT>(_timestamp_func)();
};
auto _compute = [&_cpu_now, &_gpu_now]() {
volatile uint64_t _cpu_ts = 0;
volatile uint64_t _gpu_ts = 0;
_cpu_ts += _cpu_now();
_gpu_ts += _gpu_now();
_cpu_ts += _cpu_now();
return static_cast<int64_t>(_cpu_ts / 2) - static_cast<int64_t>(_gpu_ts);
};
int64_t _diff = 0;
for(int64_t i = 0; i < _n; ++i)
{
_diff += _compute();
}
return (_diff / _n);
}
} // namespace tracing
} // namespace rocprofsys