Perfetto annotation from timemory components (#289)
* Annotate perfetto with timemory component data
- support perfetto annotations via timemory component data, e.g. use PAPI component for exact HW counter annotations
* Tests for perfetto annotation via timemory data
* Update omnitrace-instrument
- remove --default-components argument as this overrides any components set in configuration file
- required by perfetto annotation via timemory data tests
* filter unavailable timemory components
- filter out unavailable timemory components before attempting to invoke the annotate operation on the bundle
* update annotate tests
- account for no PAPI support
* update lulesh-timemory test
- replace '-d wall_clock peak_rss' with '--env OMNITRACE_TIMEMORY_COMPONENTS="wall_clock peak_rss"'
* annotate tests update
- fix misnamed test
* annotate tests update
- restrict binary rewrite to run function to force instrumentation despite heuristics
* annotate tests update
- print {available,overlapping,excluded,instrumented} functions during binary rewrite
* annotate tests update
- add allow-overlapping flag
* Support PAPI with CAP_SYS_ADMIN
- do not disable PAPI if perf_event_paranoid > 2 but has CAP_SYS_ADMIN capability
[ROCm/rocprofiler-systems commit: 1aca8c177b]
This commit is contained in:
zatwierdzone przez
GitHub
rodzic
a0812bfa0b
commit
c6929f545d
+1
-1
Submodule projects/rocprofiler-systems/external/timemory updated: d1412416d0...5c5e19dff6
+4
-34
@@ -113,11 +113,10 @@ bool include_internal_linked_libs = false;
|
||||
int verbose_level = tim::get_env<int>("OMNITRACE_VERBOSE_INSTRUMENT", 0);
|
||||
int num_log_entries = tim::get_env<int>(
|
||||
"OMNITRACE_LOG_COUNT", tim::get_env<bool>("OMNITRACE_CI", false) ? 20 : 50);
|
||||
string_t main_fname = "main";
|
||||
string_t argv0 = {};
|
||||
string_t cmdv0 = {};
|
||||
string_t default_components = "wall_clock";
|
||||
string_t prefer_library = {};
|
||||
string_t main_fname = "main";
|
||||
string_t argv0 = {};
|
||||
string_t cmdv0 = {};
|
||||
string_t prefer_library = {};
|
||||
//
|
||||
// global variables
|
||||
//
|
||||
@@ -875,33 +874,6 @@ main(int argc, char** argv)
|
||||
.min_count(1)
|
||||
.description("Read in a configuration file and encode these values as the "
|
||||
"defaults in the executable");
|
||||
parser.add_argument()
|
||||
.names({ "-d", "--default-components" })
|
||||
.dtype("string")
|
||||
.description("Default components to instrument (only useful when timemory is "
|
||||
"enabled in omnitrace library)")
|
||||
.action([](parser_t& p) {
|
||||
auto _components = p.get<strvec_t>("default-components");
|
||||
default_components = {};
|
||||
for(size_t i = 0; i < _components.size(); ++i)
|
||||
{
|
||||
if(_components.at(i) == "none")
|
||||
{
|
||||
default_components = "none";
|
||||
break;
|
||||
}
|
||||
default_components += _components.at(i);
|
||||
if(i + 1 < _components.size()) default_components += ",";
|
||||
}
|
||||
if(default_components == "none")
|
||||
default_components = {};
|
||||
else
|
||||
{
|
||||
auto _strcomp = p.get<std::string>("default-components");
|
||||
if(!_strcomp.empty() && default_components.empty())
|
||||
default_components = _strcomp;
|
||||
}
|
||||
});
|
||||
parser.add_argument({ "--env" },
|
||||
"Environment variables to add to the runtime in form "
|
||||
"VARIABLE=VALUE. E.g. use '--env OMNITRACE_USE_TIMEMORY=ON' to "
|
||||
@@ -1427,8 +1399,6 @@ main(int argc, char** argv)
|
||||
TIMEMORY_JOIN('=', "OMNITRACE_INSTRUMENT_MODE", instr_mode_v_int));
|
||||
env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_INIT", "OFF"));
|
||||
env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_FINALIZE", "OFF"));
|
||||
env_vars.emplace_back(
|
||||
TIMEMORY_JOIN('=', "OMNITRACE_TIMEMORY_COMPONENTS", default_components));
|
||||
env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_USE_CODE_COVERAGE",
|
||||
(coverage_mode != CODECOV_NONE) ? "ON" : "OFF"));
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "perfetto.hpp"
|
||||
#include "utility.hpp"
|
||||
|
||||
#include <timemory/backends/capability.hpp>
|
||||
#include <timemory/backends/dmp.hpp>
|
||||
#include <timemory/backends/mpi.hpp>
|
||||
#include <timemory/backends/process.hpp>
|
||||
@@ -40,6 +41,7 @@
|
||||
#include <timemory/log/color.hpp>
|
||||
#include <timemory/log/logger.hpp>
|
||||
#include <timemory/manager.hpp>
|
||||
#include <timemory/process/process.hpp>
|
||||
#include <timemory/sampling/allocator.hpp>
|
||||
#include <timemory/settings.hpp>
|
||||
#include <timemory/settings/types.hpp>
|
||||
@@ -59,6 +61,7 @@
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <linux/capability.h>
|
||||
#include <numeric>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
@@ -944,7 +947,13 @@ configure_settings(bool _init)
|
||||
if(_fparanoid) _fparanoid >> _paranoid;
|
||||
}
|
||||
|
||||
if(_paranoid > 2)
|
||||
auto _cap_status = timemory::linux::capability::cap_read(process::get_id());
|
||||
auto* _cap_data = &_cap_status.effective;
|
||||
bool _has_cap_sys_admin = false;
|
||||
for(auto itr : timemory::linux::capability::cap_decode(*_cap_data))
|
||||
if(itr == CAP_SYS_ADMIN) _has_cap_sys_admin = true;
|
||||
|
||||
if(_paranoid > 2 && !_has_cap_sys_admin)
|
||||
{
|
||||
OMNITRACE_BASIC_VERBOSE(0,
|
||||
"/proc/sys/kernel/perf_event_paranoid has a value of %i. "
|
||||
|
||||
+16
-16
@@ -180,14 +180,6 @@ category_region<CategoryT>::start(std::string_view name, Args&&... args)
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_perfetto)
|
||||
{
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
tracing::push_perfetto(CategoryT{}, name.data(), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_timemory)
|
||||
{
|
||||
if(get_use_timemory())
|
||||
@@ -196,6 +188,14 @@ category_region<CategoryT>::start(std::string_view name, Args&&... args)
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_perfetto)
|
||||
{
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
tracing::push_perfetto(CategoryT{}, name.data(), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(is_one_of<CategoryT, critical_trace_categories_t>::value)
|
||||
{
|
||||
using Device = critical_trace::Device;
|
||||
@@ -250,14 +250,6 @@ category_region<CategoryT>::stop(std::string_view name, Args&&... args)
|
||||
++tracing::pop_count();
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_timemory)
|
||||
{
|
||||
if(get_use_timemory())
|
||||
{
|
||||
tracing::pop_timemory(CategoryT{}, name, std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_perfetto)
|
||||
{
|
||||
if(get_use_perfetto())
|
||||
@@ -267,6 +259,14 @@ category_region<CategoryT>::stop(std::string_view name, Args&&... args)
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_timemory)
|
||||
{
|
||||
if(get_use_timemory())
|
||||
{
|
||||
tracing::pop_timemory(CategoryT{}, name, std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(_ct_use_causal)
|
||||
{
|
||||
if constexpr(is_one_of<CategoryT, causal_throughput_categories_t>::value)
|
||||
|
||||
@@ -38,10 +38,17 @@
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/tracing/annotation.hpp"
|
||||
|
||||
#include <timemory/components/io/components.hpp>
|
||||
#include <timemory/components/network/types.hpp>
|
||||
#include <timemory/components/papi/types.hpp>
|
||||
#include <timemory/components/rusage/components.hpp>
|
||||
#include <timemory/components/timing/backends.hpp>
|
||||
#include <timemory/components/timing/components.hpp>
|
||||
#include <timemory/enum.h>
|
||||
#include <timemory/hash/types.hpp>
|
||||
#include <timemory/mpl/concepts.hpp>
|
||||
#include <timemory/mpl/type_traits.hpp>
|
||||
#include <timemory/types.hpp>
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
@@ -57,8 +64,15 @@ namespace omnitrace
|
||||
{
|
||||
namespace tracing
|
||||
{
|
||||
using interval_data_instances = thread_data<std::vector<bool>>;
|
||||
using hash_value_t = tim::hash_value_t;
|
||||
using interval_data_instances = thread_data<std::vector<bool>>;
|
||||
using hash_value_t = tim::hash_value_t;
|
||||
using perfetto_annotate_component_types = tim::mpl::available_t<type_list<
|
||||
comp::cpu_clock, comp::cpu_util, comp::kernel_mode_time, comp::num_major_page_faults,
|
||||
comp::num_minor_page_faults, comp::page_rss, comp::peak_rss, comp::papi_array_t,
|
||||
comp::papi_vector, comp::priority_context_switch, comp::voluntary_context_switch,
|
||||
comp::process_cpu_clock, comp::process_cpu_util, comp::system_clock,
|
||||
comp::thread_cpu_clock, comp::thread_cpu_util, comp::user_clock, comp::user_mode_time,
|
||||
comp::virtual_memory>>;
|
||||
|
||||
//
|
||||
// declarations
|
||||
@@ -148,7 +162,7 @@ get_perfetto_track(CategoryT, FuncT&& _desc_generator, Args&&... _args)
|
||||
auto& _track_uuids = get_perfetto_track_uuids();
|
||||
if(_track_uuids.find(_uuid) == _track_uuids.end())
|
||||
{
|
||||
const auto _track = TrackT(_uuid);
|
||||
const auto _track = TrackT(_uuid, ::perfetto::ProcessTrack::Current());
|
||||
auto _desc = _track.Serialize();
|
||||
|
||||
auto _name = std::forward<FuncT>(_desc_generator)(std::forward<Args>(_args)...);
|
||||
@@ -172,7 +186,7 @@ get_perfetto_track(CategoryT, FuncT&& _desc_generator, Args&&... _args)
|
||||
_uuid, _track_uuids.at(_uuid).c_str(), _name.c_str());
|
||||
#endif
|
||||
|
||||
return TrackT(_uuid);
|
||||
return TrackT(_uuid, ::perfetto::ProcessTrack::Current());
|
||||
}
|
||||
|
||||
template <typename Tp = uint64_t>
|
||||
@@ -283,12 +297,13 @@ push_timemory(CategoryT, std::string_view name, Args&&... args)
|
||||
++get_profile_stack<CategoryT>();
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
inline void
|
||||
pop_timemory(CategoryT, std::string_view name, Args&&... args)
|
||||
template <typename CategoryT>
|
||||
inline std::pair<instrumentation_bundle_t*, size_t>
|
||||
get_timemory(CategoryT, std::string_view name)
|
||||
{
|
||||
using return_type = std::pair<instrumentation_bundle_t*, size_t>;
|
||||
// skip if category is disabled and not pushed on this thread
|
||||
if(profile_pop_disabled<CategoryT>()) return;
|
||||
if(profile_pop_disabled<CategoryT>()) return return_type{ nullptr, -1 };
|
||||
|
||||
auto _hash = tim::hash::get_hash_id(name);
|
||||
auto& _data = tracing::get_instrumentation_bundles();
|
||||
@@ -296,18 +311,13 @@ pop_timemory(CategoryT, std::string_view name, Args&&... args)
|
||||
{
|
||||
OMNITRACE_DEBUG("[%s] skipped %s :: empty bundle stack\n", "omnitrace_pop_trace",
|
||||
name.data());
|
||||
return;
|
||||
return return_type{ nullptr, -1 };
|
||||
}
|
||||
|
||||
auto*& _v_back = _data.bundles.back();
|
||||
if(OMNITRACE_LIKELY(_v_back->get_hash() == _hash))
|
||||
{
|
||||
// decrement the profile stack
|
||||
--get_profile_stack<CategoryT>();
|
||||
_v_back->stop(std::forward<Args>(args)...);
|
||||
_data.allocator.destroy(_v_back);
|
||||
_data.allocator.deallocate(_v_back, 1);
|
||||
_data.bundles.erase(--_data.bundles.end());
|
||||
return std::make_pair(_v_back, _data.bundles.size() - 1);
|
||||
}
|
||||
else if(_data.bundles.size() > 1)
|
||||
{
|
||||
@@ -316,16 +326,52 @@ pop_timemory(CategoryT, std::string_view name, Args&&... args)
|
||||
auto*& _v = _data.bundles.at(i - 1);
|
||||
if(_v->get_hash() == _hash)
|
||||
{
|
||||
// decrement the profile stack
|
||||
--get_profile_stack<CategoryT>();
|
||||
_v->stop(std::forward<Args>(args)...);
|
||||
_data.allocator.destroy(_v);
|
||||
_data.allocator.deallocate(_v, 1);
|
||||
_data.bundles.erase(_data.bundles.begin() + (i - 1));
|
||||
break;
|
||||
return std::make_pair(_v, i - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return return_type{ nullptr, -1 };
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
inline auto
|
||||
stop_timemory(CategoryT, std::string_view name, Args&&... args)
|
||||
{
|
||||
using return_type = std::pair<instrumentation_bundle_t*, size_t>;
|
||||
|
||||
// skip if category is disabled and not pushed on this thread
|
||||
if(profile_pop_disabled<CategoryT>()) return return_type{ nullptr, -1 };
|
||||
|
||||
auto&& _data = get_timemory(CategoryT{}, name);
|
||||
if(_data.first)
|
||||
{
|
||||
_data.first->stop(std::forward<Args>(args)...);
|
||||
}
|
||||
return _data;
|
||||
}
|
||||
|
||||
inline void
|
||||
destroy_timemory(std::pair<instrumentation_bundle_t*, size_t> _data)
|
||||
{
|
||||
if(_data.first)
|
||||
{
|
||||
auto& _bundles = tracing::get_instrumentation_bundles();
|
||||
_bundles.allocator.destroy(_data.first);
|
||||
_bundles.allocator.deallocate(_data.first, 1);
|
||||
_bundles.bundles.erase(_bundles.bundles.begin() + _data.second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
inline void
|
||||
pop_timemory(CategoryT, std::string_view name, Args&&... args)
|
||||
{
|
||||
// skip if category is disabled and not pushed on this thread
|
||||
if(profile_pop_disabled<CategoryT>()) return;
|
||||
|
||||
auto _data = stop_timemory(CategoryT{}, name, std::forward<Args>(args)...);
|
||||
if(_data.first) destroy_timemory(std::move(_data));
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
@@ -384,6 +430,36 @@ push_perfetto(CategoryT, const char* name, Args&&... args)
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief This function is used to take an existing lambda accepting a
|
||||
/// perfetto::EventContext and append the timemory annotations. Examples
|
||||
/// are seen in the pop_perfetto* functions
|
||||
template <typename CategoryT, typename Arg>
|
||||
inline decltype(auto)
|
||||
perfetto_annotate_timemory_data(CategoryT, const char* name, Arg&& arg)
|
||||
{
|
||||
if constexpr(std::is_invocable<Arg, ::perfetto::EventContext>::value)
|
||||
{
|
||||
return [&arg, name](::perfetto::EventContext _ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
auto _timemory_data = get_timemory(CategoryT{}, name);
|
||||
if(_timemory_data.first)
|
||||
{
|
||||
_timemory_data.first->stop();
|
||||
_timemory_data.first
|
||||
->template invoke_with<tim::operation::perfetto_annotate>(
|
||||
perfetto_annotate_component_types{}, _ctx);
|
||||
}
|
||||
}
|
||||
std::forward<Arg>(arg)(std::move(_ctx));
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::move(arg);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
inline void
|
||||
pop_perfetto(CategoryT, const char* name, Args&&... args)
|
||||
@@ -400,7 +476,8 @@ pop_perfetto(CategoryT, const char* name, Args&&... args)
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
TRACE_EVENT_END(trait::name<CategoryT>::value, _ts, "end_ns", _ts,
|
||||
std::forward<Args>(args)...);
|
||||
perfetto_annotate_timemory_data(CategoryT{}, name,
|
||||
std::forward<Args>(args))...);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -428,14 +505,15 @@ pop_perfetto(CategoryT, const char* name, Args&&... args)
|
||||
// decrement tracing stack
|
||||
--get_tracing_stack<CategoryT>();
|
||||
uint64_t _ts = now();
|
||||
TRACE_EVENT_END(trait::name<CategoryT>::value, _ts,
|
||||
std::forward<Args>(args)...,
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _ts);
|
||||
}
|
||||
});
|
||||
TRACE_EVENT_END(
|
||||
trait::name<CategoryT>::value, _ts, std::forward<Args>(args)...,
|
||||
perfetto_annotate_timemory_data(
|
||||
CategoryT{}, name, [&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _ts);
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -456,7 +534,7 @@ push_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args)
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
inline void
|
||||
pop_perfetto_ts(CategoryT, const char*, uint64_t _ts, Args&&... args)
|
||||
pop_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args)
|
||||
{
|
||||
// skip if category is disabled and not pushed on this thread
|
||||
if(tracing_pop_disabled<CategoryT>()) return;
|
||||
@@ -464,7 +542,9 @@ pop_perfetto_ts(CategoryT, const char*, uint64_t _ts, Args&&... args)
|
||||
// decrement tracing stack
|
||||
--get_tracing_stack<CategoryT>();
|
||||
|
||||
TRACE_EVENT_END(trait::name<CategoryT>::value, _ts, std::forward<Args>(args)...);
|
||||
TRACE_EVENT_END(
|
||||
trait::name<CategoryT>::value, _ts,
|
||||
perfetto_annotate_timemory_data(CategoryT{}, name, std::forward<Args>(args))...);
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
@@ -482,7 +562,7 @@ push_perfetto_track(CategoryT, const char* name, ::perfetto::Track _track, uint6
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
inline void
|
||||
pop_perfetto_track(CategoryT, const char*, ::perfetto::Track _track, uint64_t _ts,
|
||||
pop_perfetto_track(CategoryT, const char* name, ::perfetto::Track _track, uint64_t _ts,
|
||||
Args&&... args)
|
||||
{
|
||||
// skip if category is disabled and not pushed on this thread
|
||||
@@ -491,8 +571,9 @@ pop_perfetto_track(CategoryT, const char*, ::perfetto::Track _track, uint64_t _t
|
||||
// decrement tracing stack
|
||||
--get_tracing_stack<CategoryT>();
|
||||
|
||||
TRACE_EVENT_END(trait::name<CategoryT>::value, _track, _ts,
|
||||
std::forward<Args>(args)...);
|
||||
TRACE_EVENT_END(
|
||||
trait::name<CategoryT>::value, _track, _ts,
|
||||
perfetto_annotate_timemory_data(CategoryT{}, name, std::forward<Args>(args))...);
|
||||
}
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "omnitrace/categories.h" // in omnitrace-user
|
||||
|
||||
#include <timemory/mpl/concepts.hpp>
|
||||
#include <timemory/operations/types/get.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
@@ -206,3 +207,90 @@ add_perfetto_annotation(perfetto_event_context_t& ctx,
|
||||
const omnitrace_annotation_t& _annotation);
|
||||
} // namespace tracing
|
||||
} // namespace omnitrace
|
||||
|
||||
#include <timemory/operations/types/annotate.hpp>
|
||||
|
||||
namespace tim
|
||||
{
|
||||
namespace operation
|
||||
{
|
||||
using perfetto_event_context_t = ::omnitrace::tracing::perfetto_event_context_t;
|
||||
|
||||
template <typename Tp>
|
||||
struct annotate<perfetto_event_context_t, Tp>
|
||||
{
|
||||
TIMEMORY_DEFAULT_OBJECT(annotate)
|
||||
|
||||
auto operator()(Tp& obj, perfetto_event_context_t& _ctx) const
|
||||
{
|
||||
return sfinae(obj, 0, _ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
// If the component has a annotate(...) member function
|
||||
template <typename T>
|
||||
static auto sfinae(T& obj, int, perfetto_event_context_t& _ctx)
|
||||
-> decltype(obj.annotate(_ctx))
|
||||
{
|
||||
static_assert(std::is_same<T, Tp>::value, "Error T != Tp");
|
||||
return obj.annotate(_ctx);
|
||||
}
|
||||
|
||||
// If the component does not have a annotate(...) member function
|
||||
template <typename T>
|
||||
static void sfinae(T& obj, long, perfetto_event_context_t& _ctx)
|
||||
{
|
||||
static_assert(std::is_same<T, Tp>::value, "Error T != Tp");
|
||||
using value_type = typename T::value_type;
|
||||
if constexpr(!std::is_void<value_type>::value)
|
||||
{
|
||||
auto _obj_data = sfinae_data<Tp, decltype(obj.get())>(obj, 0);
|
||||
for(size_t i = 0; i < std::get<0>(_obj_data); ++i)
|
||||
{
|
||||
auto&& _label = std::get<1>(_obj_data).at(i);
|
||||
auto&& _value = std::get<2>(_obj_data).at(i);
|
||||
::omnitrace::tracing::add_perfetto_annotation(_ctx, _label, _value);
|
||||
}
|
||||
}
|
||||
(void) _ctx;
|
||||
}
|
||||
|
||||
template <typename T, typename DataT>
|
||||
static auto sfinae_data(T& obj, int)
|
||||
-> decltype(std::tuple<size_t, std::vector<std::string>, DataT>(obj.get().size(),
|
||||
obj.label_array(),
|
||||
obj.get()))
|
||||
{
|
||||
static_assert(std::is_same<T, Tp>::value, "Error T != Tp");
|
||||
auto _labels = obj.label_array();
|
||||
auto _data = obj.get();
|
||||
auto _size = std::min<size_t>(_labels.size(), _data.size());
|
||||
return std::make_tuple(_size, _labels, _data);
|
||||
}
|
||||
|
||||
template <typename T, typename DataT>
|
||||
static auto sfinae_data(T& obj, long)
|
||||
{
|
||||
using strvec_t = std::vector<std::string>;
|
||||
using datavec_t = std::vector<DataT>;
|
||||
size_t _size = 1;
|
||||
strvec_t _labels = { obj.get_label() };
|
||||
datavec_t _data = { obj.get() };
|
||||
return std::tuple<size_t, strvec_t, datavec_t>{ _size, _labels, _data };
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
struct perfetto_annotate : annotate<perfetto_event_context_t, Tp>
|
||||
{
|
||||
using base_type = annotate<perfetto_event_context_t, Tp>;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(perfetto_annotate)
|
||||
|
||||
auto operator()(Tp& obj, perfetto_event_context_t& _ctx) const
|
||||
{
|
||||
return base_type::operator()(obj, _ctx);
|
||||
}
|
||||
};
|
||||
} // namespace operation
|
||||
} // namespace tim
|
||||
|
||||
@@ -21,5 +21,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-critical-trace-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-attach-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-rccl-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-overflow-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-annotate-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-causal-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-python-tests.cmake)
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
#
|
||||
# papi tests
|
||||
#
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
if(OMNITRACE_USE_PAPI
|
||||
AND (omnitrace_perf_event_paranoid LESS_EQUAL 3
|
||||
OR omnitrace_cap_sys_admin EQUAL 0
|
||||
OR omnitrace_cap_perfmon EQUAL 0))
|
||||
set(_annotate_environment
|
||||
"${_base_environment}"
|
||||
"OMNITRACE_TIMEMORY_COMPONENTS=thread_cpu_clock papi_array"
|
||||
"OMNITRACE_PAPI_EVENTS=perf::PERF_COUNT_SW_CPU_CLOCK"
|
||||
"OMNITRACE_USE_SAMPLING=OFF")
|
||||
|
||||
omnitrace_add_test(
|
||||
SKIP_BASELINE SKIP_RUNTIME
|
||||
NAME annotate
|
||||
TARGET parallel-overhead
|
||||
RUN_ARGS 30 2 200
|
||||
REWRITE_ARGS
|
||||
-e
|
||||
-v
|
||||
2
|
||||
-R
|
||||
run
|
||||
--allow-overlapping
|
||||
--print-available
|
||||
functions
|
||||
--print-overlapping
|
||||
functions
|
||||
--print-excluded
|
||||
functions
|
||||
--print-instrumented
|
||||
functions
|
||||
--print-instructions
|
||||
ENVIRONMENT "${_annotate_environment}"
|
||||
LABELS "annotate;papi")
|
||||
|
||||
omnitrace_add_validation_test(
|
||||
NAME annotate-binary-rewrite
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "annotate;papi"
|
||||
ARGS --key-names perf::PERF_COUNT_SW_CPU_CLOCK thread_cpu_clock --key-counts 8 8)
|
||||
|
||||
omnitrace_add_validation_test(
|
||||
NAME annotate-sampling
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "papi"
|
||||
ARGS --key-names thread_cpu_clock --key-counts 6)
|
||||
else()
|
||||
set(_annotate_environment
|
||||
"${_base_environment}" "OMNITRACE_TIMEMORY_COMPONENTS=thread_cpu_clock"
|
||||
"OMNITRACE_USE_SAMPLING=OFF")
|
||||
|
||||
omnitrace_add_test(
|
||||
SKIP_BASELINE SKIP_RUNTIME
|
||||
NAME annotate
|
||||
TARGET parallel-overhead
|
||||
RUN_ARGS 30 2 200
|
||||
REWRITE_ARGS
|
||||
-e
|
||||
-v
|
||||
2
|
||||
-R
|
||||
run
|
||||
--allow-overlapping
|
||||
--print-available
|
||||
functions
|
||||
--print-overlapping
|
||||
functions
|
||||
--print-excluded
|
||||
functions
|
||||
--print-instrumented
|
||||
functions
|
||||
--print-instructions
|
||||
ENVIRONMENT "${_annotate_environment}"
|
||||
LABELS "annotate")
|
||||
|
||||
omnitrace_add_validation_test(
|
||||
NAME annotate-binary-rewrite
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "annotate"
|
||||
ARGS --key-names thread_cpu_clock --key-counts 8)
|
||||
|
||||
omnitrace_add_validation_test(
|
||||
NAME annotate-sampling
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "annotate"
|
||||
ARGS --key-names thread_cpu_clock --key-counts 6)
|
||||
endif()
|
||||
@@ -119,9 +119,8 @@ omnitrace_add_test(
|
||||
--dynamic-callsites
|
||||
-ME
|
||||
[==[libgomp]==]
|
||||
-d
|
||||
wall_clock
|
||||
peak_rss
|
||||
--env
|
||||
OMNITRACE_TIMEMORY_COMPONENTS="wall_clock peak_rss"
|
||||
RUN_ARGS -i 10 -s 20 -p
|
||||
ENVIRONMENT
|
||||
"${_timemory_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=OFF"
|
||||
|
||||
@@ -995,22 +995,24 @@ function(OMNITRACE_ADD_VALIDATION_TEST)
|
||||
)
|
||||
endif()
|
||||
|
||||
add_test(
|
||||
NAME validate-${TEST_NAME}-timemory
|
||||
COMMAND
|
||||
${OMNITRACE_VALIDATION_PYTHON}
|
||||
${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py -m ${TEST_TIMEMORY_METRIC}
|
||||
${TEST_ARGS} -i
|
||||
${PROJECT_BINARY_DIR}/omnitrace-tests-output/${TEST_NAME}/${TEST_TIMEMORY_FILE}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
if(TEST_TIMEMORY_FILE)
|
||||
add_test(
|
||||
NAME validate-${TEST_NAME}-timemory
|
||||
COMMAND
|
||||
${OMNITRACE_VALIDATION_PYTHON}
|
||||
${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py -m
|
||||
"${TEST_TIMEMORY_METRIC}" ${TEST_ARGS} -i
|
||||
${PROJECT_BINARY_DIR}/omnitrace-tests-output/${TEST_NAME}/${TEST_TIMEMORY_FILE}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(OMNITRACE_VALIDATION_PYTHON_PERFETTO EQUAL 0)
|
||||
if(OMNITRACE_VALIDATION_PYTHON_PERFETTO EQUAL 0 AND TEST_PERFETTO_FILE)
|
||||
add_test(
|
||||
NAME validate-${TEST_NAME}-perfetto
|
||||
COMMAND
|
||||
${OMNITRACE_VALIDATION_PYTHON}
|
||||
${CMAKE_CURRENT_LIST_DIR}/validate-perfetto-proto.py -m
|
||||
${TEST_PERFETTO_METRIC} ${TEST_ARGS} -i
|
||||
"${TEST_PERFETTO_METRIC}" ${TEST_ARGS} -i
|
||||
${PROJECT_BINARY_DIR}/omnitrace-tests-output/${TEST_NAME}/${TEST_PERFETTO_FILE}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
@@ -71,6 +71,20 @@ if __name__ == "__main__":
|
||||
"-p", "--print", action="store_true", help="Print the processed perfetto data"
|
||||
)
|
||||
parser.add_argument("-i", "--input", type=str, help="Input file", required=True)
|
||||
parser.add_argument(
|
||||
"--key-names",
|
||||
type=str,
|
||||
help="Require debug args contain a specific key",
|
||||
default=[],
|
||||
nargs="*",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--key-counts",
|
||||
type=int,
|
||||
help="Required number of debug args",
|
||||
default=[],
|
||||
nargs="*",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -128,6 +142,26 @@ if __name__ == "__main__":
|
||||
except RuntimeError as e:
|
||||
print(f"{e}")
|
||||
ret = 1
|
||||
|
||||
for key_name, key_count in zip(args.key_names, args.key_counts):
|
||||
slice_args = tp.query(
|
||||
f"select * from slice join args using (arg_set_id) where key='debug.{key_name}'"
|
||||
)
|
||||
count = 0
|
||||
if args.print:
|
||||
print(f"{key_name} (expected: {key_count}):")
|
||||
for row in slice_args:
|
||||
count += 1
|
||||
if args.print:
|
||||
for key, val in row.__dict__.items():
|
||||
print(f" - {key:20} :: {val}")
|
||||
print(f"Number of entries with {key_name} = {count} (expected: {key_count})")
|
||||
if key_count != count:
|
||||
ret = 1
|
||||
|
||||
if ret == 0:
|
||||
print(f"{args.input} validated")
|
||||
else:
|
||||
print(f"Failure validating {args.input}")
|
||||
|
||||
sys.exit(ret)
|
||||
|
||||
Reference in New Issue
Block a user