Support tracing thread locks with perfetto (#143)
- remove sampling and roctracer flat/timeline options
- unused/unnecessary clutter
- start pthread_gotcha before perfetto
- remove pthread_mutex_gotcha validate
- update timemory submodule with tid fix
[ROCm/rocprofiler-systems commit: 2718596e5a]
Tento commit je obsažen v:
vendorováno
+1
-1
Submodul projects/rocprofiler-systems/external/timemory aktualizován: 2f209b7dff...e3cc1e622a
@@ -196,7 +196,6 @@ OMNITRACE_CRITICAL_TRACE_PER_ROW = 0
|
||||
OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES = false
|
||||
OMNITRACE_DEBUG = false
|
||||
OMNITRACE_DL_VERBOSE = 0
|
||||
OMNITRACE_FLAT_SAMPLING = false
|
||||
OMNITRACE_INSTRUMENTATION_INTERVAL = 1
|
||||
OMNITRACE_KOKKOS_KERNEL_LOGGER = false
|
||||
OMNITRACE_PAPI_EVENTS = PAPI_TOT_CYC
|
||||
@@ -206,17 +205,14 @@ OMNITRACE_PERFETTO_COMBINE_TRACES = true
|
||||
OMNITRACE_PERFETTO_FILE = perfetto-trace.proto
|
||||
OMNITRACE_PERFETTO_FILL_POLICY = discard
|
||||
OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB = 4096
|
||||
OMNITRACE_ROCTRACER_FLAT_PROFILE = false
|
||||
OMNITRACE_ROCTRACER_HSA_ACTIVITY = false
|
||||
OMNITRACE_ROCTRACER_HSA_API = false
|
||||
OMNITRACE_ROCTRACER_HSA_API_TYPES =
|
||||
OMNITRACE_ROCTRACER_TIMELINE_PROFILE = false
|
||||
OMNITRACE_SAMPLING_CPUS =
|
||||
OMNITRACE_SAMPLING_DELAY = 0.5
|
||||
OMNITRACE_SAMPLING_FREQ = 10
|
||||
OMNITRACE_SAMPLING_GPUS = all
|
||||
OMNITRACE_TIME_OUTPUT = true
|
||||
OMNITRACE_TIMELINE_SAMPLING = false
|
||||
OMNITRACE_TIMEMORY_COMPONENTS = wall_clock
|
||||
OMNITRACE_TRACE_THREAD_LOCKS = false
|
||||
OMNITRACE_VERBOSE = 0
|
||||
@@ -297,7 +293,6 @@ $ omnitrace-avail -S -bd
|
||||
| OMNITRACE_ENABLE_SIGNAL_HANDLER | Enable signals in timemory_init |
|
||||
| OMNITRACE_FILE_OUTPUT | Write output to files |
|
||||
| OMNITRACE_FLAT_PROFILE | Set the label hierarchy mode to defa... |
|
||||
| OMNITRACE_FLAT_SAMPLING | Ignore hierarchy in all statistical ... |
|
||||
| OMNITRACE_INPUT_EXTENSIONS | File extensions used when searching ... |
|
||||
| OMNITRACE_INPUT_PATH | Explicitly specify the input folder ... |
|
||||
| OMNITRACE_INPUT_PREFIX | Explicitly specify the prefix for in... |
|
||||
@@ -328,11 +323,9 @@ $ omnitrace-avail -S -bd
|
||||
| OMNITRACE_PERFETTO_FILL_POLICY | Behavior when perfetto buffer is ful... |
|
||||
| OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB | Hint for shared-memory buffer size i... |
|
||||
| OMNITRACE_PRECISION | Set the global output precision for ... |
|
||||
| OMNITRACE_ROCTRACER_FLAT_PROFILE | Ignore hierarchy in all kernels entr... |
|
||||
| OMNITRACE_ROCTRACER_HSA_ACTIVITY | Enable HSA activity tracing support |
|
||||
| OMNITRACE_ROCTRACER_HSA_API | Enable HSA API tracing support |
|
||||
| OMNITRACE_ROCTRACER_HSA_API_TYPES | HSA API type to collect |
|
||||
| OMNITRACE_ROCTRACER_TIMELINE_PROFILE | Create unique entries for every kern... |
|
||||
| OMNITRACE_SAMPLING_CPUS | CPUs to collect frequency informatio... |
|
||||
| OMNITRACE_SAMPLING_DELAY | Number of seconds to wait before the... |
|
||||
| OMNITRACE_SAMPLING_FREQ | Number of software interrupts per se... |
|
||||
@@ -343,7 +336,6 @@ $ omnitrace-avail -S -bd
|
||||
| OMNITRACE_SUPPRESS_PARSING | Disable parsing environment |
|
||||
| OMNITRACE_TEXT_OUTPUT | Write text output files |
|
||||
| OMNITRACE_TIMELINE_PROFILE | Set the label hierarchy mode to defa... |
|
||||
| OMNITRACE_TIMELINE_SAMPLING | Create unique entries for every samp... |
|
||||
| OMNITRACE_TIMEMORY_COMPONENTS | List of components to collect via ti... |
|
||||
| OMNITRACE_TIME_FORMAT | Customize the folder generation when... |
|
||||
| OMNITRACE_TIME_OUTPUT | Output data to subfolder w/ a timest... |
|
||||
|
||||
@@ -183,8 +183,8 @@ omnitrace_set_env_hidden(const char* env_name, const char* env_val)
|
||||
|
||||
namespace
|
||||
{
|
||||
bool _set_mpi_called = false;
|
||||
std::function<void()> _start_gotcha_callback = []() {};
|
||||
bool _set_mpi_called = false;
|
||||
std::function<void()> _preinit_callback = []() {};
|
||||
} // namespace
|
||||
|
||||
extern "C" void
|
||||
@@ -223,7 +223,7 @@ omnitrace_set_mpi_hidden(bool use, bool attached)
|
||||
std::to_string(use).c_str(), std::to_string(attached).c_str(),
|
||||
std::to_string(get_state()).c_str());
|
||||
|
||||
_start_gotcha_callback();
|
||||
_preinit_callback();
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
@@ -356,6 +356,9 @@ omnitrace_init_tooling_hidden()
|
||||
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
// start these gotchas once settings have been initialized
|
||||
get_init_bundle()->start();
|
||||
|
||||
if(get_use_sampling()) sampling::block_signals();
|
||||
|
||||
if(get_use_critical_trace())
|
||||
@@ -554,11 +557,11 @@ omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _a
|
||||
|
||||
if(!_set_mpi_called)
|
||||
{
|
||||
_start_gotcha_callback = []() { get_gotcha_bundle()->start(); };
|
||||
_preinit_callback = []() { get_preinit_bundle()->start(); };
|
||||
}
|
||||
else
|
||||
{
|
||||
get_gotcha_bundle()->start();
|
||||
get_preinit_bundle()->start();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -615,7 +618,7 @@ omnitrace_finalize_hidden(void)
|
||||
if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value);
|
||||
} };
|
||||
|
||||
auto& _thread_bundle = thread_data<omnitrace_thread_bundle_t>::instance();
|
||||
auto& _thread_bundle = thread_data<thread_bundle_t>::instance();
|
||||
if(_thread_bundle) _thread_bundle->stop();
|
||||
|
||||
if(dmp::rank() == 0 && get_verbose() >= 0) fprintf(stderr, "\n");
|
||||
@@ -644,7 +647,7 @@ omnitrace_finalize_hidden(void)
|
||||
}
|
||||
}
|
||||
|
||||
// stop the main bundle which shuts down the pthread gotchas
|
||||
// stop the main bundle which has stats for run
|
||||
if(get_main_bundle())
|
||||
{
|
||||
OMNITRACE_DEBUG_F("Stopping main bundle...\n");
|
||||
@@ -690,12 +693,18 @@ omnitrace_finalize_hidden(void)
|
||||
}
|
||||
}
|
||||
|
||||
// stop the main gotcha which shuts down the pthread gotchas
|
||||
if(get_init_bundle())
|
||||
{
|
||||
OMNITRACE_DEBUG_F("Stopping main gotcha...\n");
|
||||
get_init_bundle()->stop();
|
||||
}
|
||||
|
||||
// stop the gotcha bundle
|
||||
if(get_gotcha_bundle())
|
||||
if(get_preinit_bundle())
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(1, "Shutting down miscellaneous gotchas...\n");
|
||||
get_gotcha_bundle()->stop();
|
||||
get_gotcha_bundle().reset();
|
||||
get_preinit_bundle()->stop();
|
||||
component::mpi_gotcha::shutdown();
|
||||
}
|
||||
|
||||
@@ -746,7 +755,7 @@ omnitrace_finalize_hidden(void)
|
||||
// if they are still running (e.g. thread-pool still alive), the
|
||||
// thread-specific data will be wrong if try to stop them from
|
||||
// the main thread.
|
||||
for(auto& itr : thread_data<omnitrace_thread_bundle_t>::instances())
|
||||
for(auto& itr : thread_data<thread_bundle_t>::instances())
|
||||
{
|
||||
if(itr && itr->get<comp::wall_clock>() &&
|
||||
!itr->get<comp::wall_clock>()->get_is_running())
|
||||
|
||||
+2
-2
@@ -144,7 +144,7 @@ pthread_create_gotcha::wrapper::wrapper(routine_t _routine, void* _arg,
|
||||
void*
|
||||
pthread_create_gotcha::wrapper::operator()() const
|
||||
{
|
||||
using thread_bundle_data_t = thread_data<omnitrace_thread_bundle_t>;
|
||||
using thread_bundle_data_t = thread_data<thread_bundle_t>;
|
||||
|
||||
if(is_shutdown && *is_shutdown)
|
||||
{
|
||||
@@ -195,7 +195,7 @@ pthread_create_gotcha::wrapper::operator()() const
|
||||
threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str());
|
||||
if(!thread_bundle_data_t::instances().at(_tid))
|
||||
{
|
||||
thread_data<omnitrace_thread_bundle_t>::construct(
|
||||
thread_data<thread_bundle_t>::construct(
|
||||
TIMEMORY_JOIN('/', "omnitrace/process", process::get_id(), "thread",
|
||||
_tid),
|
||||
quirk::config<quirk::auto_start>{});
|
||||
|
||||
+4
-30
@@ -27,6 +27,7 @@
|
||||
#include "library/debug.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/sampling.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
#include "library/utility.hpp"
|
||||
|
||||
#include <timemory/backends/threading.hpp>
|
||||
@@ -98,8 +99,6 @@ pthread_mutex_gotcha::configure()
|
||||
pthread_mutex_gotcha_t::get_initializer() = []() {
|
||||
if(config::get_trace_thread_locks())
|
||||
{
|
||||
validate();
|
||||
|
||||
pthread_mutex_gotcha_t::configure(
|
||||
comp::gotcha_config<0, int, pthread_mutex_t*>{ "pthread_mutex_lock" });
|
||||
|
||||
@@ -161,31 +160,6 @@ pthread_mutex_gotcha::shutdown()
|
||||
pthread_mutex_gotcha_t::disable();
|
||||
}
|
||||
|
||||
void
|
||||
pthread_mutex_gotcha::validate()
|
||||
{
|
||||
if(config::get_trace_thread_locks() && config::get_use_perfetto())
|
||||
{
|
||||
OMNITRACE_PRINT_F("\n");
|
||||
OMNITRACE_PRINT_F("\n");
|
||||
OMNITRACE_PRINT_F("\n");
|
||||
OMNITRACE_PRINT_F(
|
||||
"The overhead of all the mutex locking internally by perfetto is\n")
|
||||
OMNITRACE_PRINT_F(
|
||||
"so significant that all timing data is rendered meaningless.\n");
|
||||
OMNITRACE_PRINT_F(
|
||||
"However, mutex locking is effectively non-existant in timemory.\n");
|
||||
OMNITRACE_PRINT_F("If you want to trace the mutex locking:\n")
|
||||
OMNITRACE_PRINT_F(" OMNITRACE_USE_TIMEMORY=ON\n");
|
||||
OMNITRACE_PRINT_F(" OMNITRACE_USE_PERFETTO=OFF\n");
|
||||
OMNITRACE_PRINT_F("\n");
|
||||
OMNITRACE_PRINT_F("\n");
|
||||
OMNITRACE_PRINT_F("\n");
|
||||
OMNITRACE_FAIL_F("OMNITRACE_USE_PERFETTO and OMNITRACE_TRACE_THREAD_LOCKS cannot "
|
||||
"both be enabled.\n");
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_gotcha::pthread_mutex_gotcha(const gotcha_data_t& _data)
|
||||
: m_data{ &_data }
|
||||
{}
|
||||
@@ -290,9 +264,9 @@ pthread_mutex_gotcha::operator()(int (*_callee)(pthread_t, void**), pthread_t _t
|
||||
bool
|
||||
pthread_mutex_gotcha::is_disabled()
|
||||
{
|
||||
return (get_state() != ::omnitrace::State::Active ||
|
||||
get_thread_state() != ThreadState::Enabled ||
|
||||
(get_use_sampling() && !sampling_enabled_on_child_threads()));
|
||||
static thread_local const auto& _info = thread_info::get();
|
||||
return (!_info || _info->is_offset || get_state() != ::omnitrace::State::Active ||
|
||||
get_thread_state() != ThreadState::Enabled);
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace omnitrace
|
||||
|
||||
@@ -54,7 +54,6 @@ struct pthread_mutex_gotcha : comp::base<pthread_mutex_gotcha, void>
|
||||
// generate the gotcha wrappers
|
||||
static void configure();
|
||||
static void shutdown();
|
||||
static void validate();
|
||||
|
||||
int operator()(int (*)(pthread_mutex_t*), pthread_mutex_t*) const;
|
||||
int operator()(int (*)(pthread_spinlock_t*), pthread_spinlock_t*) const;
|
||||
|
||||
@@ -491,29 +491,6 @@ configure_settings(bool _init)
|
||||
std::to_string(_sigrt_range),
|
||||
0, "sampling", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_FLAT_SAMPLING",
|
||||
"Ignore hierarchy in all statistical sampling entries",
|
||||
_config->get_flat_profile(), "timemory", "sampling",
|
||||
"data_layout", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_TIMELINE_SAMPLING",
|
||||
"Create unique entries for every sample when statistical sampling is enabled",
|
||||
_config->get_timeline_profile(), "timemory", "sampling", "data_layout",
|
||||
"advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_ROCTRACER_FLAT_PROFILE",
|
||||
"Ignore hierarchy in all kernels entries with timemory backend",
|
||||
_config->get_flat_profile(), "timemory", "roctracer", "data_layout", "rocm",
|
||||
"advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_ROCTRACER_TIMELINE_PROFILE",
|
||||
"Create unique entries for every kernel with timemory backend",
|
||||
_config->get_timeline_profile(), "timemory", "roctracer", "data_layout", "rocm",
|
||||
"advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_ROCTRACER_HSA_ACTIVITY",
|
||||
"Enable HSA activity tracing support", true, "roctracer",
|
||||
"rocm", "advanced");
|
||||
@@ -1615,34 +1592,6 @@ get_sampling_rtoffset()
|
||||
return static_cast<tim::tsettings<int>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_timeline_sampling()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_TIMELINE_SAMPLING");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_flat_sampling()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_FLAT_SAMPLING");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_roctracer_timeline_profile()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_ROCTRACER_TIMELINE_PROFILE");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_roctracer_flat_profile()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_ROCTRACER_FLAT_PROFILE");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_trace_hsa_api()
|
||||
{
|
||||
|
||||
@@ -219,18 +219,6 @@ get_sampling_rtoffset();
|
||||
bool
|
||||
get_use_rcclp();
|
||||
|
||||
bool
|
||||
get_timeline_sampling();
|
||||
|
||||
bool
|
||||
get_flat_sampling();
|
||||
|
||||
bool
|
||||
get_roctracer_timeline_profile();
|
||||
|
||||
bool
|
||||
get_roctracer_flat_profile();
|
||||
|
||||
bool
|
||||
get_trace_hsa_api();
|
||||
|
||||
|
||||
@@ -238,12 +238,6 @@ hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
|
||||
static thread_local int64_t begin_timestamp = 0;
|
||||
static auto _scope = []() {
|
||||
auto _v = scope::config{};
|
||||
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
|
||||
if(get_roctracer_flat_profile()) _v += scope::flat{};
|
||||
return _v;
|
||||
}();
|
||||
|
||||
switch(cid)
|
||||
{
|
||||
@@ -320,7 +314,7 @@ hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
if(tasking::roctracer::get_task_group().pool())
|
||||
tasking::roctracer::get_task_group().exec(
|
||||
[_name, _beg_ns, _end_ns]() {
|
||||
roctracer_hsa_bundle_t _bundle{ _name, _scope };
|
||||
roctracer_hsa_bundle_t _bundle{ _name };
|
||||
_bundle.start()
|
||||
.store(std::plus<double>{},
|
||||
static_cast<double>(_end_ns - _beg_ns))
|
||||
@@ -374,14 +368,8 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
|
||||
|
||||
if(!_name) return;
|
||||
|
||||
auto _beg_ns = record->begin_ns + get_clock_skew();
|
||||
auto _end_ns = record->end_ns + get_clock_skew();
|
||||
static auto _scope = []() {
|
||||
auto _v = scope::config{};
|
||||
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
|
||||
if(get_roctracer_flat_profile()) _v += scope::flat{};
|
||||
return _v;
|
||||
}();
|
||||
auto _beg_ns = record->begin_ns + get_clock_skew();
|
||||
auto _end_ns = record->end_ns + get_clock_skew();
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
@@ -394,7 +382,7 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
|
||||
auto _func = [_beg_ns, _end_ns, _name]() {
|
||||
if(get_use_timemory())
|
||||
{
|
||||
roctracer_hsa_bundle_t _bundle{ *_name, _scope };
|
||||
roctracer_hsa_bundle_t _bundle{ *_name };
|
||||
_bundle.start()
|
||||
.store(std::plus<double>{}, static_cast<double>(_end_ns - _beg_ns))
|
||||
.stop();
|
||||
@@ -836,16 +824,10 @@ hip_activity_callback(const char* begin, const char* end, void*)
|
||||
|
||||
const char* op_name =
|
||||
roctracer_op_string(record->domain, record->op, record->kind);
|
||||
auto _ns_skew = get_clock_skew();
|
||||
uint64_t _beg_ns = record->begin_ns + _ns_skew;
|
||||
uint64_t _end_ns = record->end_ns + _ns_skew;
|
||||
auto _corr_id = record->correlation_id;
|
||||
static auto _scope = []() {
|
||||
auto _v = scope::config{};
|
||||
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
|
||||
if(get_roctracer_flat_profile()) _v += scope::flat{};
|
||||
return _v;
|
||||
}();
|
||||
auto _ns_skew = get_clock_skew();
|
||||
uint64_t _beg_ns = record->begin_ns + _ns_skew;
|
||||
uint64_t _end_ns = record->end_ns + _ns_skew;
|
||||
auto _corr_id = record->correlation_id;
|
||||
|
||||
auto& _keys = get_roctracer_key_data();
|
||||
auto& _tids = get_roctracer_tid_data();
|
||||
@@ -936,7 +918,7 @@ hip_activity_callback(const char* begin, const char* end, void*)
|
||||
if(_found && _name != nullptr && get_use_timemory())
|
||||
{
|
||||
auto _func = [_beg_ns, _end_ns, _name]() {
|
||||
roctracer_bundle_t _bundle{ _name, _scope };
|
||||
roctracer_bundle_t _bundle{ _name };
|
||||
_bundle.start()
|
||||
.store(std::plus<double>{}, static_cast<double>(_end_ns - _beg_ns))
|
||||
.stop()
|
||||
|
||||
@@ -216,11 +216,19 @@ get_main_bundle()
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::unique_ptr<gotcha_bundle_t>&
|
||||
get_gotcha_bundle()
|
||||
std::unique_ptr<init_bundle_t>&
|
||||
get_init_bundle()
|
||||
{
|
||||
static auto _v = std::make_unique<init_bundle_t>(
|
||||
JOIN('/', "omnitrace/process", process::get_id()));
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::unique_ptr<preinit_bundle_t>&
|
||||
get_preinit_bundle()
|
||||
{
|
||||
static auto _v =
|
||||
(setup_gotchas(), std::make_unique<gotcha_bundle_t>(
|
||||
(setup_gotchas(), std::make_unique<preinit_bundle_t>(
|
||||
JOIN('/', "omnitrace/process", process::get_id()),
|
||||
quirk::config<quirk::auto_start>{}));
|
||||
return _v;
|
||||
|
||||
@@ -45,30 +45,35 @@
|
||||
|
||||
namespace omnitrace
|
||||
{
|
||||
// started during preinit phase
|
||||
using preinit_bundle_t =
|
||||
tim::lightweight_tuple<exit_gotcha_t, fork_gotcha_t, mpi_gotcha_t>;
|
||||
|
||||
// started during init phase
|
||||
using init_bundle_t = tim::lightweight_tuple<pthread_gotcha>;
|
||||
|
||||
// bundle of components around omnitrace_init and omnitrace_finalize
|
||||
using main_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::page_rss,
|
||||
comp::cpu_clock, comp::cpu_util, pthread_gotcha>;
|
||||
|
||||
using gotcha_bundle_t =
|
||||
tim::lightweight_tuple<exit_gotcha_t, fork_gotcha_t, mpi_gotcha_t>;
|
||||
comp::cpu_clock, comp::cpu_util>;
|
||||
|
||||
// bundle of components around each thread
|
||||
#if defined(TIMEMORY_RUSAGE_THREAD) && TIMEMORY_RUSAGE_THREAD > 0
|
||||
using omnitrace_thread_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
|
||||
comp::thread_cpu_util, comp::peak_rss>;
|
||||
using thread_bundle_t = tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
|
||||
comp::thread_cpu_util, comp::peak_rss>;
|
||||
#else
|
||||
using omnitrace_thread_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
|
||||
comp::thread_cpu_util>;
|
||||
using thread_bundle_t = tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
|
||||
comp::thread_cpu_util>;
|
||||
#endif
|
||||
|
||||
std::unique_ptr<main_bundle_t>&
|
||||
get_main_bundle();
|
||||
|
||||
std::unique_ptr<gotcha_bundle_t>&
|
||||
get_gotcha_bundle();
|
||||
std::unique_ptr<init_bundle_t>&
|
||||
get_init_bundle();
|
||||
|
||||
std::unique_ptr<preinit_bundle_t>&
|
||||
get_preinit_bundle();
|
||||
|
||||
int
|
||||
get_realtime_signal();
|
||||
|
||||
@@ -574,6 +574,7 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init,
|
||||
|
||||
auto _process_perfetto = [_tid,
|
||||
_init](const std::vector<sampling::bundle_t*>& _data) {
|
||||
thread_info::init(true);
|
||||
OMNITRACE_VERBOSE(3 || get_debug_sampling(),
|
||||
"[%li] Post-processing backtraces for perfetto...\n", _tid);
|
||||
|
||||
@@ -643,10 +644,7 @@ void
|
||||
post_process_timemory(int64_t _tid, const bundle_t* _init,
|
||||
const std::vector<bundle_t*>& _data)
|
||||
{
|
||||
std::map<int64_t, std::map<int64_t, int64_t>> _depth_sum = {};
|
||||
auto _scope = tim::scope::config{};
|
||||
if(get_timeline_sampling()) _scope += scope::timeline{};
|
||||
if(get_flat_sampling()) _scope += scope::flat{};
|
||||
auto _depth_sum = std::map<int64_t, std::map<int64_t, int64_t>>{};
|
||||
|
||||
OMNITRACE_VERBOSE(3 || get_debug_sampling(),
|
||||
"[%li] Post-processing data for timemory...\n", _tid);
|
||||
@@ -674,7 +672,7 @@ post_process_timemory(int64_t _tid, const bundle_t* _init,
|
||||
// generate the instances of the tuple of components and start them
|
||||
for(const auto& itr : backtrace::filter_and_patch(_bt_data->get()))
|
||||
{
|
||||
_tc.emplace_back(tim::string_view_t{ itr }, _scope);
|
||||
_tc.emplace_back(tim::string_view_t{ itr });
|
||||
_tc.back().push(_bt_time->get_tid());
|
||||
_tc.back().start();
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ thread_info::init(bool _offset)
|
||||
const std::optional<thread_info>&
|
||||
thread_info::get()
|
||||
{
|
||||
return get(utility::get_thread_index(), LookupTID);
|
||||
return thread_info_data_t::instances().at(utility::get_thread_index());
|
||||
}
|
||||
|
||||
const std::optional<thread_info>&
|
||||
|
||||
@@ -108,7 +108,7 @@ thread_init()
|
||||
if(get_state() != State::Finalized)
|
||||
{
|
||||
if(get_use_sampling()) sampling::shutdown();
|
||||
auto& _thr_bundle = thread_data<omnitrace_thread_bundle_t>::instance();
|
||||
auto& _thr_bundle = thread_data<thread_bundle_t>::instance();
|
||||
if(_thr_bundle && _thr_bundle->get<comp::wall_clock>() &&
|
||||
_thr_bundle->get<comp::wall_clock>()->get_is_running())
|
||||
_thr_bundle->stop();
|
||||
@@ -117,10 +117,10 @@ thread_init()
|
||||
static thread_local auto _thread_setup = []() {
|
||||
if(threading::get_id() > 0)
|
||||
threading::set_thread_name(JOIN(" ", "Thread", threading::get_id()).c_str());
|
||||
thread_data<omnitrace_thread_bundle_t>::construct(
|
||||
JOIN('/', "omnitrace/process", process::get_id(), "thread",
|
||||
threading::get_id()),
|
||||
quirk::config<quirk::auto_start>{});
|
||||
thread_data<thread_bundle_t>::construct(JOIN('/', "omnitrace/process",
|
||||
process::get_id(), "thread",
|
||||
threading::get_id()),
|
||||
quirk::config<quirk::auto_start>{});
|
||||
get_interval_data()->reserve(512);
|
||||
// save the hash maps
|
||||
get_timemory_hash_ids() = tim::get_hash_ids();
|
||||
|
||||
@@ -52,14 +52,16 @@ set(_flat_environment
|
||||
"${_test_library_path}")
|
||||
|
||||
set(_lock_environment
|
||||
"OMNITRACE_USE_SAMPLING=OFF"
|
||||
"OMNITRACE_USE_SAMPLING=ON"
|
||||
"OMNITRACE_USE_PROCESS_SAMPLING=OFF"
|
||||
"OMNITRACE_SAMPLING_FREQ=250"
|
||||
"OMNITRACE_CRITICAL_TRACE=ON"
|
||||
"OMNITRACE_COLLAPSE_THREADS=ON"
|
||||
"OMNITRACE_TRACE_THREAD_LOCKS=ON"
|
||||
"OMNITRACE_TRACE_THREAD_SPIN_LOCKS=ON"
|
||||
"OMNITRACE_TRACE_THREAD_RW_LOCKS=ON"
|
||||
"OMNITRACE_COUT_OUTPUT=ON"
|
||||
"OMNITRACE_TIME_OUTPUT=OFF"
|
||||
"OMNITRACE_FLAT_PROFILE=ON"
|
||||
"OMNITRACE_TIMELINE_PROFILE=OFF"
|
||||
"${_test_library_path}")
|
||||
|
||||
@@ -241,7 +243,7 @@ function(OMNITRACE_ADD_TEST)
|
||||
|
||||
cmake_parse_arguments(
|
||||
TEST
|
||||
"SKIP_BASELINE;SKIP_REWRITE;SKIP_RUNTIME;SKIP_SAMPLING" # options
|
||||
"SKIP_BASELINE;SKIP_REWRITE;SKIP_RUNTIME;SKIP_SAMPLING;FORCE_SAMPLING" # options
|
||||
"NAME;TARGET;MPI;GPU;NUM_PROCS;REWRITE_TIMEOUT;RUNTIME_TIMEOUT" # single value
|
||||
# args
|
||||
"${_KWARGS}" # multiple value args
|
||||
@@ -315,32 +317,29 @@ function(OMNITRACE_ADD_TEST)
|
||||
${TEST_REWRITE_ARGS} -- $<TARGET_FILE:${TEST_TARGET}>
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
if(NOT TEST_SKIP_SAMPLING)
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-binary-rewrite-sampling
|
||||
COMMAND
|
||||
$<TARGET_FILE:omnitrace-exe> -o
|
||||
$<TARGET_FILE_DIR:${TEST_TARGET}>/${TEST_NAME}.samp -M sampling
|
||||
${TEST_REWRITE_ARGS} -- $<TARGET_FILE:${TEST_TARGET}>
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-binary-rewrite-run
|
||||
COMMAND
|
||||
${COMMAND_PREFIX} $<TARGET_FILE_DIR:${TEST_TARGET}>/${TEST_NAME}.inst
|
||||
${TEST_RUN_ARGS}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(NOT TEST_SKIP_SAMPLING)
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-binary-rewrite-run-sampling
|
||||
COMMAND
|
||||
${COMMAND_PREFIX}
|
||||
$<TARGET_FILE_DIR:${TEST_TARGET}>/${TEST_NAME}.samp
|
||||
${TEST_RUN_ARGS}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
if(TEST_FORCE_SAMPLING OR (NOT TEST_SKIP_REWRITE AND NOT TEST_SKIP_SAMPLING))
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-binary-rewrite-sampling
|
||||
COMMAND
|
||||
$<TARGET_FILE:omnitrace-exe> -o
|
||||
$<TARGET_FILE_DIR:${TEST_TARGET}>/${TEST_NAME}.samp -M sampling
|
||||
${TEST_REWRITE_ARGS} -- $<TARGET_FILE:${TEST_TARGET}>
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-binary-rewrite-sampling-run
|
||||
COMMAND
|
||||
${COMMAND_PREFIX} $<TARGET_FILE_DIR:${TEST_TARGET}>/${TEST_NAME}.samp
|
||||
${TEST_RUN_ARGS}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(NOT TEST_SKIP_RUNTIME)
|
||||
@@ -349,14 +348,14 @@ function(OMNITRACE_ADD_TEST)
|
||||
COMMAND $<TARGET_FILE:omnitrace-exe> ${TEST_RUNTIME_ARGS} --
|
||||
$<TARGET_FILE:${TEST_TARGET}> ${TEST_RUN_ARGS}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(NOT TEST_SKIP_SAMPLING)
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-runtime-instrument-sampling
|
||||
COMMAND $<TARGET_FILE:omnitrace-exe> -M sampling ${TEST_RUNTIME_ARGS}
|
||||
-- $<TARGET_FILE:${TEST_TARGET}> ${TEST_RUN_ARGS}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
if(TEST_FORCE_SAMPLING OR (NOT TEST_SKIP_RUNTIME AND NOT TEST_SKIP_SAMPLING))
|
||||
add_test(
|
||||
NAME ${TEST_NAME}-runtime-instrument-sampling
|
||||
COMMAND $<TARGET_FILE:omnitrace-exe> -M sampling ${TEST_RUNTIME_ARGS} --
|
||||
$<TARGET_FILE:${TEST_TARGET}> ${TEST_RUN_ARGS}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(TEST ${TEST_NAME}-binary-rewrite-run)
|
||||
@@ -364,15 +363,15 @@ function(OMNITRACE_ADD_TEST)
|
||||
PROPERTIES DEPENDS ${TEST_NAME}-binary-rewrite)
|
||||
endif()
|
||||
|
||||
if(TEST ${TEST_NAME}-binary-rewrite-run-sampling)
|
||||
set_tests_properties(${TEST_NAME}-binary-rewrite-run-sampling
|
||||
if(TEST ${TEST_NAME}-binary-rewrite-sampling-run)
|
||||
set_tests_properties(${TEST_NAME}-binary-rewrite-sampling-run
|
||||
PROPERTIES DEPENDS ${TEST_NAME}-binary-rewrite-sampling)
|
||||
endif()
|
||||
|
||||
foreach(
|
||||
_TEST
|
||||
baseline binary-rewrite binary-rewrite-run binary-rewrite-sampling
|
||||
binary-rewrite-run-sampling runtime-instrument runtime-instrument-sampling)
|
||||
binary-rewrite-sampling-run runtime-instrument runtime-instrument-sampling)
|
||||
string(REGEX REPLACE "-run(-|/)" "\\1" _prefix "${TEST_NAME}-${_TEST}/")
|
||||
set(_environ "${TEST_ENVIRONMENT}")
|
||||
set(_labels "${_TEST}")
|
||||
@@ -685,6 +684,21 @@ omnitrace_add_test(
|
||||
RUN_ARGS 10 ${NUM_THREADS} 1000
|
||||
ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF")
|
||||
|
||||
omnitrace_add_test(
|
||||
NAME parallel-overhead-locks
|
||||
TARGET parallel-overhead-locks
|
||||
LABELS "locks"
|
||||
REWRITE_ARGS -e -i 256
|
||||
RUNTIME_ARGS -e -i 256
|
||||
RUN_ARGS 30 4 1000
|
||||
ENVIRONMENT
|
||||
"${_lock_environment};OMNITRACE_USE_TIMEMORY=ON;OMNITRACE_USE_PERFETTO=ON;OMNITRACE_COLLAPSE_THREADS=OFF;OMNITRACE_SAMPLING_REALTIME=ON;OMNITRACE_SAMPLING_REALTIME_FREQ=10;OMNITRACE_SAMPLING_REALTIME_TIDS=0"
|
||||
REWRITE_RUN_PASS_REGEX
|
||||
"wall_clock .*\\|_pthread_create .* 4 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000"
|
||||
RUNTIME_PASS_REGEX
|
||||
"wall_clock .*\\|_pthread_create .* 4 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000"
|
||||
)
|
||||
|
||||
omnitrace_add_test(
|
||||
SKIP_RUNTIME SKIP_SAMPLING
|
||||
NAME parallel-overhead-locks-timemory
|
||||
@@ -693,7 +707,7 @@ omnitrace_add_test(
|
||||
REWRITE_ARGS -e -v 2 --min-instructions=4
|
||||
RUN_ARGS 10 4 1000
|
||||
ENVIRONMENT
|
||||
"${_lock_environment};OMNITRACE_USE_TIMEMORY=ON;OMNITRACE_USE_PERFETTO=OFF"
|
||||
"${_lock_environment};OMNITRACE_FLAT_PROFILE=ON;OMNITRACE_USE_TIMEMORY=ON;OMNITRACE_USE_PERFETTO=OFF"
|
||||
REWRITE_RUN_PASS_REGEX
|
||||
"start_thread (.*) 4 (.*) pthread_mutex_lock (.*) 4000 (.*) pthread_mutex_unlock (.*) 4000"
|
||||
)
|
||||
@@ -706,8 +720,8 @@ omnitrace_add_test(
|
||||
REWRITE_ARGS -e -v 2 --min-instructions=8
|
||||
RUN_ARGS 10 4 1000
|
||||
ENVIRONMENT
|
||||
"${_lock_environment};OMNITRACE_USE_TIMEMORY=OFF;OMNITRACE_USE_PERFETTO=ON"
|
||||
PROPERTIES WILL_FAIL ON)
|
||||
"${_lock_environment};OMNITRACE_FLAT_PROFILE=ON;OMNITRACE_USE_TIMEMORY=OFF;OMNITRACE_USE_PERFETTO=ON"
|
||||
)
|
||||
|
||||
omnitrace_add_test(
|
||||
NAME user-api
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele