Support sampling duration, sampling TIDs (#142)
- Sampling duration config values
- OMNITRACE_SAMPLING_DURATION
- OMNITRACE_PROCESS_SAMPLING_DURATION
- Disables sampling after this time (in seconds) has elapsed
- Sampling thread-id config values
- OMNITRACE_SAMPLING_TIDS
- OMNITRACE_SAMPLING_CPUTIME_TIDS
- OMNITRACE_SAMPLING_REALTIME_TIDS
- Allows user to select certain threads for sampling
- Miscellaneous
- Tweaked the finalization verbosity messages
- moved sampling-on-child-threads into runtime.hpp and runtime.cpp
- fixed submodule dyninst header install
[ROCm/rocprofiler-systems commit: e67afd33eb]
This commit is contained in:
committed by
GitHub
parent
cbdc7cad4b
commit
2ef9dfd002
@@ -314,8 +314,7 @@ if(OMNITRACE_BUILD_DYNINST)
|
||||
TARGETS ${_LIB}
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/omnitrace
|
||||
COMPONENT dyninst
|
||||
PUBLIC_HEADER DESTINATION ${PROJECT_BINARY_DIR}/.discard/omnitrace/include
|
||||
)
|
||||
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/omnitrace/dyninst)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@
|
||||
#define OMNITRACE_STRINGIZE(X) OMNITRACE_STRINGIZE2(X)
|
||||
#define OMNITRACE_STRINGIZE2(X) #X
|
||||
#define OMNITRACE_VAR_NAME_COMBINE(X, Y) X##Y
|
||||
#define OMNITRACE_VARIABLE(Y) OMNITRACE_VAR_NAME_COMBINE(_omni_var_, Y)
|
||||
#define OMNITRACE_VARIABLE(X, Y) OMNITRACE_VAR_NAME_COMBINE(X, Y)
|
||||
#define OMNITRACE_LINESTR OMNITRACE_STRINGIZE(__LINE__)
|
||||
#define OMNITRACE_ESC(...) __VA_ARGS__
|
||||
|
||||
|
||||
@@ -337,27 +337,26 @@ omnitrace_init_tooling_hidden()
|
||||
if(get_state() > State::Active) return;
|
||||
if(get_use_process_sampling())
|
||||
{
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
process_sampler::setup();
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
}
|
||||
if(get_use_sampling())
|
||||
{
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
sampling::setup();
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(get_use_sampling());
|
||||
}
|
||||
if(get_use_sampling())
|
||||
{
|
||||
push_enable_sampling_on_child_threads(get_use_sampling());
|
||||
sampling::unblock_signals();
|
||||
}
|
||||
get_main_bundle()->start();
|
||||
set_state(State::Active); // set to active as very last operation
|
||||
} };
|
||||
|
||||
if(get_use_sampling())
|
||||
{
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
sampling::block_signals();
|
||||
}
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
if(get_use_sampling()) sampling::block_signals();
|
||||
|
||||
if(get_use_critical_trace())
|
||||
{
|
||||
@@ -426,8 +425,8 @@ omnitrace_init_tooling_hidden()
|
||||
|
||||
for(const auto& itr : _disabled_categories)
|
||||
{
|
||||
OMNITRACE_VERBOSE(1, "Disabling perfetto track event category: %s\n",
|
||||
itr.c_str());
|
||||
OMNITRACE_VERBOSE_F(1, "Disabling perfetto track event category: %s\n",
|
||||
itr.c_str());
|
||||
track_event_cfg.add_disabled_categories(itr);
|
||||
}
|
||||
|
||||
@@ -581,6 +580,8 @@ omnitrace_finalize_hidden(void)
|
||||
return;
|
||||
}
|
||||
|
||||
if(get_verbose() >= 0 || get_debug()) fprintf(stderr, "\n");
|
||||
|
||||
OMNITRACE_VERBOSE_F(0, "finalizing...\n");
|
||||
thread_info::set_stop(comp::wall_clock::record());
|
||||
|
||||
@@ -604,8 +605,8 @@ omnitrace_finalize_hidden(void)
|
||||
|
||||
set_state(State::Finalized);
|
||||
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
pthread_gotcha::set_sampling_on_all_future_threads(false);
|
||||
push_enable_sampling_on_child_threads(false);
|
||||
set_sampling_on_all_future_threads(false);
|
||||
|
||||
auto _debug_init = get_debug_finalize();
|
||||
auto _debug_value = get_debug();
|
||||
@@ -614,8 +615,6 @@ omnitrace_finalize_hidden(void)
|
||||
if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value);
|
||||
} };
|
||||
|
||||
OMNITRACE_DEBUG_F("\n");
|
||||
|
||||
auto& _thread_bundle = thread_data<omnitrace_thread_bundle_t>::instance();
|
||||
if(_thread_bundle) _thread_bundle->stop();
|
||||
|
||||
@@ -713,7 +712,7 @@ omnitrace_finalize_hidden(void)
|
||||
comp::roctracer::shutdown();
|
||||
|
||||
// join extra thread(s) used by roctracer
|
||||
OMNITRACE_VERBOSE_F(1, "Waiting on roctracer tasks...\n");
|
||||
OMNITRACE_VERBOSE_F(2, "Waiting on roctracer tasks...\n");
|
||||
tasking::join();
|
||||
}
|
||||
|
||||
@@ -734,10 +733,11 @@ omnitrace_finalize_hidden(void)
|
||||
// report the high-level metrics for the process
|
||||
if(get_main_bundle())
|
||||
{
|
||||
if(get_verbose() >= 0 || get_debug()) fprintf(stderr, "\n");
|
||||
std::string _msg = JOIN("", *get_main_bundle());
|
||||
auto _pos = _msg.find(">>> ");
|
||||
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
|
||||
OMNITRACE_PRINT("%s\n", _msg.c_str());
|
||||
OMNITRACE_VERBOSE_F(0, "%s\n", _msg.c_str());
|
||||
OMNITRACE_DEBUG_F("Resetting main bundle...\n");
|
||||
get_main_bundle()->reset();
|
||||
}
|
||||
@@ -754,10 +754,12 @@ omnitrace_finalize_hidden(void)
|
||||
std::string _msg = JOIN("", *itr);
|
||||
auto _pos = _msg.find(">>> ");
|
||||
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
|
||||
OMNITRACE_VERBOSE(0, "%s\n", _msg.c_str());
|
||||
OMNITRACE_VERBOSE_F(0, "%s\n", _msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if(get_verbose() >= 0 || get_debug()) fprintf(stderr, "\n");
|
||||
|
||||
// ensure that all the MT instances are flushed
|
||||
if(get_use_sampling())
|
||||
{
|
||||
@@ -813,6 +815,16 @@ omnitrace_finalize_hidden(void)
|
||||
tasking::join();
|
||||
}
|
||||
|
||||
// shutdown tasking before timemory is finalized, especially the roctracer thread-pool
|
||||
OMNITRACE_VERBOSE_F(1, "Shutting down thread-pools...\n");
|
||||
tasking::shutdown();
|
||||
|
||||
if(get_use_code_coverage())
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(1, "Post-processing the code coverage...\n");
|
||||
coverage::post_process();
|
||||
}
|
||||
|
||||
bool _perfetto_output_error = false;
|
||||
if(get_use_perfetto() && !is_system_backend())
|
||||
{
|
||||
@@ -821,11 +833,7 @@ omnitrace_finalize_hidden(void)
|
||||
OMNITRACE_CI_THROW(tracing_session == nullptr,
|
||||
"Null pointer to the tracing session");
|
||||
|
||||
if(get_verbose() >= 0) fprintf(stderr, "\n");
|
||||
if(get_verbose() >= 0 || get_debug())
|
||||
fprintf(stderr, "%s[%s][%s]|%i> Flushing perfetto...%s\n",
|
||||
tim::log::color::info(), TIMEMORY_PROJECT_NAME, OMNITRACE_FUNCTION,
|
||||
dmp::rank(), tim::log::color::end());
|
||||
OMNITRACE_VERBOSE_F(0, "Finalizing perfetto...\n");
|
||||
|
||||
// Make sure the last event is closed for this example.
|
||||
perfetto::TrackEvent::Flush();
|
||||
@@ -905,16 +913,6 @@ omnitrace_finalize_hidden(void)
|
||||
}
|
||||
}
|
||||
|
||||
// shutdown tasking before timemory is finalized, especially the roctracer thread-pool
|
||||
OMNITRACE_VERBOSE_F(1, "Shutting down thread-pools...\n");
|
||||
tasking::shutdown();
|
||||
|
||||
OMNITRACE_VERBOSE_F(1, "Shutting down thread-pools...\n");
|
||||
if(get_use_code_coverage())
|
||||
{
|
||||
coverage::post_process();
|
||||
}
|
||||
|
||||
tim::manager::instance()->add_metadata([](auto& ar) {
|
||||
auto _maps = tim::procfs::read_maps(process::get_id());
|
||||
auto _libs = std::set<std::string>{};
|
||||
|
||||
+2
-4
@@ -22,7 +22,6 @@
|
||||
|
||||
#include "library/components/pthread_create_gotcha.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/components/roctracer.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
@@ -213,9 +212,8 @@ pthread_create_gotcha::wrapper::operator()() const
|
||||
if(m_enable_sampling)
|
||||
{
|
||||
_is_sampling = true;
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
_signals = sampling::setup();
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
sampling::unblock_signals();
|
||||
}
|
||||
}
|
||||
@@ -336,7 +334,7 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr,
|
||||
auto _active = (get_state() == ::omnitrace::State::Active && !_disabled);
|
||||
auto _coverage = (get_mode() == Mode::Coverage);
|
||||
auto _use_sampling = get_use_sampling();
|
||||
auto _sample_child = pthread_gotcha::sampling_enabled_on_child_threads();
|
||||
auto _sample_child = sampling_enabled_on_child_threads();
|
||||
auto _tid = utility::get_thread_index();
|
||||
auto _use_bundle = (_active && !_coverage);
|
||||
const auto& _info = thread_info::init(!_active || !_sample_child || _disabled);
|
||||
|
||||
-53
@@ -69,14 +69,6 @@ namespace
|
||||
using bundle_t = tim::lightweight_tuple<component::pthread_create_gotcha_t,
|
||||
component::pthread_mutex_gotcha_t>;
|
||||
|
||||
auto&
|
||||
get_sampling_on_child_threads_history(int64_t _idx = utility::get_thread_index())
|
||||
{
|
||||
static auto _v = utility::get_filled_array<OMNITRACE_MAX_THREADS>(
|
||||
[]() { return utility::get_reserved_vector<bool>(32); });
|
||||
return _v.at(_idx);
|
||||
}
|
||||
|
||||
auto&
|
||||
get_bundle()
|
||||
{
|
||||
@@ -112,51 +104,6 @@ pthread_gotcha::shutdown()
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
pthread_gotcha::sampling_enabled_on_child_threads()
|
||||
{
|
||||
return sampling_on_child_threads();
|
||||
}
|
||||
|
||||
bool
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(bool _v)
|
||||
{
|
||||
bool _last = sampling_on_child_threads();
|
||||
sampling_on_child_threads() = _v;
|
||||
auto& _hist = get_sampling_on_child_threads_history();
|
||||
_hist.emplace_back(_last);
|
||||
return _last;
|
||||
}
|
||||
|
||||
bool
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads()
|
||||
{
|
||||
auto& _hist = get_sampling_on_child_threads_history();
|
||||
if(!_hist.empty())
|
||||
{
|
||||
bool _restored = _hist.back();
|
||||
_hist.pop_back();
|
||||
sampling_on_child_threads() = _restored;
|
||||
}
|
||||
return sampling_on_child_threads();
|
||||
}
|
||||
|
||||
void
|
||||
pthread_gotcha::set_sampling_on_all_future_threads(bool _v)
|
||||
{
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
get_sampling_on_child_threads_history(i).emplace_back(_v);
|
||||
}
|
||||
|
||||
bool&
|
||||
pthread_gotcha::sampling_on_child_threads()
|
||||
{
|
||||
static thread_local bool _v = get_sampling_on_child_threads_history().empty()
|
||||
? false
|
||||
: get_sampling_on_child_threads_history().back();
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
pthread_gotcha::start()
|
||||
{
|
||||
|
||||
-15
@@ -42,22 +42,7 @@ struct pthread_gotcha : tim::component::base<pthread_gotcha, void>
|
||||
static void configure();
|
||||
static void shutdown();
|
||||
|
||||
// query current value
|
||||
static bool sampling_enabled_on_child_threads();
|
||||
|
||||
// use this to disable sampling in a region (e.g. right before thread creation)
|
||||
static bool push_enable_sampling_on_child_threads(bool _v);
|
||||
|
||||
// use this to restore previous setting
|
||||
static bool pop_enable_sampling_on_child_threads();
|
||||
|
||||
// make sure every newly created thead starts with this value
|
||||
static void set_sampling_on_all_future_threads(bool _v);
|
||||
|
||||
static void start();
|
||||
static void stop();
|
||||
|
||||
private:
|
||||
static bool& sampling_on_child_threads();
|
||||
};
|
||||
} // namespace omnitrace
|
||||
|
||||
+1
-2
@@ -22,7 +22,6 @@
|
||||
|
||||
#include "library/components/pthread_mutex_gotcha.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "library/debug.hpp"
|
||||
@@ -293,7 +292,7 @@ pthread_mutex_gotcha::is_disabled()
|
||||
{
|
||||
return (get_state() != ::omnitrace::State::Active ||
|
||||
get_thread_state() != ThreadState::Enabled ||
|
||||
(get_use_sampling() && !pthread_gotcha::sampling_enabled_on_child_threads()));
|
||||
(get_use_sampling() && !sampling_enabled_on_child_threads()));
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace omnitrace
|
||||
|
||||
@@ -22,13 +22,13 @@
|
||||
|
||||
#include "library/components/roctracer.hpp"
|
||||
#include "library/common.hpp"
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/defines.hpp"
|
||||
#include "library/dynamic_library.hpp"
|
||||
#include "library/redirect.hpp"
|
||||
#include "library/roctracer.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/sampling.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
|
||||
@@ -121,7 +121,7 @@ roctracer::setup()
|
||||
roctracer_is_setup() = true;
|
||||
|
||||
OMNITRACE_VERBOSE_F(1, "setting up roctracer...\n");
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
dynamic_library _amdhip64{ "OMNITRACE_ROCTRACER_LIBAMDHIP64",
|
||||
find_library_path("libamdhip64.so",
|
||||
@@ -169,8 +169,6 @@ roctracer::setup()
|
||||
for(auto& itr : roctracer_setup_routines())
|
||||
itr.second();
|
||||
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
|
||||
OMNITRACE_VERBOSE_F(1, "roctracer is setup\n");
|
||||
}
|
||||
|
||||
|
||||
@@ -92,6 +92,42 @@ get_available_perfetto_categories()
|
||||
return _v;
|
||||
}
|
||||
|
||||
template <typename Tp = int64_t>
|
||||
std::set<Tp>
|
||||
parse_numeric_range(std::string _input_string, const std::string& _label)
|
||||
{
|
||||
for(auto& itr : _input_string)
|
||||
itr = tolower(itr);
|
||||
auto _result = std::set<Tp>{};
|
||||
for(const auto& _v : tim::delimit(_input_string, ",; \t"))
|
||||
{
|
||||
if(_v.find_first_not_of("0123456789-") != std::string::npos)
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(
|
||||
0,
|
||||
"Invalid %s specification. Only numerical values (e.g., 0) or "
|
||||
"ranges (e.g., 0-7) are permitted. Ignoring %s...",
|
||||
_label.c_str(), _v.c_str());
|
||||
continue;
|
||||
}
|
||||
if(_v.find('-') != std::string::npos)
|
||||
{
|
||||
auto _vv = tim::delimit(_v, "-");
|
||||
OMNITRACE_CONDITIONAL_THROW(
|
||||
_vv.size() != 2,
|
||||
"Invalid %s range specification: %s. Required format N-M, e.g. 0-4",
|
||||
_label.c_str(), _v.c_str());
|
||||
for(int64_t i = std::stol(_vv.at(0)); i <= std::stol(_vv.at(1)); ++i)
|
||||
_result.emplace(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_result.emplace(std::stol(_v));
|
||||
}
|
||||
}
|
||||
return _result;
|
||||
}
|
||||
|
||||
#define OMNITRACE_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \
|
||||
[&]() { \
|
||||
auto _ret = _config->insert<TYPE, TYPE>( \
|
||||
@@ -334,12 +370,21 @@ configure_settings(bool _init)
|
||||
"delivered. Defaults to OMNITRACE_SAMPLING_DELAY when <= 0.0",
|
||||
-1.0, "sampling", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(double, "OMNITRACE_SAMPLING_DURATION",
|
||||
"If > 0.0, time (in seconds) to sample before stopping", 0.0,
|
||||
"sampling", "process_sampling");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
double, "OMNITRACE_PROCESS_SAMPLING_FREQ",
|
||||
"Number of measurements per second when OMNITTRACE_USE_PROCESS_SAMPLING=ON. If "
|
||||
"set to zero, uses OMNITRACE_SAMPLING_FREQ value",
|
||||
0.0, "process_sampling");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(double, "OMNITRACE_PROCESS_SAMPLING_DURATION",
|
||||
"If > 0.0, time (in seconds) to sample before stopping. If "
|
||||
"less than zero, uses OMNITRACE_SAMPLING_DURATION",
|
||||
-1.0, "sampling", "process_sampling");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
std::string, "OMNITRACE_SAMPLING_CPUS",
|
||||
"CPUs to collect frequency information for. Values should be separated by commas "
|
||||
@@ -359,6 +404,29 @@ configure_settings(bool _init)
|
||||
"'all' and 'none' suppresses all GPU sampling",
|
||||
std::string{ "all" }, "rocm_smi", "rocm", "process_sampling");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
std::string, "OMNITRACE_SAMPLING_TIDS",
|
||||
"Limit call-stack sampling to specific thread IDs, starting at zero for the main "
|
||||
"thread. Be aware that some libraries, such as ROCm may create additional "
|
||||
"threads which increment the TID count. However, no threads started by omnitrace "
|
||||
"will increment the TID count. Values should be separated by commas and can be "
|
||||
"explicit or ranges, e.g. 0,1,5-8. An empty value implies all TIDs.",
|
||||
std::string{}, "sampling", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
std::string, "OMNITRACE_SAMPLING_CPUTIME_TIDS",
|
||||
"Same as OMNITRACE_SAMPLING_TIDS but applies specifically to samplers whose "
|
||||
"timers are based on the CPU-time. This is useful when both "
|
||||
"OMNITRACE_SAMPLING_CPUTIME=ON and OMNITRACE_SAMPLING_REALTIME=ON",
|
||||
std::string{}, "sampling", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
std::string, "OMNITRACE_SAMPLING_REALTIME_TIDS",
|
||||
"Same as OMNITRACE_SAMPLING_TIDS but applies specifically to samplers whose "
|
||||
"timers are based on the real (wall) time. This is useful when both "
|
||||
"OMNITRACE_SAMPLING_CPUTIME=ON and OMNITRACE_SAMPLING_REALTIME=ON",
|
||||
std::string{}, "sampling", "advanced");
|
||||
|
||||
auto _backend = tim::get_env_choice<std::string>(
|
||||
"OMNITRACE_PERFETTO_BACKEND",
|
||||
(_system_backend) ? "system" // if OMNITRACE_PERFETTO_BACKEND_SYSTEM is true,
|
||||
@@ -480,7 +548,7 @@ configure_settings(bool _init)
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_PERFETTO_BUFFER_SIZE_KB",
|
||||
"Size of perfetto buffer (in KB)", size_t{ 1024000 },
|
||||
"perfetto", "data", "advanced");
|
||||
"perfetto", "data");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_PERFETTO_COMBINE_TRACES",
|
||||
"Combine Perfetto traces. If not explicitly set, it will "
|
||||
@@ -695,9 +763,13 @@ configure_settings(bool _init)
|
||||
tim::delimit(_config->get<std::string>("OMNITRACE_CONFIG_FILE"), ";:"))
|
||||
{
|
||||
if(_config->get_suppress_config()) continue;
|
||||
|
||||
OMNITRACE_BASIC_VERBOSE(1, "Reading config file %s\n", itr.c_str());
|
||||
_config->read(itr);
|
||||
if(_config->get<bool>("OMNITRACE_CI") && _main_proc)
|
||||
|
||||
if(_main_proc &&
|
||||
((_config->get<bool>("OMNITRACE_CI") && settings::verbose() >= 0) ||
|
||||
settings::verbose() >= 1 || settings::debug()))
|
||||
{
|
||||
std::ifstream _in{ itr };
|
||||
std::stringstream _iss{};
|
||||
@@ -709,7 +781,7 @@ configure_settings(bool _init)
|
||||
}
|
||||
if(!_iss.str().empty())
|
||||
{
|
||||
OMNITRACE_BASIC_PRINT("config file '%s':\n%s\n", itr.c_str(),
|
||||
OMNITRACE_BASIC_PRINT("config file '%s':\n%s", itr.c_str(),
|
||||
_iss.str().c_str());
|
||||
}
|
||||
}
|
||||
@@ -1753,6 +1825,13 @@ get_sampling_real_delay()
|
||||
return _val;
|
||||
}
|
||||
|
||||
double
|
||||
get_sampling_duration()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_SAMPLING_DURATION");
|
||||
return static_cast<tim::tsettings<double>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
std::string
|
||||
get_sampling_cpus()
|
||||
{
|
||||
@@ -1760,6 +1839,30 @@ get_sampling_cpus()
|
||||
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
std::set<int64_t>
|
||||
get_sampling_tids()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_SAMPLING_TIDS");
|
||||
return parse_numeric_range<>(
|
||||
static_cast<tim::tsettings<std::string>&>(*_v->second).get(), "thread IDs");
|
||||
}
|
||||
|
||||
std::set<int64_t>
|
||||
get_sampling_cpu_tids()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_SAMPLING_CPUTIME_TIDS");
|
||||
return parse_numeric_range<>(
|
||||
static_cast<tim::tsettings<std::string>&>(*_v->second).get(), "thread IDs");
|
||||
}
|
||||
|
||||
std::set<int64_t>
|
||||
get_sampling_real_tids()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_SAMPLING_REALTIME_TIDS");
|
||||
return parse_numeric_range<>(
|
||||
static_cast<tim::tsettings<std::string>&>(*_v->second).get(), "thread IDs");
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_critical_trace_count()
|
||||
{
|
||||
@@ -1777,6 +1880,13 @@ get_process_sampling_freq()
|
||||
return _val;
|
||||
}
|
||||
|
||||
double
|
||||
get_process_sampling_duration()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_PROCESS_SAMPLING_DURATION");
|
||||
return static_cast<tim::tsettings<double>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
std::string
|
||||
get_sampling_gpus()
|
||||
{
|
||||
|
||||
@@ -298,12 +298,24 @@ get_sampling_cpu_delay();
|
||||
double
|
||||
get_sampling_real_delay();
|
||||
|
||||
double
|
||||
get_sampling_duration();
|
||||
|
||||
std::string
|
||||
get_sampling_cpus();
|
||||
|
||||
std::set<int64_t>
|
||||
get_sampling_cpu_tids();
|
||||
|
||||
std::set<int64_t>
|
||||
get_sampling_real_tids();
|
||||
|
||||
double
|
||||
get_process_sampling_freq();
|
||||
|
||||
double
|
||||
get_process_sampling_duration();
|
||||
|
||||
std::string
|
||||
get_sampling_gpus();
|
||||
|
||||
|
||||
@@ -160,8 +160,9 @@ write_perfetto_counter_track(index&& _idx, Args... _args)
|
||||
void
|
||||
post_process()
|
||||
{
|
||||
OMNITRACE_PRINT("Post-processing %zu cpu frequency and memory usage entries...\n",
|
||||
cpu_data.size());
|
||||
OMNITRACE_VERBOSE(1,
|
||||
"Post-processing %zu cpu frequency and memory usage entries...\n",
|
||||
cpu_data.size());
|
||||
auto _process_frequencies = [](size_t _idx, size_t _offset) {
|
||||
using freq_track = perfetto_counter_track<cpu_freq_component>;
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "library/process_sampler.hpp"
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/cpu_freq.hpp"
|
||||
#include "library/debug.hpp"
|
||||
@@ -86,10 +85,16 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
|
||||
itr->config();
|
||||
|
||||
OMNITRACE_VERBOSE(
|
||||
1, "Thread sampler polling at an interval of %f seconds...\n",
|
||||
1, "Background process sampling polling at an interval of %f seconds...\n",
|
||||
std::chrono::duration_cast<std::chrono::duration<double>>(_interval).count());
|
||||
|
||||
auto _duration = config::get_process_sampling_duration();
|
||||
if(_duration < 0.0) _duration = config::get_sampling_duration();
|
||||
bool _has_duration = (_duration > 0.0);
|
||||
|
||||
auto _now = std::chrono::steady_clock::now();
|
||||
auto _end =
|
||||
_now + std::chrono::nanoseconds{ static_cast<uint64_t>(_duration * units::sec) };
|
||||
while(_state && _state->load() != State::Finalized && get_state() != State::Finalized)
|
||||
{
|
||||
std::this_thread::sleep_until(_now);
|
||||
@@ -100,12 +105,23 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
|
||||
for(auto& itr : instances)
|
||||
itr->sample();
|
||||
get_sampler_is_sampling().store(false);
|
||||
if(_has_duration && _now >= _end) break;
|
||||
while(_now < std::chrono::steady_clock::now())
|
||||
_now += _interval;
|
||||
}
|
||||
|
||||
// ensure this is always false
|
||||
get_sampler_is_sampling().store(false);
|
||||
|
||||
if(_has_duration && _now >= _end && get_state() != State::Finalized)
|
||||
{
|
||||
OMNITRACE_VERBOSE(
|
||||
1,
|
||||
"Background process sampling duration of %f seconds has elapsed. "
|
||||
"Shutting down process sampling...\n",
|
||||
_duration);
|
||||
}
|
||||
|
||||
OMNITRACE_CONDITIONAL_BASIC_PRINT(get_debug(),
|
||||
"Thread sampler polling completed...\n");
|
||||
|
||||
@@ -155,12 +171,12 @@ sampler::setup()
|
||||
auto _fut = _prom.get_future();
|
||||
polling_finished = std::make_unique<promise_t>();
|
||||
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
set_state(State::PreInit);
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
get_thread() = std::make_unique<std::thread>(&poll<msec_t>, &get_sampler_state(),
|
||||
msec_t{ _msec_freq }, &_prom);
|
||||
_fut.wait();
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
|
||||
set_state(State::Active);
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@ extern "C"
|
||||
if(!tim::settings::enabled()) return true;
|
||||
|
||||
roctracer_is_init() = true;
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
OMNITRACE_BASIC_VERBOSE_F(1 || rocm::on_load_trace, "Loading ROCm tooling...\n");
|
||||
|
||||
tim::consume_parameters(table, runtime_version, failed_tool_count,
|
||||
@@ -308,7 +308,6 @@ extern "C"
|
||||
"failed! OMNITRACE_ROCPROFILER_LIBRARY=%s\n",
|
||||
_rocprof.filename.c_str());
|
||||
}
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
|
||||
OMNITRACE_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Loading... %s\n",
|
||||
(_success) ? "Done" : "Failed");
|
||||
|
||||
@@ -33,12 +33,12 @@
|
||||
#include "library/rocm_smi.hpp"
|
||||
#include "library/common.hpp"
|
||||
#include "library/components/fwd.hpp"
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/gpu.hpp"
|
||||
#include "library/perfetto.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/state.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
|
||||
@@ -328,7 +328,7 @@ setup()
|
||||
|
||||
if(is_initialized() || !get_use_rocm_smi()) return;
|
||||
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
// assign the data value to determined by rocm-smi
|
||||
data::device_count = device_count();
|
||||
@@ -402,8 +402,6 @@ setup()
|
||||
_e.what());
|
||||
data::device_list = {};
|
||||
}
|
||||
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -52,6 +52,26 @@
|
||||
|
||||
namespace omnitrace
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto&
|
||||
get_sampling_on_child_threads_history(int64_t _idx = utility::get_thread_index())
|
||||
{
|
||||
static auto _v = utility::get_filled_array<OMNITRACE_MAX_THREADS>(
|
||||
[]() { return utility::get_reserved_vector<bool>(32); });
|
||||
return _v.at(_idx);
|
||||
}
|
||||
|
||||
bool&
|
||||
sampling_on_child_threads()
|
||||
{
|
||||
static thread_local bool _v = get_sampling_on_child_threads_history().empty()
|
||||
? false
|
||||
: get_sampling_on_child_threads_history().back();
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int
|
||||
get_realtime_signal()
|
||||
{
|
||||
@@ -254,4 +274,40 @@ pop_thread_state()
|
||||
}
|
||||
return get_thread_state();
|
||||
}
|
||||
|
||||
bool
|
||||
sampling_enabled_on_child_threads()
|
||||
{
|
||||
return sampling_on_child_threads();
|
||||
}
|
||||
|
||||
bool
|
||||
push_enable_sampling_on_child_threads(bool _v)
|
||||
{
|
||||
bool _last = sampling_on_child_threads();
|
||||
sampling_on_child_threads() = _v;
|
||||
auto& _hist = get_sampling_on_child_threads_history();
|
||||
_hist.emplace_back(_last);
|
||||
return _last;
|
||||
}
|
||||
|
||||
bool
|
||||
pop_enable_sampling_on_child_threads()
|
||||
{
|
||||
auto& _hist = get_sampling_on_child_threads_history();
|
||||
if(!_hist.empty())
|
||||
{
|
||||
bool _restored = _hist.back();
|
||||
_hist.pop_back();
|
||||
sampling_on_child_threads() = _restored;
|
||||
}
|
||||
return sampling_on_child_threads();
|
||||
}
|
||||
|
||||
void
|
||||
set_sampling_on_all_future_threads(bool _v)
|
||||
{
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
get_sampling_on_child_threads_history(i).emplace_back(_v);
|
||||
}
|
||||
} // namespace omnitrace
|
||||
|
||||
@@ -117,11 +117,39 @@ struct scoped_thread_state
|
||||
scoped_thread_state(ThreadState _v) { push_thread_state(_v); }
|
||||
~scoped_thread_state() { pop_thread_state(); }
|
||||
};
|
||||
|
||||
// query current value
|
||||
bool
|
||||
sampling_enabled_on_child_threads();
|
||||
|
||||
// use this to disable sampling in a region (e.g. right before thread creation)
|
||||
bool
|
||||
push_enable_sampling_on_child_threads(bool _v);
|
||||
|
||||
// use this to restore previous setting
|
||||
bool
|
||||
pop_enable_sampling_on_child_threads();
|
||||
|
||||
// make sure every newly created thead starts with this value
|
||||
void
|
||||
set_sampling_on_all_future_threads(bool _v);
|
||||
|
||||
struct scoped_child_sampling
|
||||
{
|
||||
scoped_child_sampling(bool _v) { push_enable_sampling_on_child_threads(_v); }
|
||||
~scoped_child_sampling() { pop_enable_sampling_on_child_threads(); }
|
||||
};
|
||||
} // namespace omnitrace
|
||||
|
||||
#define OMNITRACE_SCOPED_THREAD_STATE(STATE) \
|
||||
::omnitrace::scoped_thread_state OMNITRACE_VARIABLE( \
|
||||
OMNITRACE_VAR_NAME_COMBINE(scoped_thread_state_, __LINE__)) \
|
||||
::omnitrace::scoped_thread_state OMNITRACE_VARIABLE(_scoped_thread_state_, __LINE__) \
|
||||
{ \
|
||||
::omnitrace::STATE \
|
||||
}
|
||||
|
||||
#define OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(VALUE) \
|
||||
::omnitrace::scoped_child_sampling OMNITRACE_VARIABLE(_scoped_child_sampling_, \
|
||||
__LINE__) \
|
||||
{ \
|
||||
VALUE \
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include "library/components/backtrace_metrics.hpp"
|
||||
#include "library/components/backtrace_timestamp.hpp"
|
||||
#include "library/components/fwd.hpp"
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
@@ -60,6 +59,8 @@
|
||||
#include <timemory/variadic.hpp>
|
||||
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <initializer_list>
|
||||
@@ -161,6 +162,79 @@ get_sampler_running(int64_t _tid)
|
||||
return _v.at(_tid);
|
||||
}
|
||||
|
||||
auto&
|
||||
get_duration_cv()
|
||||
{
|
||||
static auto _v = std::condition_variable{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_duration_thread()
|
||||
{
|
||||
static auto _v = std::unique_ptr<std::thread>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
start_duration_thread()
|
||||
{
|
||||
static std::mutex _start_mutex{};
|
||||
std::unique_lock<std::mutex> _start_lk{ _start_mutex, std::defer_lock };
|
||||
if(!_start_lk.owns_lock()) _start_lk.lock();
|
||||
|
||||
if(!get_duration_thread() && config::get_sampling_duration() > 0.0)
|
||||
{
|
||||
// we may need to protect against recursion bc of pthread wrapper
|
||||
static bool _protect = false;
|
||||
if(_protect) return;
|
||||
_protect = true;
|
||||
auto _now = std::chrono::steady_clock::now();
|
||||
auto _end = _now + std::chrono::nanoseconds{ static_cast<uint64_t>(
|
||||
config::get_sampling_duration() * units::sec) };
|
||||
auto _func = [_end]() {
|
||||
thread_info::init(true);
|
||||
std::mutex _mutex{};
|
||||
bool _wait = true;
|
||||
while(_wait)
|
||||
{
|
||||
_wait = false;
|
||||
std::unique_lock<std::mutex> _lk{ _mutex };
|
||||
get_duration_cv().wait_until(_lk, _end);
|
||||
auto _premature = (std::chrono::steady_clock::now() < _end);
|
||||
auto _finalized = (get_state() == State::Finalized);
|
||||
if(_premature && !_finalized)
|
||||
{
|
||||
// protect against spurious wakeups
|
||||
OMNITRACE_VERBOSE(
|
||||
2, "%sSpurious wakeup of sampling duration thread...\n",
|
||||
tim::log::color::warning());
|
||||
_wait = true;
|
||||
}
|
||||
else if(_finalized)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
OMNITRACE_VERBOSE(1,
|
||||
"Sampling duration of %f seconds has elapsed. "
|
||||
"Shutting down sampling...\n",
|
||||
config::get_sampling_duration());
|
||||
shutdown();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
OMNITRACE_VERBOSE(1, "Sampling will be disabled after %f seconds...\n",
|
||||
config::get_sampling_duration());
|
||||
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
get_duration_thread() = std::make_unique<std::thread>(_func);
|
||||
_protect = false;
|
||||
}
|
||||
}
|
||||
|
||||
std::set<int>
|
||||
configure(bool _setup, int64_t _tid = threading::get_id())
|
||||
{
|
||||
@@ -170,10 +244,24 @@ configure(bool _setup, int64_t _tid = threading::get_id())
|
||||
bool _is_running = (!_running) ? false : *_running;
|
||||
auto& _signal_types = sampling::get_signal_types(_tid);
|
||||
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
auto _dtor = scope::destructor{ []() {
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
} };
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
auto&& _cpu_tids = get_sampling_cpu_tids();
|
||||
auto&& _real_tids = get_sampling_real_tids();
|
||||
|
||||
auto _erase_tid_signal = [_tid, &_signal_types](auto& _tids, int _signum) {
|
||||
if(!_tids.empty())
|
||||
{
|
||||
if(_tids.count(_tid) == 0)
|
||||
{
|
||||
OMNITRACE_VERBOSE(3, "Disabling SIG%i from thread %li\n", _signum, _tid);
|
||||
_signal_types->erase(_signum);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
_erase_tid_signal(_cpu_tids, get_cputime_signal());
|
||||
_erase_tid_signal(_real_tids, get_realtime_signal());
|
||||
|
||||
if(_setup && !_sampler && !_is_running && !_signal_types->empty())
|
||||
{
|
||||
@@ -253,6 +341,7 @@ configure(bool _setup, int64_t _tid = threading::get_id())
|
||||
|
||||
*_running = true;
|
||||
sampling::get_sampler_init(_tid)->sample();
|
||||
start_duration_thread();
|
||||
_sampler->start();
|
||||
}
|
||||
else if(!_setup && _sampler && _is_running)
|
||||
@@ -265,6 +354,7 @@ configure(bool _setup, int64_t _tid = threading::get_id())
|
||||
sampling::block_signals(*_signal_types);
|
||||
}
|
||||
|
||||
get_duration_cv().notify_one();
|
||||
if(_tid == 0)
|
||||
{
|
||||
// this propagates to all threads
|
||||
@@ -278,6 +368,12 @@ configure(bool _setup, int64_t _tid = threading::get_id())
|
||||
*get_sampler_running(i) = false;
|
||||
}
|
||||
}
|
||||
|
||||
if(get_duration_thread())
|
||||
{
|
||||
get_duration_thread()->join();
|
||||
get_duration_thread().reset();
|
||||
}
|
||||
}
|
||||
|
||||
_sampler->stop();
|
||||
@@ -363,8 +459,8 @@ post_process()
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
backtrace_metrics::configure(false, i);
|
||||
|
||||
OMNITRACE_VERBOSE(1 || get_debug_sampling(), "Post-processing sampling data...\n");
|
||||
|
||||
size_t _total_data = 0;
|
||||
size_t _total_threads = 0;
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
{
|
||||
auto& _sampler = get_sampler(i);
|
||||
@@ -398,7 +494,7 @@ post_process()
|
||||
_sampler->stop();
|
||||
auto& _raw_data = _sampler->get_data();
|
||||
|
||||
OMNITRACE_VERBOSE(0 || get_debug_sampling(),
|
||||
OMNITRACE_VERBOSE(2 || get_debug_sampling(),
|
||||
"Sampler data for thread %lu has %zu initial entries...\n", i,
|
||||
_raw_data.size());
|
||||
|
||||
@@ -430,23 +526,27 @@ post_process()
|
||||
continue;
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE(0 || get_debug_sampling(),
|
||||
OMNITRACE_VERBOSE(2 || get_debug_sampling(),
|
||||
"Sampler data for thread %lu has %zu valid entries...\n", i,
|
||||
_raw_data.size());
|
||||
|
||||
_total_data += _raw_data.size();
|
||||
_total_threads += 1;
|
||||
|
||||
if(get_use_perfetto()) post_process_perfetto(i, _init, _data);
|
||||
if(get_use_timemory()) post_process_timemory(i, _init, _data);
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE(0 || get_debug_sampling(),
|
||||
"Post-processing sampling entries completed\n");
|
||||
OMNITRACE_VERBOSE(3 || get_debug_sampling(), "Destroying samplers...\n");
|
||||
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
{
|
||||
get_sampler(i).reset();
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE(0 || get_debug_sampling(), "Post-processing samplers destroyed\n");
|
||||
OMNITRACE_VERBOSE(1 || get_debug_sampling(),
|
||||
"Collected %zu samples from %zu threads...\n", _total_data,
|
||||
_total_threads);
|
||||
}
|
||||
|
||||
namespace
|
||||
@@ -535,17 +635,8 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init,
|
||||
}
|
||||
};
|
||||
|
||||
if(_tid == 0 && config::get_mode() == Mode::Sampling &&
|
||||
config::get_perfetto_fill_policy() == "discard")
|
||||
{
|
||||
_process_perfetto(_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
pthread_gotcha::push_enable_sampling_on_child_threads(false);
|
||||
std::thread{ _process_perfetto_wrapper }.join();
|
||||
pthread_gotcha::pop_enable_sampling_on_child_threads();
|
||||
}
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
std::thread{ _process_perfetto_wrapper }.join();
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user