Deprecate omnitrace use thread sampling (#68)

* Deprecate OMNITRACE_USE_THREAD_SAMPLING

* Reworked config based on OMNITRACE_MODE

- config::set_default_setting_value(...)
- config::get_mode() is now dynamically deduced
- moved tweaking defaults from library.cpp to config::configure_mode_settings(...)
- timemory submodule update fixing vsetting issue

* runtime.md update

* revert accidental lambda name change

* Reintroduce (deprecated) OMNITRACE_ROCM_SMI_DEVICES

- add handle_deprecated_setting(...) for this deprecated setting

[ROCm/rocprofiler-systems commit: 27e4e82376]
Cette révision appartient à :
Jonathan R. Madsen
2022-06-24 15:03:15 -05:00
révisé par GitHub
Parent efa5da659c
révision b1bd1a4d94
12 fichiers modifiés avec 288 ajouts et 149 suppressions
Sous-module projects/rocprofiler-systems/external/timemory mis-à-jour : de69314112...9b1e0c1560
+1 -1
Voir le fichier
@@ -165,7 +165,7 @@ std::string settings_rexclude_exact =
"FREQ|"
"STACK_CLEARING|TARGET_PID|THROTTLE_(COUNT|VALUE)|(AUTO|FLAMEGRAPH)_OUTPUT|"
"(ENABLE|DISABLE)_ALL_SIGNALS|ALLOW_SIGNAL_HANDLER|CTEST_NOTES|INSTRUCTION_"
"ROOFLINE)$";
"ROOFLINE|ADD_SECONDARY)$";
// leading matches, e.g. OMNITRACE_MPI_[A-Z_]+
std::string settings_rexclude_begin =
+2 -2
Voir le fichier
@@ -154,7 +154,7 @@ $ omnitrace-avail -S -bd
| OMNITRACE_USE_ROCM_SMI | Enable sampling GPU power, temp, uti... |
| OMNITRACE_USE_ROCTRACER | Enable ROCM tracing |
| OMNITRACE_USE_SAMPLING | Enable statistical sampling of call-... |
| OMNITRACE_USE_THREAD_SAMPLING | Enable a background thread which sam... |
| OMNITRACE_USE_PROCESS_SAMPLING | Enable a background thread which sam... |
| OMNITRACE_USE_TIMEMORY | Enable timemory backend |
| OMNITRACE_VERBOSE | Verbosity level |
| OMNITRACE_WIDTH | Set the global output width for comp... |
@@ -669,7 +669,7 @@ $SAMPLE = OFF
OMNITRACE_USE_PERFETTO = $ENABLE
OMNITRACE_USE_TIMEMORY = $ENABLE
OMNITRACE_USE_SAMPLING = $SAMPLE
OMNITRACE_USE_THREAD_SAMPLING = $SAMPLE
OMNITRACE_USE_PROCESS_SAMPLING = $SAMPLE
OMNITRACE_CRITICAL_TRACE = OFF
# debug
+2 -2
Voir le fichier
@@ -65,12 +65,12 @@ set(library_sources
${CMAKE_CURRENT_LIST_DIR}/library/mproc.cpp
${CMAKE_CURRENT_LIST_DIR}/library/ompt.cpp
${CMAKE_CURRENT_LIST_DIR}/library/perfetto.cpp
${CMAKE_CURRENT_LIST_DIR}/library/process_sampler.cpp
${CMAKE_CURRENT_LIST_DIR}/library/ptl.cpp
${CMAKE_CURRENT_LIST_DIR}/library/runtime.cpp
${CMAKE_CURRENT_LIST_DIR}/library/sampling.cpp
${CMAKE_CURRENT_LIST_DIR}/library/state.cpp
${CMAKE_CURRENT_LIST_DIR}/library/thread_data.cpp
${CMAKE_CURRENT_LIST_DIR}/library/thread_sampler.cpp
${CMAKE_CURRENT_LIST_DIR}/library/timemory.cpp
${CMAKE_CURRENT_LIST_DIR}/library/components/backtrace.cpp
${CMAKE_CURRENT_LIST_DIR}/library/components/fork_gotcha.cpp
@@ -95,12 +95,12 @@ set(library_headers
${CMAKE_CURRENT_LIST_DIR}/library/mproc.hpp
${CMAKE_CURRENT_LIST_DIR}/library/ompt.hpp
${CMAKE_CURRENT_LIST_DIR}/library/perfetto.hpp
${CMAKE_CURRENT_LIST_DIR}/library/process_sampler.hpp
${CMAKE_CURRENT_LIST_DIR}/library/ptl.hpp
${CMAKE_CURRENT_LIST_DIR}/library/runtime.hpp
${CMAKE_CURRENT_LIST_DIR}/library/sampling.hpp
${CMAKE_CURRENT_LIST_DIR}/library/state.hpp
${CMAKE_CURRENT_LIST_DIR}/library/thread_data.hpp
${CMAKE_CURRENT_LIST_DIR}/library/thread_sampler.hpp
${CMAKE_CURRENT_LIST_DIR}/library/timemory.hpp
${CMAKE_CURRENT_LIST_DIR}/library/utility.hpp
${CMAKE_CURRENT_LIST_DIR}/library/components/fwd.hpp
+13 -81
Voir le fichier
@@ -36,10 +36,10 @@
#include "library/defines.hpp"
#include "library/gpu.hpp"
#include "library/ompt.hpp"
#include "library/process_sampler.hpp"
#include "library/ptl.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
#include "library/thread_sampler.hpp"
#include "library/timemory.hpp"
#include <timemory/utility/procfs/maps.hpp>
@@ -64,8 +64,8 @@ struct user_regions
using omni_functors = omnitrace::component::functors<omni_regions>;
using user_functors = omnitrace::component::functors<user_regions>;
TIMEMORY_DEFINE_NAME_TRAIT("host", omni_functors);
TIMEMORY_DEFINE_NAME_TRAIT("user", user_functors);
TIMEMORY_DEFINE_NAME_TRAIT("host", omni_functors)
TIMEMORY_DEFINE_NAME_TRAIT("user", user_functors)
TIMEMORY_INVOKE_PREINIT(omni_functors)
TIMEMORY_INVOKE_PREINIT(user_functors)
@@ -117,6 +117,9 @@ ensure_finalization(bool _static_init = false)
// see:
// https://github.com/ROCm-Developer-Tools/roctracer/issues/22#issuecomment-572814465
tim::set_env("HSA_ENABLE_INTERRUPT", "0", 0);
#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0
tim::set_env("HSA_TOOLS_LIB", "libomnitrace.so", 0);
#endif
}
return scope::destructor{ []() { omnitrace_finalize_hidden(); } };
}
@@ -428,7 +431,6 @@ omnitrace_init_library_hidden()
(void) _tid;
static bool _once = false;
auto _mode = get_mode();
auto _debug_init = get_debug_init();
OMNITRACE_CONDITIONAL_BASIC_PRINT_F(_debug_init, "State is %s...\n",
@@ -472,68 +474,16 @@ omnitrace_init_library_hidden()
if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value);
} };
OMNITRACE_DEBUG_F("\n");
// below will effectively do:
// get_cpu_cid_stack(0)->emplace_back(-1);
// plus query some env variables
add_critical_trace<Device::CPU, Phase::NONE>(0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0);
if(gpu::device_count() == 0 && get_state() != State::Active)
{
OMNITRACE_DEBUG_F(
"No HIP devices were found: disabling roctracer and rocm_smi...\n");
get_use_roctracer() = false;
get_use_rocm_smi() = false;
}
if(_mode == Mode::Sampling)
{
OMNITRACE_CONDITIONAL_PRINT_F(get_verbose() >= 0,
"Disabling critical trace in %s mode...\n",
std::to_string(_mode).c_str());
get_use_critical_trace() = false;
get_use_sampling() = tim::get_env("OMNITRACE_USE_SAMPLING", true);
get_use_thread_sampling() =
tim::get_env("OMNITRACE_USE_THREAD_SAMPLING", get_use_sampling());
}
else if(_mode == Mode::Coverage)
{
for(auto&& itr :
{ "USE_SAMPLING", "USE_THREAD_SAMPLING", "CRITICAL_TRACE", "USE_ROCTRACER",
"USE_ROCM_SMI", "USE_PERFETTO", "USE_TIMEMORY", "USE_KOKKOSP", "USE_OMPT" })
{
auto _name = JOIN('_', "OMNITRACE", itr);
if(!config::set_setting_value(_name, false))
{
OMNITRACE_VERBOSE_F(4, "No configuration setting named '%s'",
_name.c_str());
}
}
}
tim::trait::runtime_enabled<comp::roctracer>::set(get_use_roctracer());
tim::trait::runtime_enabled<comp::roctracer_data>::set(get_use_roctracer() &&
get_use_timemory());
get_instrumentation_interval() = std::max<size_t>(get_instrumentation_interval(), 1);
if(get_use_kokkosp())
{
auto _force = 0;
auto _current_kokkosp_lib = tim::get_env<std::string>("KOKKOS_PROFILE_LIBRARY");
if(std::regex_search(_current_kokkosp_lib, std::regex{ "libtimemory\\." }))
_force = 1;
tim::set_env("KOKKOS_PROFILE_LIBRARY", "libomnitrace.so", _force);
}
// recycle all subsequent thread ids
threading::recycle_ids() =
tim::get_env<bool>("OMNITRACE_RECYCLE_TIDS", !get_use_sampling());
#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0
tim::set_env("HSA_TOOLS_LIB", "libomnitrace.so", 0);
#endif
OMNITRACE_CONDITIONAL_BASIC_PRINT_F(_debug_init, "\n");
}
//======================================================================================//
@@ -582,10 +532,10 @@ omnitrace_init_tooling_hidden()
// if set to finalized, don't continue
if(get_state() > State::Active) return;
if(config::get_trace_thread_locks()) pthread_mutex_gotcha::validate();
if(get_use_thread_sampling())
if(get_use_process_sampling())
{
pthread_gotcha::push_enable_sampling_on_child_threads(false);
thread_sampler::setup();
process_sampler::setup();
pthread_gotcha::pop_enable_sampling_on_child_threads();
}
if(get_use_sampling())
@@ -929,24 +879,6 @@ omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _a
tim::set_env("OMNITRACE_MODE", _mode, 0);
config::is_binary_rewrite() = _is_binary_rewrite;
if(get_mode() == Mode::Coverage)
{
tim::set_env("OMNITRACE_USE_PERFETTO", "OFF", 0);
tim::set_env("OMNITRACE_USE_TIMEMORY", "OFF", 0);
tim::set_env("OMNITRACE_USE_KOKKOSP", "OFF", 0);
tim::set_env("OMNITRACE_USE_SAMPLING", "OFF", 0);
tim::set_env("OMNITRACE_USE_ROCTRACER", "OFF", 0);
tim::set_env("OMNITRACE_USE_ROCM_SMI", "OFF", 0);
}
// set OMNITRACE_USE_SAMPLING to ON by default if mode is sampling
tim::set_env("OMNITRACE_USE_SAMPLING", (get_mode() == Mode::Sampling) ? "ON" : "OFF",
0);
// default to KokkosP enabled when sampling, otherwise default to off
tim::set_env("OMNITRACE_USE_KOKKOSP", (get_mode() == Mode::Sampling) ? "ON" : "OFF",
0);
if(!_set_mpi_called)
{
_start_gotcha_callback = []() { get_gotcha_bundle()->start(); };
@@ -1083,10 +1015,10 @@ omnitrace_finalize_hidden(void)
OMNITRACE_VERBOSE_F(1, "Shutting down pthread gotcha...\n");
pthread_gotcha::shutdown();
if(get_use_thread_sampling())
if(get_use_process_sampling())
{
OMNITRACE_VERBOSE_F(1, "Shutting down background sampler...\n");
thread_sampler::shutdown();
process_sampler::shutdown();
}
if(get_use_roctracer())
@@ -1185,10 +1117,10 @@ omnitrace_finalize_hidden(void)
tasking::join();
}
if(get_use_thread_sampling())
if(get_use_process_sampling())
{
OMNITRACE_VERBOSE_F(1, "Post-processing the system-level samples...\n");
thread_sampler::post_process();
process_sampler::post_process();
}
if(get_use_critical_trace())
+6 -4
Voir le fichier
@@ -31,10 +31,12 @@
#include <timemory/components/user_bundle/types.hpp>
#include <timemory/enum.h>
#include <timemory/mpl/concepts.hpp>
#include <timemory/mpl/type_traits.hpp>
#include <type_traits>
TIMEMORY_DEFINE_NS_API(project, omnitrace)
TIMEMORY_DEFINE_NS_API(category, process_sampling)
TIMEMORY_DECLARE_COMPONENT(roctracer)
@@ -146,17 +148,17 @@ TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_percent, project::omni
category::interrupt_sampling)
TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_busy, project::omnitrace,
tpls::rocm, device::gpu, os::supports_linux,
category::sampling, category::thread_sampling)
category::sampling, category::process_sampling)
TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_memory, project::omnitrace,
tpls::rocm, device::gpu, os::supports_linux, category::memory,
category::sampling, category::thread_sampling)
category::sampling, category::process_sampling)
TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_power, project::omnitrace,
tpls::rocm, device::gpu, os::supports_linux, category::power,
category::sampling, category::thread_sampling)
category::sampling, category::process_sampling)
TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_temp, project::omnitrace,
tpls::rocm, device::gpu, os::supports_linux,
category::temperature, category::sampling,
category::thread_sampling)
category::process_sampling)
TIMEMORY_PROPERTY_SPECIALIZATION(omnitrace::component::omnitrace, OMNITRACE_COMPONENT,
"omnitrace", "omnitrace_component")
@@ -28,7 +28,6 @@
#include "library/debug.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_sampler.hpp"
#include "library/utility.hpp"
#include "timemory/backends/threading.hpp"
+220 -40
Voir le fichier
@@ -23,6 +23,7 @@
#include "library/config.hpp"
#include "library/debug.hpp"
#include "library/defines.hpp"
#include "library/gpu.hpp"
#include <timemory/backends/dmp.hpp>
#include <timemory/backends/mpi.hpp>
@@ -72,7 +73,7 @@ get_setting_name(std::string _v)
}
#define OMNITRACE_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \
{ \
[&]() { \
auto _ret = _config->insert<TYPE, TYPE>( \
ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \
std::set<std::string>{ "custom", "omnitrace", "omnitrace_library", \
@@ -80,23 +81,25 @@ get_setting_name(std::string _v)
if(!_ret.second) \
OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \
get_setting_name(ENV_NAME).c_str(), ENV_NAME); \
}
return _config->find(ENV_NAME)->second; \
}()
// below does not include "omnitrace_library"
#define OMNITRACE_CONFIG_EXT_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \
{ \
[&]() { \
auto _ret = _config->insert<TYPE, TYPE>( \
ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \
std::set<std::string>{ "custom", "omnitrace", __VA_ARGS__ }); \
if(!_ret.second) \
OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \
get_setting_name(ENV_NAME).c_str(), ENV_NAME); \
}
return _config->find(ENV_NAME)->second; \
}()
// setting + command line option
#define OMNITRACE_CONFIG_CL_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, \
CMD_LINE, ...) \
{ \
[&]() { \
auto _ret = _config->insert<TYPE, TYPE>( \
ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \
std::set<std::string>{ "custom", "omnitrace", "omnitrace_library", \
@@ -105,7 +108,8 @@ get_setting_name(std::string _v)
if(!_ret.second) \
OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \
get_setting_name(ENV_NAME).c_str(), ENV_NAME); \
}
return _config->find(ENV_NAME)->second; \
}()
} // namespace
inline namespace config
@@ -159,6 +163,12 @@ configure_settings(bool _init)
auto _omnitrace_debug = _config->get<bool>("OMNITRACE_DEBUG");
if(_omnitrace_debug) tim::set_env("TIMEMORY_DEBUG_SETTINGS", "1", 0);
OMNITRACE_CONFIG_SETTING(
std::string, "OMNITRACE_MODE",
"Data collection mode. Used to set default values for OMNITRACE_USE_* options. "
"Typically set by omnitrace binary instrumenter.",
"trace", "backend");
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CI",
"Enable some runtime validation checks (typically enabled "
"for continuous integration)",
@@ -188,9 +198,15 @@ configure_settings(bool _init)
"backend", "sampling");
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_THREAD_SAMPLING",
"Enable a background thread which samples system metrics "
"such as the CPU/GPU freq, power, etc.",
true, "backend", "sampling", "thread_sampling");
"[DEPRECATED] Renamed to OMNITRACE_USE_PROCESS_SAMPLING",
true, "backend", "sampling", "process_sampling",
"deprecated");
OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_PROCESS_SAMPLING",
"Enable a background thread which samples process-level and system metrics "
"such as the CPU/GPU freq, power, memory usage, etc.",
true, "backend", "sampling", "process_sampling");
OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_PID",
@@ -221,27 +237,31 @@ configure_settings(bool _init)
OMNITRACE_CONFIG_SETTING(
double, "OMNITRACE_SAMPLING_FREQ",
"Number of software interrupts per second when OMNITTRACE_USE_SAMPLING=ON", 10.0,
"sampling", "thread_sampling");
"sampling", "process_sampling");
OMNITRACE_CONFIG_SETTING(
double, "OMNITRACE_SAMPLING_DELAY",
"Number of seconds to wait before the first sampling signal is delivered, "
"increasing this value can fix deadlocks during init",
0.5, "sampling", "thread_sampling");
0.5, "sampling", "process_sampling");
OMNITRACE_CONFIG_SETTING(
std::string, "OMNITRACE_SAMPLING_CPUS",
"CPUs to collect frequency information for. Values should be separated by commas "
"and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies 'all' and "
"'none' suppresses all CPU frequency sampling",
"", "thread_sampling");
"", "process_sampling");
OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_ROCM_SMI_DEVICES",
"[DEPRECATED] Renamed to OMNITRACE_SAMPLING_GPUS", "all",
"rocm_smi", "rocm", "process_sampling");
OMNITRACE_CONFIG_SETTING(
std::string, "OMNITRACE_SAMPLING_GPUS",
"Devices to query when OMNITRACE_USE_ROCM_SMI=ON. Values should be separated by "
"commas and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies "
"'all' and 'none' suppresses all GPU sampling",
"all", "rocm_smi", "rocm", "thread_sampling");
"all", "rocm_smi", "rocm", "process_sampling");
auto _backend = tim::get_env_choice<std::string>(
"OMNITRACE_PERFETTO_BACKEND",
@@ -253,10 +273,8 @@ configure_settings(bool _init)
OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_PERFETTO_BACKEND",
"Specify the perfetto backend to activate. Options are: "
"'inprocess', 'system', or 'all'",
_backend, "perfetto");
_config->find("OMNITRACE_PERFETTO_BACKEND")
->second->set_choices({ "inprocess", "system", "all" });
_backend, "perfetto")
->set_choices({ "inprocess", "system", "all" });
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE",
"Enable generation of the critical trace", false, "backend",
@@ -325,10 +343,8 @@ configure_settings(bool _init)
std::string, "OMNITRACE_PERFETTO_FILL_POLICY",
"Behavior when perfetto buffer is full. 'discard' will ignore new entries, "
"'ring_buffer' will overwrite old entries",
"discard", "perfetto", "data");
_config->find("OMNITRACE_PERFETTO_FILL_POLICY")
->second->set_choices({ "fill", "discard" });
"discard", "perfetto", "data")
->set_choices({ "fill", "discard" });
OMNITRACE_CONFIG_EXT_SETTING(int64_t, "OMNITRACE_CRITICAL_TRACE_COUNT",
"Number of critical trace to export (0 == all)", 0,
@@ -359,7 +375,9 @@ configure_settings(bool _init)
OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_OUTPUT_FILE", "Perfetto filename",
"", "perfetto", "io", "filename");
// set the defaults
_config->get_flamegraph_output() = false;
_config->get_ctest_notes() = false;
_config->get_cout_output() = false;
_config->get_file_output() = true;
_config->get_json_output() = true;
@@ -471,6 +489,10 @@ configure_settings(bool _init)
_combine_perfetto_traces->second->set(_config->get<bool>("collapse_processes"));
}
handle_deprecated_setting("OMNITRACE_ROCM_SMI_DEVICES", "OMNITRACE_SAMPLING_GPUS");
handle_deprecated_setting("OMNITRACE_USE_THREAD_SAMPLING",
"OMNITRACE_USE_PROCESS_SAMPLING");
scope::get_fields()[scope::flat::value] = _config->get_flat_profile();
scope::get_fields()[scope::timeline::value] = _config->get_timeline_profile();
@@ -479,8 +501,88 @@ configure_settings(bool _init)
#if !defined(TIMEMORY_USE_MPI) && defined(TIMEMORY_USE_MPI_HEADERS)
if(tim::dmp::is_initialized()) settings::default_process_suffix() = tim::dmp::rank();
#endif
OMNITRACE_CONDITIONAL_BASIC_PRINT(get_verbose_env() > 0, "configuration complete\n");
auto _dl_verbose = _config->find("OMNITRACE_DL_VERBOSE");
tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(), 0);
#if !defined(TIMEMORY_USE_MPI) || TIMEMORY_USE_MPI == 0
_config->disable("OMNITRACE_PERFETTO_COMBINE_TRACES");
#endif
configure_mode_settings();
configure_signal_handler();
configure_disabled_settings();
OMNITRACE_CONDITIONAL_BASIC_PRINT(get_verbose_env() > 0, "configuration complete\n");
}
void
configure_mode_settings()
{
auto _set = [](const std::string& _name, bool _v) {
if(!set_setting_value(_name, _v))
{
OMNITRACE_VERBOSE(
4, "[configure_mode_settings] No configuration setting named '%s'...\n",
_name.data());
}
else
{
OMNITRACE_VERBOSE(
1, "[configure_mode_settings] Overriding %s to %s in %s mode...\n",
_name.c_str(), JOIN("", std::boolalpha, _v).c_str(),
std::to_string(get_mode()).c_str());
}
};
if(get_mode() == Mode::Coverage)
{
set_default_setting_value("OMNITRACE_USE_CODE_COVERAGE", true);
_set("OMNITRACE_USE_PERFETTO", false);
_set("OMNITRACE_USE_TIMEMORY", false);
_set("OMNITRACE_USE_ROCM_SMI", false);
_set("OMNITRACE_USE_ROCTRACER", false);
_set("OMNITRACE_USE_KOKKOSP", false);
_set("OMNITRACE_USE_OMPT", false);
_set("OMNITRACE_USE_SAMPLING", false);
_set("OMNITRACE_USE_PROCESS_SAMPLING", false);
_set("OMNITRACE_CRITICAL_TRACE", false);
}
else if(get_mode() == Mode::Sampling)
{
set_default_setting_value("OMNITRACE_USE_SAMPLING", true);
set_default_setting_value("OMNITRACE_USE_PROCESS_SAMPLING", true);
_set("OMNITRACE_CRITICAL_TRACE", false);
}
if(gpu::device_count() == 0)
{
OMNITRACE_VERBOSE_F(
1, "No HIP devices were found: disabling roctracer and rocm_smi...\n");
get_use_roctracer() = false;
get_use_rocm_smi() = false;
}
get_instrumentation_interval() = std::max<size_t>(get_instrumentation_interval(), 1);
if(get_use_kokkosp())
{
auto _force = 0;
auto _current_kokkosp_lib = tim::get_env<std::string>("KOKKOS_PROFILE_LIBRARY");
if(std::regex_search(_current_kokkosp_lib, std::regex{ "libtimemory\\." }))
_force = 1;
tim::set_env("KOKKOS_PROFILE_LIBRARY", "libomnitrace.so", _force);
}
// recycle all subsequent thread ids
threading::recycle_ids() =
tim::get_env<bool>("OMNITRACE_RECYCLE_TIDS", !get_use_sampling());
}
void
configure_signal_handler()
{
auto _config = settings::shared_instance();
auto _ignore_dyninst_trampoline =
tim::get_env("OMNITRACE_IGNORE_DYNINST_TRAMPOLINE", false);
// this is how dyninst looks up the env variable
@@ -533,8 +635,14 @@ configure_settings(bool _init)
_old_handler = signal(_dyninst_trampoline_signal,
static_cast<signal_handler_t>(_trampoline_handler));
}
}
auto _handle_use_option = [](const std::string& _opt, const std::string& _category) {
void
configure_disabled_settings()
{
auto _config = settings::shared_instance();
auto _handle_use_option = [_config](const std::string& _opt,
const std::string& _category) {
if(!_config->get<bool>(_opt))
{
auto _disabled = _config->disable_category(_category);
@@ -552,7 +660,7 @@ configure_settings(bool _init)
};
_handle_use_option("OMNITRACE_USE_SAMPLING", "sampling");
_handle_use_option("OMNITRACE_USE_THREAD_SAMPLING", "thread_sampling");
_handle_use_option("OMNITRACE_USE_PROCESS_SAMPLING", "process_sampling");
_handle_use_option("OMNITRACE_USE_KOKKOSP", "kokkos");
_handle_use_option("OMNITRACE_USE_PERFETTO", "perfetto");
_handle_use_option("OMNITRACE_USE_TIMEMORY", "timemory");
@@ -573,31 +681,79 @@ configure_settings(bool _init)
_config->disable_category("ompt");
#endif
// user bundle components
_config->disable_category("throttle");
// user bundle components
_config->disable("components");
_config->disable("global_components");
_config->disable("ompt_components");
_config->disable("kokkos_components");
_config->disable("trace_components");
_config->disable("profiler_components");
// miscellaneous
_config->disable("destructor_report");
_config->disable("stack_clearing");
_config->disable("add_secondary");
// output fields
_config->disable("auto_output");
_config->disable("file_output");
_config->disable("plot_output");
_config->disable("dart_output");
_config->disable("flamegraph_output");
_config->disable("separator_freq");
_config->disable("width");
_config->disable("max_width");
}
auto _dl_verbose = _config->find("OMNITRACE_DL_VERBOSE");
tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(), 0);
void
handle_deprecated_setting(const std::string& _old, const std::string& _new, int _verbose)
{
auto _config = settings::shared_instance();
auto _old_setting = _config->find(_old);
auto _new_setting = _config->find(_new);
#if !defined(TIMEMORY_USE_MPI) || TIMEMORY_USE_MPI == 0
_config->disable("OMNITRACE_PERFETTO_COMBINE_TRACES");
#endif
if(_old_setting == _config->end()) return;
OMNITRACE_CI_THROW(_new_setting == _config->end(),
"New configuration setting not found: '%s'", _new.c_str());
if(_old_setting->second->get_environ_updated() ||
_old_setting->second->get_config_updated())
{
auto _separator = [_verbose]() {
std::array<char, 79> _v = {};
_v.fill('=');
_v.back() = '\0';
OMNITRACE_VERBOSE(_verbose, "#%s#\n", _v.data());
};
_separator();
OMNITRACE_VERBOSE(_verbose, "#\n");
OMNITRACE_VERBOSE(_verbose, "# DEPRECATION NOTICE:\n");
OMNITRACE_VERBOSE(_verbose, "# %s is deprecated!\n", _old.c_str());
OMNITRACE_VERBOSE(_verbose, "# Use %s instead!\n", _new.c_str());
if(!_new_setting->second->get_environ_updated() &&
!_new_setting->second->get_config_updated())
{
auto _before = _new_setting->second->as_string();
_new_setting->second->parse(_old_setting->second->as_string());
auto _after = _new_setting->second->as_string();
if(_before != _after)
{
std::string _cause =
(_old_setting->second->get_environ_updated()) ? "environ" : "config";
OMNITRACE_VERBOSE(_verbose, "#\n");
OMNITRACE_VERBOSE(_verbose, "# %s :: '%s' -> '%s'\n", _new.c_str(),
_before.c_str(), _after.c_str());
OMNITRACE_VERBOSE(_verbose, "# via %s (%s)\n", _old.c_str(),
_cause.c_str());
}
}
OMNITRACE_VERBOSE(_verbose, "#\n");
_separator();
}
}
void
@@ -653,6 +809,9 @@ print_settings(
std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) {
auto _npos = std::string::npos;
// OMNITRACE_CONFIG_FILE always first
if(lhs.at(0) == "OMNITRACE_MODE") return true;
if(rhs.at(0) == "OMNITRACE_MODE") return false;
// OMNITRACE_CONFIG_FILE always second
if(lhs.at(0).find("OMNITRACE_CONFIG") != _npos) return true;
if(rhs.at(0).find("OMNITRACE_CONFIG") != _npos) return false;
// OMNITRACE_USE_* prioritized
@@ -756,7 +915,8 @@ get_config_file()
Mode
get_mode()
{
static auto _v = []() {
if(!settings_are_configured())
{
auto _mode = tim::get_env_choice<std::string>(
"OMNITRACE_MODE", "trace", { "trace", "sampling", "coverage" });
if(_mode == "sampling")
@@ -764,8 +924,26 @@ get_mode()
else if(_mode == "coverage")
return Mode::Coverage;
return Mode::Trace;
}();
return _v;
}
static auto _m =
std::unordered_map<std::string_view, Mode>{ { "trace", Mode::Trace },
{ "sampling", Mode::Sampling },
{ "coverage", Mode::Coverage } };
static auto _v = get_config()->find("OMNITRACE_MODE");
try
{
return _m.at(static_cast<tim::tsettings<std::string>&>(*_v->second).get());
} catch(std::runtime_error& _e)
{
auto _mode = static_cast<tim::tsettings<std::string>&>(*_v->second).get();
std::stringstream _ss{};
for(const auto& itr : _v->second->get_choices())
_ss << ", " << itr;
auto _msg = (_ss.str().length() > 2) ? _ss.str().substr(2) : std::string{};
OMNITRACE_THROW("[%s] invalid mode %s. Choices: %s\n", __FUNCTION__,
_mode.c_str(), _msg.c_str());
}
return Mode::Trace;
}
bool&
@@ -818,8 +996,10 @@ get_debug()
bool
get_debug_sampling()
{
static bool _v = tim::get_env<bool>("OMNITRACE_DEBUG_SAMPLING", get_debug_env());
return (_v || get_debug());
static bool _v =
tim::get_env<bool>("OMNITRACE_DEBUG_SAMPLING",
(settings_are_configured() ? get_debug() : get_debug_env()));
return _v;
}
int
@@ -888,9 +1068,9 @@ get_use_sampling()
}
bool&
get_use_thread_sampling()
get_use_process_sampling()
{
static auto _v = get_config()->find("OMNITRACE_USE_THREAD_SAMPLING");
static auto _v = get_config()->find("OMNITRACE_USE_PROCESS_SAMPLING");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
@@ -1142,7 +1322,7 @@ get_critical_trace_count()
}
double&
get_thread_sampling_freq()
get_process_sampling_freq()
{
static auto _v = std::min<double>(get_sampling_freq(), 1000.0);
return _v;
+29 -3
Voir le fichier
@@ -48,6 +48,19 @@ settings_are_configured() OMNITRACE_HOT;
void
configure_settings(bool _init = true);
void
configure_mode_settings();
void
configure_signal_handler();
void
configure_disabled_settings();
void
handle_deprecated_setting(const std::string& _old, const std::string& _new,
int _verbose = 0);
void
print_banner(std::ostream& _os = std::cerr);
@@ -73,6 +86,19 @@ set_setting_value(const std::string& _name, Tp&& _v)
return _setting->second->set(std::forward<Tp>(_v));
}
template <typename Tp>
bool
set_default_setting_value(const std::string& _name, Tp&& _v)
{
auto _instance = tim::settings::shared_instance();
auto _setting = _instance->find(_name);
if(_setting == _instance->end()) return false;
if(!_setting->second) return false;
if(_setting->second->get_config_updated() || _setting->second->get_environ_updated())
return false;
return _setting->second->set(std::forward<Tp>(_v));
}
template <typename Tp>
std::pair<bool, Tp>
get_setting_value(const std::string& _name)
@@ -92,7 +118,7 @@ std::string
get_config_file();
Mode
get_mode() OMNITRACE_HOT;
get_mode();
bool&
is_attached();
@@ -146,7 +172,7 @@ bool&
get_use_sampling() OMNITRACE_HOT;
bool&
get_use_thread_sampling() OMNITRACE_HOT;
get_use_process_sampling() OMNITRACE_HOT;
bool&
get_use_pid();
@@ -237,7 +263,7 @@ std::string
get_sampling_cpus();
double&
get_thread_sampling_freq();
get_process_sampling_freq();
std::string
get_sampling_gpus();
@@ -20,7 +20,7 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "library/thread_sampler.hpp"
#include "library/process_sampler.hpp"
#include "library/components/pthread_gotcha.hpp"
#include "library/components/rocm_smi.hpp"
#include "library/config.hpp"
@@ -32,7 +32,7 @@
namespace omnitrace
{
namespace thread_sampler
namespace process_sampler
{
namespace
{
@@ -113,7 +113,7 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
void
sampler::setup()
{
if(!get_use_thread_sampling())
if(!get_use_process_sampling())
{
OMNITRACE_DEBUG("Background sampler is disabled...\n");
return;
@@ -148,7 +148,7 @@ sampler::setup()
polling_finished = std::make_unique<promise_t>();
auto _freq = get_thread_sampling_freq();
auto _freq = get_process_sampling_freq();
uint64_t _msec_freq = (1.0 / _freq) * 1.0e3;
promise_t _prom{};
@@ -180,7 +180,7 @@ sampler::shutdown()
{
size_t _nitr = 0;
constexpr size_t _nitr_max = 100;
uint64_t _freq = (1.0 / get_thread_sampling_freq()) * 1.0e3;
uint64_t _freq = (1.0 / get_process_sampling_freq()) * 1.0e3;
// wait until the sampler is no longer sampling
std::this_thread::sleep_for(msec_t{ _freq });
@@ -226,5 +226,5 @@ sampler::set_state(state_t _state)
{
get_sampler_state().store(_state);
}
} // namespace thread_sampler
} // namespace process_sampler
} // namespace omnitrace
@@ -39,7 +39,7 @@
namespace omnitrace
{
namespace thread_sampler
namespace process_sampler
{
struct instance
{
@@ -99,5 +99,5 @@ post_process()
sampler::post_process();
}
//
} // namespace thread_sampler
} // namespace process_sampler
} // namespace omnitrace
+6 -6
Voir le fichier
@@ -27,7 +27,7 @@ set(_base_environment
"OMNITRACE_USE_PERFETTO=ON"
"OMNITRACE_USE_TIMEMORY=ON"
"OMNITRACE_USE_SAMPLING=ON"
"OMNITRACE_USE_THREAD_SAMPLING=ON"
"OMNITRACE_USE_PROCESS_SAMPLING=ON"
"OMNITRACE_TIME_OUTPUT=OFF"
"OMP_PROC_BIND=spread"
"OMP_PLACES=threads"
@@ -37,7 +37,7 @@ set(_base_environment
set(_lock_environment
"OMNITRACE_USE_SAMPLING=OFF"
"OMNITRACE_USE_THREAD_SAMPLING=OFF"
"OMNITRACE_USE_PROCESS_SAMPLING=OFF"
"OMNITRACE_CRITICAL_TRACE=ON"
"OMNITRACE_COLLAPSE_THREADS=ON"
"OMNITRACE_TRACE_THREAD_LOCKS=ON"
@@ -64,7 +64,7 @@ set(_perfetto_environment
"OMNITRACE_USE_PERFETTO=ON"
"OMNITRACE_USE_TIMEMORY=OFF"
"OMNITRACE_USE_SAMPLING=ON"
"OMNITRACE_USE_THREAD_SAMPLING=ON"
"OMNITRACE_USE_PROCESS_SAMPLING=ON"
"OMNITRACE_TIME_OUTPUT=OFF"
"OMP_PROC_BIND=spread"
"OMP_PLACES=threads"
@@ -76,7 +76,7 @@ set(_timemory_environment
"OMNITRACE_USE_PERFETTO=OFF"
"OMNITRACE_USE_TIMEMORY=ON"
"OMNITRACE_USE_SAMPLING=ON"
"OMNITRACE_USE_THREAD_SAMPLING=ON"
"OMNITRACE_USE_PROCESS_SAMPLING=ON"
"OMNITRACE_TIME_OUTPUT=OFF"
"OMP_PROC_BIND=spread"
"OMP_PLACES=threads"
@@ -90,7 +90,7 @@ set(_python_environment
"OMNITRACE_USE_PERFETTO=ON"
"OMNITRACE_USE_TIMEMORY=ON"
"OMNITRACE_USE_SAMPLING=OFF"
"OMNITRACE_USE_THREAD_SAMPLING=ON"
"OMNITRACE_USE_PROCESS_SAMPLING=ON"
"OMNITRACE_TIME_OUTPUT=OFF"
"OMNITRACE_TREE_OUTPUT=OFF"
"OMNITRACE_USE_PID=OFF"
@@ -102,7 +102,7 @@ set(_attach_environment
"OMNITRACE_USE_PERFETTO=ON"
"OMNITRACE_USE_TIMEMORY=ON"
"OMNITRACE_USE_SAMPLING=OFF"
"OMNITRACE_USE_THREAD_SAMPLING=ON"
"OMNITRACE_USE_PROCESS_SAMPLING=ON"
"OMNITRACE_USE_CRITICAL_TRACE=ON"
"OMNITRACE_USE_OMPT=ON"
"OMNITRACE_USE_KOKKOSP=ON"