diff --git a/external/timemory b/external/timemory index de69314112..9b1e0c1560 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit de69314112095f45974e0575caa12c9909e0aa93 +Subproject commit 9b1e0c15605a8b3bcc483f8f6c52932aae5e5fbd diff --git a/source/bin/omnitrace-avail/avail.cpp b/source/bin/omnitrace-avail/avail.cpp index 0af062b233..41b2542176 100644 --- a/source/bin/omnitrace-avail/avail.cpp +++ b/source/bin/omnitrace-avail/avail.cpp @@ -165,7 +165,7 @@ std::string settings_rexclude_exact = "FREQ|" "STACK_CLEARING|TARGET_PID|THROTTLE_(COUNT|VALUE)|(AUTO|FLAMEGRAPH)_OUTPUT|" "(ENABLE|DISABLE)_ALL_SIGNALS|ALLOW_SIGNAL_HANDLER|CTEST_NOTES|INSTRUCTION_" - "ROOFLINE)$"; + "ROOFLINE|ADD_SECONDARY)$"; // leading matches, e.g. OMNITRACE_MPI_[A-Z_]+ std::string settings_rexclude_begin = diff --git a/source/docs/runtime.md b/source/docs/runtime.md index 1b560dcc4f..5440902681 100644 --- a/source/docs/runtime.md +++ b/source/docs/runtime.md @@ -154,7 +154,7 @@ $ omnitrace-avail -S -bd | OMNITRACE_USE_ROCM_SMI | Enable sampling GPU power, temp, uti... | | OMNITRACE_USE_ROCTRACER | Enable ROCM tracing | | OMNITRACE_USE_SAMPLING | Enable statistical sampling of call-... | -| OMNITRACE_USE_THREAD_SAMPLING | Enable a background thread which sam... | +| OMNITRACE_USE_PROCESS_SAMPLING | Enable a background thread which sam... | | OMNITRACE_USE_TIMEMORY | Enable timemory backend | | OMNITRACE_VERBOSE | Verbosity level | | OMNITRACE_WIDTH | Set the global output width for comp... | @@ -669,7 +669,7 @@ $SAMPLE = OFF OMNITRACE_USE_PERFETTO = $ENABLE OMNITRACE_USE_TIMEMORY = $ENABLE OMNITRACE_USE_SAMPLING = $SAMPLE -OMNITRACE_USE_THREAD_SAMPLING = $SAMPLE +OMNITRACE_USE_PROCESS_SAMPLING = $SAMPLE OMNITRACE_CRITICAL_TRACE = OFF # debug diff --git a/source/lib/omnitrace/CMakeLists.txt b/source/lib/omnitrace/CMakeLists.txt index bd34ab4479..70aa52c78f 100644 --- a/source/lib/omnitrace/CMakeLists.txt +++ b/source/lib/omnitrace/CMakeLists.txt @@ -65,12 +65,12 @@ set(library_sources ${CMAKE_CURRENT_LIST_DIR}/library/mproc.cpp ${CMAKE_CURRENT_LIST_DIR}/library/ompt.cpp ${CMAKE_CURRENT_LIST_DIR}/library/perfetto.cpp + ${CMAKE_CURRENT_LIST_DIR}/library/process_sampler.cpp ${CMAKE_CURRENT_LIST_DIR}/library/ptl.cpp ${CMAKE_CURRENT_LIST_DIR}/library/runtime.cpp ${CMAKE_CURRENT_LIST_DIR}/library/sampling.cpp ${CMAKE_CURRENT_LIST_DIR}/library/state.cpp ${CMAKE_CURRENT_LIST_DIR}/library/thread_data.cpp - ${CMAKE_CURRENT_LIST_DIR}/library/thread_sampler.cpp ${CMAKE_CURRENT_LIST_DIR}/library/timemory.cpp ${CMAKE_CURRENT_LIST_DIR}/library/components/backtrace.cpp ${CMAKE_CURRENT_LIST_DIR}/library/components/fork_gotcha.cpp @@ -95,12 +95,12 @@ set(library_headers ${CMAKE_CURRENT_LIST_DIR}/library/mproc.hpp ${CMAKE_CURRENT_LIST_DIR}/library/ompt.hpp ${CMAKE_CURRENT_LIST_DIR}/library/perfetto.hpp + ${CMAKE_CURRENT_LIST_DIR}/library/process_sampler.hpp ${CMAKE_CURRENT_LIST_DIR}/library/ptl.hpp ${CMAKE_CURRENT_LIST_DIR}/library/runtime.hpp ${CMAKE_CURRENT_LIST_DIR}/library/sampling.hpp ${CMAKE_CURRENT_LIST_DIR}/library/state.hpp ${CMAKE_CURRENT_LIST_DIR}/library/thread_data.hpp - ${CMAKE_CURRENT_LIST_DIR}/library/thread_sampler.hpp ${CMAKE_CURRENT_LIST_DIR}/library/timemory.hpp ${CMAKE_CURRENT_LIST_DIR}/library/utility.hpp ${CMAKE_CURRENT_LIST_DIR}/library/components/fwd.hpp diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index bbe92dab73..9e29b229a5 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -36,10 +36,10 @@ #include "library/defines.hpp" #include "library/gpu.hpp" #include "library/ompt.hpp" +#include "library/process_sampler.hpp" #include "library/ptl.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" -#include "library/thread_sampler.hpp" #include "library/timemory.hpp" #include @@ -64,8 +64,8 @@ struct user_regions using omni_functors = omnitrace::component::functors; using user_functors = omnitrace::component::functors; -TIMEMORY_DEFINE_NAME_TRAIT("host", omni_functors); -TIMEMORY_DEFINE_NAME_TRAIT("user", user_functors); +TIMEMORY_DEFINE_NAME_TRAIT("host", omni_functors) +TIMEMORY_DEFINE_NAME_TRAIT("user", user_functors) TIMEMORY_INVOKE_PREINIT(omni_functors) TIMEMORY_INVOKE_PREINIT(user_functors) @@ -117,6 +117,9 @@ ensure_finalization(bool _static_init = false) // see: // https://github.com/ROCm-Developer-Tools/roctracer/issues/22#issuecomment-572814465 tim::set_env("HSA_ENABLE_INTERRUPT", "0", 0); +#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0 + tim::set_env("HSA_TOOLS_LIB", "libomnitrace.so", 0); +#endif } return scope::destructor{ []() { omnitrace_finalize_hidden(); } }; } @@ -428,7 +431,6 @@ omnitrace_init_library_hidden() (void) _tid; static bool _once = false; - auto _mode = get_mode(); auto _debug_init = get_debug_init(); OMNITRACE_CONDITIONAL_BASIC_PRINT_F(_debug_init, "State is %s...\n", @@ -472,68 +474,16 @@ omnitrace_init_library_hidden() if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value); } }; - OMNITRACE_DEBUG_F("\n"); - // below will effectively do: // get_cpu_cid_stack(0)->emplace_back(-1); // plus query some env variables add_critical_trace(0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0); - if(gpu::device_count() == 0 && get_state() != State::Active) - { - OMNITRACE_DEBUG_F( - "No HIP devices were found: disabling roctracer and rocm_smi...\n"); - get_use_roctracer() = false; - get_use_rocm_smi() = false; - } - - if(_mode == Mode::Sampling) - { - OMNITRACE_CONDITIONAL_PRINT_F(get_verbose() >= 0, - "Disabling critical trace in %s mode...\n", - std::to_string(_mode).c_str()); - get_use_critical_trace() = false; - get_use_sampling() = tim::get_env("OMNITRACE_USE_SAMPLING", true); - get_use_thread_sampling() = - tim::get_env("OMNITRACE_USE_THREAD_SAMPLING", get_use_sampling()); - } - else if(_mode == Mode::Coverage) - { - for(auto&& itr : - { "USE_SAMPLING", "USE_THREAD_SAMPLING", "CRITICAL_TRACE", "USE_ROCTRACER", - "USE_ROCM_SMI", "USE_PERFETTO", "USE_TIMEMORY", "USE_KOKKOSP", "USE_OMPT" }) - { - auto _name = JOIN('_', "OMNITRACE", itr); - if(!config::set_setting_value(_name, false)) - { - OMNITRACE_VERBOSE_F(4, "No configuration setting named '%s'", - _name.c_str()); - } - } - } - tim::trait::runtime_enabled::set(get_use_roctracer()); tim::trait::runtime_enabled::set(get_use_roctracer() && get_use_timemory()); - get_instrumentation_interval() = std::max(get_instrumentation_interval(), 1); - - if(get_use_kokkosp()) - { - auto _force = 0; - auto _current_kokkosp_lib = tim::get_env("KOKKOS_PROFILE_LIBRARY"); - if(std::regex_search(_current_kokkosp_lib, std::regex{ "libtimemory\\." })) - _force = 1; - tim::set_env("KOKKOS_PROFILE_LIBRARY", "libomnitrace.so", _force); - } - - // recycle all subsequent thread ids - threading::recycle_ids() = - tim::get_env("OMNITRACE_RECYCLE_TIDS", !get_use_sampling()); - -#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0 - tim::set_env("HSA_TOOLS_LIB", "libomnitrace.so", 0); -#endif + OMNITRACE_CONDITIONAL_BASIC_PRINT_F(_debug_init, "\n"); } //======================================================================================// @@ -582,10 +532,10 @@ omnitrace_init_tooling_hidden() // if set to finalized, don't continue if(get_state() > State::Active) return; if(config::get_trace_thread_locks()) pthread_mutex_gotcha::validate(); - if(get_use_thread_sampling()) + if(get_use_process_sampling()) { pthread_gotcha::push_enable_sampling_on_child_threads(false); - thread_sampler::setup(); + process_sampler::setup(); pthread_gotcha::pop_enable_sampling_on_child_threads(); } if(get_use_sampling()) @@ -929,24 +879,6 @@ omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _a tim::set_env("OMNITRACE_MODE", _mode, 0); config::is_binary_rewrite() = _is_binary_rewrite; - if(get_mode() == Mode::Coverage) - { - tim::set_env("OMNITRACE_USE_PERFETTO", "OFF", 0); - tim::set_env("OMNITRACE_USE_TIMEMORY", "OFF", 0); - tim::set_env("OMNITRACE_USE_KOKKOSP", "OFF", 0); - tim::set_env("OMNITRACE_USE_SAMPLING", "OFF", 0); - tim::set_env("OMNITRACE_USE_ROCTRACER", "OFF", 0); - tim::set_env("OMNITRACE_USE_ROCM_SMI", "OFF", 0); - } - - // set OMNITRACE_USE_SAMPLING to ON by default if mode is sampling - tim::set_env("OMNITRACE_USE_SAMPLING", (get_mode() == Mode::Sampling) ? "ON" : "OFF", - 0); - - // default to KokkosP enabled when sampling, otherwise default to off - tim::set_env("OMNITRACE_USE_KOKKOSP", (get_mode() == Mode::Sampling) ? "ON" : "OFF", - 0); - if(!_set_mpi_called) { _start_gotcha_callback = []() { get_gotcha_bundle()->start(); }; @@ -1083,10 +1015,10 @@ omnitrace_finalize_hidden(void) OMNITRACE_VERBOSE_F(1, "Shutting down pthread gotcha...\n"); pthread_gotcha::shutdown(); - if(get_use_thread_sampling()) + if(get_use_process_sampling()) { OMNITRACE_VERBOSE_F(1, "Shutting down background sampler...\n"); - thread_sampler::shutdown(); + process_sampler::shutdown(); } if(get_use_roctracer()) @@ -1185,10 +1117,10 @@ omnitrace_finalize_hidden(void) tasking::join(); } - if(get_use_thread_sampling()) + if(get_use_process_sampling()) { OMNITRACE_VERBOSE_F(1, "Post-processing the system-level samples...\n"); - thread_sampler::post_process(); + process_sampler::post_process(); } if(get_use_critical_trace()) diff --git a/source/lib/omnitrace/library/components/fwd.hpp b/source/lib/omnitrace/library/components/fwd.hpp index f50749f45a..90ffcd4df4 100644 --- a/source/lib/omnitrace/library/components/fwd.hpp +++ b/source/lib/omnitrace/library/components/fwd.hpp @@ -31,10 +31,12 @@ #include #include #include +#include #include TIMEMORY_DEFINE_NS_API(project, omnitrace) +TIMEMORY_DEFINE_NS_API(category, process_sampling) TIMEMORY_DECLARE_COMPONENT(roctracer) @@ -146,17 +148,17 @@ TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_percent, project::omni category::interrupt_sampling) TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_busy, project::omnitrace, tpls::rocm, device::gpu, os::supports_linux, - category::sampling, category::thread_sampling) + category::sampling, category::process_sampling) TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_memory, project::omnitrace, tpls::rocm, device::gpu, os::supports_linux, category::memory, - category::sampling, category::thread_sampling) + category::sampling, category::process_sampling) TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_power, project::omnitrace, tpls::rocm, device::gpu, os::supports_linux, category::power, - category::sampling, category::thread_sampling) + category::sampling, category::process_sampling) TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_temp, project::omnitrace, tpls::rocm, device::gpu, os::supports_linux, category::temperature, category::sampling, - category::thread_sampling) + category::process_sampling) TIMEMORY_PROPERTY_SPECIALIZATION(omnitrace::component::omnitrace, OMNITRACE_COMPONENT, "omnitrace", "omnitrace_component") diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 420437cdf0..b65e06fa5f 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -28,7 +28,6 @@ #include "library/debug.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" -#include "library/thread_sampler.hpp" #include "library/utility.hpp" #include "timemory/backends/threading.hpp" diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index d91d2b981c..99c8f24530 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -23,6 +23,7 @@ #include "library/config.hpp" #include "library/debug.hpp" #include "library/defines.hpp" +#include "library/gpu.hpp" #include #include @@ -72,7 +73,7 @@ get_setting_name(std::string _v) } #define OMNITRACE_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \ - { \ + [&]() { \ auto _ret = _config->insert( \ ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \ std::set{ "custom", "omnitrace", "omnitrace_library", \ @@ -80,23 +81,25 @@ get_setting_name(std::string _v) if(!_ret.second) \ OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \ get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ - } + return _config->find(ENV_NAME)->second; \ + }() // below does not include "omnitrace_library" #define OMNITRACE_CONFIG_EXT_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \ - { \ + [&]() { \ auto _ret = _config->insert( \ ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \ std::set{ "custom", "omnitrace", __VA_ARGS__ }); \ if(!_ret.second) \ OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \ get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ - } + return _config->find(ENV_NAME)->second; \ + }() // setting + command line option #define OMNITRACE_CONFIG_CL_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, \ CMD_LINE, ...) \ - { \ + [&]() { \ auto _ret = _config->insert( \ ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \ std::set{ "custom", "omnitrace", "omnitrace_library", \ @@ -105,7 +108,8 @@ get_setting_name(std::string _v) if(!_ret.second) \ OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \ get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ - } + return _config->find(ENV_NAME)->second; \ + }() } // namespace inline namespace config @@ -159,6 +163,12 @@ configure_settings(bool _init) auto _omnitrace_debug = _config->get("OMNITRACE_DEBUG"); if(_omnitrace_debug) tim::set_env("TIMEMORY_DEBUG_SETTINGS", "1", 0); + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_MODE", + "Data collection mode. Used to set default values for OMNITRACE_USE_* options. " + "Typically set by omnitrace binary instrumenter.", + "trace", "backend"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CI", "Enable some runtime validation checks (typically enabled " "for continuous integration)", @@ -188,9 +198,15 @@ configure_settings(bool _init) "backend", "sampling"); OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_THREAD_SAMPLING", - "Enable a background thread which samples system metrics " - "such as the CPU/GPU freq, power, etc.", - true, "backend", "sampling", "thread_sampling"); + "[DEPRECATED] Renamed to OMNITRACE_USE_PROCESS_SAMPLING", + true, "backend", "sampling", "process_sampling", + "deprecated"); + + OMNITRACE_CONFIG_SETTING( + bool, "OMNITRACE_USE_PROCESS_SAMPLING", + "Enable a background thread which samples process-level and system metrics " + "such as the CPU/GPU freq, power, memory usage, etc.", + true, "backend", "sampling", "process_sampling"); OMNITRACE_CONFIG_SETTING( bool, "OMNITRACE_USE_PID", @@ -221,27 +237,31 @@ configure_settings(bool _init) OMNITRACE_CONFIG_SETTING( double, "OMNITRACE_SAMPLING_FREQ", "Number of software interrupts per second when OMNITTRACE_USE_SAMPLING=ON", 10.0, - "sampling", "thread_sampling"); + "sampling", "process_sampling"); OMNITRACE_CONFIG_SETTING( double, "OMNITRACE_SAMPLING_DELAY", "Number of seconds to wait before the first sampling signal is delivered, " "increasing this value can fix deadlocks during init", - 0.5, "sampling", "thread_sampling"); + 0.5, "sampling", "process_sampling"); OMNITRACE_CONFIG_SETTING( std::string, "OMNITRACE_SAMPLING_CPUS", "CPUs to collect frequency information for. Values should be separated by commas " "and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies 'all' and " "'none' suppresses all CPU frequency sampling", - "", "thread_sampling"); + "", "process_sampling"); + + OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_ROCM_SMI_DEVICES", + "[DEPRECATED] Renamed to OMNITRACE_SAMPLING_GPUS", "all", + "rocm_smi", "rocm", "process_sampling"); OMNITRACE_CONFIG_SETTING( std::string, "OMNITRACE_SAMPLING_GPUS", "Devices to query when OMNITRACE_USE_ROCM_SMI=ON. Values should be separated by " "commas and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies " "'all' and 'none' suppresses all GPU sampling", - "all", "rocm_smi", "rocm", "thread_sampling"); + "all", "rocm_smi", "rocm", "process_sampling"); auto _backend = tim::get_env_choice( "OMNITRACE_PERFETTO_BACKEND", @@ -253,10 +273,8 @@ configure_settings(bool _init) OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_PERFETTO_BACKEND", "Specify the perfetto backend to activate. Options are: " "'inprocess', 'system', or 'all'", - _backend, "perfetto"); - - _config->find("OMNITRACE_PERFETTO_BACKEND") - ->second->set_choices({ "inprocess", "system", "all" }); + _backend, "perfetto") + ->set_choices({ "inprocess", "system", "all" }); OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE", "Enable generation of the critical trace", false, "backend", @@ -325,10 +343,8 @@ configure_settings(bool _init) std::string, "OMNITRACE_PERFETTO_FILL_POLICY", "Behavior when perfetto buffer is full. 'discard' will ignore new entries, " "'ring_buffer' will overwrite old entries", - "discard", "perfetto", "data"); - - _config->find("OMNITRACE_PERFETTO_FILL_POLICY") - ->second->set_choices({ "fill", "discard" }); + "discard", "perfetto", "data") + ->set_choices({ "fill", "discard" }); OMNITRACE_CONFIG_EXT_SETTING(int64_t, "OMNITRACE_CRITICAL_TRACE_COUNT", "Number of critical trace to export (0 == all)", 0, @@ -359,7 +375,9 @@ configure_settings(bool _init) OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_OUTPUT_FILE", "Perfetto filename", "", "perfetto", "io", "filename"); + // set the defaults _config->get_flamegraph_output() = false; + _config->get_ctest_notes() = false; _config->get_cout_output() = false; _config->get_file_output() = true; _config->get_json_output() = true; @@ -471,6 +489,10 @@ configure_settings(bool _init) _combine_perfetto_traces->second->set(_config->get("collapse_processes")); } + handle_deprecated_setting("OMNITRACE_ROCM_SMI_DEVICES", "OMNITRACE_SAMPLING_GPUS"); + handle_deprecated_setting("OMNITRACE_USE_THREAD_SAMPLING", + "OMNITRACE_USE_PROCESS_SAMPLING"); + scope::get_fields()[scope::flat::value] = _config->get_flat_profile(); scope::get_fields()[scope::timeline::value] = _config->get_timeline_profile(); @@ -479,8 +501,88 @@ configure_settings(bool _init) #if !defined(TIMEMORY_USE_MPI) && defined(TIMEMORY_USE_MPI_HEADERS) if(tim::dmp::is_initialized()) settings::default_process_suffix() = tim::dmp::rank(); #endif - OMNITRACE_CONDITIONAL_BASIC_PRINT(get_verbose_env() > 0, "configuration complete\n"); + auto _dl_verbose = _config->find("OMNITRACE_DL_VERBOSE"); + tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(), 0); + +#if !defined(TIMEMORY_USE_MPI) || TIMEMORY_USE_MPI == 0 + _config->disable("OMNITRACE_PERFETTO_COMBINE_TRACES"); +#endif + + configure_mode_settings(); + configure_signal_handler(); + configure_disabled_settings(); + + OMNITRACE_CONDITIONAL_BASIC_PRINT(get_verbose_env() > 0, "configuration complete\n"); +} + +void +configure_mode_settings() +{ + auto _set = [](const std::string& _name, bool _v) { + if(!set_setting_value(_name, _v)) + { + OMNITRACE_VERBOSE( + 4, "[configure_mode_settings] No configuration setting named '%s'...\n", + _name.data()); + } + else + { + OMNITRACE_VERBOSE( + 1, "[configure_mode_settings] Overriding %s to %s in %s mode...\n", + _name.c_str(), JOIN("", std::boolalpha, _v).c_str(), + std::to_string(get_mode()).c_str()); + } + }; + + if(get_mode() == Mode::Coverage) + { + set_default_setting_value("OMNITRACE_USE_CODE_COVERAGE", true); + _set("OMNITRACE_USE_PERFETTO", false); + _set("OMNITRACE_USE_TIMEMORY", false); + _set("OMNITRACE_USE_ROCM_SMI", false); + _set("OMNITRACE_USE_ROCTRACER", false); + _set("OMNITRACE_USE_KOKKOSP", false); + _set("OMNITRACE_USE_OMPT", false); + _set("OMNITRACE_USE_SAMPLING", false); + _set("OMNITRACE_USE_PROCESS_SAMPLING", false); + _set("OMNITRACE_CRITICAL_TRACE", false); + } + else if(get_mode() == Mode::Sampling) + { + set_default_setting_value("OMNITRACE_USE_SAMPLING", true); + set_default_setting_value("OMNITRACE_USE_PROCESS_SAMPLING", true); + _set("OMNITRACE_CRITICAL_TRACE", false); + } + + if(gpu::device_count() == 0) + { + OMNITRACE_VERBOSE_F( + 1, "No HIP devices were found: disabling roctracer and rocm_smi...\n"); + get_use_roctracer() = false; + get_use_rocm_smi() = false; + } + + get_instrumentation_interval() = std::max(get_instrumentation_interval(), 1); + + if(get_use_kokkosp()) + { + auto _force = 0; + auto _current_kokkosp_lib = tim::get_env("KOKKOS_PROFILE_LIBRARY"); + if(std::regex_search(_current_kokkosp_lib, std::regex{ "libtimemory\\." })) + _force = 1; + tim::set_env("KOKKOS_PROFILE_LIBRARY", "libomnitrace.so", _force); + } + + // recycle all subsequent thread ids + threading::recycle_ids() = + tim::get_env("OMNITRACE_RECYCLE_TIDS", !get_use_sampling()); +} + +void +configure_signal_handler() +{ + auto _config = settings::shared_instance(); auto _ignore_dyninst_trampoline = tim::get_env("OMNITRACE_IGNORE_DYNINST_TRAMPOLINE", false); // this is how dyninst looks up the env variable @@ -533,8 +635,14 @@ configure_settings(bool _init) _old_handler = signal(_dyninst_trampoline_signal, static_cast(_trampoline_handler)); } +} - auto _handle_use_option = [](const std::string& _opt, const std::string& _category) { +void +configure_disabled_settings() +{ + auto _config = settings::shared_instance(); + auto _handle_use_option = [_config](const std::string& _opt, + const std::string& _category) { if(!_config->get(_opt)) { auto _disabled = _config->disable_category(_category); @@ -552,7 +660,7 @@ configure_settings(bool _init) }; _handle_use_option("OMNITRACE_USE_SAMPLING", "sampling"); - _handle_use_option("OMNITRACE_USE_THREAD_SAMPLING", "thread_sampling"); + _handle_use_option("OMNITRACE_USE_PROCESS_SAMPLING", "process_sampling"); _handle_use_option("OMNITRACE_USE_KOKKOSP", "kokkos"); _handle_use_option("OMNITRACE_USE_PERFETTO", "perfetto"); _handle_use_option("OMNITRACE_USE_TIMEMORY", "timemory"); @@ -573,31 +681,79 @@ configure_settings(bool _init) _config->disable_category("ompt"); #endif - // user bundle components _config->disable_category("throttle"); + + // user bundle components _config->disable("components"); _config->disable("global_components"); _config->disable("ompt_components"); _config->disable("kokkos_components"); _config->disable("trace_components"); _config->disable("profiler_components"); + + // miscellaneous _config->disable("destructor_report"); _config->disable("stack_clearing"); + _config->disable("add_secondary"); + + // output fields _config->disable("auto_output"); _config->disable("file_output"); _config->disable("plot_output"); _config->disable("dart_output"); _config->disable("flamegraph_output"); _config->disable("separator_freq"); - _config->disable("width"); - _config->disable("max_width"); +} - auto _dl_verbose = _config->find("OMNITRACE_DL_VERBOSE"); - tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(), 0); +void +handle_deprecated_setting(const std::string& _old, const std::string& _new, int _verbose) +{ + auto _config = settings::shared_instance(); + auto _old_setting = _config->find(_old); + auto _new_setting = _config->find(_new); -#if !defined(TIMEMORY_USE_MPI) || TIMEMORY_USE_MPI == 0 - _config->disable("OMNITRACE_PERFETTO_COMBINE_TRACES"); -#endif + if(_old_setting == _config->end()) return; + + OMNITRACE_CI_THROW(_new_setting == _config->end(), + "New configuration setting not found: '%s'", _new.c_str()); + + if(_old_setting->second->get_environ_updated() || + _old_setting->second->get_config_updated()) + { + auto _separator = [_verbose]() { + std::array _v = {}; + _v.fill('='); + _v.back() = '\0'; + OMNITRACE_VERBOSE(_verbose, "#%s#\n", _v.data()); + }; + _separator(); + OMNITRACE_VERBOSE(_verbose, "#\n"); + OMNITRACE_VERBOSE(_verbose, "# DEPRECATION NOTICE:\n"); + OMNITRACE_VERBOSE(_verbose, "# %s is deprecated!\n", _old.c_str()); + OMNITRACE_VERBOSE(_verbose, "# Use %s instead!\n", _new.c_str()); + + if(!_new_setting->second->get_environ_updated() && + !_new_setting->second->get_config_updated()) + { + auto _before = _new_setting->second->as_string(); + _new_setting->second->parse(_old_setting->second->as_string()); + auto _after = _new_setting->second->as_string(); + + if(_before != _after) + { + std::string _cause = + (_old_setting->second->get_environ_updated()) ? "environ" : "config"; + OMNITRACE_VERBOSE(_verbose, "#\n"); + OMNITRACE_VERBOSE(_verbose, "# %s :: '%s' -> '%s'\n", _new.c_str(), + _before.c_str(), _after.c_str()); + OMNITRACE_VERBOSE(_verbose, "# via %s (%s)\n", _old.c_str(), + _cause.c_str()); + } + } + + OMNITRACE_VERBOSE(_verbose, "#\n"); + _separator(); + } } void @@ -653,6 +809,9 @@ print_settings( std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) { auto _npos = std::string::npos; // OMNITRACE_CONFIG_FILE always first + if(lhs.at(0) == "OMNITRACE_MODE") return true; + if(rhs.at(0) == "OMNITRACE_MODE") return false; + // OMNITRACE_CONFIG_FILE always second if(lhs.at(0).find("OMNITRACE_CONFIG") != _npos) return true; if(rhs.at(0).find("OMNITRACE_CONFIG") != _npos) return false; // OMNITRACE_USE_* prioritized @@ -756,7 +915,8 @@ get_config_file() Mode get_mode() { - static auto _v = []() { + if(!settings_are_configured()) + { auto _mode = tim::get_env_choice( "OMNITRACE_MODE", "trace", { "trace", "sampling", "coverage" }); if(_mode == "sampling") @@ -764,8 +924,26 @@ get_mode() else if(_mode == "coverage") return Mode::Coverage; return Mode::Trace; - }(); - return _v; + } + static auto _m = + std::unordered_map{ { "trace", Mode::Trace }, + { "sampling", Mode::Sampling }, + { "coverage", Mode::Coverage } }; + static auto _v = get_config()->find("OMNITRACE_MODE"); + try + { + return _m.at(static_cast&>(*_v->second).get()); + } catch(std::runtime_error& _e) + { + auto _mode = static_cast&>(*_v->second).get(); + std::stringstream _ss{}; + for(const auto& itr : _v->second->get_choices()) + _ss << ", " << itr; + auto _msg = (_ss.str().length() > 2) ? _ss.str().substr(2) : std::string{}; + OMNITRACE_THROW("[%s] invalid mode %s. Choices: %s\n", __FUNCTION__, + _mode.c_str(), _msg.c_str()); + } + return Mode::Trace; } bool& @@ -818,8 +996,10 @@ get_debug() bool get_debug_sampling() { - static bool _v = tim::get_env("OMNITRACE_DEBUG_SAMPLING", get_debug_env()); - return (_v || get_debug()); + static bool _v = + tim::get_env("OMNITRACE_DEBUG_SAMPLING", + (settings_are_configured() ? get_debug() : get_debug_env())); + return _v; } int @@ -888,9 +1068,9 @@ get_use_sampling() } bool& -get_use_thread_sampling() +get_use_process_sampling() { - static auto _v = get_config()->find("OMNITRACE_USE_THREAD_SAMPLING"); + static auto _v = get_config()->find("OMNITRACE_USE_PROCESS_SAMPLING"); return static_cast&>(*_v->second).get(); } @@ -1142,7 +1322,7 @@ get_critical_trace_count() } double& -get_thread_sampling_freq() +get_process_sampling_freq() { static auto _v = std::min(get_sampling_freq(), 1000.0); return _v; diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index 005bceb8b3..60ec36a93b 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -48,6 +48,19 @@ settings_are_configured() OMNITRACE_HOT; void configure_settings(bool _init = true); +void +configure_mode_settings(); + +void +configure_signal_handler(); + +void +configure_disabled_settings(); + +void +handle_deprecated_setting(const std::string& _old, const std::string& _new, + int _verbose = 0); + void print_banner(std::ostream& _os = std::cerr); @@ -73,6 +86,19 @@ set_setting_value(const std::string& _name, Tp&& _v) return _setting->second->set(std::forward(_v)); } +template +bool +set_default_setting_value(const std::string& _name, Tp&& _v) +{ + auto _instance = tim::settings::shared_instance(); + auto _setting = _instance->find(_name); + if(_setting == _instance->end()) return false; + if(!_setting->second) return false; + if(_setting->second->get_config_updated() || _setting->second->get_environ_updated()) + return false; + return _setting->second->set(std::forward(_v)); +} + template std::pair get_setting_value(const std::string& _name) @@ -92,7 +118,7 @@ std::string get_config_file(); Mode -get_mode() OMNITRACE_HOT; +get_mode(); bool& is_attached(); @@ -146,7 +172,7 @@ bool& get_use_sampling() OMNITRACE_HOT; bool& -get_use_thread_sampling() OMNITRACE_HOT; +get_use_process_sampling() OMNITRACE_HOT; bool& get_use_pid(); @@ -237,7 +263,7 @@ std::string get_sampling_cpus(); double& -get_thread_sampling_freq(); +get_process_sampling_freq(); std::string get_sampling_gpus(); diff --git a/source/lib/omnitrace/library/thread_sampler.cpp b/source/lib/omnitrace/library/process_sampler.cpp similarity index 96% rename from source/lib/omnitrace/library/thread_sampler.cpp rename to source/lib/omnitrace/library/process_sampler.cpp index bc25dbf154..e9c141e0fd 100644 --- a/source/lib/omnitrace/library/thread_sampler.cpp +++ b/source/lib/omnitrace/library/process_sampler.cpp @@ -20,7 +20,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "library/thread_sampler.hpp" +#include "library/process_sampler.hpp" #include "library/components/pthread_gotcha.hpp" #include "library/components/rocm_smi.hpp" #include "library/config.hpp" @@ -32,7 +32,7 @@ namespace omnitrace { -namespace thread_sampler +namespace process_sampler { namespace { @@ -113,7 +113,7 @@ sampler::poll(std::atomic* _state, nsec_t _interval, promise_t* _ready) void sampler::setup() { - if(!get_use_thread_sampling()) + if(!get_use_process_sampling()) { OMNITRACE_DEBUG("Background sampler is disabled...\n"); return; @@ -148,7 +148,7 @@ sampler::setup() polling_finished = std::make_unique(); - auto _freq = get_thread_sampling_freq(); + auto _freq = get_process_sampling_freq(); uint64_t _msec_freq = (1.0 / _freq) * 1.0e3; promise_t _prom{}; @@ -180,7 +180,7 @@ sampler::shutdown() { size_t _nitr = 0; constexpr size_t _nitr_max = 100; - uint64_t _freq = (1.0 / get_thread_sampling_freq()) * 1.0e3; + uint64_t _freq = (1.0 / get_process_sampling_freq()) * 1.0e3; // wait until the sampler is no longer sampling std::this_thread::sleep_for(msec_t{ _freq }); @@ -226,5 +226,5 @@ sampler::set_state(state_t _state) { get_sampler_state().store(_state); } -} // namespace thread_sampler +} // namespace process_sampler } // namespace omnitrace diff --git a/source/lib/omnitrace/library/thread_sampler.hpp b/source/lib/omnitrace/library/process_sampler.hpp similarity index 98% rename from source/lib/omnitrace/library/thread_sampler.hpp rename to source/lib/omnitrace/library/process_sampler.hpp index a14f391e99..cb4cbfdb6c 100644 --- a/source/lib/omnitrace/library/thread_sampler.hpp +++ b/source/lib/omnitrace/library/process_sampler.hpp @@ -39,7 +39,7 @@ namespace omnitrace { -namespace thread_sampler +namespace process_sampler { struct instance { @@ -99,5 +99,5 @@ post_process() sampler::post_process(); } // -} // namespace thread_sampler +} // namespace process_sampler } // namespace omnitrace diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d89a49d6b8..b8f04b3d09 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,7 +27,7 @@ set(_base_environment "OMNITRACE_USE_PERFETTO=ON" "OMNITRACE_USE_TIMEMORY=ON" "OMNITRACE_USE_SAMPLING=ON" - "OMNITRACE_USE_THREAD_SAMPLING=ON" + "OMNITRACE_USE_PROCESS_SAMPLING=ON" "OMNITRACE_TIME_OUTPUT=OFF" "OMP_PROC_BIND=spread" "OMP_PLACES=threads" @@ -37,7 +37,7 @@ set(_base_environment set(_lock_environment "OMNITRACE_USE_SAMPLING=OFF" - "OMNITRACE_USE_THREAD_SAMPLING=OFF" + "OMNITRACE_USE_PROCESS_SAMPLING=OFF" "OMNITRACE_CRITICAL_TRACE=ON" "OMNITRACE_COLLAPSE_THREADS=ON" "OMNITRACE_TRACE_THREAD_LOCKS=ON" @@ -64,7 +64,7 @@ set(_perfetto_environment "OMNITRACE_USE_PERFETTO=ON" "OMNITRACE_USE_TIMEMORY=OFF" "OMNITRACE_USE_SAMPLING=ON" - "OMNITRACE_USE_THREAD_SAMPLING=ON" + "OMNITRACE_USE_PROCESS_SAMPLING=ON" "OMNITRACE_TIME_OUTPUT=OFF" "OMP_PROC_BIND=spread" "OMP_PLACES=threads" @@ -76,7 +76,7 @@ set(_timemory_environment "OMNITRACE_USE_PERFETTO=OFF" "OMNITRACE_USE_TIMEMORY=ON" "OMNITRACE_USE_SAMPLING=ON" - "OMNITRACE_USE_THREAD_SAMPLING=ON" + "OMNITRACE_USE_PROCESS_SAMPLING=ON" "OMNITRACE_TIME_OUTPUT=OFF" "OMP_PROC_BIND=spread" "OMP_PLACES=threads" @@ -90,7 +90,7 @@ set(_python_environment "OMNITRACE_USE_PERFETTO=ON" "OMNITRACE_USE_TIMEMORY=ON" "OMNITRACE_USE_SAMPLING=OFF" - "OMNITRACE_USE_THREAD_SAMPLING=ON" + "OMNITRACE_USE_PROCESS_SAMPLING=ON" "OMNITRACE_TIME_OUTPUT=OFF" "OMNITRACE_TREE_OUTPUT=OFF" "OMNITRACE_USE_PID=OFF" @@ -102,7 +102,7 @@ set(_attach_environment "OMNITRACE_USE_PERFETTO=ON" "OMNITRACE_USE_TIMEMORY=ON" "OMNITRACE_USE_SAMPLING=OFF" - "OMNITRACE_USE_THREAD_SAMPLING=ON" + "OMNITRACE_USE_PROCESS_SAMPLING=ON" "OMNITRACE_USE_CRITICAL_TRACE=ON" "OMNITRACE_USE_OMPT=ON" "OMNITRACE_USE_KOKKOSP=ON"