diff --git a/external/timemory b/external/timemory index 95df33c9c4..d5e3987802 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 95df33c9c44d21df5b883d9e6fbe65f40cf4b8a4 +Subproject commit d5e3987802d18489bcd5cdb47735b12a147877c2 diff --git a/source/bin/omnitrace-avail/avail.cpp b/source/bin/omnitrace-avail/avail.cpp index 5cea3e1279..14a877818a 100644 --- a/source/bin/omnitrace-avail/avail.cpp +++ b/source/bin/omnitrace-avail/avail.cpp @@ -272,25 +272,65 @@ main(int argc, char** argv) { auto _keys = tim::settings::output_keys( tim::settings::shared_instance()->get_tag()); - std::pair _w = { 0, 0 }; + std::tuple _w = { 0, 0, 0 }; for(const auto& itr : _keys) { - if(!is_selected(itr.first)) continue; - if(_show && !is_selected(itr.second)) continue; - _w.first = std::max(_w.first, itr.first.length()); - _w.second = std::max(_w.second, itr.second.length()); + if(!is_selected(itr.key)) continue; + if(_show && !is_selected(itr.value)) continue; + std::get<0>(_w) = std::max(std::get<0>(_w), itr.key.length()); + std::get<1>(_w) = std::max(std::get<1>(_w), itr.value.length()); + std::get<2>(_w) = std::max(std::get<2>(_w), itr.description.length()); } std::stringstream _msg{}; - _msg << "Output Keys:\n" << std::left; - for(const auto& itr : _keys) + _msg << std::left; + + if(markdown) { - if(!is_selected(itr.first)) continue; - if(_show && !is_selected(itr.second)) continue; - if(_show) - _msg << " " << std::setw(_w.first) << itr.first - << " :: " << std::setw(_w.second) << itr.second << "\n"; - else - _msg << " " << std::setw(_w.first) << itr.first << "\n"; + _msg << "| " << std::setw(std::get<0>(_w) + 2) << "String"; + if(_show) _msg << " | " << std::setw(std::get<1>(_w)) << "Value"; + _msg << " | " << std::setw(std::get<2>(_w)) << "Encoding" + << " |\n"; + + auto _dashes = [](int64_t _n) { + std::stringstream _dss{}; + _dss.fill('-'); + _dss << std::setw(_n + 2) << ""; + return _dss.str(); + }; + + _msg << "|" << _dashes(std::get<0>(_w) + 2); + if(_show) _msg << "|" << _dashes(std::get<1>(_w)); + _msg << "|" << _dashes(std::get<2>(_w)) << "|\n"; + + for(const auto& itr : _keys) + { + if(!is_selected(itr.key)) continue; + if(_show && !is_selected(itr.value)) continue; + _msg << "| " << std::setw(std::get<0>(_w) + 2) + << TIMEMORY_JOIN("", "`", itr.key, "`"); + if(_show) + _msg << " | " << std::setw(std::get<1>(_w)) << itr.value; + _msg << " | " << std::setw(std::get<2>(_w)) << itr.description + << " |\n"; + } + } + else + { + _msg << "Output Keys:\n" << std::left; + for(const auto& itr : _keys) + { + if(!is_selected(itr.key)) continue; + if(_show && !is_selected(itr.value)) continue; + if(_show) + _msg << " " << std::setw(std::get<0>(_w)) << itr.key + << " :: " << std::setw(std::get<1>(_w)) << itr.value + << " :: " << std::setw(std::get<2>(_w)) + << itr.description << "\n"; + else + _msg << " " << std::setw(std::get<0>(_w)) << itr.key + << " :: " << std::setw(std::get<2>(_w)) + << itr.description << "\n"; + } } std::cout << _msg.str(); } diff --git a/source/docs/output.md b/source/docs/output.md index d8e219d0eb..030c2c485b 100644 --- a/source/docs/output.md +++ b/source/docs/output.md @@ -261,21 +261,25 @@ set `OMNITRACE_OUTPUT_PREFIX="%argt%-"` and let omnitrace cleanly organize the o | String | Encoding | |-----------------|--------------------------------------------------------------------------------------------------------------------| -| `%arg%` | Command line argument at position `` (zero indexed), e.g. `%arg0%` for first argument. | -| `%arg_hash%` | MD5 sum of `%arg%` | | `%argv%` | Entire command-line condensed into a single string | -| `%argv_hash%` | MD5 sum of `%argv%` | | `%argt%` | Similar to `%argv%` except basename of first command line argument | -| `%argt_hash%` | MD5 sum if `%argt%` | | `%args%` | All command line arguments condensed into a single string | -| `%args_hash%` | MD5 sum of `%args%` | | `%tag%` | Basename of first command line argument | +| `%arg%` | Command line argument at position `` (zero indexed), e.g. `%arg0%` for first argument. | +| `%argv_hash%` | MD5 sum of `%argv%` | +| `%argt_hash%` | MD5 sum if `%argt%` | +| `%args_hash%` | MD5 sum of `%args%` | | `%tag_hash%` | MD5 sum of `%tag%` | +| `%arg_hash%` | MD5 sum of `%arg%` | | `%pid%` | Process identifier (i.e. `getpid()`) | | `%ppid%` | Parent process identifier (i.e. `getppid()`) | +| `%pgid%` | Process group identifier (i.e. `getpgid(getpid())`) | +| `%psid%` | Process session identifier (i.e. `getsid(getpid())`) | +| `%psize%` | Number of sibling process (from reading `/proc//tasks//children`) | | `%job%` | Value of `SLURM_JOB_ID` environment variable if exists, else `0` | | `%rank%` | Value of `SLURM_PROCID` environment variable if exists, else `MPI_Comm_rank` (or `0` non-mpi) | | `%size%` | `MPI_Comm_size` or `1` if non-mpi | +| `%nid%` | `%rank%` if possible, otherwise `%pid%` | | `%launch_time%` | Launch date and time (uses `OMNITRACE_TIME_FORMAT`) | | `%env{NAME}%` | Value of environment variable `NAME` (i.e. `getenv(NAME)`) | | `%cfg{NAME}%` | Value of configuration variable `NAME` (e.g. `%cfg{OMNITRACE_SAMPLING_FREQ}%` would resolve to sampling frequency) | diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 246e5f85a0..885d072d4c 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -991,9 +991,10 @@ omnitrace_finalize_hidden(void) OMNITRACE_VERBOSE_F(1, "Finalizing timemory...\n"); tim::timemory_finalize(_timemory_manager.get()); + auto _cfg = settings::compose_filename_config{}; + _cfg.use_suffix = true; _timemory_manager->write_metadata(settings::get_global_output_prefix(), - "omnitrace", - settings::default_process_suffix()); + "omnitrace", _cfg); } if(_perfetto_output_error) diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index bd31d4d31d..9477e1a70f 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -56,6 +56,7 @@ #include #include #include +#include namespace omnitrace { @@ -597,6 +598,16 @@ configure_settings(bool _init) std::string{ "perfetto-trace.proto" }, "perfetto", "io", "filename", "advanced"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_TEMPORARY_FILES", + "Write data to temporary files to minimize the memory usage " + "of omnitrace, e.g. call-stack samples will be periodically " + "written to a file and re-loaded during finalization", + true, "io", "data", "advanced"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_TMPDIR", "Base directory for temporary files", + get_env("TMPDIR", "/tmp"), "io", "data", "advanced"); + // set the defaults _config->get_flamegraph_output() = false; _config->get_ctest_notes() = false; @@ -986,6 +997,7 @@ configure_signal_handler() if(_config->get_enable_signal_handler()) { tim::signals::disable_signal_detection(); + signal_settings::enable(sys_signal::Interrupt); signal_settings::set_exit_action(omnitrace_exit_action); signal_settings::check_environment(); auto default_signals = signal_settings::get_default(); @@ -1964,6 +1976,80 @@ get_debug_pid() _vlist.count(dmp::rank()) > 0; return _v; } + +bool +get_use_tmp_files() +{ + static auto _v = get_config()->find("OMNITRACE_USE_TEMPORARY_FILES"); + return static_cast&>(*_v->second).get(); +} + +std::string +get_tmpdir() +{ + static auto _v = get_config()->find("OMNITRACE_TMPDIR"); + return static_cast&>(*_v->second).get(); +} + +tmp_file::tmp_file(std::string _v) +: filename{ std::move(_v) } +{} + +tmp_file::~tmp_file() { close(); } + +void +tmp_file::open(std::ios::openmode _mode) +{ + OMNITRACE_VERBOSE_F(2, "Opening temporary file '%s'...\n", filename.c_str()); + + if(!filepath::exists(filename)) + { + // if the filepath does not exist, open in out mode to create it + std::ofstream _ofs{}; + filepath::open(_ofs, filename); + } + + stream.open(filename, _mode); +} + +void +tmp_file::close() +{ + if(stream.is_open()) stream.close(); +} + +std::shared_ptr +get_tmp_file(std::string _basename, std::string _ext) +{ + if(!get_use_tmp_files()) return std::shared_ptr{}; + + static auto _existing_files = + std::unordered_map>{}; + static std::mutex _mutex{}; + std::unique_lock _lk{ _mutex }; + + auto _cfg = settings::compose_filename_config{}; + _cfg.use_suffix = true; + _cfg.suffix = "%pid%"; + _cfg.explicit_path = get_tmpdir(); + _cfg.subdirectory = JOIN('/', settings::output_path(), "%ppid%", ""); + auto _fname = + settings::compose_output_filename(std::move(_basename), std::move(_ext), _cfg); + + if(_fname.empty() || _fname.front() != '/') + { + OMNITRACE_THROW("Error! temporary file '%s' (based on '%s.%s') is either empty " + "or is not an absolute path", + _fname.c_str(), _basename.c_str(), _ext.c_str()); + } + auto itr = _existing_files.find(_fname); + if(itr != _existing_files.end()) return itr->second; + + auto _v = std::make_shared(_fname); + _v->open(); + _existing_files.emplace(_fname, std::move(_v)); + return _existing_files.at(_fname); +} } // namespace config State& diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index d5859627ca..060bfd32e8 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -330,6 +331,29 @@ get_trace_thread_join(); std::string get_rocm_events(); + +bool +get_use_tmp_files(); + +std::string +get_tmpdir(); + +struct tmp_file +{ + tmp_file(std::string); + ~tmp_file(); + + void open(std::ios::openmode = std::ios::binary | std::ios::in | std::ios::out); + void close(); + + operator bool() const { return stream.is_open() && stream.good(); } + + std::string filename = {}; + std::fstream stream = {}; +}; + +std::shared_ptr +get_tmp_file(std::string _basename, std::string _ext = "dat"); } // namespace config // diff --git a/source/lib/omnitrace/library/perfetto.hpp b/source/lib/omnitrace/library/perfetto.hpp index a0ce84d65e..1c7638cef9 100644 --- a/source/lib/omnitrace/library/perfetto.hpp +++ b/source/lib/omnitrace/library/perfetto.hpp @@ -151,8 +151,9 @@ struct perfetto_counter_track _css << " " << std::hex << std::setw(12) << std::left << eitr; OMNITRACE_THROW("perfetto_counter_track emplace method for '%s' (%p) " "invalidated C-string '%s' (%p).\n%8s: %s\n%8s: %s\n", - _v.c_str(), _name->c_str(), std::get<0>(itr).c_str(), - std::get<0>(itr).c_str(), "previous", + _v.c_str(), (void*) _name->c_str(), + std::get<0>(itr).c_str(), + (void*) std::get<0>(itr).c_str(), "previous", _pss.str().c_str(), "current", _css.str().c_str()); } } diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index 4e747c1698..6d36427e20 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -286,6 +286,69 @@ start_duration_thread() } } +auto& +get_offload_file() +{ + static auto _v = config::get_tmp_file("sampling"); + return _v; +} + +std::mutex& +get_offload_mutex() +{ + static auto _v = std::mutex{}; + return _v; +} + +using sampler_bundle_t = typename sampler_t::bundle_type; +using sampler_buffer_t = tim::data_storage::ring_buffer; + +void +offload_buffer(int64_t _seq, sampler_buffer_t&& _buf) +{ + auto _lk = std::unique_lock{ get_offload_mutex() }; + auto& _file = get_offload_file(); + if(!_file) return; + + OMNITRACE_VERBOSE_F(3, "Saving sampling buffer for thread %li...\n", _seq); + auto& _fs = _file->stream; + _fs.write(reinterpret_cast(&_seq), sizeof(_seq)); + auto _data = std::move(_buf); + _data.save(_fs); + _data.destroy(); + _buf.destroy(); +} + +auto +load_offload_buffer() +{ + auto _data = std::map>{}; + if(!get_use_tmp_files()) return _data; + + auto _lk = std::unique_lock{ get_offload_mutex() }; + auto& _file = get_offload_file(); + if(!_file) return _data; + + auto& _fs = _file->stream; + + _fs.close(); + _file->open(std::ios::binary | std::ios::in); + while(!_fs.eof()) + { + int64_t _seq = 0; + _fs.read(reinterpret_cast(&_seq), sizeof(_seq)); + if(_fs.eof()) break; + sampler_buffer_t _buffer{}; + _buffer.load(_fs); + OMNITRACE_VERBOSE_F(2, "Loading %zu samples for thread %li...\n", _buffer.count(), + _seq); + _data[_seq].emplace_back(std::move(_buffer)); + } + _file.reset(); + + return _data; +} + std::set configure(bool _setup, int64_t _tid) { @@ -362,6 +425,12 @@ configure(bool _setup, int64_t _tid) threading::get_sys_tid() }); } + if(get_use_tmp_files()) + { + auto _file = get_offload_file(); + if(_file && *_file) _sampler->set_offload(&offload_buffer); + } + static_assert(tim::trait::buffer_size::value > 0, "Error! Zero buffer size"); @@ -542,6 +611,14 @@ post_process() size_t _total_data = 0; size_t _total_threads = 0; + + for(size_t i = 0; i < max_supported_threads; ++i) + { + auto& _sampler = get_sampler(i); + if(_sampler) _sampler->set_offload(nullptr); + } + + auto _loaded_data = load_offload_buffer(); for(size_t i = 0; i < max_supported_threads; ++i) { auto& _sampler = get_sampler(i); @@ -574,6 +651,16 @@ post_process() _sampler->stop(); auto& _raw_data = _sampler->get_data(); + for(auto litr : _loaded_data[i]) + { + while(!litr.is_empty()) + { + auto _v = sampler_bundle_t{}; + litr.read(&_v); + _raw_data.emplace_back(std::move(_v)); + } + litr.destroy(); + } OMNITRACE_VERBOSE(2 || get_debug_sampling(), "Sampler data for thread %lu has %zu initial entries...\n", i, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1cec467f71..f30b40d940 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -811,7 +811,9 @@ if(OMNITRACE_USE_MPI OR OMNITRACE_USE_MPI_HEADERS) ENVIRONMENT "${_base_environment}" REWRITE_RUN_PASS_REGEX "(/[A-Za-z-]+/perfetto-trace-0.proto).*(/[A-Za-z-]+/wall_clock-0.txt')" - REWRITE_RUN_FAIL_REGEX "-[0-9][0-9]+.(json|txt|proto)") + REWRITE_RUN_FAIL_REGEX + "(perfetto-trace|trip_count|sampling_percent|sampling_cpu_clock|sampling_wall_clock|wall_clock)-[0-9][0-9]+.(json|txt|proto)" + ) omnitrace_add_test( SKIP_RUNTIME SKIP_SAMPLING