diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index 0c4b329753..9b6ab3bed5 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -103,7 +103,7 @@ jobs: omnitrace-instrument -e -v 1 -o ls.inst --simulate -- ls for i in $(find omnitrace-ls.inst-output -type f); do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace-instrument -e -v 1 -o ls.inst -- ls - ./ls.inst + omnitrace-run -- ./ls.inst omnitrace-instrument -e -v 1 --simulate -- ls for i in $(find omnitrace-ls-output -type f); do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace-instrument -e -v 1 -- ls diff --git a/.github/workflows/ubuntu-bionic.yml b/.github/workflows/ubuntu-bionic.yml index bf225e450e..d2c8d30975 100644 --- a/.github/workflows/ubuntu-bionic.yml +++ b/.github/workflows/ubuntu-bionic.yml @@ -134,7 +134,7 @@ jobs: omnitrace-instrument -e -v 1 -o ls.inst --simulate -- ls for i in $(find omnitrace-ls.inst-output -type f); do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace-instrument -e -v 1 -o ls.inst -- ls - ./ls.inst + omnitrace-run -- ./ls.inst omnitrace-instrument -e -v 1 --simulate -- ls for i in $(find omnitrace-ls-output -type f); do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace-instrument -e -v 1 -- ls diff --git a/README.md b/README.md index 010eb77cf1..decc444894 100755 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ Once you have rewritten your executable and/or libraries with instrumentation, y or exectuable which loads the instrumented libraries normally, e.g.: ```shell -./app.inst +omnitrace-run -- ./app.inst ``` If you want to re-define certain settings to new default in a binary rewrite, use the `--env` option. This `omnitrace` option @@ -207,7 +207,7 @@ is 1024000 KB (1 GiB): ```shell # buffer size defaults to 1024000 omnitrace-instrument -o app.inst -- /path/to/app -./app.inst +omnitrace-run -- ./app.inst ``` Passing `--env OMNITRACE_PERFETTO_BUFFER_SIZE_KB=5120000` will change the default value in `app.inst` to 5120000 KiB (5 GiB): @@ -215,13 +215,15 @@ Passing `--env OMNITRACE_PERFETTO_BUFFER_SIZE_KB=5120000` will change the defaul ```shell # defaults to 5 GiB buffer size omnitrace-instrument -o app.inst --env OMNITRACE_PERFETTO_BUFFER_SIZE_KB=5120000 -- /path/to/app -./app.inst +omnitrace-run -- ./app.inst ``` ```shell -# override default 5 GiB buffer size to 200 MB +# override default 5 GiB buffer size to 200 MB via command-line +omnitrace-run --trace-buffer-size=200000 -- ./app.inst +# override default 5 GiB buffer size to 200 MB via environment export OMNITRACE_PERFETTO_BUFFER_SIZE_KB=200000 -./app.inst +omnitrace-run -- ./app.inst ``` #### Runtime Instrumentation @@ -319,21 +321,18 @@ perfetto --out ./omnitrace-perfetto.proto --txt -c ${OMNITRACE_ROOT}/share/omnit > ***NOTE: if the perfetto tools were installed by omnitrace, replace `traced` with `omnitrace-perfetto-traced` and*** > ***`perfetto` with `omnitrace-perfetto`.*** -Configure omnitrace to use the perfetto system backend: - -```shell -export OMNITRACE_PERFETTO_BACKEND=system -``` - -And finally, execute your instrumented application. Either the binary rewritten application: +Configure omnitrace to use the perfetto system backend via the `--perfetto-backend` option of `omnitrace-run`: ```shell +# enable sampling on the uninstrumented binary +omnitrace-run --sample --trace --perfetto-backend=system -- ./myapp +# trace the instrument the binary omnitrace-instrument -o ./myapp.inst -- ./myapp -./myapp.inst +omnitrace-run --trace --perfetto-backend=system -- ./myapp.inst ``` -Or with runtime instrumentation: +or via the `--env` option of `omnitrace-instrument` + runtime instrumentation: ```shell -omnitrace-instrument -- ./myapp +omnitrace-instrument --env OMNITRACE_PERFETTO_BACKEND=system -- ./myapp ``` diff --git a/VERSION b/VERSION index a8fdfda1c7..f8e233b273 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.8.1 +1.9.0 diff --git a/cmake/MacroUtilities.cmake b/cmake/MacroUtilities.cmake index e0eb4c90f5..af48a5517d 100644 --- a/cmake/MacroUtilities.cmake +++ b/cmake/MacroUtilities.cmake @@ -151,6 +151,7 @@ function(OMNITRACE_STRIP_TARGET) --keep-symbol="omnitrace_pop_region" --keep-symbol="omnitrace_set_env" --keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_reset_preload" + --keep-symbol="omnitrace_set_instrumented" --keep-symbol="omnitrace_user_*" --keep-symbol="ompt_start_tool" --keep-symbol="kokkosp_*" --keep-symbol="OnLoad" --keep-symbol="OnUnload" --keep-symbol="OnLoadToolProp" diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index a097796859..34d0a7a2ca 100644 --- a/cmake/Packages.cmake +++ b/cmake/Packages.cmake @@ -55,6 +55,7 @@ target_include_directories( $ $ $ + $ $) # include threading because of rooflines diff --git a/external/timemory b/external/timemory index 8ca28b04d9..50c13fef89 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 8ca28b04d919f3fe89d4348fdd161b7876c4f852 +Subproject commit 50c13fef89eb5f1333088ed66542a99c2556c3f4 diff --git a/scripts/test-install.sh b/scripts/test-install.sh index 436f380e32..94302e1635 100755 --- a/scripts/test-install.sh +++ b/scripts/test-install.sh @@ -187,7 +187,7 @@ test-omnitrace-rewrite() verbose-run omnitrace-instrument -e -v 1 -o ${CONFIG_DIR}/ls.inst --simulate -- ${LS_NAME} for i in $(find ${CONFIG_DIR}/omnitrace-tests-output/ls.inst -type f); do verbose-run ls ${i}; done verbose-run omnitrace-instrument -e -v 1 -o ${CONFIG_DIR}/ls.inst -- ${LS_NAME} - verbose-run ${CONFIG_DIR}/ls.inst ${LS_ARGS} + verbose-run omnitrace-run -- ${CONFIG_DIR}/ls.inst ${LS_ARGS} } test-omnitrace-runtime() diff --git a/source/bin/CMakeLists.txt b/source/bin/CMakeLists.txt index b697f2e643..afdc31be9c 100644 --- a/source/bin/CMakeLists.txt +++ b/source/bin/CMakeLists.txt @@ -19,6 +19,7 @@ add_subdirectory(omnitrace-critical-trace) add_subdirectory(omnitrace-causal) add_subdirectory(omnitrace-sample) add_subdirectory(omnitrace-instrument) +add_subdirectory(omnitrace-run) # omnitrace-exe is deprecated add_subdirectory(omnitrace-exe) diff --git a/source/bin/omnitrace-causal/impl.cpp b/source/bin/omnitrace-causal/impl.cpp index 450891e763..5422809c2a 100644 --- a/source/bin/omnitrace-causal/impl.cpp +++ b/source/bin/omnitrace-causal/impl.cpp @@ -282,7 +282,7 @@ print_command(const std::vector& _argv, std::string_view _prefix) std::vector get_initial_environment() { - std::vector _env; + auto _env = std::vector{}; if(environ != nullptr) { int idx = 0; diff --git a/source/bin/omnitrace-instrument/module_function.cpp b/source/bin/omnitrace-instrument/module_function.cpp index 0a6213f90e..5b857c172c 100644 --- a/source/bin/omnitrace-instrument/module_function.cpp +++ b/source/bin/omnitrace-instrument/module_function.cpp @@ -499,7 +499,8 @@ module_function::is_module_constrained() const // always instrument these modules if(module_name == "DEFAULT_MODULE" || module_name == "LIBRARY_MODULE") - return _report("Skipping", "default module", 2); + // return _report("Skipping", "default module", 2); + return false; static std::regex ext_regex{ "\\.(s|S)$", regex_opts }; static std::regex sys_regex{ "^(s|k|e|w)_[A-Za-z_0-9\\-]+\\.(c|C)$", regex_opts }; diff --git a/source/bin/omnitrace-instrument/omnitrace-instrument.cpp b/source/bin/omnitrace-instrument/omnitrace-instrument.cpp index 3ddb27c1af..ba4f80ccc2 100644 --- a/source/bin/omnitrace-instrument/omnitrace-instrument.cpp +++ b/source/bin/omnitrace-instrument/omnitrace-instrument.cpp @@ -22,6 +22,7 @@ #include "omnitrace-instrument.hpp" #include "common/defines.h" +#include "dl/dl.hpp" #include "fwd.hpp" #include "internal_libs.hpp" #include "log.hpp" @@ -56,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +88,8 @@ get_default_min_address_range() } } // namespace +using InstrumentMode = ::omnitrace::dl::InstrumentMode; + bool use_return_info = false; bool use_args_info = false; bool use_file_info = false; @@ -108,7 +112,7 @@ bool include_uninstr = false; bool include_internal_linked_libs = false; int verbose_level = tim::get_env("OMNITRACE_VERBOSE_INSTRUMENT", 0); int num_log_entries = tim::get_env( - "OMNITRACE_LOG_COUNT", tim::get_env("OMNITRACE_CI", false) ? 20 : -1); + "OMNITRACE_LOG_COUNT", tim::get_env("OMNITRACE_CI", false) ? 20 : 50); string_t main_fname = "main"; string_t argv0 = {}; string_t cmdv0 = {}; @@ -1410,8 +1414,27 @@ main(int argc, char** argv) // //----------------------------------------------------------------------------------// - addr_space = - omnitrace_get_address_space(bpatch, _cmdc, _cmdv, binary_rewrite, _pid, mutname); + // prioritize the user environment arguments + auto instr_mode_v = (binary_rewrite) ? InstrumentMode::BinaryRewrite + : (_pid < 0) ? InstrumentMode::ProcessCreate + : InstrumentMode::ProcessAttach; + auto instr_mode_v_int = static_cast(instr_mode_v); + auto env_vars = parser.get("env"); + env_vars.reserve(env_vars.size() + env_config_variables.size()); + for(auto&& itr : env_config_variables) + env_vars.emplace_back(itr); + env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MODE", instr_mode)); + env_vars.emplace_back( + TIMEMORY_JOIN('=', "OMNITRACE_INSTRUMENT_MODE", instr_mode_v_int)); + env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_INIT", "OFF")); + env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_FINALIZE", "OFF")); + env_vars.emplace_back( + TIMEMORY_JOIN('=', "OMNITRACE_TIMEMORY_COMPONENTS", default_components)); + env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_USE_CODE_COVERAGE", + (coverage_mode != CODECOV_NONE) ? "ON" : "OFF")); + + addr_space = omnitrace_get_address_space(bpatch, _cmdc, _cmdv, env_vars, + binary_rewrite, _pid, mutname); // addr_space->allowTraps(instr_traps); @@ -1704,14 +1727,15 @@ main(int argc, char** argv) verbprintf(0, "Finding instrumentation functions...\n"); - auto* init_func = find_function(app_image, "omnitrace_init"); - auto* fini_func = find_function(app_image, "omnitrace_finalize"); - auto* env_func = find_function(app_image, "omnitrace_set_env"); - auto* mpi_func = find_function(app_image, "omnitrace_set_mpi"); - auto* entr_trace = find_function(app_image, "omnitrace_push_trace"); - auto* exit_trace = find_function(app_image, "omnitrace_pop_trace"); - auto* reg_src_func = find_function(app_image, "omnitrace_register_source"); - auto* reg_cov_func = find_function(app_image, "omnitrace_register_coverage"); + auto* init_func = find_function(app_image, "omnitrace_init"); + auto* fini_func = find_function(app_image, "omnitrace_finalize"); + auto* env_func = find_function(app_image, "omnitrace_set_env"); + auto* mpi_func = find_function(app_image, "omnitrace_set_mpi"); + auto* entr_trace = find_function(app_image, "omnitrace_push_trace"); + auto* exit_trace = find_function(app_image, "omnitrace_pop_trace"); + auto* reg_src_func = find_function(app_image, "omnitrace_register_source"); + auto* reg_cov_func = find_function(app_image, "omnitrace_register_coverage"); + auto* set_instr_func = find_function(app_image, "omnitrace_set_instrumented"); if(!main_func && main_fname == "main") main_func = find_function(app_image, "_main"); @@ -1828,12 +1852,14 @@ main(int argc, char** argv) using pair_t = std::pair; - for(const auto& itr : - { pair_t(entr_trace, "omnitrace_push_trace"), - pair_t(exit_trace, "omnitrace_pop_trace"), pair_t(init_func, "omnitrace_init"), - pair_t(fini_func, "omnitrace_finalize"), pair_t(env_func, "omnitrace_set_env"), - pair_t(reg_src_func, "omnitrace_register_source"), - pair_t(reg_cov_func, "omnitrace_register_coverage") }) + for(const auto& itr : { pair_t{ entr_trace, "omnitrace_push_trace" }, + pair_t{ exit_trace, "omnitrace_pop_trace" }, + pair_t{ init_func, "omnitrace_init" }, + pair_t{ fini_func, "omnitrace_finalize" }, + pair_t{ env_func, "omnitrace_set_env" }, + pair_t{ set_instr_func, "omnitrace_set_instrumented" }, + pair_t{ reg_src_func, "omnitrace_register_source" }, + pair_t{ reg_cov_func, "omnitrace_register_coverage" } }) { if(!itr.first) { @@ -1888,19 +1914,20 @@ main(int argc, char** argv) if(main_func) main_sign.get(); auto main_call_args = omnitrace_call_expr(main_sign.get()); - auto init_call_args = omnitrace_call_expr(instr_mode, binary_rewrite, _init_arg0); + auto init_call_args = omnitrace_call_expr(instr_mode, binary_rewrite, ""); auto fini_call_args = omnitrace_call_expr(); auto umpi_call_args = omnitrace_call_expr(use_mpi, is_attached); auto none_call_args = omnitrace_call_expr(); + auto set_instr_args = omnitrace_call_expr(instr_mode_v_int); verbprintf(2, "Done\n"); verbprintf(2, "Getting call snippets... "); - auto init_call = init_call_args.get(init_func); - auto fini_call = fini_call_args.get(fini_func); - auto umpi_call = umpi_call_args.get(mpi_func); - - auto main_beg_call = main_call_args.get(entr_trace); + auto init_call = init_call_args.get(init_func); + auto fini_call = fini_call_args.get(fini_func); + auto umpi_call = umpi_call_args.get(mpi_func); + auto set_instr_call = set_instr_args.get(set_instr_func); + auto main_beg_call = main_call_args.get(entr_trace); verbprintf(2, "Done\n"); @@ -1924,35 +1951,27 @@ main(int argc, char** argv) } if(_libname.empty()) _libname = "libomnitrace-dl.so"; - // prioritize the user environment arguments - auto env_vars = parser.get("env"); - env_vars.reserve(env_vars.size() + env_config_variables.size()); - for(auto&& itr : env_config_variables) - env_vars.emplace_back(itr); - env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MODE", instr_mode)); - env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_INIT", "OFF")); - env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_FINALIZE", "OFF")); + if(!binary_rewrite && !is_attached) env_vars.clear(); + env_vars.emplace_back( TIMEMORY_JOIN('=', "OMNITRACE_INIT_ENABLED", (user_start_func && user_stop_func) ? "OFF" : "ON")); - env_vars.emplace_back( - TIMEMORY_JOIN('=', "OMNITRACE_TIMEMORY_COMPONENTS", default_components)); env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_USE_MPIP", (binary_rewrite && use_mpi) ? "ON" : "OFF")); - env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_USE_CODE_COVERAGE", - (coverage_mode != CODECOV_NONE) ? "ON" : "OFF")); if(use_mpi) env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_USE_PID", "ON")); for(auto& itr : env_vars) { - auto p = tim::delimit(itr, "="); - if(p.size() != 2) + auto _pos = itr.find('='); + if(_pos == std::string::npos) { errprintf(0, "environment variable %s not in form VARIABLE=VALUE\n", itr.c_str()); } - tim::set_env(p.at(0), p.at(1)); - auto _expr = omnitrace_call_expr(p.at(0), p.at(1)); + auto _var = itr.substr(0, _pos); + auto _val = itr.substr(_pos + 1); + tim::set_env(_var, _val); + auto _expr = omnitrace_call_expr(_var, _val); env_variables.emplace_back(_expr.get(env_func)); } @@ -1962,6 +1981,12 @@ main(int argc, char** argv) // //----------------------------------------------------------------------------------// + // call into omnitrace-dl to notify that instrumentation is occurring + if(binary_rewrite) + { + init_names.emplace(init_names.begin(), set_instr_call.get()); + } + for(const auto& itr : env_variables) { if(itr) init_names.emplace_back(itr.get()); @@ -1981,8 +2006,9 @@ main(int argc, char** argv) } if(umpi_call) init_names.emplace_back(umpi_call.get()); - if(init_call) init_names.emplace_back(init_call.get()); - if(main_func && main_beg_call) init_names.emplace_back(main_beg_call.get()); + if(!binary_rewrite && init_call) init_names.emplace_back(init_call.get()); + if(is_attached && main_func && main_beg_call) + init_names.emplace_back(main_beg_call.get()); for(const auto& itr : end_expr) if(itr.second) fini_names.emplace_back(itr.second.get()); @@ -2047,34 +2073,56 @@ main(int argc, char** argv) auto _init_sequence = sequence_t{ init_names }; auto _fini_sequence = sequence_t{ fini_names }; - if(app_thread && is_attached) + if(!is_attached) { - assert(app_thread != nullptr); - verbprintf(1, "Executing initial snippets...\n"); - for(auto* itr : init_names) - app_thread->oneTimeCode(*itr); - } - else - { - if(main_entr_points) - { - verbprintf(1, "Adding main entry snippets...\n"); - addr_space->insertSnippet(_init_sequence, *main_entr_points); - // insert_instr(addr_space, *main_entr_points, _init_sequence, BPatch_entry); - } - else - { + auto _insert_init_callbacks = std::function{}; + auto _insert_init_snippets = std::function{}; + + _insert_init_snippets = [&]() { + if(main_entr_points) + { + verbprintf(1, "Adding main entry snippets...\n"); + addr_space->insertSnippet(_init_sequence, *main_entr_points); + } + else + { + errprintf(0, "Dyninst error inserting main entry snippets: no main entry " + "points\n"); + return false; + } + return true; + }; + + _insert_init_callbacks = [&]() { + verbprintf(1, "Adding main init callbacks...\n"); + size_t _ninits = 0; for(auto* itr : _objs) { + if(itr->name().find("libomnitrace") != std::string::npos) continue; try { - itr->insertInitCallback(_init_sequence); + verbprintf(2, "Adding main init callbacks (via %s)...\n", + itr->name().c_str()); + if(itr->insertInitCallback(_init_sequence)) + { + ++_ninits; + } } catch(std::runtime_error& _e) { errprintf(0, "Dyninst error inserting init callback: %s\n", _e.what()); } } + return (_ninits > 0); + }; + + if(binary_rewrite) + { + if(!_insert_init_callbacks()) _insert_init_snippets(); + } + else + { + if(!_insert_init_snippets()) _insert_init_callbacks(); } } @@ -2469,6 +2517,12 @@ main(int argc, char** argv) } else if(!app_thread->isTerminated() && is_attached) { + bpatch->setDebugParsing(false); + bpatch->setDelayedParsing(true); + verbprintf(1, "Executing initial snippets...\n"); + for(auto* itr : init_names) + app_thread->oneTimeCode(*itr); + app_thread->continueExecution(); while(!app_thread->isTerminated()) { @@ -2492,7 +2546,7 @@ main(int argc, char** argv) delete[] _cmdv; verbprintf(0, "End of omnitrace\n"); - verbprintf(1, "Exit code: %i\n", code); + verbprintf((code != 0) ? 0 : 1, "Exit code: %i\n", code); if(log_ofs) { diff --git a/source/bin/omnitrace-instrument/omnitrace-instrument.hpp b/source/bin/omnitrace-instrument/omnitrace-instrument.hpp index 09cf707f83..fd6b0d9315 100644 --- a/source/bin/omnitrace-instrument/omnitrace-instrument.hpp +++ b/source/bin/omnitrace-instrument/omnitrace-instrument.hpp @@ -29,6 +29,7 @@ #include "module_function.hpp" #include +#include #include #include @@ -172,7 +173,8 @@ omnitrace_get_is_executable(std::string_view _cmd, bool _default_v) // static inline address_space_t* omnitrace_get_address_space(patch_pointer_t& _bpatch, int _cmdc, char** _cmdv, - bool _rewrite, int _pid = -1, const std::string& _name = {}) + const std::vector& _cmdenv, bool _rewrite, + int _pid = -1, const std::string& _name = {}) { address_space_t* mutatee = nullptr; @@ -196,51 +198,83 @@ omnitrace_get_address_space(patch_pointer_t& _bpatch, int _cmdc, char** _cmdv, } verbprintf_bare(1, "Done\n"); } - else if(_pid >= 0) - { - verbprintf(1, "Attaching to process %i... ", _pid); - fflush(stderr); - char* _cmdv0 = (_cmdc > 0) ? _cmdv[0] : nullptr; - mutatee = _bpatch->processAttach(_cmdv0, _pid); - if(!mutatee) - { - verbprintf(-1, "Failed to connect to process %i\n", (int) _pid); - throw std::runtime_error("Failed to attach to process"); - } - verbprintf_bare(1, "Done\n"); - } else { - if(_cmdc < 1) errprintf(-127, "No command provided"); + bool _attach = (_pid >= 0); - if(is_text_file(_cmdv[0])) + // override the current environment create/attach to process, revert environment + using strpair_t = std::pair; + auto _imported = std::vector{}; + auto _exported = std::vector{}; + auto _get_env_pair = [](const std::string& _full) { + auto _pos = _full.find('='); + if(_pos < _full.length()) + return std::make_pair(_full.substr(0, _pos), _full.substr(_pos + 1)); + return strpair_t{}; + }; + + if(environ) { - errprintf(-1, - "'%s' is a text file. OmniTrace only supports instrumenting " - "binary files", - _cmdv[0]); + size_t _idx = 0; + while(environ[_idx] != nullptr) + _imported.emplace_back(_get_env_pair(environ[_idx++])); } - std::stringstream ss; - for(int i = 0; i < _cmdc; ++i) + for(const auto& itr : _cmdenv) { - if(!_cmdv || !_cmdv[i]) continue; - ss << " " << _cmdv[i]; + _exported.emplace_back(_get_env_pair(itr)); } - auto _cmd_msg = ss.str(); - if(_cmd_msg.length() > 1) _cmd_msg = _cmd_msg.substr(1); - char** _environ = environ; - verbprintf(1, "Creating process '%s'... ", _cmd_msg.c_str()); - fflush(stderr); - mutatee = _bpatch->processCreate(_cmdv[0], (const char**) _cmdv, - (const char**) _environ); - if(!mutatee) + for(const auto& itr : _exported) { - verbprintf(-1, "Failed to create process: '%s'\n", _cmd_msg.c_str()); - throw std::runtime_error("Failed to create process"); + setenv(itr.first.c_str(), itr.second.c_str(), 1); + verbprintf(4, "[env] %s=%s\n", itr.first.c_str(), itr.second.c_str()); + } + + if(_attach) + { + verbprintf(1, "Attaching to process %i... ", _pid); + fflush(stderr); + char* _cmdv0 = (_cmdc > 0) ? _cmdv[0] : nullptr; + mutatee = _bpatch->processAttach(_cmdv0, _pid); + if(!mutatee) + { + verbprintf(-1, "Failed to connect to process %i\n", (int) _pid); + throw std::runtime_error("Failed to attach to process"); + } + verbprintf_bare(1, "Done\n"); + } + else + { + if(_cmdc < 1) errprintf(-127, "No command provided"); + + if(is_text_file(_cmdv[0])) + { + errprintf(-1, + "'%s' is a text file. OmniTrace only supports instrumenting " + "binary files", + _cmdv[0]); + } + + std::stringstream ss; + for(int i = 0; i < _cmdc; ++i) + { + if(!_cmdv || !_cmdv[i]) continue; + ss << " " << _cmdv[i]; + } + auto _cmd_msg = ss.str(); + if(_cmd_msg.length() > 1) _cmd_msg = _cmd_msg.substr(1); + + verbprintf(1, "Creating process '%s'... ", _cmd_msg.c_str()); + fflush(stderr); + mutatee = _bpatch->processCreate(_cmdv[0], (const char**) _cmdv, nullptr); + if(!mutatee) + { + verbprintf(-1, "Failed to create process: '%s'\n", _cmd_msg.c_str()); + throw std::runtime_error("Failed to create process"); + } + verbprintf_bare(1, "Done\n"); } - verbprintf_bare(1, "Done\n"); } return mutatee; diff --git a/source/bin/omnitrace-run/CMakeLists.txt b/source/bin/omnitrace-run/CMakeLists.txt new file mode 100644 index 0000000000..63054bcc87 --- /dev/null +++ b/source/bin/omnitrace-run/CMakeLists.txt @@ -0,0 +1,28 @@ +# ------------------------------------------------------------------------------# +# +# omnitrace-run target +# +# ------------------------------------------------------------------------------# + +add_executable( + omnitrace-run + ${CMAKE_CURRENT_LIST_DIR}/omnitrace-run.cpp + ${CMAKE_CURRENT_LIST_DIR}/omnitrace-run.hpp ${CMAKE_CURRENT_LIST_DIR}/impl.cpp) + +target_compile_definitions(omnitrace-run PRIVATE TIMEMORY_CMAKE=1) +target_include_directories(omnitrace-run PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_link_libraries( + omnitrace-run + PRIVATE omnitrace::omnitrace-compile-definitions omnitrace::omnitrace-headers + omnitrace::omnitrace-common-library omnitrace::omnitrace-core + omnitrace::omnitrace-sanitizer) +set_target_properties( + omnitrace-run PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" + INSTALL_RPATH "${OMNITRACE_EXE_INSTALL_RPATH}") + +omnitrace_strip_target(omnitrace-run) + +install( + TARGETS omnitrace-run + DESTINATION ${CMAKE_INSTALL_BINDIR} + OPTIONAL) diff --git a/source/bin/omnitrace-run/impl.cpp b/source/bin/omnitrace-run/impl.cpp new file mode 100644 index 0000000000..71a0a163fb --- /dev/null +++ b/source/bin/omnitrace-run/impl.cpp @@ -0,0 +1,359 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "omnitrace-run.hpp" + +#include "common/defines.h" +#include "common/delimit.hpp" +#include "common/environment.hpp" +#include "common/join.hpp" +#include "common/setup.hpp" +#include "core/argparse.hpp" +#include "core/config.hpp" +#include "core/state.hpp" +#include "core/timemory.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace color = ::tim::log::color; +namespace filepath = ::tim::filepath; // NOLINT +namespace console = ::tim::utility::console; +namespace argparse = ::tim::argparse; +namespace signals = ::tim::signals; +using settings = ::omnitrace::settings; +using namespace ::timemory::join; +using ::tim::get_env; +using ::tim::log::stream; + +namespace std +{ +std::string +to_string(bool _v) +{ + return (_v) ? "true" : "false"; +} +} // namespace std + +namespace +{ +std::string +get_internal_libpath(const std::string& _lib) +{ + auto _exe = std::string_view{ realpath("/proc/self/exe", nullptr) }; + auto _pos = _exe.find_last_of('/'); + auto _dir = std::string{ "./" }; + if(_pos != std::string_view::npos) _dir = _exe.substr(0, _pos); + return omnitrace::common::join("/", _dir, "..", "lib", _lib); +} + +parser_data_t& +get_initial_environment(parser_data_t& _data) +{ + if(environ != nullptr) + { + int idx = 0; + while(environ[idx] != nullptr) + { + auto* _v = environ[idx++]; + _data.initial.emplace(_v); + _data.current.emplace_back(strdup(_v)); + } + } + + return _data; +} + +int +get_verbose(parser_data_t& _data) +{ + auto& verbose = _data.verbose; + verbose = get_env("OMNITRACE_CAUSAL_VERBOSE", + get_env("OMNITRACE_VERBOSE", verbose, false)); + auto _debug = + get_env("OMNITRACE_CAUSAL_DEBUG", get_env("OMNITRACE_DEBUG", false, false)); + if(_debug) verbose += 8; + return verbose; +} + +std::string +get_realpath(const std::string& _v) +{ + auto* _tmp = realpath(_v.c_str(), nullptr); + auto _ret = std::string{ _tmp }; + free(_tmp); + return _ret; +} + +auto +toggle_suppression(std::tuple _inp) +{ + auto _out = + std::make_tuple(settings::suppress_config(), settings::suppress_parsing()); + std::tie(settings::suppress_config(), settings::suppress_parsing()) = _inp; + return _out; +} + +// disable suppression when exe loads but store original values for restoration later +auto initial_suppression = toggle_suppression({ true, true }); +} // namespace + +void +print_command(const parser_data_t& _data, std::string_view _prefix) +{ + auto verbose = _data.verbose; + const auto& _argv = _data.command; + if(verbose >= 1) + stream(std::cout, color::info()) + << _prefix << "Executing '" << join(array_config{ " " }, _argv) << "'...\n"; + + std::cerr << color::end() << std::flush; +} + +void +prepare_command_for_run(char* _exe, parser_data_t& _data) +{ + if(!_data.launcher.empty()) + { + bool _injected = false; + auto _new_argv = std::vector{}; + for(auto* itr : _data.command) + { + if(!_injected && std::regex_search(itr, std::regex{ _data.launcher })) + { + _new_argv.emplace_back(_exe); + _new_argv.emplace_back(strdup("--")); + _injected = true; + } + _new_argv.emplace_back(itr); + } + + if(!_injected) + { + throw std::runtime_error( + join("", "omnitrace-run was unable to match \"", _data.launcher, + "\" to any arguments on the command line: \"", + join(array_config{ " ", "", "" }, _data.command), "\"")); + } + + std::swap(_data.command, _new_argv); + } +} + +void +prepare_environment_for_run(parser_data_t& _data) +{ + if(_data.launcher.empty()) + { + omnitrace::argparse::add_ld_preload(_data); + } +} + +void +print_updated_environment(parser_data_t& _data, std::string_view _prefix) +{ + auto _verbose = get_verbose(_data); + + if(_verbose < 0) return; + + auto _env = _data.current; + const auto& _updated_envs = _data.updated; + + std::sort(_env.begin(), _env.end(), [](auto* _lhs, auto* _rhs) { + if(!_lhs) return false; + if(!_rhs) return true; + return std::string_view{ _lhs } < std::string_view{ _rhs }; + }); + + std::vector _updates = {}; + std::vector _general = {}; + + for(auto* itr : _env) + { + if(itr == nullptr) continue; + + auto _is_omni = (std::string_view{ itr }.find("OMNITRACE") == 0); + auto _updated = false; + for(const auto& vitr : _updated_envs) + { + if(std::string_view{ itr }.find(vitr) == 0) + { + _updated = true; + break; + } + } + + if(_updated) + _updates.emplace_back(itr); + else if(_verbose >= 1 && _is_omni) + _general.emplace_back(itr); + } + + if(_general.size() + _updates.size() == 0 || _verbose < 0) return; + + std::cerr << std::endl; + + for(auto& itr : _general) + stream(std::cerr, color::source()) << _prefix << itr << "\n"; + for(auto& itr : _updates) + stream(std::cerr, color::source()) << _prefix << itr << "\n"; + + std::cerr << color::end() << std::flush; +} + +parser_data_t& +parse_args(int argc, char** argv, parser_data_t& _parser_data) +{ + get_initial_environment(_parser_data); + + bool _do_parse_args = false; + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string_view{ argv[i] }; + if(_arg == "--" || _arg == "-?" || _arg == "-h" || _arg == "--help" || + _arg == "--version") + _do_parse_args = true; + } + + if(!_do_parse_args && argc > 1 && std::string_view{ argv[1] }.find('-') == 0) + _do_parse_args = true; + + if(!_do_parse_args) return parse_command(argc, argv, _parser_data); + + using parser_t = argparse::argument_parser; + using parser_err_t = typename parser_t::result_type; + + toggle_suppression(initial_suppression); + omnitrace::argparse::init_parser(_parser_data); + + // no need for backtraces + signals::disable_signal_detection(signals::signal_settings::get_enabled()); + + auto help_check = [](parser_t& p, int _argc, char** _argv) { + std::set help_args = { "-h", "--help", "-?" }; + return (p.exists("help") || _argc == 1 || + (_argc > 1 && help_args.find(_argv[1]) != help_args.end())); + }; + + const auto* _desc = R"desc( + Command line interface to omnitrace configuration. + )desc"; + + auto parser = parser_t{ basename(argv[0]), _desc }; + + parser.on_error([](parser_t&, const parser_err_t& _err) { + stream(std::cerr, color::fatal()) << _err << "\n"; + exit(EXIT_FAILURE); + }); + + parser.enable_help("", "Usage: omnitrace-run -- "); + parser.enable_version("omnitrace-run", "v" OMNITRACE_VERSION_STRING, + OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); + + auto _cols = std::get<0>(console::get_columns()); + if(_cols > parser.get_help_width() + 8) + parser.set_description_width( + std::min(_cols - parser.get_help_width() - 8, 120)); + + // disable options related to causal profiling + _parser_data.processed_groups.emplace("causal"); + + omnitrace::argparse::add_core_arguments(parser, _parser_data); + omnitrace::argparse::add_extended_arguments(parser, _parser_data); + + auto _inpv = std::vector{}; + auto& _outv = _parser_data.command; + bool _hash = false; + for(int i = 0; i < argc; ++i) + { + if(argv[i] == nullptr) + { + continue; + } + else if(_hash) + { + _outv.emplace_back(strdup(argv[i])); + } + else if(std::string_view{ argv[i] } == "--") + { + _hash = true; + } + else + { + _inpv.emplace_back(strdup(argv[i])); + } + } + + auto _cerr = parser.parse_args(_inpv.size(), _inpv.data()); + if(_cerr) + { + std::cerr << _cerr.what() << std::endl; + exit(EXIT_FAILURE); + } + + return _parser_data; +} + +parser_data_t& +parse_command(int argc, char** argv, parser_data_t& _parser_data) +{ + toggle_suppression(initial_suppression); + omnitrace::argparse::init_parser(_parser_data); + + // no need for backtraces + signals::disable_signal_detection(signals::signal_settings::get_enabled()); + + auto& _outv = _parser_data.command; + bool _hash = false; + for(int i = 1; i < argc; ++i) + { + _outv.emplace_back(strdup(argv[i])); + } + + return _parser_data; +} diff --git a/source/bin/omnitrace-run/omnitrace-run.cpp b/source/bin/omnitrace-run/omnitrace-run.cpp new file mode 100644 index 0000000000..a870960d6e --- /dev/null +++ b/source/bin/omnitrace-run/omnitrace-run.cpp @@ -0,0 +1,81 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "omnitrace-run.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +auto* _getenv_at_load = getenv("TIMEMORY_LIBRARY_CTOR"); +auto _setenv_at_load = setenv("TIMEMORY_LIBRARY_CTOR", "0", 0); +} // namespace + +int +main(int argc, char** argv) +{ + if(!_getenv_at_load) + unsetenv("TIMEMORY_LIBRARY_CTOR"); + else + setenv("TIMEMORY_LIBRARY_CTOR", _getenv_at_load, 1); + + auto _print_usage = [argv]() { + std::cerr << tim::log::color::fatal() << "Usage: " << argv[0] + << " -- " << std::endl; + }; + + if(argc == 1) + { + _print_usage(); + return EXIT_FAILURE; + } + + auto _parse_data = parser_data_t{}; + parse_args(argc, argv, _parse_data); + prepare_command_for_run(argv[0], _parse_data); + prepare_environment_for_run(_parse_data); + + auto& _argv = _parse_data.command; + auto& _envp = _parse_data.current; + if(!_argv.empty()) + { + print_updated_environment(_parse_data, "OMNITRACE: "); + print_command(_parse_data, "OMNITRACE: "); + _argv.emplace_back(nullptr); + _envp.emplace_back(nullptr); + return execvpe(_argv.front(), _argv.data(), _envp.data()); + } + + _print_usage(); + return EXIT_FAILURE; +} diff --git a/source/bin/omnitrace-run/omnitrace-run.hpp b/source/bin/omnitrace-run/omnitrace-run.hpp new file mode 100644 index 0000000000..c62090e067 --- /dev/null +++ b/source/bin/omnitrace-run/omnitrace-run.hpp @@ -0,0 +1,53 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "core/argparse.hpp" + +#include +#include +#include +#include +#include +#include +#include + +using parser_data_t = omnitrace::argparse::parser_data; + +void +print_command(const parser_data_t&, std::string_view); + +void +print_updated_environment(parser_data_t&, std::string_view); + +void +prepare_command_for_run(char*, parser_data_t&); + +void +prepare_environment_for_run(parser_data_t&); + +parser_data_t& +parse_args(int argc, char** argv, parser_data_t&); + +parser_data_t& +parse_command(int argc, char** argv, parser_data_t&); diff --git a/source/bin/omnitrace-sample/impl.cpp b/source/bin/omnitrace-sample/impl.cpp index 281632a0e6..797a5f99d0 100644 --- a/source/bin/omnitrace-sample/impl.cpp +++ b/source/bin/omnitrace-sample/impl.cpp @@ -117,7 +117,7 @@ print_command(const std::vector& _argv) std::vector get_initial_environment() { - std::vector _env; + auto _env = std::vector{}; if(environ != nullptr) { int idx = 0; @@ -497,7 +497,7 @@ parse_args(int argc, char** argv, std::vector& _env) .count(1) .dtype("filepath") .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_PERFETTO_FILE", p.get("trace-file")); + update_env(_env, "OMNITRACE_PERFETTO_FILE", p.get("trace-file")); }); parser .add_argument({ "--trace-buffer-size" }, @@ -634,7 +634,7 @@ parse_args(int argc, char** argv, std::vector& _env) .requires({ "host" }) .action([&](parser_t& p) { update_env( - _env, "OMNITRACE_PROCESS_SAMPLING_CPUS", + _env, "OMNITRACE_SAMPLING_CPUS", join(array_config{ "," }, p.get>("cpus"))); }); parser @@ -644,7 +644,7 @@ parse_args(int argc, char** argv, std::vector& _env) .requires({ "device" }) .action([&](parser_t& p) { update_env( - _env, "OMNITRACE_PROCESS_SAMPLING_GPUS", + _env, "OMNITRACE_SAMPLING_GPUS", join(array_config{ "," }, p.get>("gpus"))); }); diff --git a/source/bin/tests/CMakeLists.txt b/source/bin/tests/CMakeLists.txt index 8a7b1182d2..47c8d7705c 100644 --- a/source/bin/tests/CMakeLists.txt +++ b/source/bin/tests/CMakeLists.txt @@ -410,3 +410,119 @@ omnitrace_add_bin_test( PASS_REGEX "OMNITRACE_CONFIG_FILE(.*)OMNITRACE_ENABLED(.*)OMNITRACE_SUPPRESS_CONFIG(.*)OMNITRACE_SUPPRESS_PARSING(.*)OMNITRACE_VERBOSE" ) + +omnitrace_add_bin_test( + NAME omnitrace-run-help + TARGET omnitrace-run + ARGS --help + TIMEOUT 45 + LABELS "omnitrace-run") + +file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/omnitrace-tests-config") +file( + WRITE "${PROJECT_BINARY_DIR}/omnitrace-tests-config/empty.cfg" + " +# +# empty config file +# +") + +add_executable(sleeper ${CMAKE_CURRENT_SOURCE_DIR}/sleeper.cpp) +set_target_properties( + sleeper PROPERTIES BUILD_TYPE RelWithDebInfo RUNTIME_OUTPUT_DIRECTORY + ${PROJECT_BINARY_DIR}/bin/testing) + +omnitrace_add_bin_test( + NAME omnitrace-run-args + TARGET omnitrace-run + ARGS --monochrome + --debug=false + -v + 1 + -c + %env{TWD}%/omnitrace-tests-config/empty.cfg + -o + omnitrace-tests-output + omnitrace-run-args-output/ + -TPHD + -S + cputime + realtime + --trace-wait=1.0e-12 + --trace-duration=5.0 + --wait=1.0 + --duration=3.0 + --trace-file=perfetto-run-args-trace.proto + --trace-buffer-size=100 + --trace-fill-policy=ring_buffer + --profile-format + console + json + text + --process-freq + 1000 + --process-wait + 0.0 + --process-duration + 10 + --cpus + 0-4 + --gpus + 0 + -f + 1000 + --sampling-wait + 1.0 + --sampling-duration + 10 + -t + 0-3 + --sample-cputime + 1000 + 1.0 + 0-3 + --sample-realtime + 10 + 0.5 + 0-3 + -I + all + -E + mutex-locks + rw-locks + spin-locks + -C + perf::INSTRUCTIONS + --inlines + --hsa-interrupt + 0 + --use-causal=false + --use-kokkosp + --num-threads-hint=4 + --sampling-allocator-size=32 + --ci + --dl-verbose=3 + --perfetto-annotations=off + --kokkosp-kernel-logger + --kokkosp-name-length-max=1024 + --kokkosp-prefix="[kokkos]" + --tmpdir + ${CMAKE_BINARY_DIR}/omnitrace-tests-config/tmpdir + --perfetto-backend + inprocess + --use-pid + false + --time-output + off + --thread-pool-size + 0 + --timemory-components + wall_clock + cpu_clock + peak_rss + page_rss + -- + $ + 5 + TIMEOUT 45 + LABELS "omnitrace-run") diff --git a/source/bin/tests/sleeper.cpp b/source/bin/tests/sleeper.cpp new file mode 100644 index 0000000000..74fe0b094f --- /dev/null +++ b/source/bin/tests/sleeper.cpp @@ -0,0 +1,28 @@ + +#include +#include +#include +#include + +using clock_type = std::chrono::steady_clock; + +int +main(int argc, char** argv) +{ + double _val = 0.0; + if(argc > 0) + { + auto _ss = std::stringstream{}; + _ss << argv[1]; + _ss >> _val; + } + + intmax_t _nsec = _val * std::nano::den; + auto _end = clock_type::now() + std::chrono::nanoseconds{ _nsec }; + while(clock_type::now() < _end) + { + std::this_thread::sleep_for(std::chrono::nanoseconds{ _nsec / 10 }); + } + + return 0; +} diff --git a/source/docs/getting_started.md b/source/docs/getting_started.md index 64b7b59259..e6019938a5 100644 --- a/source/docs/getting_started.md +++ b/source/docs/getting_started.md @@ -97,7 +97,7 @@ e.g., omnitrace's meaning of the term "module" when instrumenting Python. - Binary rewrites only instrument the functions defined in the target binary, whereas runtime instrumentation can/will instrument functions defined in the shared libraries which are linked into the target binary - When using binary instrumentation with MPI, avoid runtime instrumentation - Runtime instrumentation requires a fork + ptrace: which is generally incompatible with how MPI applications spawn their processes - - Binary rewrite the executable using MPI (and, optionally, libraries used by the executable) and execute the generated instrumented executable instead of the original, e.g. `mpirun -n 2 ./myexe` should be `mpirun -n 2 ./myexe.inst` where `myexe.inst` is the generated instrumented `myexe` executable. + - Binary rewrite the executable using MPI (and, optionally, libraries used by the executable) and execute the generated instrumented executable via `omnitrace-run` instead of the original, e.g. `mpirun -n 2 ./myexe` should be `mpirun -n 2 omnitrace-run -- ./myexe.inst` where `myexe.inst` is the generated instrumented `myexe` executable. ## Data Collection Mode(s) diff --git a/source/docs/instrumenting.md b/source/docs/instrumenting.md index aa06d02009..58ee253fde 100644 --- a/source/docs/instrumenting.md +++ b/source/docs/instrumenting.md @@ -335,7 +335,7 @@ is consistent with the LLVM style of using a standalone double-hyphen (`--`). Al are interpreted as belonging to omnitrace and all arguments following the double-hyphen are interpreted as the application and it's arguments. In binary rewrite mode, all application arguments after the first argument are ignored, i.e. `./omnitrace-instrument -o ls.inst -- ls -l` interprets `ls` as the target to instrument (ignores the `-l` argument) -and generates a `ls.inst` executable that you can subsequently run `ls.inst -l` with. +and generates a `ls.inst` executable that you can subsequently run `omnitrace-run -- ls.inst -l` with. ## Runtime Instrumentation @@ -781,7 +781,7 @@ For example, if the following sequence of commands are run: omnitrace-instrument -o ./foo.inst -- ./foo export OMNITRACE_USE_SAMPLING=ON export OMNITRACE_SAMPLING_FREQ=5 -./foo.inst +omnitrace-run -- ./foo.inst ``` These configuration settings will not be preserved in another session, whereas: @@ -794,7 +794,7 @@ will preserve those environment variables: ```shell # will sample 5x per second -./foo.samp +omnitrace-run -- ./foo.samp ``` while still allowing the subsequent session to override those defaults: @@ -802,7 +802,7 @@ while still allowing the subsequent session to override those defaults: ```shell # will sample 100x per second export OMNITRACE_SAMPLING_FREQ=100 -./foo.samp +omnitrace-run -- ./foo.samp ``` ### Troubleshooting diff --git a/source/docs/sampling.md b/source/docs/sampling.md index 5cc63253ff..ff4c5a1aa1 100644 --- a/source/docs/sampling.md +++ b/source/docs/sampling.md @@ -15,7 +15,7 @@ Call-stack sampling can be activated with either a binary instrumented via the ` ```console omnitrace-instrument -M sampling -o foo.inst -- foo -./foo.inst +omnitrace-run -- ./foo.inst ``` - Runtime instrumentation with only instrumentation necessary to start/stop sampling diff --git a/source/docs/user_api.md b/source/docs/user_api.md index e4989a060a..8fa04f81ef 100644 --- a/source/docs/user_api.md +++ b/source/docs/user_api.md @@ -169,11 +169,7 @@ custom_push_region(const char* name) ```console $ omnitrace-instrument -l --min-instructions=8 -E custom_push_region -o -- ./user-api ... -$ export OMNITRACE_USE_TIMEMORY=ON -$ export OMNITRACE_USE_PID=OFF -$ export OMNITRACE_TIME_OUTPUT=OFF -$ export OMNITRACE_OUTPUT_PATH=omnitrace-example-output -$ ./user-api.inst 20 4 100 +$ omnitrace-run --profile --use-pid off --time-output off -- ./user-api.inst 20 4 100 Pushing custom region :: ./user-api.inst [omnitrace][omnitrace_init_tooling] Instrumentation mode: Trace diff --git a/source/lib/common/environment.hpp b/source/lib/common/environment.hpp index 4aecc58396..7928715dfb 100644 --- a/source/lib/common/environment.hpp +++ b/source/lib/common/environment.hpp @@ -26,8 +26,10 @@ #include #include #include +#include #include #include +#include namespace omnitrace { @@ -36,7 +38,7 @@ inline namespace common namespace { inline std::string -get_env(std::string_view env_id, std::string_view _default) +get_env_impl(std::string_view env_id, std::string_view _default) { if(env_id.empty()) return std::string{ _default }; char* env_var = ::std::getenv(env_id.data()); @@ -45,13 +47,13 @@ get_env(std::string_view env_id, std::string_view _default) } inline std::string -get_env(std::string_view env_id, const char* _default) +get_env_impl(std::string_view env_id, const char* _default) { - return get_env(env_id, std::string_view{ _default }); + return get_env_impl(env_id, std::string_view{ _default }); } inline int -get_env(std::string_view env_id, int _default) +get_env_impl(std::string_view env_id, int _default) { if(env_id.empty()) return _default; char* env_var = ::std::getenv(env_id.data()); @@ -73,15 +75,21 @@ get_env(std::string_view env_id, int _default) } inline bool -get_env(std::string_view env_id, bool _default) +get_env_impl(std::string_view env_id, bool _default) { if(env_id.empty()) return _default; char* env_var = ::std::getenv(env_id.data()); if(env_var) { + if(std::string_view{ env_var }.empty()) + throw std::runtime_error(std::string{ "No boolean value provided for " } + + std::string{ env_id }); + if(std::string_view{ env_var }.find_first_not_of("0123456789") == std::string_view::npos) + { return static_cast(std::stoi(env_var)); + } else { for(size_t i = 0; i < strlen(env_var); ++i) @@ -93,6 +101,22 @@ get_env(std::string_view env_id, bool _default) } return _default; } + +template +inline auto +get_env(std::string_view env_id, Tp&& _default) +{ + if constexpr(std::is_enum::value) + { + using Up = std::underlying_type_t; + // cast to underlying type -> get_env -> cast to enum type + return static_cast(get_env_impl(env_id, static_cast(_default))); + } + else + { + return get_env_impl(env_id, std::forward(_default)); + } +} } // namespace } // namespace common } // namespace omnitrace diff --git a/source/lib/core/CMakeLists.txt b/source/lib/core/CMakeLists.txt index 920af695e6..05998737f0 100644 --- a/source/lib/core/CMakeLists.txt +++ b/source/lib/core/CMakeLists.txt @@ -3,6 +3,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/defines.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/defines.hpp @ONLY) set(core_sources + ${CMAKE_CURRENT_LIST_DIR}/argparse.cpp ${CMAKE_CURRENT_LIST_DIR}/categories.cpp ${CMAKE_CURRENT_LIST_DIR}/config.cpp ${CMAKE_CURRENT_LIST_DIR}/constraint.cpp @@ -16,6 +17,7 @@ set(core_sources ${CMAKE_CURRENT_LIST_DIR}/timemory.cpp) set(core_headers + ${CMAKE_CURRENT_LIST_DIR}/argparse.hpp ${CMAKE_CURRENT_LIST_DIR}/categories.hpp ${CMAKE_CURRENT_LIST_DIR}/common.hpp ${CMAKE_CURRENT_LIST_DIR}/concepts.hpp diff --git a/source/lib/core/argparse.cpp b/source/lib/core/argparse.cpp new file mode 100644 index 0000000000..d56b10af4a --- /dev/null +++ b/source/lib/core/argparse.cpp @@ -0,0 +1,1418 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "argparse.hpp" +#include "common/join.hpp" +#include "config.hpp" +#include "defines.hpp" +#include "exception.hpp" +#include "gpu.hpp" +#include "state.hpp" + +#include +#include +#include + +namespace omnitrace +{ +namespace argparse +{ +namespace +{ +namespace filepath = ::tim::filepath; +using array_config_t = ::timemory::join::array_config; +using ::tim::get_env; +using ::timemory::join::join; + +auto +get_clock_id_choices() +{ + auto clock_name = [](std::string _v) { + constexpr auto _clock_prefix = std::string_view{ "clock_" }; + for(auto& itr : _v) + itr = tolower(itr); + auto _pos = _v.find(_clock_prefix); + if(_pos == 0) _v = _v.substr(_pos + _clock_prefix.length()); + if(_v == "process_cputime_id") _v = "cputime"; + return _v; + }; + +#define OMNITRACE_CLOCK_IDENTIFIER(VAL) \ + std::make_tuple(clock_name(#VAL), VAL, std::string_view{ #VAL }) + + auto _choices = strvec_t{}; + auto _aliases = std::map{}; + for(auto itr : { OMNITRACE_CLOCK_IDENTIFIER(CLOCK_REALTIME), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_PROCESS_CPUTIME_ID), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC_RAW), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_REALTIME_COARSE), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC_COARSE), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_BOOTTIME) }) + { + auto _choice = std::to_string(std::get<1>(itr)); + _choices.emplace_back(_choice); + _aliases[_choice] = { std::get<0>(itr), std::string{ std::get<2>(itr) } }; + } + +#undef OMNITRACE_CLOCK_IDENTIFIER + + return std::make_pair(_choices, _aliases); +} + +auto +get_realpath(const std::string& _path) +{ + return filepath::realpath(_path, nullptr, false); +} + +enum update_mode : int +{ + UPD_REPLACE = 0x1, + UPD_PREPEND = 0x2, + UPD_APPEND = 0x3, + UPD_WEAK = 0x4, +}; + +template +void +update_env(parser_data& _data, std::string_view _env_var, Tp&& _env_val, + update_mode&& _mode = UPD_REPLACE, std::string_view _join_delim = ":") +{ + _data.updated.emplace(_env_var); + + auto _prepend = (_mode & UPD_PREPEND) == UPD_PREPEND; + auto _append = (_mode & UPD_APPEND) == UPD_APPEND; + auto _weak_upd = (_mode & UPD_WEAK) == UPD_WEAK; + + auto _key = join("", _env_var, "="); + for(auto& itr : _data.current) + { + if(!itr) continue; + if(std::string_view{ itr }.find(_key) == 0) + { + if(_weak_upd) + { + // if the value has changed, do not update but allow overridding the value + // inherited from the initial env + if(_data.initial.find(std::string{ itr }) == _data.initial.end()) return; + } + + if(_prepend || _append) + { + if(std::string_view{ itr }.find(join("", _env_val)) == + std::string_view::npos) + { + auto _val = std::string{ itr }.substr(_key.length()); + free(itr); + if(_prepend) + itr = + strdup(join('=', _env_var, join(_join_delim, _val, _env_val)) + .c_str()); + else + itr = + strdup(join('=', _env_var, join(_join_delim, _env_val, _val)) + .c_str()); + } + } + else + { + free(itr); + itr = strdup(omnitrace::common::join('=', _env_var, _env_val).c_str()); + } + return; + } + } + _data.current.emplace_back( + strdup(omnitrace::common::join('=', _env_var, _env_val).c_str())); +} + +void +remove_env(parser_data& _data, std::string_view _env_var) +{ + auto _key = join("", _env_var, "="); + auto _match = [&_key](auto itr) { return std::string_view{ itr }.find(_key) == 0; }; + + auto& _environ = _data.current; + _environ.erase(std::remove_if(_environ.begin(), _environ.end(), _match), + _environ.end()); + + auto& _initial = _data.initial; + for(const auto& itr : _initial) + { + if(std::string_view{ itr }.find(_key) == 0) + _environ.emplace_back(strdup(itr.c_str())); + } +} + +std::string +get_internal_libpath(const std::string& _lib) +{ + auto _exe = filepath::realpath("/proc/self/exe", nullptr, false); + auto _pos = _exe.find_last_of('/'); + auto _dir = filepath::get_cwd(); + if(_pos != std::string_view::npos) _dir = _exe.substr(0, _pos); + return filepath::realpath(omnitrace::common::join("/", _dir, "..", "lib", _lib), + nullptr, false); +} +} // namespace + +bool +default_setting_filter(vsetting_t* _v, const parser_data& _data) +{ + return (_data.processed_settings.count(_v) == 0 && + _data.processed_environs.count(_v->get_name()) == 0 && + _data.processed_environs.count(_v->get_env_name()) == 0); +} + +bool +default_environ_filter(std::string_view _v, const parser_data& _data) +{ + return (_data.processed_environs.count(_v.data()) == 0); +} + +bool +default_grouping_filter(std::string_view _v, const parser_data& _data) +{ + return (_data.processed_groups.count(_v.data()) == 0); +} + +parser_data& +init_parser(parser_data& _data) +{ + tim::settings::suppress_config() = true; + tim::settings::suppress_parsing() = true; + + set_state(State::Init); + config::configure_settings(false); + + auto& _current = _data.current; + auto& _initial = _data.initial; + + if(environ != nullptr) + { + int idx = 0; + while(environ[idx] != nullptr) + { + auto* _v = environ[idx++]; + _initial.emplace(_v); + _current.emplace_back(strdup(_v)); + } + } + + _data.dl_libpath = get_realpath(get_internal_libpath("libomnitrace-dl.so").c_str()); + _data.omni_libpath = get_realpath(get_internal_libpath("libomnitrace.so").c_str()); + +#if defined(OMNITRACE_USE_ROCTRACER) || defined(OMNITRACE_USE_ROCPROFILER) + update_env(_data, "HSA_TOOLS_LIB", _data.dl_libpath); + if(!getenv("HSA_TOOLS_REPORT_LOAD_FAILURE")) + update_env(_data, "HSA_TOOLS_REPORT_LOAD_FAILURE", "1"); +#endif + +#if defined(OMNITRACE_USE_ROCPROFILER) + update_env(_data, "ROCP_TOOL_LIB", _data.omni_libpath); + if(!getenv("ROCP_HSA_INTERCEPT")) update_env(_data, "ROCP_HSA_INTERCEPT", "1"); +#endif + +#if defined(OMNITRACE_USE_OMPT) + if(!getenv("OMP_TOOL_LIBRARIES")) + update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath, UPD_PREPEND); +#endif + + return _data; +} + +parser_data& +add_ld_preload(parser_data& _data) +{ + update_env(_data, "LD_PRELOAD", _data.dl_libpath, UPD_APPEND); + return _data; +} + +parser_data& +add_core_arguments(parser_t& _parser, parser_data& _data) +{ + const auto* _cputime_desc = + R"(Sample based on a CPU-clock timer (default). Accepts zero or more arguments: + %{INDENT}%0. Enables sampling based on CPU-clock timer. + %{INDENT}%1. Interrupts per second. E.g., 100 == sample every 10 milliseconds of CPU-time. + %{INDENT}%2. Delay (in seconds of CPU-clock time). I.e., how long each thread should wait before taking first sample. + %{INDENT}%3+ Thread IDs to target for sampling, starting at 0 (the main thread). + %{INDENT}% May be specified as index or range, e.g., '0 2-4' will be interpreted as: + %{INDENT}% sample the main thread (0), do not sample the first child thread but sample the 2nd, 3rd, and 4th child threads)"; + + const auto* _realtime_desc = + R"(Sample based on a real-clock timer. Accepts zero or more arguments: + %{INDENT}%0. Enables sampling based on real-clock timer. + %{INDENT}%1. Interrupts per second. E.g., 100 == sample every 10 milliseconds of realtime. + %{INDENT}%2. Delay (in seconds of real-clock time). I.e., how long each thread should wait before taking first sample. + %{INDENT}%3+ Thread IDs to target for sampling, starting at 0 (the main thread). + %{INDENT}% May be specified as index or range, e.g., '0 2-4' will be interpreted as: + %{INDENT}% sample the main thread (0), do not sample the first child thread but sample the 2nd, 3rd, and 4th child threads + %{INDENT}% When sampling with a real-clock timer, please note that enabling this will cause threads which are typically "idle" + %{INDENT}% to consume more resources since, while idle, the real-clock time increases (and therefore triggers taking samples) + %{INDENT}% whereas the CPU-clock time does not.)"; + + const auto* _hsa_interrupt_desc = + R"(Set the value of the HSA_ENABLE_INTERRUPT environment variable. +%{INDENT}% ROCm version 5.2 and older have a bug which will cause a deadlock if a sample is taken while waiting for the signal +%{INDENT}% that a kernel completed -- which happens when sampling with a real-clock timer. We require this option to be set to +%{INDENT}% when --realtime is specified to make users aware that, while this may fix the bug, it can have a negative impact on +%{INDENT}% performance. +%{INDENT}% Values: +%{INDENT}% 0 avoid triggering the bug, potentially at the cost of reduced performance +%{INDENT}% 1 do not modify how ROCm is notified about kernel completion)"; + + auto _realtime_reqs = + (tim::get_env("HSA_ENABLE_INTERRUPT", std::string{}, false).empty()) + ? strvec_t{ "hsa-interrupt" } + : strvec_t{}; + +#if OMNITRACE_USE_ROCTRACER == 0 && OMNITRACE_USE_ROCPROFILER == 0 + _realtime_reqs.clear(); +#endif + + const auto* _trace_policy_desc = + R"(Policy for new data when the buffer size limit is reached: + %{INDENT}%- discard : new data is ignored + %{INDENT}%- ring_buffer : new data overwrites oldest data)"; + + _parser.start_group("DEBUG OPTIONS", ""); + + if(_data.environ_filter("monochrome", _data)) + { + _parser.add_argument({ "--monochrome" }, "Disable colorized output") + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { + auto _monochrome = p.get("monochrome"); + _data.monochrome = _monochrome; + p.set_use_color(!_monochrome); + update_env(_data, "OMNITRACE_MONOCHROME", (_monochrome) ? "1" : "0"); + update_env(_data, "MONOCHROME", (_monochrome) ? "1" : "0"); + }); + + _data.processed_environs.emplace("monochrome"); + } + + if(_data.environ_filter("debug", _data)) + { + _parser.add_argument({ "--debug" }, "Debug output") + .max_count(1) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_DEBUG", p.get("debug")); + }); + + _data.processed_environs.emplace("debug"); + } + + if(_data.environ_filter("verbose", _data)) + { + _parser.add_argument({ "-v", "--verbose" }, "Verbose output") + .count(1) + .dtype("integral") + .action([&](parser_t& p) { + auto _v = p.get("verbose"); + _data.verbose = _v; + update_env(_data, "OMNITRACE_VERBOSE", _v); + }); + + _data.processed_environs.emplace("verbose"); + } + + add_group_arguments(_parser, "debugging", _data); + add_group_arguments(_parser, "mode", _data, true); + + _parser.start_group("GENERAL OPTIONS", + "These are options which are ubiquitously applied"); + + if(_data.environ_filter("config", _data)) + { + _parser.add_argument({ "-c", "--config" }, "Configuration file") + .min_count(1) + .dtype("filepath") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_CONFIG_FILE", + join(array_config_t{ ":" }, p.get("config"))); + }); + + _data.processed_environs.emplace("config"); + _data.processed_environs.emplace("config_file"); + } + + if(_data.environ_filter("output", _data)) + { + _parser + .add_argument( + { "-o", "--output" }, + "Output path. Accepts 1-2 parameters corresponding to the output " + "path and the output prefix") + .min_count(1) + .max_count(2) + .dtype("path [prefix]") + .action([&](parser_t& p) { + auto _v = p.get("output"); + update_env(_data, "OMNITRACE_OUTPUT_PATH", _v.at(0)); + if(_v.size() > 1) update_env(_data, "OMNITRACE_OUTPUT_PREFIX", _v.at(1)); + }); + + _data.processed_environs.emplace("output"); + _data.processed_environs.emplace("output_path"); + _data.processed_environs.emplace("output_prefix"); + } + + if(_data.environ_filter("trace", _data)) + { + _parser + .add_argument({ "-T", "--trace" }, + "Generate a detailed trace (perfetto output)") + .max_count(1) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_USE_PERFETTO", p.get("trace")); + }); + + _data.processed_environs.emplace("trace"); + } + + if(_data.environ_filter("profile", _data)) + { + _parser + .add_argument( + { "-P", "--profile" }, + "Generate a call-stack-based profile (conflicts with --flat-profile)") + .max_count(1) + .conflicts({ "flat-profile" }) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_USE_TIMEMORY", p.get("profile")); + }); + + _data.processed_environs.emplace("profile"); + } + + if(_data.environ_filter("flat_profile", _data)) + { + _parser + .add_argument({ "-F", "--flat-profile" }, + "Generate a flat profile (conflicts with --profile)") + .max_count(1) + .conflicts({ "profile" }) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_USE_TIMEMORY", p.get("flat-profile")); + update_env(_data, "OMNITRACE_FLAT_PROFILE", p.get("flat-profile")); + }); + + _data.processed_environs.emplace("flat_profile"); + } + + if(_data.environ_filter("sampling", _data)) + { + _parser + .add_argument({ "-S", "--sample" }, + "Enable statistical sampling of call-stack") + .min_count(0) + .max_count(2) + .dtype("timer-type") + .choices({ "cputime", "realtime" }) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_USE_SAMPLING", true); + auto _modes = p.get("sample"); + if(!_modes.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_CPUTIME", + _modes.count("cputime") > 0, UPD_WEAK); + update_env(_data, "OMNITRACE_SAMPLING_REALTIME", + _modes.count("realtime") > 0, UPD_WEAK); + } + }); + + _data.processed_environs.emplace("cpu_freq"); + } + + if(_data.environ_filter("host", _data)) + { + _parser + .add_argument({ "-H", "--host" }, + "Enable sampling host-based metrics for the process. E.g. CPU " + "frequency, memory usage, etc.") + .max_count(1) + .action([&](parser_t& p) { + auto _h = p.get("host"); + auto _d = p.get("device"); + update_env(_data, "OMNITRACE_USE_PROCESS_SAMPLING", _h || _d); + update_env(_data, "OMNITRACE_CPU_FREQ_ENABLED", _h); + }); + + _data.processed_environs.emplace("host"); + _data.processed_environs.emplace("cpu_freq"); + } + + if(_data.environ_filter("device", _data)) + { + _parser + .add_argument( + { "-D", "--device" }, + "Enable sampling device-based metrics for the process. E.g. GPU " + "temperature, memory usage, etc.") + .max_count(1) + .action([&](parser_t& p) { + auto _h = p.get("host"); + auto _d = p.get("device"); + update_env(_data, "OMNITRACE_USE_PROCESS_SAMPLING", _h || _d); + update_env(_data, "OMNITRACE_USE_ROCM_SMI", _d); + }); + + _data.processed_environs.emplace("device"); + _data.processed_environs.emplace("rocm_smi"); + } + + if(_data.environ_filter("wait", _data)) + { + _parser + .add_argument( + { "-w", "--wait" }, + "This option is a combination of '--trace-wait' and " + "'--sampling-wait'. See the descriptions for those two options.") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_TRACE_DELAY", p.get("wait"), + UPD_WEAK); + update_env(_data, "OMNITRACE_SAMPLING_DELAY", p.get("wait"), + UPD_WEAK); + update_env(_data, "OMNITRACE_CAUSAL_DELAY", p.get("wait"), + UPD_WEAK); + }); + + _data.processed_environs.emplace("wait"); + } + + if(_data.environ_filter("duration", _data)) + { + _parser + .add_argument( + { "-d", "--duration" }, + "This option is a combination of '--trace-duration' and " + "'--sampling-duration'. See the descriptions for those two options.") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_TRACE_DURATION", p.get("duration"), + UPD_WEAK); + update_env(_data, "OMNITRACE_SAMPLING_DURATION", + p.get("duration"), UPD_WEAK); + update_env(_data, "OMNITRACE_CAUSAL_DURATION", p.get("duration"), + UPD_WEAK); + }); + + _data.processed_environs.emplace("duration"); + } + + if(_data.environ_filter("periods", _data)) + { + _parser + .add_argument({ "--periods" }, + "Similar to specifying delay and/or duration except in " + "the form :, ::, " + "and/or :::") + .min_count(1) + .dtype("period-spec(s)") + .action([&](parser_t& p) { + update_env( + _data, "OMNITRACE_TRACE_PERIODS", + join(array_config_t{ " ", "", "" }, p.get("periods")), + UPD_WEAK); + }); + + _data.processed_environs.emplace("periods"); + } + + strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt", + "rcclp", "rocm-smi", "roctracer", "rocprofiler", + "roctx", "mutex-locks", "spin-locks", "rw-locks" }; + +#if !defined(OMNITRACE_USE_MPI) && !defined(OMNITRACE_USE_MPI_HEADERS) + _backend_choices.erase("mpip"); +#endif + +#if !defined(OMNITRACE_USE_OMPT) + _backend_choices.erase("ompt"); +#endif + +#if !defined(OMNITRACE_USE_RCCL) + _backend_choices.erase("rcclp"); +#endif + +#if !defined(OMNITRACE_USE_ROCM_SMI) + _backend_choices.erase("rocm-smi"); +#endif + +#if !defined(OMNITRACE_USE_ROCTRACER) + _backend_choices.erase("roctracer"); + _backend_choices.erase("roctx"); +#endif + +#if !defined(OMNITRACE_USE_ROCPROFILER) + _backend_choices.erase("rocprofiler"); +#endif + + if(gpu::device_count() == 0) + { + _backend_choices.erase("rcclp"); + _backend_choices.erase("rocm-smi"); + _backend_choices.erase("roctracer"); + _backend_choices.erase("rocprofiler"); + +#if defined(OMNITRACE_USE_RCCL) + update_env(_data, "OMNITRACE_USE_RCCLP", false); +#endif + +#if defined(OMNITRACE_USE_ROCM_SMI) + update_env(_data, "OMNITRACE_USE_ROCM_SMI", false); +#endif + +#if defined(OMNITRACE_USE_ROCTRACER) + update_env(_data, "OMNITRACE_USE_ROCTRACER", false); + update_env(_data, "OMNITRACE_USE_ROCTX", false); + update_env(_data, "OMNITRACE_ROCTRACER_HSA_ACTIVITY", false); + update_env(_data, "OMNITRACE_ROCTRACER_HIP_ACTIVITY", false); + _backend_choices.erase("roctracer"); + _backend_choices.erase("roctx"); +#endif + +#if defined(OMNITRACE_USE_ROCPROFILER) + update_env(_data, "OMNITRACE_USE_ROCPROFILER", false); +#endif + } + + _parser.start_group("BACKEND OPTIONS", + "These options control region information captured " + "w/o sampling or instrumentation"); + + if(_data.environ_filter("include", _data)) + { + _parser.add_argument({ "-I", "--include" }, "Include data from these backends") + .min_count(1) + .max_count(_backend_choices.size()) + .dtype("[backend...]") + .choices(_backend_choices) + .action([&](parser_t& p) { + auto _v = p.get("include"); + auto _update = [&](const auto& _opt, bool _cond) { + if(_cond || _v.count("all") > 0) update_env(_data, _opt, true); + }; + _update("OMNITRACE_USE_KOKKOSP", _v.count("kokkosp") > 0); + _update("OMNITRACE_USE_MPIP", _v.count("mpip") > 0); + _update("OMNITRACE_USE_OMPT", _v.count("ompt") > 0); + _update("OMNITRACE_USE_RCCLP", _v.count("rcclp") > 0); + _update("OMNITRACE_USE_ROCTX", _v.count("roctx") > 0); + _update("OMNITRACE_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("OMNITRACE_USE_ROCTRACER", _v.count("roctracer") > 0); + _update("OMNITRACE_USE_ROCPROFILER", _v.count("rocprofiler") > 0); + _update("OMNITRACE_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); + + if(_v.count("all") > 0 || _v.count("ompt") > 0) + update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath, + UPD_PREPEND); + + if(_v.count("all") > 0 || _v.count("kokkosp") > 0) + update_env(_data, "KOKKOS_PROFILE_LIBRARY", _data.omni_libpath, + UPD_PREPEND); + }); + + _data.processed_environs.emplace("include"); + } + + if(_data.environ_filter("exclude", _data)) + { + _parser.add_argument({ "-E", "--exclude" }, "Exclude data from these backends") + .min_count(1) + .max_count(_backend_choices.size()) + .dtype("[backend...]") + .choices(_backend_choices) + .action([&](parser_t& p) { + auto _v = p.get("exclude"); + auto _update = [&](const auto& _opt, bool _cond) { + if(_cond || _v.count("all") > 0) update_env(_data, _opt, false); + }; + _update("OMNITRACE_USE_KOKKOSP", _v.count("kokkosp") > 0); + _update("OMNITRACE_USE_MPIP", _v.count("mpip") > 0); + _update("OMNITRACE_USE_OMPT", _v.count("ompt") > 0); + _update("OMNITRACE_USE_RCCLP", _v.count("rcclp") > 0); + _update("OMNITRACE_USE_ROCTX", _v.count("roctx") > 0); + _update("OMNITRACE_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("OMNITRACE_USE_ROCTRACER", _v.count("roctracer") > 0); + _update("OMNITRACE_USE_ROCPROFILER", _v.count("rocprofiler") > 0); + _update("OMNITRACE_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); + + if(_v.count("all") > 0 || + (_v.count("roctracer") > 0 && _v.count("rocprofiler") > 0)) + { + remove_env(_data, "HSA_TOOLS_LIB"); + remove_env(_data, "HSA_TOOLS_REPORT_LOAD_FAILURE"); + } + + if(_v.count("all") > 0 || _v.count("rocprofiler") > 0) + { + remove_env(_data, "ROCP_TOOL_LIB"); + remove_env(_data, "ROCP_HSA_INTERCEPT"); + } + + if(_v.count("all") > 0 || _v.count("ompt") > 0) + remove_env(_data, "OMP_TOOL_LIBRARIES"); + + if(_v.count("all") > 0 || _v.count("kokkosp") > 0) + remove_env(_data, "KOKKOS_PROFILE_LIBRARY"); + }); + + _data.processed_environs.emplace("exclude"); + } + + add_group_arguments(_parser, "backend", _data); + add_group_arguments(_parser, "parallelism", _data, true); + + if(_data.environ_filter("launcher", _data)) + { + _parser + .add_argument( + { "-l", "--launcher" }, + "When running MPI jobs, typically the associated '--' for this " + "executable should be right before the target executable, e.g. `mpirun " + "-n 2 -- `. This options " + "enables prefixing the entire command (i.e. before `mpirun`, `srun`, " + "etc.). Pass the name of the target executable (or a regex for matching " + "to the name of the target) as the argument to this option and this " + "executable will insert itself a second time in the appropriate " + "location, e.g. ` --launcher sleep -- mpirun -n 2 sleep 10` is " + "equivalent to `mpirun -n 2 -- sleep 10`") + .count(1) + .dtype("target-exe") + .action( + [&](parser_t& p) { _data.launcher = p.get("launcher"); }); + + _data.processed_environs.emplace("launcher"); + } + + _parser.start_group("TRACING OPTIONS", "Specific options controlling tracing (i.e. " + "deterministic measurements of every event)"); + + if(_data.environ_filter("trace_file", _data)) + { + _parser + .add_argument( + { "--trace-file" }, + "Specify the trace output filename. Relative filepath will be with " + "respect to output path and output prefix.") + .count(1) + .dtype("filepath") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_PERFETTO_FILE", + p.get("trace-file")); + }); + + _data.processed_environs.emplace("trace_file"); + _data.processed_environs.emplace("perfetto_file"); + } + + if(_data.environ_filter("trace_buffer_size", _data)) + { + _parser + .add_argument({ "--trace-buffer-size" }, + "Size limit for the trace output (in KB)") + .count(1) + .dtype("KB") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_PERFETTO_BUFFER_SIZE_KB", + p.get("trace-buffer-size")); + }); + + _data.processed_environs.emplace("trace_buffer_size"); + _data.processed_environs.emplace("perfetto_buffer_size_kb"); + } + + if(_data.environ_filter("trace_fill_policy", _data)) + { + _parser.add_argument({ "--trace-fill-policy" }, _trace_policy_desc) + .count(1) + .dtype("policy") + .choices({ "discard", "ring_buffer" }) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_PERFETTO_FILL_POLICY", + p.get("trace-fill-policy")); + }); + + _data.processed_environs.emplace("trace_fill_policy"); + _data.processed_environs.emplace("perfetto_fill_policy"); + } + + if(_data.environ_filter("trace_wait", _data)) + { + _parser + .add_argument( + { "--trace-wait" }, + "Set the wait time (in seconds) " + "before collecting trace and/or profiling data" + "(in seconds). By default, the duration is in seconds of realtime " + "but that can changed via --trace-clock-id.") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_TRACE_DELAY", p.get("trace-wait")); + }); + + _data.processed_environs.emplace("trace_delay"); + } + + if(_data.environ_filter("trace_duration", _data)) + { + _parser + .add_argument( + { "--trace-duration" }, + "Set the duration of the trace and/or profile data collection (in " + "seconds). By default, the duration is in seconds of realtime but " + "that can changed via --trace-clock-id.") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_TRACE_DURATION", + p.get("trace-duration")); + }); + + _data.processed_environs.emplace("trace_duration"); + } + + if(_data.environ_filter("trace_periods", _data)) + { + _parser + .add_argument( + { "--trace-periods" }, + "More powerful version of specifying trace delay and/or duration. Format " + "is one or more groups of: :, " + "::, " + "and/or :::.") + .min_count(1) + .dtype("period-spec(s)") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_TRACE_PERIODS", + join(array_config_t{ ",", "", "" }, + p.get("trace-periods"))); + }); + + _data.processed_environs.emplace("trace_periods"); + } + + if(_data.environ_filter("trace_clock_id", _data)) + { + auto _clock_id_choices = get_clock_id_choices(); + _parser + .add_argument( + { "--trace-clock-id" }, + "Set the default clock ID for for trace delay/duration. Note: " + "\"cputime\" is " + "the *process* CPU time and might need to be scaled based on the number " + "of " + "threads, i.e. 4 seconds of CPU-time for an application with 4 fully " + "active " + "threads would equate to ~1 second of realtime. If this proves to be " + "difficult to handle in practice, please file a feature request for " + "omnitrace to auto-scale based on the number of threads.") + .count(1) + .dtype("clock-id") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_TRACE_PERIOD_CLOCK_ID", + p.get("trace-clock-id")); + }) + .choices(_clock_id_choices.first) + .choice_aliases(_clock_id_choices.second); + + _data.processed_environs.emplace("trace_clock_id"); + _data.processed_environs.emplace("trace_period_clock_id"); + } + + _parser.start_group("PROFILE OPTIONS", + "Specific options controlling profiling (i.e. deterministic " + "measurements which are aggregated into a summary)"); + + if(_data.environ_filter("profile_format", _data)) + { + _parser.add_argument({ "--profile-format" }, "Data formats for profiling results") + .min_count(1) + .max_count(3) + .dtype("string") + .requires({ "profile|flat-profile" }) + .choices({ "text", "json", "console" }) + .action([&](parser_t& p) { + auto _v = p.get("profile-format"); + update_env(_data, "OMNITRACE_USE_TIMEMORY", true); + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_TEXT_OUTPUT", _v.count("text") != 0); + update_env(_data, "OMNITRACE_JSON_OUTPUT", _v.count("json") != 0); + update_env(_data, "OMNITRACE_COUT_OUTPUT", _v.count("console") != 0); + } + }); + + _data.processed_environs.emplace("profile_format"); + _data.processed_environs.emplace("text_output"); + _data.processed_environs.emplace("json_output"); + _data.processed_environs.emplace("cout_output"); + } + + if(_data.environ_filter("profile_diff", _data)) + { + _parser + .add_argument( + { "--profile-diff" }, + "Generate a diff output b/t the profile collected and an existing " + "profile from another run Accepts 1-2 parameters corresponding to " + "the input path and the input prefix") + .min_count(1) + .max_count(2) + .dtype("path [prefix]") + .action([&](parser_t& p) { + auto _v = p.get("profile-diff"); + update_env(_data, "OMNITRACE_DIFF_OUTPUT", true); + update_env(_data, "OMNITRACE_INPUT_PATH", _v.at(0)); + if(_v.size() > 1) update_env(_data, "OMNITRACE_INPUT_PREFIX", _v.at(1)); + }); + + _data.processed_environs.emplace("profile_diff"); + _data.processed_environs.emplace("diff_output"); + _data.processed_environs.emplace("input_path"); + _data.processed_environs.emplace("input_prefix"); + } + + _parser.start_group( + "HOST/DEVICE (PROCESS SAMPLING) OPTIONS", + "Process sampling is background measurements for resources available to the " + "entire process. These samples are not tied to specific lines/regions of code"); + + if(_data.environ_filter("process_freq", _data)) + { + _parser + .add_argument({ "--process-freq" }, + "Set the default host/device sampling frequency " + "(number of interrupts per second)") + .count(1) + .dtype("floating-point") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_PROCESS_SAMPLING_FREQ", + p.get("process-freq")); + }); + + _data.processed_environs.emplace("process_freq"); + _data.processed_environs.emplace("process_sampling_freq"); + } + + if(_data.environ_filter("process_wait", _data)) + { + _parser + .add_argument({ "--process-wait" }, "Set the default wait time (i.e. delay) " + "before taking first host/device sample " + "(in seconds of realtime)") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_PROCESS_SAMPLING_DELAY", + p.get("process-wait")); + }); + + _data.processed_environs.emplace("process_wait"); + _data.processed_environs.emplace("process_sampling_delay"); + } + + if(_data.environ_filter("process_duration", _data)) + { + _parser + .add_argument( + { "--process-duration" }, + "Set the duration of the host/device sampling (in seconds of realtime)") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_PROCESS_DURATION", + p.get("process-duration")); + }); + + _data.processed_environs.emplace("process_duration"); + _data.processed_environs.emplace("process_sampling_duration"); + } + + if(_data.environ_filter("cpus", _data)) + { + _parser + .add_argument( + { "--cpus" }, + "CPU IDs for frequency sampling. Supports integers and/or ranges") + .dtype("int and/or range") + .requires({ "host" }) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_CPUS", + join(array_config_t{ "," }, p.get("cpus"))); + }); + + _data.processed_environs.emplace("cpus"); + _data.processed_environs.emplace("sampling_cpus"); + } + + if(_data.environ_filter("gpus", _data)) + { + _parser + .add_argument({ "--gpus" }, + "GPU IDs for SMI queries. Supports integers and/or ranges") + .dtype("int and/or range") + .requires({ "device" }) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_GPUS", + join(array_config_t{ "," }, p.get("gpus"))); + }); + + _data.processed_environs.emplace("gpus"); + _data.processed_environs.emplace("sampling_gpus"); + } + + _parser.start_group("GENERAL SAMPLING OPTIONS", + "General options for timer-based sampling per-thread"); + + if(_data.environ_filter("sampling_freq", _data)) + { + _parser + .add_argument({ "-f", "--sampling-freq" }, + "Set the default sampling frequency " + "(number of interrupts per second)") + .count(1) + .dtype("floating-point") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_FREQ", + p.get("sampling-freq")); + }); + + _data.processed_environs.emplace("sampling_freq"); + } + + if(_data.environ_filter("tids", _data)) + { + _parser + .add_argument( + { "-t", "--tids" }, + "Specify the default thread IDs for sampling, where 0 (zero) is " + "the main thread and each thread created by the target application " + "is assigned an atomically incrementing value.") + .min_count(1) + .dtype("int and/or range") + .action([&](parser_t& p) { + update_env( + _data, "OMNITRACE_SAMPLING_TIDS", + join(array_config_t{ ", " }, p.get>("tids"))); + }); + + _data.processed_environs.emplace("tids"); + _data.processed_environs.emplace("sampling_tids"); + } + + if(_data.environ_filter("sampling_wait", _data)) + { + _parser + .add_argument( + { "--sampling-wait" }, + "Set the default wait time (i.e. delay) before taking first sample " + "(in seconds). This delay time is based on the clock of the sampler, " + "i.e., a " + "delay of 1 second for CPU-clock sampler may not equal 1 second of " + "realtime") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_DELAY", + p.get("sampling-wait")); + }); + + _data.processed_environs.emplace("sampling_wait"); + _data.processed_environs.emplace("sampling_delay"); + } + + if(_data.environ_filter("sampling_duration", _data)) + { + _parser + .add_argument( + { "--sampling-duration" }, + "Set the duration of the sampling (in seconds of realtime). I.e., it is " + "possible (currently) to set a CPU-clock time delay that exceeds the " + "real-time duration... resulting in zero samples being taken") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_DURATION", + p.get("sampling-duration")); + }); + + _data.processed_environs.emplace("sampling_duration"); + } + + _parser.start_group( + "SAMPLING TIMER OPTIONS", + "These options determine the heuristic for deciding when to take a sample"); + + if(_data.environ_filter("sample_cputime", _data)) + { + _parser.add_argument({ "--sample-cputime" }, _cputime_desc) + .min_count(0) + .dtype("[freq] [delay] [tids...]") + .action([&](parser_t& p) { + auto _v = p.get>("sample-cputime"); + update_env(_data, "OMNITRACE_SAMPLING_CPUTIME", true); + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_CPUTIME_FREQ", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_CPUTIME_DELAY", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_CPUTIME_TIDS", + join(array_config_t{ "," }, _v)); + } + }); + + _data.processed_environs.emplace("sampling_cputime"); + } + + if(_data.environ_filter("sample_realtime", _data)) + { + _parser.add_argument({ "--sample-realtime" }, _realtime_desc) + .min_count(0) + .dtype("[freq] [delay] [tids...]") + .requires(std::move(_realtime_reqs)) + .action([&](parser_t& p) { + auto _v = p.get>("sample-realtime"); + update_env(_data, "OMNITRACE_SAMPLING_REALTIME", true); + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_REALTIME_FREQ", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_REALTIME_DELAY", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_data, "OMNITRACE_SAMPLING_REALTIME_TIDS", + join(array_config_t{ "," }, _v)); + } + }); + + _data.processed_environs.emplace("sampling_realtime"); + } + + _parser.start_group( + "ADVANCED SAMPLING OPTIONS", + "These options determine the heuristic for deciding when to take a sample"); + + add_group_arguments(_parser, "sampling", _data); + + _parser.start_group("HARDWARE COUNTER OPTIONS", "See also: omnitrace-avail -H"); + + if(_data.environ_filter("cpu_events", _data)) + { + _parser + .add_argument({ "-C", "--cpu-events" }, + "Set the CPU hardware counter events to record (ref: " + "`omnitrace-avail -H -c CPU`)") + .min_count(1) + .dtype("[EVENT ...]") + .action([&](parser_t& p) { + auto _events = join(array_config_t{ "," }, p.get("cpu-events")); + update_env(_data, "OMNITRACE_PAPI_EVENTS", _events); + }); + + _data.processed_environs.emplace("cpu_events"); + _data.processed_environs.emplace("papi_events"); + } + +#if defined(OMNITRACE_USE_ROCPROFILER) + if(_data.environ_filter("gpu_events", _data)) + { + _parser + .add_argument({ "-G", "--gpu-events" }, + "Set the GPU hardware counter events to record (ref: " + "`omnitrace-avail -H -c GPU`)") + .min_count(1) + .dtype("[EVENT ...]") + .action([&](parser_t& p) { + auto _events = join(array_config_t{ "," }, p.get("gpu-events")); + update_env(_data, "OMNITRACE_ROCM_EVENTS", _events); + }); + + _data.processed_environs.emplace("gpu_events"); + _data.processed_environs.emplace("rocm_events"); + } +#endif + + add_group_arguments(_parser, "category", _data, true); + add_group_arguments(_parser, "io", _data, true); + add_group_arguments(_parser, "perfetto", _data, true); + add_group_arguments(_parser, "timemory", _data, true); + add_group_arguments(_parser, "rocm", _data, true); + add_group_arguments(_parser, "critical_trace", _data, true); + + _parser.start_group("MISCELLANEOUS OPTIONS", ""); + + if(_data.environ_filter("inlines", _data)) + { + _parser + .add_argument({ "-i", "--inlines" }, + "Include inline info in output when available") + .max_count(1) + .action([&](parser_t& p) { + update_env(_data, "OMNITRACE_SAMPLING_INCLUDE_INLINES", + p.get("inlines")); + }); + + _data.processed_environs.emplace("inlines"); + _data.processed_environs.emplace("sampling_include_inlines"); + } + + if(_data.environ_filter("hsa_interrupt", _data)) + { + _parser.add_argument({ "--hsa-interrupt" }, _hsa_interrupt_desc) + .count(1) + .dtype("int") + .choices({ 0, 1 }) + .action([&](parser_t& p) { + update_env(_data, "HSA_ENABLE_INTERRUPT", p.get("hsa-interrupt")); + }); + + _data.processed_environs.emplace("hsa_interrupt"); + } + + _parser.end_group(); + + return _data; +} + +parser_data& +add_group_arguments(parser_t& _parser, const std::string& _group_name, parser_data& _data, + bool _add_group) +{ + if(!_data.grouping_filter(_group_name, _data)) return _data; + + auto _get_name = [](const std::shared_ptr& itr) { + auto _name = itr->get_name(); + auto _pos = std::string::npos; + while((_pos = _name.find('_')) != std::string::npos) + _name = _name.replace(_pos, 1, "-"); + return _name; + }; + + auto _add_option = [&_parser, &_data](const std::string& _name, + const std::shared_ptr& itr) { + if(!_data.setting_filter(itr.get(), _data)) return false; + + if(_name.empty()) + throw exception("Error! empty name for " + + itr->get_name()); + + _data.processed_settings.emplace(itr.get()); + + auto _opt_name = std::string{ "--" } + _name; + itr->set_command_line({ _opt_name }); + auto* _arg = static_cast(itr->add_argument(_parser)); + if(_arg) + { + _arg->action([&_data, itr, _name](parser_t& p) { + using namespace timemory::join; + auto _value = join(array_config{ " ", "", "" }, p.get(_name)); + if(_value.empty()) _value = p.get(_name); + if(_value.empty()) _value = join("", std::boolalpha, p.get(_name)); + if(_value.empty()) + throw exception("Error! no value for " + _name); + update_env(_data, itr->get_env_name(), _value); + }); + } + else + { + TIMEMORY_PRINTF_WARNING(stderr, "Warning! Option %s (%s) is not enabled\n", + _name.c_str(), itr->get_env_name().c_str()); + _parser.add_argument({ _opt_name }, itr->get_description()) + .action([&](parser_t& p) { + using namespace timemory::join; + auto _value = + join(array_config{ " ", "", "" }, p.get(_name)); + if(_value.empty()) + throw exception("Error! no value for " + + _name); + update_env(_data, itr->get_env_name(), _value); + }); + } + return true; + }; + + auto _settings = std::vector>{}; + for(auto& itr : *omnitrace::settings::instance()) + { + if(itr.second->get_categories().count("omnitrace") == 0) continue; + if(itr.second->get_categories().count("deprecated") > 0) continue; + if(itr.second->get_hidden()) continue; + if(!_data.setting_filter(itr.second.get(), _data)) continue; + if(!_data.environ_filter(itr.second->get_name(), _data)) continue; + if(itr.second->get_categories().count(_group_name) == 0) continue; + + itr.second->set_enabled(true); + _settings.emplace_back(itr.second); + + if(itr.second->get_name() == "papi_events") + { + auto _choices = itr.second->get_choices(); + _choices.erase( + std::remove_if(_choices.begin(), _choices.end(), + [](const auto& citr) { + return std::regex_search( + citr, + std::regex{ "[A-Za-z0-9]:([A-Za-z_]+)" }) || + std::regex_search(citr, std::regex{ "io:::" }); + }), + _choices.end()); + _choices.emplace_back( + "... run `omnitrace-avail -H -c CPU` for full list ..."); + itr.second->set_choices(_choices); + } + } + + std::sort(_settings.begin(), _settings.end(), [](const auto& _lhs, const auto& _rhs) { + auto _lhs_v = _lhs->get_name(); + auto _rhs_v = _rhs->get_name(); + if(_lhs_v.length() > 4 && _rhs_v.length() > 4 && + _lhs_v.substr(0, 4) == _rhs_v.substr(0, 4)) + return _lhs_v < _rhs_v; + return _lhs_v.length() < _rhs_v.length(); + }); + + if(_add_group) + { + auto _group_label = _group_name; + for(auto& c : _group_label) + c = toupper(c); + _parser.start_group(_group_label); + } + + for(const auto& itr : _settings) + { + _add_option(_get_name(itr), itr); + } + + if(_add_group) _parser.end_group(); + + return _data; +} + +parser_data& +add_extended_arguments(parser_t& _parser, parser_data& _data) +{ + auto _category_count_map = std::unordered_map{}; + auto _settings = std::vector>{}; + for(auto& itr : *omnitrace::settings::instance()) + { + if(itr.second->get_categories().count("omnitrace") == 0) continue; + if(itr.second->get_categories().count("deprecated") > 0) continue; + if(itr.second->get_hidden()) continue; + if(!_data.setting_filter(itr.second.get(), _data)) continue; + if(!_data.environ_filter(itr.second->get_name(), _data)) continue; + + itr.second->set_enabled(true); + _settings.emplace_back(itr.second); + + if(itr.second->get_name() == "papi_events") + { + auto _choices = itr.second->get_choices(); + _choices.erase( + std::remove_if(_choices.begin(), _choices.end(), + [](const auto& citr) { + return std::regex_search( + citr, + std::regex{ "[A-Za-z0-9]:([A-Za-z_]+)" }) || + std::regex_search(citr, std::regex{ "io:::" }); + }), + _choices.end()); + _choices.emplace_back( + "... run `omnitrace-avail -H -c CPU` for full list ..."); + itr.second->set_choices(_choices); + } + + for(const auto& citr : itr.second->get_categories()) + { + if(std::regex_search( + citr, std::regex{ + "omnitrace|timemory|^(native|custom|advanced|analysis)$" })) + continue; + _category_count_map[citr] += 1; + } + } + + auto _category_count_vec = strvec_t{}; + for(const auto& itr : _category_count_map) + _category_count_vec.emplace_back(itr.first); + + std::sort(_category_count_vec.begin(), _category_count_vec.end(), + [&_category_count_map](const auto& _lhs, const auto& _rhs) { + auto _lhs_v = _category_count_map.at(_lhs); + auto _rhs_v = _category_count_map.at(_rhs); + if(_lhs_v == _rhs_v) return _lhs < _rhs; + return _lhs_v > _rhs_v; + }); + + auto _groups = + std::unordered_map>>{}; + for(const auto& citr : _category_count_vec) + { + _groups[citr] = {}; + for(const auto& itr : _settings) + { + if(itr->get_categories().count(citr) > 0) _groups[citr].emplace_back(itr); + } + _settings.erase(std::remove_if(_settings.begin(), _settings.end(), + [&citr](const auto& itr) { + return itr->get_categories().count(citr) > 0; + }), + _settings.end()); + } + + for(const auto& citr : _category_count_vec) + { + auto _group = _groups.at(citr); + if(_group.empty()) continue; + + add_group_arguments(_parser, citr, _data, true); + } + + return _data; +} +} // namespace argparse +} // namespace omnitrace diff --git a/source/lib/core/argparse.hpp b/source/lib/core/argparse.hpp new file mode 100644 index 0000000000..7a7d55ce3b --- /dev/null +++ b/source/lib/core/argparse.hpp @@ -0,0 +1,93 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "defines.hpp" + +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace argparse +{ +struct parser_data; + +using parser_t = ::tim::argparse::argument_parser; +using vsetting_t = ::tim::vsettings; +using vsettings_set_t = std::set; +using strset_t = std::set; +using strvec_t = std::vector; +using setting_filter_t = std::function; +using environ_filter_t = std::function; +using grouping_filter_t = std::function; + +bool +default_setting_filter(vsetting_t*, const parser_data&); + +bool +default_environ_filter(std::string_view, const parser_data&); + +bool +default_grouping_filter(std::string_view, const parser_data&); + +struct parser_data +{ + bool monochrome = false; + bool debug = false; + int verbose = 0; + std::string dl_libpath = {}; + std::string omni_libpath = {}; + std::string launcher = {}; + vsettings_set_t processed_settings = {}; + std::set processed_environs = {}; + std::set processed_groups = {}; + std::vector current = {}; + std::vector command = {}; + std::set updated = {}; + std::set initial = {}; + grouping_filter_t grouping_filter = default_grouping_filter; + setting_filter_t setting_filter = default_setting_filter; + environ_filter_t environ_filter = default_environ_filter; +}; + +parser_data& +init_parser(parser_data&); + +parser_data& +add_ld_preload(parser_data&); + +parser_data& +add_core_arguments(parser_t&, parser_data&); + +parser_data& +add_group_arguments(parser_t&, const std::string&, parser_data&, bool _add_group = false); + +parser_data& +add_extended_arguments(parser_t&, parser_data&); +} // namespace argparse +} // namespace omnitrace diff --git a/source/lib/core/common.hpp b/source/lib/core/common.hpp index cd47909f7f..740991a651 100644 --- a/source/lib/core/common.hpp +++ b/source/lib/core/common.hpp @@ -22,10 +22,10 @@ #pragma once -#include "categories.hpp" #include "common/join.hpp" -#include "concepts.hpp" -#include "defines.hpp" +#include "core/categories.hpp" +#include "core/concepts.hpp" +#include "core/defines.hpp" #include #include diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index 37501091b7..5e8114349c 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -281,7 +281,7 @@ configure_settings(bool _init) std::string, "OMNITRACE_MODE", "Data collection mode. Used to set default values for OMNITRACE_USE_* options. " "Typically set by omnitrace binary instrumenter.", - std::string{ "trace" }, "backend", "advanced") + std::string{ "trace" }, "backend", "advanced", "mode") ->set_choices({ "trace", "sampling", "causal", "coverage" }); OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CI", @@ -308,7 +308,7 @@ configure_settings(bool _init) "threads that get sampled, omnitrace can start all the background threads during " "initialization", get_env("OMNITRACE_NUM_THREADS", 1), "threading", "performance", - "sampling", "debugging", "advanced"); + "sampling", "parallelism", "advanced"); OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_PERFETTO", "Enable perfetto backend", _default_perfetto_v, "backend", "perfetto"); @@ -680,13 +680,13 @@ configure_settings(bool _init) "Enable collecting profiling and trace data for these " "categories and disable all other categories", "", "trace", "profile", "perfetto", "timemory", "data", - "advanced") + "category", "advanced") ->set_choices(get_available_categories>()); OMNITRACE_CONFIG_SETTING( std::string, "OMNITRACE_DISABLE_CATEGORIES", "Disable collecting profiling and trace data for these categories", "", "trace", - "profile", "perfetto", "timemory", "data", "advanced") + "profile", "perfetto", "timemory", "data", "category", "advanced") ->set_choices(get_available_categories>()); OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_PERFETTO_ANNOTATIONS", @@ -705,19 +705,18 @@ configure_settings(bool _init) OMNITRACE_CONFIG_EXT_SETTING(int64_t, "OMNITRACE_CRITICAL_TRACE_COUNT", "Number of critical trace to export (0 == all)", - int64_t{ 0 }, "data", "critical_trace", - "omnitrace-critical-trace", "perfetto", "advanced"); + int64_t{ 0 }, "critical_trace", + "omnitrace-critical-trace", "advanced"); OMNITRACE_CONFIG_SETTING(uint64_t, "OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT", "Number of critical trace records to store in thread-local " "memory before submitting to shared buffer", - uint64_t{ 2000 }, "data", "critical_trace", "advanced"); + uint64_t{ 2000 }, "critical_trace", "advanced"); OMNITRACE_CONFIG_EXT_SETTING( int64_t, "OMNITRACE_CRITICAL_TRACE_PER_ROW", "How many critical traces per row in perfetto (0 == all in one row)", - int64_t{ 0 }, "io", "critical_trace", "omnitrace-critical-trace", "perfetto", - "advanced"); + int64_t{ 0 }, "critical_trace", "omnitrace-critical-trace", "advanced"); OMNITRACE_CONFIG_SETTING( std::string, "OMNITRACE_TIMEMORY_COMPONENTS", @@ -1506,9 +1505,15 @@ print_banner(std::ostream& _os) )banner"; auto _tag = std::string_view{ OMNITRACE_GIT_DESCRIBE }; auto _rev = std::string_view{ OMNITRACE_GIT_REVISION }; - std::stringstream _version_info{}; +#if OMNITRACE_HIP_VERSION_MAJOR > 0 + auto _hip = JOIN('.', OMNITRACE_HIP_VERSION_MAJOR, OMNITRACE_HIP_VERSION_MINOR, "x"); +#else + auto _hip = std::string_view{}; +#endif + + std::stringstream _version_info{}; _version_info << "omnitrace v" << OMNITRACE_VERSION_STRING; - if(!_tag.empty() || !_rev.empty()) + if(!_tag.empty() || !_rev.empty() || !_hip.empty()) { _version_info << " ("; if(!_tag.empty()) @@ -1516,10 +1521,21 @@ print_banner(std::ostream& _os) _version_info << "tag: " << OMNITRACE_GIT_DESCRIBE; if(!_rev.empty()) _version_info << ", "; } - if(!_rev.empty()) _version_info << "rev: " << OMNITRACE_GIT_REVISION; - _version_info << ")"; + + if(!_rev.empty()) + { + _version_info << "rev: " << OMNITRACE_GIT_REVISION; + if(!_hip.empty()) _version_info << ", "; + } + + if(!_hip.empty()) + { + _version_info << "rocm: " << _hip; + } } + if(!_version_info.str().empty()) _version_info << ")"; + tim::log::stream(_os, tim::log::color::info()) << _banner << _version_info.str(); _os << std::endl; } diff --git a/source/lib/omnitrace-dl/CMakeLists.txt b/source/lib/omnitrace-dl/CMakeLists.txt index 6ef2365195..f2f35f76ff 100644 --- a/source/lib/omnitrace-dl/CMakeLists.txt +++ b/source/lib/omnitrace-dl/CMakeLists.txt @@ -17,8 +17,8 @@ add_library(omnitrace::omnitrace-dl-library ALIAS omnitrace-dl-library) target_sources( omnitrace-dl-library - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/dl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dl.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/main.c) + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/dl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main.c + ${CMAKE_CURRENT_SOURCE_DIR}/dl/dl.hpp) target_include_directories( omnitrace-dl-library PUBLIC $ @@ -44,7 +44,7 @@ set_target_properties( omnitrace_strip_target(omnitrace-dl-library) -install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/dl.hpp - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/omnitrace) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/dl/dl.hpp + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/omnitrace/dl) install(TARGETS omnitrace-dl-library DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index 82b97d2b24..5f01a78b3a 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -38,12 +38,21 @@ #include "common/invoke.hpp" #include "common/join.hpp" #include "common/setup.hpp" -#include "dl.hpp" +#include "dl/dl.hpp" #include "omnitrace/categories.h" #include "omnitrace/types.h" +#include + #include +#include #include +#include +#include +#include +#include +#include +#include //--------------------------------------------------------------------------------------// @@ -69,6 +78,8 @@ //--------------------------------------------------------------------------------------// +using main_func_t = int (*)(int, char**, char**); + std::ostream& operator<<(std::ostream& _os, const SpaceHandle& _handle) { @@ -78,7 +89,7 @@ operator<<(std::ostream& _os, const SpaceHandle& _handle) namespace omnitrace { -inline namespace dl +namespace dl { namespace { @@ -97,6 +108,16 @@ get_omnitrace_dl_env() : get_env("OMNITRACE_DL_VERBOSE", get_omnitrace_env()); } +inline bool& +get_omnitrace_is_preloaded() +{ + static bool _v = []() { + auto&& _preload_libs = get_env("LD_PRELOAD", std::string{}); + return (_preload_libs.find("libomnitrace-dl.so") != std::string::npos); + }(); + return _v; +} + inline bool get_omnitrace_preload() { @@ -136,6 +157,12 @@ get_omnitrace_root_pid() return get_env("OMNITRACE_ROOT_PROCESS", _pid); } +void +omnitrace_preinit() OMNITRACE_INTERNAL_API; + +void +omnitrace_postinit(std::string exe = {}) OMNITRACE_INTERNAL_API; + pid_t _omnitrace_root_pid = get_omnitrace_root_pid(); // environment priority: @@ -189,7 +216,7 @@ const char* _omnitrace_dl_dlopen_descr = "RTLD_LAZY | RTLD_LOCAL"; #endif /// This class contains function pointers for omnitrace's instrumentation functions -struct OMNITRACE_HIDDEN_API indirect +struct OMNITRACE_INTERNAL_API indirect { OMNITRACE_INLINE indirect(const std::string& _omnilib, const std::string& _userlib, const std::string& _dllib) @@ -456,7 +483,7 @@ private: }; inline indirect& -get_indirect() OMNITRACE_HIDDEN_API; +get_indirect() OMNITRACE_INTERNAL_API; indirect& get_indirect() @@ -519,6 +546,13 @@ get_thread_status() return _v; } +InstrumentMode& +get_instrumented() +{ + static auto _v = get_env("OMNITRACE_INSTRUMENT_MODE", InstrumentMode::None); + return _v; +} + // ensure finalization is called bool _omnitrace_dl_fini = (std::atexit([]() { if(get_active()) omnitrace_finalize(); @@ -555,7 +589,7 @@ bool _omnitrace_dl_fini = (std::atexit([]() { fflush(stderr); \ } -using omnitrace::get_indirect; +using omnitrace::dl::get_indirect; namespace dl = omnitrace::dl; extern "C" @@ -598,6 +632,9 @@ extern "C" return; } + if(dl::get_instrumented() < dl::InstrumentMode::PythonProfile) + dl::omnitrace_preinit(); + bool _invoked = false; OMNITRACE_DL_INVOKE_STATUS(_invoked, get_indirect().omnitrace_init_f, a, b, c); if(_invoked) @@ -605,6 +642,8 @@ extern "C" dl::get_active() = true; dl::get_inited() = true; dl::_omnitrace_dl_verbose = dl::get_omnitrace_dl_env(); + if(dl::get_instrumented() < dl::InstrumentMode::PythonProfile) + dl::omnitrace_postinit((c) ? std::string{ c } : std::string{}); } } @@ -727,8 +766,9 @@ extern "C" OMNITRACE_DL_IGNORE(2, "already initialized and active", a, b); return; } + OMNITRACE_DL_LOG(2, "%s(%s, %s)\n", __FUNCTION__, a, b); setenv(a, b, 0); - OMNITRACE_DL_INVOKE(get_indirect().omnitrace_set_env_f, a, b); + // OMNITRACE_DL_INVOKE(get_indirect().omnitrace_set_env_f, a, b); } void omnitrace_set_mpi(bool a, bool b) @@ -840,6 +880,22 @@ extern "C" _annotations, _annotation_count); } + void omnitrace_set_instrumented(int _mode) + { + OMNITRACE_DL_LOG(2, "%s(%i)\n", __FUNCTION__, _mode); + auto _mode_v = static_cast(_mode); + if(_mode_v < dl::InstrumentMode::None || _mode_v >= dl::InstrumentMode::Last) + { + OMNITRACE_DL_LOG(-127, + "%s(mode=%i) invoked with invalid instrumentation mode. " + "mode should be %i >= mode < %i\n", + __FUNCTION__, _mode, + static_cast(dl::InstrumentMode::None), + static_cast(dl::InstrumentMode::Last)); + } + dl::get_instrumented() = _mode_v; + } + //----------------------------------------------------------------------------------// // // KokkosP @@ -1060,18 +1116,153 @@ extern "C" namespace omnitrace { -inline namespace dl +namespace dl { namespace { bool -omnitrace_preload() OMNITRACE_HIDDEN_API; +omnitrace_preload() OMNITRACE_INTERNAL_API; + +std::vector +get_link_map(const char*, + std::vector&& = { (RTLD_LAZY | RTLD_NOLOAD) }) OMNITRACE_INTERNAL_API; + +const char* +get_default_mode() OMNITRACE_INTERNAL_API; + +void +verify_instrumented_preloaded() OMNITRACE_INTERNAL_API; + +std::vector +get_link_map(const char* _name, std::vector&& _open_modes) +{ + void* _handle = nullptr; + bool _noload = false; + for(auto _mode : _open_modes) + { + _handle = dlopen(_name, _mode); + _noload = (_mode & RTLD_NOLOAD) == RTLD_NOLOAD; + if(_handle) break; + } + + auto _chain = std::vector{}; + if(_handle) + { + struct link_map* _link_map = nullptr; + dlinfo(_handle, RTLD_DI_LINKMAP, &_link_map); + struct link_map* _next = _link_map->l_next; + while(_next) + { + if(_next->l_name != nullptr && !std::string_view{ _next->l_name }.empty()) + { + _chain.emplace_back(_next->l_name); + } + _next = _next->l_next; + } + + if(_noload == false) dlclose(_handle); + } + return _chain; +} + +const char* +get_default_mode() +{ + if(get_env("OMNITRACE_USE_CAUSAL", false)) return "causal"; + + auto _link_map = get_link_map(nullptr); + for(const auto& itr : _link_map) + { + if(itr.find("libomnitrace-rt.so") != std::string::npos || + itr.find("libdyninstAPI_RT.so") != std::string::npos) + return "trace"; + } + + return "sampling"; +} + +void +omnitrace_preinit() +{ + switch(get_instrumented()) + { + case InstrumentMode::None: + case InstrumentMode::BinaryRewrite: + case InstrumentMode::ProcessCreate: + case InstrumentMode::ProcessAttach: + { + auto _use_mpip = get_env("OMNITRACE_USE_MPIP", false); + auto _use_mpi = get_env("OMNITRACE_USE_MPI", _use_mpip); + auto _causal = get_env("OMNITRACE_USE_CAUSAL", false); + auto _mode = get_env("OMNITRACE_MODE", get_default_mode()); + + if(_use_mpi && !(_causal && _mode == "causal")) + { + // only make this call if true bc otherwise, if + // false, it will disable the MPIP component and + // we may intercept the MPI init call later. + // If _use_mpi defaults to true above, calling this + // will override can current env or config value for + // OMNITRACE_USE_PID. + omnitrace_set_mpi(_use_mpi, dl::get_instrumented() == + dl::InstrumentMode::ProcessAttach); + } + break; + } + case InstrumentMode::PythonProfile: + case InstrumentMode::Last: break; + } +} + +void +omnitrace_postinit(std::string _exe) +{ + switch(get_instrumented()) + { + case InstrumentMode::None: + case InstrumentMode::BinaryRewrite: + case InstrumentMode::ProcessCreate: + case InstrumentMode::ProcessAttach: + { + if(_exe.empty()) + _exe = tim::filepath::readlink(join('/', "/proc", getpid(), "exe")); + + omnitrace_init_tooling(); + if(_exe.empty()) + omnitrace_push_trace("main"); + else + omnitrace_push_trace(basename(_exe.c_str())); + break; + } + case InstrumentMode::PythonProfile: + { + omnitrace_init_tooling(); + break; + } + case InstrumentMode::Last: break; + } +} bool omnitrace_preload() { - auto _preload = get_omnitrace_preload() && get_env("OMNITRACE_ENABLED", true); - auto _use_mpi = get_env("OMNITRACE_USE_MPI", get_env("OMNITRACE_USE_MPIP", false)); + auto _preload = get_omnitrace_is_preloaded() && get_omnitrace_preload() && + get_env("OMNITRACE_ENABLED", true); + + auto _link_map = get_link_map(nullptr); + auto _instr_mode = + get_env("OMNITRACE_INSTRUMENT_MODE", dl::InstrumentMode::BinaryRewrite); + for(const auto& itr : _link_map) + { + if(itr.find("libomnitrace-rt.so") != std::string::npos || + itr.find("libdyninstAPI_RT.so") != std::string::npos) + { + omnitrace_set_instrumented(static_cast(_instr_mode)); + break; + } + } + + verify_instrumented_preloaded(); static bool _once = false; if(_once) return _preload; @@ -1081,30 +1272,174 @@ omnitrace_preload() { reset_omnitrace_preload(); omnitrace_preinit_library(); - auto _causal = get_env("OMNITRACE_USE_CAUSAL", false); - auto _mode = get_env("OMNITRACE_MODE", (_causal) ? "causal" : "sampling"); - OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init", - ::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", _mode, - false, "omnitrace") - .c_str()); - if(_use_mpi && !(_causal && _mode == "causal")) - { - // only make this call if true bc otherwise, if - // false, it will disable the MPIP component and - // we may intercept the MPI init call later. - // If _use_mpi defaults to true above, calling this - // will override can current env or config value for - // OMNITRACE_USE_PID. - omnitrace_set_mpi(_use_mpi, false); - } - omnitrace_init(_mode.c_str(), false, nullptr); - omnitrace_init_tooling(); } return _preload; } -bool _handle_preload = omnitrace::dl::omnitrace_preload(); +void +verify_instrumented_preloaded() +{ + // if preloaded then we are fine + if(get_omnitrace_is_preloaded()) return; + + // value returned by get_instrumented is set by either: + // - the search of the linked libraries + // - via the instrumenter + // if binary rewrite or runtime instrumentation, there is an opportunity for + // LD_PRELOAD + switch(dl::get_instrumented()) + { + case dl::InstrumentMode::None: + case dl::InstrumentMode::ProcessAttach: + case dl::InstrumentMode::ProcessCreate: + { + return; + } + case dl::InstrumentMode::BinaryRewrite: + { + break; + } + case dl::InstrumentMode::Last: + { + throw std::runtime_error( + "Invalid instrumentation type: InstrumentMode::Last"); + } + } + + static const char* _notice = R"notice( + + NNNNNNNN NNNNNNNN OOOOOOOOO TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIII CCCCCCCCCCCCCEEEEEEEEEEEEEEEEEEEEEE + N:::::::N N::::::N OO:::::::::OO T:::::::::::::::::::::TI::::::::I CCC::::::::::::CE::::::::::::::::::::E + N::::::::N N::::::N OO:::::::::::::OO T:::::::::::::::::::::TI::::::::I CC:::::::::::::::CE::::::::::::::::::::E + N:::::::::N N::::::NO:::::::OOO:::::::OT:::::TT:::::::TT:::::TII::::::IIC:::::CCCCCCCC::::CEE::::::EEEEEEEEE::::E + N::::::::::N N::::::NO::::::O O::::::OTTTTTT T:::::T TTTTTT I::::I C:::::C CCCCCC E:::::E EEEEEE + N:::::::::::N N::::::NO:::::O O:::::O T:::::T I::::IC:::::C E:::::E + N:::::::N::::N N::::::NO:::::O O:::::O T:::::T I::::IC:::::C E::::::EEEEEEEEEE + N::::::N N::::N N::::::NO:::::O O:::::O T:::::T I::::IC:::::C E:::::::::::::::E + N::::::N N::::N:::::::NO:::::O O:::::O T:::::T I::::IC:::::C E:::::::::::::::E + N::::::N N:::::::::::NO:::::O O:::::O T:::::T I::::IC:::::C E::::::EEEEEEEEEE + N::::::N N::::::::::NO:::::O O:::::O T:::::T I::::IC:::::C E:::::E + N::::::N N:::::::::NO::::::O O::::::O T:::::T I::::I C:::::C CCCCCC E:::::E EEEEEE + N::::::N N::::::::NO:::::::OOO:::::::O TT:::::::TT II::::::IIC:::::CCCCCCCC::::CEE::::::EEEEEEEE:::::E + N::::::N N:::::::N OO:::::::::::::OO T:::::::::T I::::::::I CC:::::::::::::::CE::::::::::::::::::::E + N::::::N N::::::N OO:::::::::OO T:::::::::T I::::::::I CCC::::::::::::CE::::::::::::::::::::E + NNNNNNNN NNNNNNN OOOOOOOOO TTTTTTTTTTT IIIIIIIIII CCCCCCCCCCCCCEEEEEEEEEEEEEEEEEEEEEE + + _ _ _____ ______ + | | | |/ ____| ____| + | | | | (___ | |__ + | | | |\___ \| __| + | |__| |____) | |____ + \____/|_____/|______| + + ____ __ __ _ _ _____ _______ _____ _____ ______ _____ _ _ _ _ + / __ \| \/ | \ | |_ _|__ __| __ \ /\ / ____| ____| | __ \| | | | \ | | + | | | | \ / | \| | | | | | | |__) | / \ | | | |__ ______| |__) | | | | \| | + | | | | |\/| | . ` | | | | | | _ / / /\ \| | | __|______| _ /| | | | . ` | + | |__| | | | | |\ |_| |_ | | | | \ \ / ____ \ |____| |____ | | \ \| |__| | |\ | + \____/|_| |_|_| \_|_____| |_| |_| \_\/_/ \_\_____|______| |_| \_\\____/|_| \_| + + + Due to a variety of edge cases we've encountered, OmniTrace now requires that binary rewritten executables and libraries be launched + with the 'omnitrace-run' executable. + + In order to launch the executable with 'omnitrace-run', prefix the current command with 'omnitrace-run' and a standalone double hyphen ('--'). + For MPI applications, place 'omnitrace-run --' after the MPI command. + E.g.: + + + mpirun -n 2 + + should be: + + omnitrace-run -- + mpirun -n 2 omnitrace-run -- + + Note: the command-line arguments passed to 'omnitrace-run' (which are specified before the double hyphen) will override configuration variables + and/or any configuration values specified to 'omnitrace-instrument' via the '--config' or '--env' options. + E.g.: + + $ omnitrace-instrument -o ./sleep.inst --env OMNITRACE_SAMPLING_DELAY=5.0 -- sleep + $ echo "OMNITRACE_SAMPLING_FREQ = 500" > omnitrace.cfg + $ export OMNITRACE_CONFIG_FILE=omnitrace.cfg + $ omnitrace-run --sampling-freq=100 --sampling-delay=1.0 -- ./sleep.inst 10 + + In the first command, a default sampling delay of 5 seconds in embedded into the instrumented 'sleep.inst'. + In the second command, the sampling frequency will be set to 500 interrupts per second when OmniTrace reads the config file + In the fourth command, the sampling frequency and sampling delay are overridden to 100 interrupts per second and 1 second, respectively, when sleep.inst runs + + Thanks for using OmniTrace and happy optimizing! + )notice"; + + // emit notice + std::cerr << _notice << std::endl; + + std::quick_exit(EXIT_FAILURE); +} + +bool _handle_preload = omnitrace_preload(); +main_func_t main_real = nullptr; } // namespace } // namespace dl } // namespace omnitrace + +extern "C" +{ + int omnitrace_main(int argc, char** argv, char** envp) OMNITRACE_INTERNAL_API; + void omnitrace_set_main(main_func_t) OMNITRACE_INTERNAL_API; + + void omnitrace_set_main(main_func_t _main_real) + { + ::omnitrace::dl::main_real = _main_real; + } + + int omnitrace_main(int argc, char** argv, char** envp) + { + OMNITRACE_DL_LOG(0, "%s\n", __FUNCTION__); + using ::omnitrace::common::get_env; + using ::omnitrace::dl::get_default_mode; + + // prevent re-entry + static int _reentry = 0; + if(_reentry > 0) return -1; + _reentry = 1; + + if(!::omnitrace::dl::main_real) + throw std::runtime_error("[omnitrace][dl] Unsuccessful wrapping of main: " + "nullptr to real main function"); + + if(envp) + { + size_t _idx = 0; + while(envp[_idx] != nullptr) + { + auto _env_v = std::string_view{ envp[_idx++] }; + if(_env_v.find("OMNITRACE") != 0 && + _env_v.find("libomnitrace") == std::string_view::npos) + continue; + auto _pos = _env_v.find('='); + if(_pos < _env_v.length()) + { + auto _var = std::string{ _env_v }.substr(0, _pos); + auto _val = std::string{ _env_v }.substr(_pos + 1); + OMNITRACE_DL_LOG(1, "%s(%s, %s)\n", "omnitrace_set_env", _var.c_str(), + _val.c_str()); + setenv(_var.c_str(), _val.c_str(), 0); + } + } + } + + auto _mode = get_env("OMNITRACE_MODE", get_default_mode()); + omnitrace_init(_mode.c_str(), + dl::get_instrumented() == dl::InstrumentMode::BinaryRewrite, + argv[0]); + + int ret = (*::omnitrace::dl::main_real)(argc, argv, envp); + + omnitrace_pop_trace(basename(argv[0])); + omnitrace_finalize(); + + return ret; + } +} diff --git a/source/lib/omnitrace-dl/dl.hpp b/source/lib/omnitrace-dl/dl/dl.hpp similarity index 95% rename from source/lib/omnitrace-dl/dl.hpp rename to source/lib/omnitrace-dl/dl/dl.hpp index d4c3c2b4b1..f04f0a5011 100644 --- a/source/lib/omnitrace-dl/dl.hpp +++ b/source/lib/omnitrace-dl/dl/dl.hpp @@ -76,6 +76,7 @@ extern "C" void omnitrace_set_env(const char* env_name, const char* env_val) OMNITRACE_PUBLIC_API; void omnitrace_set_mpi(bool use, bool attached) OMNITRACE_PUBLIC_API; + void omnitrace_set_instrumented(int) OMNITRACE_PUBLIC_API; void omnitrace_push_trace(const char* name) OMNITRACE_PUBLIC_API; void omnitrace_pop_trace(const char* name) OMNITRACE_PUBLIC_API; int omnitrace_push_region(const char*) OMNITRACE_PUBLIC_API; @@ -191,4 +192,20 @@ extern "C" #endif } +namespace omnitrace +{ +namespace dl +{ +enum class InstrumentMode : int +{ + None = -1, + BinaryRewrite = 0, + ProcessCreate = 1, // runtime instrumentation at start of process + ProcessAttach = 2, // runtime instrumentation of running process + PythonProfile = 3, // python setprofile + Last, +}; +} +} // namespace omnitrace + #endif // OMNITRACE_DL_HPP_ 1 diff --git a/source/lib/omnitrace-dl/main.c b/source/lib/omnitrace-dl/main.c index 4eadbb6c46..c83325c698 100644 --- a/source/lib/omnitrace-dl/main.c +++ b/source/lib/omnitrace-dl/main.c @@ -22,8 +22,9 @@ #define _GNU_SOURCE -#define OMNITRACE_PUBLIC_API __attribute__((visibility("default"))); -#define OMNITRACE_HIDDEN_API __attribute__((visibility("hidden"))); +#define OMNITRACE_PUBLIC_API __attribute__((visibility("default"))); +#define OMNITRACE_HIDDEN_API __attribute__((visibility("hidden"))); +#define OMNITRACE_INTERNAL_API __attribute__((visibility("internal"))); #include #include @@ -35,25 +36,18 @@ // // local type definitions // +typedef int (*main_func_t)(int, char**, char**); typedef int (*start_main_t)(int (*)(int, char**, char**), int, char**, int (*)(int, char**, char**), void (*)(void), void (*)(void), void*); -// -// local variables -// -static int (*main_real)(int, char**, char**); // Trampoline for the real main() - // // local function declarations // -int -omnitrace_main(int, char**, char**) OMNITRACE_HIDDEN_API; - int omnitrace_libc_start_main(int (*)(int, char**, char**), int, char**, int (*)(int, char**, char**), void (*)(void), void (*)(void), - void*) OMNITRACE_HIDDEN_API; + void*) OMNITRACE_INTERNAL_API; int __libc_start_main(int (*)(int, char**, char**), int, char**, int (*)(int, char**, char**), @@ -80,32 +74,13 @@ omnitrace_init_tooling(void); extern void omnitrace_init(const char*, bool, const char*); -// -// local function definitions -// -int -omnitrace_main(int argc, char** argv, char** envp) -{ - // prevent re-entry - static int _reentry = 0; - if(_reentry > 0) return -1; - _reentry = 1; +extern char* +basename(const char*); - // set the relevant environment variables - // omnitrace_update_env(&envp); +extern void omnitrace_set_main(main_func_t) OMNITRACE_INTERNAL_API; - const char* mode = getenv("OMNITRACE_MODE"); - omnitrace_init(mode ? mode : "sampling", false, argv[0]); - omnitrace_init_tooling(); - omnitrace_push_trace(basename(argv[0])); - - int ret = main_real(argc, argv, envp); - - omnitrace_pop_trace(basename(argv[0])); - omnitrace_finalize(); - - return ret; -} +extern int +omnitrace_main(int argc, char** argv, char** envp) OMNITRACE_INTERNAL_API; int omnitrace_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _argv, @@ -123,7 +98,7 @@ omnitrace_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _ void* _this_func = __builtin_return_address(0); // Save the real main function address - main_real = _main; + omnitrace_set_main(_main); // Find the real __libc_start_main() start_main_t user_main = dlsym(RTLD_NEXT, "__libc_start_main"); @@ -136,8 +111,7 @@ omnitrace_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _ if(_preload == 0) { // call original main - return user_main(main_real, _argc, _argv, _init, _fini, _rtld_fini, - _stack_end); + return user_main(_main, _argc, _argv, _init, _fini, _rtld_fini, _stack_end); } else { diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index e0e5f2fc26..3eeb941590 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -150,7 +150,7 @@ ensure_finalization(bool _static_init = false) _tid->system_value); } - if(get_env("OMNITRACE_MONOCHROME", false)) tim::log::monochrome() = true; + if(common::get_env("OMNITRACE_MONOCHROME", false)) tim::log::monochrome() = true; (void) tim::manager::instance(); (void) tim::settings::shared_instance(); @@ -637,7 +637,7 @@ extern "C" void omnitrace_reset_preload_hidden(void) { tim::set_env("OMNITRACE_PRELOAD", "0", 1); - auto&& _preload_libs = get_env("LD_PRELOAD", std::string{}); + auto&& _preload_libs = common::get_env("LD_PRELOAD", std::string{}); if(_preload_libs.find("libomnitrace") != std::string::npos) { auto _modified_preload = std::string{}; @@ -732,7 +732,8 @@ omnitrace_finalize_hidden(void) if(dmp::rank() == 0) { OMNITRACE_PRINT_F("\n"); - config::print_settings(get_env("OMNITRACE_PRINT_ENV", get_debug())); + config::print_settings( + tim::get_env("OMNITRACE_PRINT_ENV", get_debug())); } } diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index aba6097b83..2cafee7ba7 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -76,6 +77,7 @@ inline void start_bundle(bundle_t& _bundle, Args&&... _args) { if(!get_use_timemory() && !get_use_perfetto()) return; + trait::runtime_enabled::set(get_use_roctracer()); OMNITRACE_BASIC_VERBOSE_F(3, "starting bundle '%s'...\n", _bundle.key().c_str()); if constexpr(sizeof...(Args) > 0) { diff --git a/source/python/libpyomnitrace.cpp b/source/python/libpyomnitrace.cpp index b3ccc7e0ab..04fe3a9a50 100644 --- a/source/python/libpyomnitrace.cpp +++ b/source/python/libpyomnitrace.cpp @@ -21,7 +21,7 @@ // SOFTWARE. #include "libpyomnitrace.hpp" -#include "dl.hpp" +#include "dl/dl.hpp" #include "library/coverage.hpp" #include "library/coverage/impl.hpp" #include "omnitrace/categories.h" @@ -123,6 +123,8 @@ PYBIND11_MODULE(libpyomnitrace, omni) if(_is_initialized) throw std::runtime_error("Error! omnitrace is already initialized"); _is_initialized = true; + omnitrace_set_instrumented( + static_cast(omnitrace::dl::InstrumentMode::PythonProfile)); omnitrace_set_mpi(_get_use_mpi(), false); std::string _cmd = {}; std::string _cmd_line = {}; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 729f69c642..7aa15cc1d1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -281,7 +281,7 @@ if(OMNITRACE_USE_MPI OR OMNITRACE_USE_MPI_HEADERS) ENVIRONMENT "${_flat_environment};OMNITRACE_USE_SAMPLING=OFF;OMNITRACE_STRICT_CONFIG=OFF;OMNITRACE_USE_MPIP=ON" REWRITE_RUN_PASS_REGEX - ">>> main(.*\n.*)>>> MPI_Init_thread(.*\n.*)>>> pthread_create(.*\n.*)>>> MPI_Comm_size(.*\n.*)>>> MPI_Comm_rank(.*\n.*)>>> MPI_Barrier(.*\n.*)>>> MPI_Alltoall" + ">>> mpi-flat-mpip.inst(.*\n.*)>>> MPI_Init_thread(.*\n.*)>>> pthread_create(.*\n.*)>>> MPI_Comm_size(.*\n.*)>>> MPI_Comm_rank(.*\n.*)>>> MPI_Barrier(.*\n.*)>>> MPI_Alltoall" ) omnitrace_add_test( @@ -303,7 +303,7 @@ if(OMNITRACE_USE_MPI OR OMNITRACE_USE_MPI_HEADERS) 0 ENVIRONMENT "${_flat_environment};OMNITRACE_USE_SAMPLING=OFF" REWRITE_RUN_PASS_REGEX - ">>> main(.*\n.*)>>> MPI_Init_thread(.*\n.*)>>> pthread_create(.*\n.*)>>> MPI_Comm_size(.*\n.*)>>> MPI_Comm_rank(.*\n.*)>>> MPI_Barrier(.*\n.*)>>> MPI_Alltoall" + ">>> mpi-flat.inst(.*\n.*)>>> MPI_Init_thread(.*\n.*)>>> pthread_create(.*\n.*)>>> MPI_Comm_size(.*\n.*)>>> MPI_Comm_rank(.*\n.*)>>> MPI_Barrier(.*\n.*)>>> MPI_Alltoall" ) set(_mpip_environment @@ -724,7 +724,7 @@ omnitrace_add_validation_test( LABELS "time-window" FAIL_REGEX "outer_d" ARGS -l - main + trace-time-window.inst outer_a outer_b outer_c @@ -749,7 +749,7 @@ omnitrace_add_validation_test( LABELS "time-window" FAIL_REGEX "outer_d" ARGS -l - main + trace-time-window outer_a outer_b outer_c diff --git a/tests/omnitrace-testing.cmake b/tests/omnitrace-testing.cmake index 2292494525..449f5f8c48 100644 --- a/tests/omnitrace-testing.cmake +++ b/tests/omnitrace-testing.cmake @@ -257,7 +257,7 @@ endif() function(OMNITRACE_WRITE_TEST_CONFIG _FILE _ENV) set(_ENV_ONLY - "OMNITRACE_(MODE|USE_MPIP|DEBUG_SETTINGS|FORCE_ROCPROFILER_INIT|DEFAULT_MIN_INSTRUCTIONS|MONOCHROME|VERBOSE)=" + "OMNITRACE_(CI|MODE|USE_MPIP|DEBUG_SETTINGS|FORCE_ROCPROFILER_INIT|DEFAULT_MIN_INSTRUCTIONS|MONOCHROME|VERBOSE)=" ) set(_FILE_CONTENTS) set(_ENV_CONTENTS) @@ -434,8 +434,8 @@ function(OMNITRACE_ADD_TEST) add_test( NAME ${TEST_NAME}-binary-rewrite-run COMMAND - ${COMMAND_PREFIX} $/${TEST_NAME}.inst - ${TEST_RUN_ARGS} + ${COMMAND_PREFIX} $ -- + $/${TEST_NAME}.inst ${TEST_RUN_ARGS} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) endif() @@ -451,8 +451,8 @@ function(OMNITRACE_ADD_TEST) add_test( NAME ${TEST_NAME}-binary-rewrite-sampling-run COMMAND - ${COMMAND_PREFIX} $/${TEST_NAME}.samp - ${TEST_RUN_ARGS} + ${COMMAND_PREFIX} $ -- + $/${TEST_NAME}.samp ${TEST_RUN_ARGS} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) endif()