diff --git a/projects/rocprofiler-systems/.github/workflows/linux-ci.yml b/projects/rocprofiler-systems/.github/workflows/linux-ci.yml index 34bc0a598f..7156141b0b 100644 --- a/projects/rocprofiler-systems/.github/workflows/linux-ci.yml +++ b/projects/rocprofiler-systems/.github/workflows/linux-ci.yml @@ -74,11 +74,24 @@ jobs: - name: Test Install timeout-minutes: 10 - run: - omnitrace --help && - omnitrace -e -v 1 -o ls.inst -- ls && - ./ls.inst && - rm ./ls.inst && + run: | + set -v + export OMNITRACE_DEBUG=ON + which omnitrace-avail + ldd $(which omnitrace-avail) + omnitrace-avail --help + omnitrace-avail -a + which omnitrace-critical-trace + ldd $(which omnitrace-critical-trace) + which omnitrace + ldd $(which omnitrace) + omnitrace --help + omnitrace -e -v 1 -o ls.inst --simulate -- ls + for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + omnitrace -e -v 1 -o ls.inst -- ls + ./ls.inst + omnitrace -e -v 1 --simulate -- ls + for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -- ls - name: CTest Artifacts @@ -156,11 +169,24 @@ jobs: - name: Test Install timeout-minutes: 10 - run: - omnitrace --help && - omnitrace -e -v 1 -o ls.inst -- ls && - ./ls.inst && - rm ./ls.inst && + run: | + set -v + export OMNITRACE_DEBUG=ON + which omnitrace-avail + ldd $(which omnitrace-avail) + omnitrace-avail --help + omnitrace-avail -a + which omnitrace-critical-trace + ldd $(which omnitrace-critical-trace) + which omnitrace + ldd $(which omnitrace) + omnitrace --help + omnitrace -e -v 1 -o ls.inst --simulate -- ls + for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + omnitrace -e -v 1 -o ls.inst -- ls + ./ls.inst + omnitrace -e -v 1 --simulate -- ls + for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -- ls - name: CTest Artifacts @@ -262,12 +288,24 @@ jobs: - name: Test Install timeout-minutes: 10 - run: - ldd $(which omnitrace) && - omnitrace --help && - omnitrace -e -v 1 -o ls.inst -- ls && - ./ls.inst && - rm ./ls.inst && + run: | + set -v + export OMNITRACE_DEBUG=ON + which omnitrace-avail + ldd $(which omnitrace-avail) + omnitrace-avail --help + omnitrace-avail -a + which omnitrace-critical-trace + ldd $(which omnitrace-critical-trace) + which omnitrace + ldd $(which omnitrace) + omnitrace --help + omnitrace -e -v 1 -o ls.inst --simulate -- ls + for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + omnitrace -e -v 1 -o ls.inst -- ls + ./ls.inst + omnitrace -e -v 1 --simulate -- ls + for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -- ls - name: CTest Artifacts @@ -361,12 +399,24 @@ jobs: - name: Test Install timeout-minutes: 10 - run: - ldd $(which omnitrace) && - omnitrace --help && - omnitrace -e -v 1 -o ls.inst -- ls && - ./ls.inst && - rm ./ls.inst && + run: | + set -v + export OMNITRACE_DEBUG=ON + which omnitrace-avail + ldd $(which omnitrace-avail) + omnitrace-avail --help + omnitrace-avail -a + which omnitrace-critical-trace + ldd $(which omnitrace-critical-trace) + which omnitrace + ldd $(which omnitrace) + omnitrace --help + omnitrace -e -v 1 -o ls.inst --simulate -- ls + for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + omnitrace -e -v 1 -o ls.inst -- ls + ./ls.inst + omnitrace -e -v 1 --simulate -- ls + for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -- ls - name: CTest Artifacts @@ -481,10 +531,22 @@ jobs: - name: Test Install timeout-minutes: 10 - run: - ldd $(which omnitrace) && - omnitrace --help && - omnitrace -e -v 1 -o ls.inst -- ls && - ./ls.inst && - rm ./ls.inst && + run: | + set -v + export OMNITRACE_DEBUG=ON + which omnitrace-avail + ldd $(which omnitrace-avail) + omnitrace-avail --help + omnitrace-avail -a + which omnitrace-critical-trace + ldd $(which omnitrace-critical-trace) + which omnitrace + ldd $(which omnitrace) + omnitrace --help + omnitrace -e -v 1 -o ls.inst --simulate -- ls + for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + omnitrace -e -v 1 -o ls.inst -- ls + ./ls.inst + omnitrace -e -v 1 --simulate -- ls + for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -- ls diff --git a/projects/rocprofiler-systems/cmake/Packages.cmake b/projects/rocprofiler-systems/cmake/Packages.cmake index 7c2f7af55a..1a78985f3d 100644 --- a/projects/rocprofiler-systems/cmake/Packages.cmake +++ b/projects/rocprofiler-systems/cmake/Packages.cmake @@ -373,11 +373,11 @@ set(TIMEMORY_TLS_MODEL set(TIMEMORY_SETTINGS_PREFIX "OMNITRACE_" CACHE STRING "Prefix used for settings and environment variables") -set(TIMEMORY_SETTINGS_CONFIG_NAME +set(TIMEMORY_PROJECT_NAME "omnitrace" CACHE STRING "Name for configuration") mark_as_advanced(TIMEMORY_SETTINGS_PREFIX) -mark_as_advanced(TIMEMORY_SETTINGS_CONFIG_NAME) +mark_as_advanced(TIMEMORY_PROJECT_NAME) omnitrace_checkout_git_submodule( RELATIVE_PATH external/timemory @@ -391,8 +391,8 @@ omnitrace_save_variables( # ensure timemory builds PIC static libs so that we don't have to install timemory shared # lib -set(BUILD_SHARED_LIBS ON) -set(BUILD_STATIC_LIBS OFF) +set(BUILD_SHARED_LIBS OFF) +set(BUILD_STATIC_LIBS ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(TIMEMORY_CTP_OPTIONS GLOBAL) diff --git a/projects/rocprofiler-systems/external/timemory b/projects/rocprofiler-systems/external/timemory index c5c7e73a01..ba478a0188 160000 --- a/projects/rocprofiler-systems/external/timemory +++ b/projects/rocprofiler-systems/external/timemory @@ -1 +1 @@ -Subproject commit c5c7e73a0164d01bfbdc5b005b936ab20224df73 +Subproject commit ba478a0188bdc26f85368045fe91bf75e7004d4f diff --git a/projects/rocprofiler-systems/source/bin/omnitrace-avail/avail.cpp b/projects/rocprofiler-systems/source/bin/omnitrace-avail/avail.cpp index 8ce5f2b7fc..09062e7a66 100644 --- a/projects/rocprofiler-systems/source/bin/omnitrace-avail/avail.cpp +++ b/projects/rocprofiler-systems/source/bin/omnitrace-avail/avail.cpp @@ -44,11 +44,15 @@ #include #include +#include #include #include #include +#include #include +#include #include +#include #include #if defined(TIMEMORY_UNIX) @@ -65,7 +69,9 @@ using array_t = std::array; using string_t = std::string; using stringstream_t = std::stringstream; using str_vec_t = std::vector; +using str_set_t = std::set; using info_type_base = std::tuple; +using parser_t = tim::argparse::argument_parser; struct info_type : info_type_base { @@ -85,8 +91,9 @@ struct info_type : info_type_base const auto& id_strings() const { return info().at(3); } const auto& label() const { return info().at(4); } const auto& description() const { return info().at(5); } + const auto& categories() const { return info().at(6); } - bool valid() const { return !name().empty() && info().size() >= 5; } + bool valid() const { return !name().empty() && info().size() >= 6; } bool operator<(const info_type& rhs) const { return name() < rhs.name(); } bool operator!=(const info_type& rhs) const { return !(*this == rhs); } @@ -101,6 +108,22 @@ struct info_type : info_type_base } }; +//--------------------------------------------------------------------------------------// + +enum +{ + VAL = 0, + ENUM = 1, + LANG = 2, + CID = 3, + FNAME = 4, + DESC = 5, + CATEGORY = 6, + TOTAL = 7 +}; + +//--------------------------------------------------------------------------------------// + namespace { char global_delim = '|'; @@ -111,16 +134,43 @@ bool all_info = false; bool force_brief = false; bool debug_msg = false; bool case_insensitive = false; +bool regex_hl = false; int32_t max_width = 0; int32_t num_cols = 0; int32_t min_width = 40; int32_t padding = 4; str_vec_t regex_keys = {}; -bool regex_hl = false; -constexpr size_t num_component_options = 6; -constexpr size_t num_settings_options = 3; +str_vec_t category_regex_keys = {}; +str_set_t category_view = {}; +constexpr size_t num_component_options = 7; +constexpr size_t num_settings_options = 4; constexpr size_t num_hw_counter_options = 4; std::stringstream lerr{}; + +// explicit setting names to exclude +std::set settings_exclude = { + "OMNITRACE_ENVIRONMENT", "OMNITRACE_COMMAND_LINE", "cereal_class_version", "settings", +#if !defined(TIMEMORY_USE_CRAYPAT) + "OMNITRACE_CRAYPAT" +#endif +}; + +// exclude some timemory settings which are not relevant to omnitrace +// exact matches, e.g. OMNITRACE_BANNER +std::string settings_rexclude_exact = + "^OMNITRACE_(BANNER|DESTRUCTOR_REPORT|COMPONENTS|(GLOBAL|MPIP|NCCLP|OMPT|" + "PROFILER|TRACE|KOKKOS)_COMPONENTS|PYTHON_EXE|PAPI_ATTACH|PLOT_OUTPUT|SEPARATOR_" + "FREQ|" + "STACK_CLEARING|TARGET_PID|THROTTLE_(COUNT|VALUE)|(AUTO|FLAMEGRAPH)_OUTPUT|" + "(ENABLE|DISABLE)_ALL_SIGNALS|ALLOW_SIGNAL_HANDLER|CTEST_NOTES|INSTRUCTION_" + "ROOFLINE)$"; + +// leading matches, e.g. OMNITRACE_MPI_[A-Z_]+ +std::string settings_rexclude_begin = + "^OMNITRACE_(ERT|DART|MPI|UPCXX|ROOFLINE|CUDA|NVTX|CUPTI)_[A-Z_]+$"; + +bool +exclude_setting(const std::string&); } // namespace //--------------------------------------------------------------------------------------// @@ -153,6 +203,9 @@ banner(IntArrayT _breaks, std::array _use, char filler = '-', char deli bool is_selected(const std::string& line); +bool +is_category_selected(const std::string& _line); + std::string hl_selected(const std::string& line); @@ -174,6 +227,12 @@ write_hw_counter_info(std::ostream&, const array_t& = {}, template struct get_availability; +template +struct component_categories; + +void +process_categories(parser_t&, const str_set_t&); + //--------------------------------------------------------------------------------------// template @@ -233,15 +292,53 @@ struct get_availability //--------------------------------------------------------------------------------------// -enum +template +struct component_categories { - VAL = 0, - ENUM = 1, - LANG = 2, - CID = 3, - FNAME = 4, - DESC = 5, - TOTAL = 6 + template + void operator()(std::set& _v, type_list) const + { + // + auto _cleanup = [](std::string _type, const std::string& _pattern) { + auto _pos = std::string::npos; + while((_pos = _type.find(_pattern)) != std::string::npos) + _type.erase(_pos, _pattern.length()); + return _type; + }; + (void) _cleanup; // unused but set if sizeof...(Tp) == 0 + + TIMEMORY_FOLD_EXPRESSION(_v.emplace( + TIMEMORY_JOIN("::", "component", _cleanup(demangle(), "tim::")))); + } + + void operator()(std::set& _v) const + { + if constexpr(!concepts::is_placeholder::value) + (*this)(_v, trait::component_apis_t{}); + } +}; + +template <> +struct component_categories +{ + template + void operator()(std::set& _v, std::index_sequence) const + { + TIMEMORY_FOLD_EXPRESSION( + component_categories>{}(_v)); + } + + void operator()(std::set& _v) const + { + (*this)(_v, std::make_index_sequence{}); + } + + auto operator()() const + { + std::set _categories{}; + (*this)(_categories); + return _categories; + } }; //--------------------------------------------------------------------------------------// @@ -249,28 +346,48 @@ enum int main(int argc, char** argv) { - array_t options = { false, false, false, false, false, false }; - array_t fields = {}; - array_t use_mark = {}; + omnitrace_init_library(); + + std::set _category_options = component_categories{}(); + { + auto _settings = tim::settings::shared_instance(); + for(const auto& itr : *_settings) + { + if(exclude_setting(itr.second->get_env_name())) continue; + for(const auto& eitr : itr.second->get_categories()) + { + if(eitr == "native") + _category_options.emplace("settings::timemory"); + else + _category_options.emplace(TIMEMORY_JOIN("::", "settings", eitr)); + } + } + } + + array_t options = { false, false, false, false, false, false, false }; + array_t fields = {}; + array_t use_mark = {}; std::string cols_via{}; std::tie(num_cols, cols_via) = tim::utility::console::get_columns(); std::string col_msg = "(default: " + std::to_string(num_cols) + " [via " + cols_via + "])"; - fields[VAL] = "VALUE_TYPE"; - fields[ENUM] = "ENUMERATION"; - fields[LANG] = "C++ ALIAS / PYTHON ENUMERATION"; - fields[FNAME] = "FILENAME"; - fields[CID] = "STRING_IDS"; - fields[DESC] = "DESCRIPTION"; + fields[VAL] = "VALUE_TYPE"; + fields[ENUM] = "ENUMERATION"; + fields[LANG] = "C++ ALIAS / PYTHON ENUMERATION"; + fields[FNAME] = "FILENAME"; + fields[CID] = "STRING_IDS"; + fields[DESC] = "DESCRIPTION"; + fields[CATEGORY] = "CATEGORY"; - use_mark[VAL] = true; - use_mark[ENUM] = true; - use_mark[LANG] = true; - use_mark[FNAME] = false; - use_mark[CID] = false; - use_mark[DESC] = false; + use_mark[VAL] = true; + use_mark[ENUM] = true; + use_mark[LANG] = true; + use_mark[FNAME] = false; + use_mark[CID] = false; + use_mark[DESC] = false; + use_mark[CATEGORY] = false; bool include_settings = false; bool include_components = false; @@ -278,7 +395,6 @@ main(int argc, char** argv) std::string file = {}; - using parser_t = tim::argparse::argument_parser; parser_t parser("omnitrace-avail"); parser.enable_help(); @@ -329,6 +445,16 @@ main(int argc, char** argv) .min_count(1) .dtype("list of strings") .action([](parser_t& p) { regex_keys = p.get("filter"); }); + parser + .add_argument({ "-R", "--category-filter" }, + "Filter the output according to provided regex w.r.t. the " + "categories (egrep + " + "case-sensitive) [e.g. -r \"true\"]") + .min_count(1) + .dtype("list of strings") + .action([](parser_t& p) { + category_regex_keys = p.get("category-filter"); + }); parser.add_argument({ "-i", "--ignore-case" }, "Ignore case when filtering") .max_count(1) .dtype("bool") @@ -341,6 +467,15 @@ main(int argc, char** argv) parser.add_argument({ "--alphabetical" }, "Sort the output alphabetically") .max_count(1) .action([](parser_t& p) { alphabetical = p.get("alphabetical"); }); + parser + .add_argument({ "--list-categories" }, + "List the available categories for --categories option") + .count(0) + .action([_category_options](parser_t&) { + std::cout << "Categories:\n"; + for(const auto& itr : _category_options) + std::cout << " " << itr << "\n"; + }); parser.add_argument({ "" }, ""); parser.add_argument({ "[COLUMN OPTIONS]" }, ""); @@ -349,6 +484,14 @@ main(int argc, char** argv) .action([](parser_t& p) { force_brief = p.get("brief"); }); parser.add_argument({ "-d", "--description" }, "Display the component description") .max_count(1); + parser + .add_argument({ "--categories" }, + "Display the category information (use --list-categories to see " + "the available categories)") + .dtype("string") + .action([&_category_options](parser_t& p) { + process_categories(p, _category_options); + }); parser.add_argument({ "-s", "--string" }, "Display all acceptable string identifiers") .max_count(1); parser @@ -405,6 +548,8 @@ main(int argc, char** argv) return EXIT_FAILURE; } + if(parser.exists("list-categories")) return EXIT_SUCCESS; + std::string _pos_regex{}; if(parser.get_positional_count() > 0) { @@ -417,7 +562,11 @@ main(int argc, char** argv) } } - if(!_pos_regex.empty()) regex_keys.emplace_back(_pos_regex); + if(!_pos_regex.empty()) + { + regex_keys.emplace_back(_pos_regex); + category_regex_keys.emplace_back(_pos_regex); + } auto _parser_set_if_exists = [&parser](auto& _var, const std::string& _opt) { using Tp = decay_t; @@ -428,11 +577,16 @@ main(int argc, char** argv) _parser_set_if_exists(options[DESC], "description"); _parser_set_if_exists(options[VAL], "value"); _parser_set_if_exists(options[CID], "string"); + _parser_set_if_exists(options[CATEGORY], "categories"); _parser_set_if_exists(file, "output"); _parser_set_if_exists(include_components, "components"); _parser_set_if_exists(include_settings, "settings"); _parser_set_if_exists(include_hw_counters, "hw-counters"); + if(options[CATEGORY] && force_brief) options[CATEGORY] = false; + + if(category_view.empty()) category_view = _category_options; + if(!include_components && !include_settings && !include_hw_counters) include_settings = true; @@ -462,12 +616,14 @@ main(int argc, char** argv) dump_log(); if(include_settings) - write_settings_info(*os, { options[VAL], options[LANG], options[DESC] }); + write_settings_info( + *os, { options[VAL], options[LANG], options[DESC], options[CATEGORY] }); dump_log(); if(include_hw_counters) - write_hw_counter_info(*os, { true, !force_brief, !options[DESC], options[DESC] }); + write_hw_counter_info(*os, { true, !force_brief && !available_only, + !options[DESC], options[DESC] }); dump_log(); @@ -522,34 +678,42 @@ write_component_info(std::ostream& os, const array_t& options, _info.erase(std::remove_if(_info.begin(), _info.end(), [](const auto& itr) { + // NOLINTNEXTLINE for(const auto& nitr : - { "cuda", "cupti", "ompt", "roofline", "_bundle", + { "cuda", "cupti", "nvtx", "roofline", "_bundle", "data_integer", "data_unsigned", "data_floating", "printer" }) { if(itr.name().find(nitr) != std::string::npos) return true; } - return false; + auto _categories = tim::delimit( + itr.categories(), ", ", [](const string_t& _v) { + return "component::" + _v; + }); + for(const auto& citr : _categories) + if(category_view.count(citr) > 0) return false; + return true; }), _info.end()); using width_type = std::vector; using width_bool = std::array; - width_type _widths = width_type{ 30, 12, 20, 20, 20, 40, 20, 40 }; - width_bool _wusing = width_bool{ true, !force_brief }; + auto _available_column = !force_brief && !available_only; + width_type _widths = width_type{ 30, 12, 20, 20, 20, 40, 20, 40, 10 }; + width_bool _wusing = width_bool{ true, _available_column }; + int64_t pad = padding; for(size_t i = 0; i < options.size(); ++i) _wusing[i + 2] = options[i]; - int64_t pad = padding; - { constexpr size_t idx = 0; stringstream_t ss; write_entry(ss, "COMPONENT", _widths.at(0), false, true); _widths.at(idx) = std::max(ss.str().length() + pad, _widths.at(idx)); } + { constexpr size_t idx = 1; stringstream_t ss; @@ -582,7 +746,7 @@ write_component_info(std::ostream& os, const array_t& options, std::stringstream ss; _selected += (is_selected(std::get<0>(itr))) ? 1 : 0; write_entry(ss, std::get<0>(itr), _widths.at(0), false, true); - if(!force_brief) + if(_available_column) { std::stringstream _avss{}; _avss << std::boolalpha << std::get<1>(itr); @@ -597,6 +761,9 @@ write_component_info(std::ostream& os, const array_t& options, write_entry(ss, std::get<2>(itr).at(i), _widths.at(i + 2), center, _mark.at(i)); } + + _selected += (is_category_selected(std::get<2>(itr).at(CATEGORY))) ? 1 : 0; + if(_selected == 0) continue; } @@ -632,7 +799,7 @@ write_component_info(std::ostream& os, const array_t& options, os << global_delim; write_entry(os, "COMPONENT", _widths.at(0), true, false); - if(!force_brief) write_entry(os, "AVAILABLE", _widths.at(1), true, false); + if(_available_column) write_entry(os, "AVAILABLE", _widths.at(1), true, false); for(size_t i = 0; i < fields.size(); ++i) { if(!options[i]) continue; @@ -647,7 +814,7 @@ write_component_info(std::ostream& os, const array_t& options, std::stringstream ss; _selected += (is_selected(std::get<0>(itr))) ? 1 : 0; write_entry(ss, std::get<0>(itr), _widths.at(0), false, true); - if(!force_brief) + if(_available_column) { std::stringstream _avss{}; _avss << std::boolalpha << std::get<1>(itr); @@ -663,6 +830,8 @@ write_component_info(std::ostream& os, const array_t& options, _mark.at(i)); } + _selected += (is_category_selected(std::get<2>(itr).at(CATEGORY))) ? 1 : 0; + if(_selected > 0) { os << global_delim; @@ -692,56 +861,83 @@ write_settings_info(std::ostream& os, const array_t& opts, { static_assert(N >= num_settings_options, "Error! Too few settings options + fields"); - static constexpr size_t size = 7; + static constexpr size_t size = 8; using archive_type = cereal::SettingsTextArchive; using array_type = typename archive_type::array_type; - using unique_set = typename archive_type::unique_set; using width_type = array_t; using width_bool = array_t; - width_type _widths = { 0, 0, 0, 0, 0, 0, 0 }; + width_type _widths = { 0, 0, 0, 0, 0, 0, 0, 0 }; width_bool _wusing = { - true, !force_brief, opts[0], opts[1], opts[1], opts[1], opts[2] + true, !force_brief, opts[0], opts[1], opts[1], opts[1], opts[2], opts[3], }; - width_bool _mark = { false, false, false, true, true, true, false }; + width_bool _mark = { false, false, false, true, true, true, false, false }; // this settings has delayed initialization. make sure it is generated (void) omnitrace::config::get_perfetto_output_filename(); array_type _setting_output; - unique_set _settings_exclude = { "OMNITRACE_ENVIRONMENT", "OMNITRACE_COMMAND_LINE", - "cereal_class_version", "settings" }; -#if !defined(TIMEMORY_USE_CRAYPAT) - _settings_exclude.emplace("OMNITRACE_CRAYPAT"); -#endif - - cereal::SettingsTextArchive settings_archive{ _setting_output, _settings_exclude }; + cereal::SettingsTextArchive settings_archive{ _setting_output, settings_exclude }; settings::serialize_settings(settings_archive); - // exclude some timemory settings which are not relevant to omnitrace - // exact matches, e.g. OMNITRACE_BANNER - std::string _settings_rexclude_exact = - "^OMNITRACE_(BANNER|DESTRUCTOR_REPORT|COMPONENTS|(GLOBAL|MPIP|NCCLP|OMPT|" - "PROFILER|TRACE)_COMPONENTS|PYTHON_EXE|PAPI_ATTACH|PLOT_OUTPUT|SEPARATOR_FREQ|" - "STACK_CLEARING|TARGET_PID|THROTTLE_(COUNT|VALUE)|(AUTO|FLAMEGRAPH)_OUTPUT|" - "(ENABLE|DISABLE)_ALL_SIGNALS|ALLOW_SIGNAL_HANDLER|CTEST_NOTES)$"; - // leading matches, e.g. OMNITRACE_MPI_[A-Z_]+ - std::string _settings_rexclude_begin = - "^OMNITRACE_(ERT|DART|MPI|UPCXX|ROOFLINE|CUDA|NVTX|CUPTI)_[A-Z_]+$"; + _setting_output.erase( + std::remove_if(_setting_output.begin(), _setting_output.end(), + [](const auto& itr) { return itr.find("environ") == itr.end(); }), + _setting_output.end()); - // lambda for deciding which settings we want to restrict displaying - auto&& _remove_conditions = [&_settings_exclude, &_settings_rexclude_exact, - &_settings_rexclude_begin](const auto& itr) { - auto&& _v = itr.find("environ")->second; - bool _a = _settings_exclude.find(_v) != _settings_exclude.end(); - bool _b = std::regex_match(_v, std::regex(_settings_rexclude_exact)); - bool _c = std::regex_match(_v, std::regex(_settings_rexclude_begin)); - return (_a || _b || _c); - }; + // patch up the categories + str_set_t _not_in_category_view{}; + auto _settings = tim::settings::shared_instance(); + for(auto& itr : _setting_output) + { + auto _name = itr.find("environ")->second; + auto sitr = _settings->find(_name); + if(sitr != _settings->end()) + { + str_set_t _categories{}; + for(const auto& citr : sitr->second->get_categories()) + { + if(citr == "native") + _categories.emplace("settings::timemory"); + else + _categories.emplace(TIMEMORY_JOIN("::", "settings", citr)); + } + bool _found = false; + for(const auto& citr : _categories) + { + if(category_view.count(citr) > 0) _found = true; + } + if(!_found) + { + _not_in_category_view.emplace(_name); + continue; + } + std::stringstream _ss{}; + for(const auto& citr : sitr->second->get_categories()) + _ss << ", " << citr; + if(!_ss.str().empty()) + { + itr["categories"] = _ss.str().substr(2); + } + } + } + + // erase excluded settings and erase settings not in category view + _setting_output.erase( + std::remove_if(_setting_output.begin(), _setting_output.end(), + [&_not_in_category_view](const auto& itr) { + return (exclude_setting(itr.find("environ")->second) || + _not_in_category_view.count( + itr.find("environ")->second) > 0); + }), + _setting_output.end()); _setting_output.erase(std::remove_if(_setting_output.begin(), _setting_output.end(), - _remove_conditions), + [](const auto& itr) { + return !is_category_selected( + itr.find("categories")->second); + }), _setting_output.end()); if(alphabetical) @@ -753,14 +949,16 @@ write_settings_info(std::ostream& os, const array_t& opts, } array_t _labels = { - "ENVIRONMENT VARIABLE", "VALUE", "DATA TYPE", "C++ STATIC ACCESSOR", - "C++ MEMBER ACCESSOR", "Python ACCESSOR", "DESCRIPTION" + "ENVIRONMENT VARIABLE", "VALUE", "DATA TYPE", "C++ STATIC ACCESSOR", + "C++ MEMBER ACCESSOR", "Python ACCESSOR", "DESCRIPTION", "CATEGORIES", }; array_t _keys = { "environ", "value", "data_type", "static_accessor", "member_accessor", "python_accessor", - "description" }; - array_t _center = { false, true, true, false, false, false, false }; + "description", "categories" }; + array_t _center = { + false, true, true, false, false, false, false, false + }; for(size_t i = 0; i < _widths.size(); ++i) { @@ -1039,11 +1237,42 @@ using component_value_type_t = //--------------------------------------------------------------------------------------// +template +auto get_categories(type_list) +{ + auto _cleanup = [](std::string _type, const std::string& _pattern) { + auto _pos = std::string::npos; + while((_pos = _type.find(_pattern)) != std::string::npos) + _type.erase(_pos, _pattern.length()); + return _type; + }; + (void) _cleanup; // unused but set if sizeof...(Tp) == 0 + + auto _vec = str_vec_t{ _cleanup(demangle(), "tim::")... }; + std::sort(_vec.begin(), _vec.end(), [](const auto& lhs, const auto& rhs) { + // prioritize project category + auto lpos = lhs.find("project::"); + auto rpos = rhs.find("project::"); + return (lpos == rpos) ? (lhs < rhs) : (lpos < rpos); + }); + std::stringstream _ss{}; + for(auto&& itr : _vec) + { + _ss << ", " << itr; + } + std::string _v = _ss.str(); + if(!_v.empty()) return _v.substr(2); + return _v; +} + +//--------------------------------------------------------------------------------------// + template info_type get_availability::get_info() { - using value_type = component_value_type_t; + using value_type = component_value_type_t; + using category_types = typename trait::component_apis::type; auto _cleanup = [](std::string _type, const std::string& _pattern) { auto _pos = std::string::npos; @@ -1080,22 +1309,26 @@ get_availability::get_info() id_type = ""; ids_set.clear(); } - auto itr = ids_set.begin(); - string_t db = (markdown) ? "`\"" : "\""; - string_t de = (markdown) ? "\"`" : "\""; - if(has_metadata) description += ". " + metadata_t::extra_description(); - description += "."; - while(itr->empty()) - ++itr; string_t ids_str = {}; - if(itr != ids_set.end()) - ids_str = TIMEMORY_JOIN("", TIMEMORY_JOIN("", db, *itr++, de)); - for(; itr != ids_set.end(); ++itr) { - if(!itr->empty()) - ids_str = TIMEMORY_JOIN(" ", ids_str, TIMEMORY_JOIN("", db, *itr, de)); + auto itr = ids_set.begin(); + string_t db = (markdown) ? "`\"" : "\""; + string_t de = (markdown) ? "\"`" : "\""; + if(has_metadata) description += ". " + metadata_t::extra_description(); + description += "."; + while(itr->empty()) + ++itr; + if(itr != ids_set.end()) + ids_str = TIMEMORY_JOIN("", TIMEMORY_JOIN("", db, *itr++, de)); + for(; itr != ids_set.end(); ++itr) + { + if(!itr->empty()) + ids_str = TIMEMORY_JOIN(" ", ids_str, TIMEMORY_JOIN("", db, *itr, de)); + } } + string_t categories = get_categories(category_types{}); + #if 0 auto _remove_typelist = [](std::string _tmp) { if(_tmp.empty()) return _tmp; @@ -1120,7 +1353,7 @@ get_availability::get_info() data_type = _replace(_cleanup(data_type, "::__1"), "> >", ">>"); return info_type{ name, is_available, str_vec_t{ data_type, enum_type, id_type, ids_str, label, - description } }; + description, categories } }; } //--------------------------------------------------------------------------------------// @@ -1393,6 +1626,65 @@ regex_replace(const std::string& _line) #endif return _line; } + +const std::string& +get_category_regex_pattern() +{ + static std::string _pattern = []() { + std::string _v{}; + for(const auto& itr : category_regex_keys) + { + lerr << "Adding regex key: '" << itr << "'...\n"; + _v += "|" + itr; + } + return (_v.empty()) ? _v : _v.substr(1); + }(); + return _pattern; +} + +auto +get_category_regex() +{ + static auto _rc = std::regex(get_category_regex_pattern(), get_regex_constants()); + return _rc; +} + +bool +category_regex_match(const std::string& _line) +{ + if(get_regex_pattern().empty()) return true; + + static size_t lerr_width = 0; + lerr_width = std::max(lerr_width, _line.length()); + std::stringstream _line_ss; + _line_ss << "'" << _line << "'"; + + if(std::regex_match(_line, get_category_regex())) + { + lerr << std::left << std::setw(lerr_width) << _line_ss.str() + << " matched pattern '" << get_category_regex_pattern() << "'...\n"; + return true; + } + if(std::regex_search(_line, get_category_regex())) + { + lerr << std::left << std::setw(lerr_width) << _line_ss.str() << " found pattern '" + << get_category_regex_pattern() << "'...\n"; + return true; + } + + lerr << std::left << std::setw(lerr_width) << _line_ss.str() << " missing pattern '" + << get_category_regex_pattern() << "'...\n"; + return false; +} + +bool +exclude_setting(const std::string& _v) +{ + bool _a = settings_exclude.find(_v) != settings_exclude.end(); + bool _b = std::regex_match(_v, std::regex{ settings_rexclude_exact }); + bool _c = std::regex_match(_v, std::regex{ settings_rexclude_begin }); + return (_a || _b || _c); +} } // namespace //--------------------------------------------------------------------------------------// @@ -1405,6 +1697,14 @@ is_selected(const std::string& _line) //--------------------------------------------------------------------------------------// +bool +is_category_selected(const std::string& _line) +{ + return category_regex_match(_line); +} + +//--------------------------------------------------------------------------------------// + std::string hl_selected(const std::string& _line) { @@ -1412,3 +1712,38 @@ hl_selected(const std::string& _line) } //--------------------------------------------------------------------------------------// + +void +process_categories(parser_t& p, const str_set_t& _category_options) +{ + category_view = p.get("categories"); + std::vector> _shorthand_patches{}; + for(const auto& itr : category_view) + { + auto _is_shorthand = [&_shorthand_patches, &_category_options, + itr](const std::string& _prefix) { + auto _opt = TIMEMORY_JOIN("::", _prefix, itr); + if(_category_options.count(_opt) > 0) + { + _shorthand_patches.emplace_back([itr, _opt]() { + category_view.erase(itr); + category_view.emplace(_opt); + }); + return true; + } + return false; + }; + + if(_category_options.count(itr) == 0) + { + if(!_is_shorthand("component") && !_is_shorthand("settings")) + throw std::runtime_error( + itr + " is not a valid category. Use --list-categories to view " + "valid categories"); + } + } + for(auto&& itr : _shorthand_patches) + itr(); +} + +//--------------------------------------------------------------------------------------// diff --git a/projects/rocprofiler-systems/source/bin/omnitrace/details.cpp b/projects/rocprofiler-systems/source/bin/omnitrace/details.cpp index 1548371f4f..e6db65df73 100644 --- a/projects/rocprofiler-systems/source/bin/omnitrace/details.cpp +++ b/projects/rocprofiler-systems/source/bin/omnitrace/details.cpp @@ -22,14 +22,65 @@ #include "omnitrace.hpp" -static int expect_error = NO_ERROR; -static int error_print = 0; -static auto regex_opts = std::regex_constants::egrep | std::regex_constants::optimize; +static int expect_error = NO_ERROR; +static int error_print = 0; // set of whole function names to exclude strset_t get_whole_function_names() { +#if 1 + return strset_t{ "sem_init", + "sem_destroy", + "sem_open", + "sem_close", + "sem_post", + "sem_wait", + "sem_getvalue", + "sem_clockwait", + "sem_timedwait", + "sem_trywait", + "do_futex_wait", + "sem_unlink", + "fork", + "dl_iterate_phdr", + "dlinfo", + "dlopen", + "dlmopen", + "dlvsym", + "dlsym", + "getenv", + "setenv", + "unsetenv", + "fflush", + "malloc", + "malloc_stats", + "malloc_trim", + "mallopt", + "calloc", + "free", + "pvalloc", + "valloc", + "mmap", + "munmap", + "fmemopen", + "fmemclose", + "backtrace", + "backtrace_symbols", + "backtrace_symbols_fd", + "sigaddset", + "sigandset", + "sigdelset", + "sigemptyset", + "sigfillset", + "sighold", + "sigisemptyset", + "sigismember", + "sigorset", + "sigrelse", + "sigvec" }; +#else + // should hopefully be removed soon return strset_t{ "a64l", "advance", "aio_return", @@ -487,6 +538,7 @@ get_whole_function_names() "xencrypt", "xprt_register", "xprt_unregister" }; +#endif } //======================================================================================// @@ -780,137 +832,3 @@ error_func_fake(error_level_t level, int num, const char* const* params) consume_parameters(level, num, params); // It does nothing. } - -//======================================================================================// -// -bool -c_stdlib_module_constraint(const std::string& _file) -{ - static std::regex _pattern( - "^(a64l|accept4|alphasort|argp-help|argp-parse|asprintf|atof|atoi|atol|atoll|" - "auth_des|auth_none|auth_unix|backtrace|backtracesyms|backtracesymsfd|c16rtomb|" - "cacheinfo|canonicalize|carg|cargf|cargf128|cargl|" - "catgets|cfmakeraw|cfsetspeed|check_pf|chflags|" - "clearerr|clearerr_u|clnt_perr|clnt_raw|clnt_tcp|clnt_udp|clnt_unix" - "settime|copy_file_range|" - "creat64|ctermid|ctime|ctime_r|ctype|ctype-c99|ctype-c99_l|ctype-extn|ctype_l|" - "cuserid|daemon|dcigettext|difftime|dirname|div|dl-error|dl-libc|dl-sym|dlerror|" - "duplocale|dysize|endutxent|envz|epoll_wait|" - "ether_aton|ether_aton_r|ether_hton|ether_line|ether_ntoa|ether_ntoh|eventfd_" - "read|eventfd_write|execlp|execv|execvp|explicit_bzero|faccessat|fallocate64|" - "fattach|fchflags|fchmodat|fdatasync|fdetach|fdopendir|fedisblxcpt|feenablxcpt|" - "fegetexcept|fegetmode|feholdexcpt|feof_u|ferror_u|fesetenv|fesetexcept|" - "fesetmode|fesetround|fetestexceptflag|fexecve|ffsll|fgetexcptflg|fgetgrent|" - "fgetpwent|fgetsgent|fgetspent|fileno|fmemopen|fmtmsg|fnmatch|fprintf|fputc|" - "fputc_u|fputwc|fputwc_u|freopen|freopen64|fscanf|fseeko|fsetexcptflg|fstab|" - "fsync|ftello|ftime|ftok|fts|ftw|futimens|futimesat|fwide|fxprintf|gconv_conf|" - "gconv_db|gconv_dl|genops|getaddrinfo|getaliasent|getaliasent_r|getaliasname|" - "getauxval|getc|getchar|getchar_u|getdate|getdirentries|getdirname|getentropy|" - "getenv|getgrent|getgrent_r|getgrgid|getgrnam|gethostid|gethstbyad|gethstbynm|" - "gethstbynm2|gethstent|gethstent_r|getipv4sourcefilter|getloadavg|getlogin|" - "getlogin_r|getmsg|getnameinfo|getnetbyad|getnetbynm|getnetent|getnetent_r|" - "getnetgrent|getnetgrent_r|getopt|getopt1|getpass|getproto|getprtent|getprtent_r|" - "getprtname|getpwent|getpwent_r|getpwnam|getpwnam_r|getpwuid|getrandom|" - "getrpcbyname|getrpcbynumber|getrpcent|getrpcent_r|getrpcport|getservent|" - "getservent_r|getsgent|getsgent_r|getsgnam|getsourcefilter|getspent|getspent_r|" - "getspnam|getsrvbynm|getsrvbynm_r|getsrvbypt|getsubopt|getsysstats|getttyent|" - "getusershell|getutent_r|getutline|getutmp|getutxent|getutxid|getutxline|getw|" - "getwchar|getwchar_u|getwd|glob|gmon|gmtime|grantpt|group_member|gtty|herror|" - "hsearch|hsearch_r|htons|iconv|iconv_close|iconv_open|idn-stub|if_index|ifaddrs|" - "inet6_|inet_|inet_|initgroups|insremque|iofgets|iofgetws|iofgetws_u|iofputws|" - "iofwide|iopopen|ioungetwc|isastream|isctype|isfdtype|key_call|key_prot|killpg|" - "l64a|labs|lchmod|lckpwdf|lcong48|ldiv|llabs|lldiv|lockf|longjmp|lsearch|lutimes|" - "makedev|malloc|mblen|mbrtoc16|mbsinit|mbstowcs|mbtowc|mcheck|memccpy|" - "memchr|memcmp|memfrob|memmem|memset|memstream|mkdtemp|mkfifo|mkfifoat|mkostemp|" - "mkostemps|mkstemp|mkstemps|mktemp|mlock2|mntent|mntent_r|mpa|" - "msgctl|msgget|msgsnd|msort|msync|mtrace|netname|nice|nl_langinfo|nsap_addr|nscd_" - "getgr_r|nscd_gethst_r|nscd_getpw_r|nscd_getserv_r|nscd_helper|" - "nsswitch|ntp_gettime|ntp_gettimex|obprintf|obstack|oldfmemopen|open_by_handle_" - "at|opendir|pathconf|pclose|perror|pkey_mprotect|pm_getmaps|pmap_prot|pmap_rmt|" - "posix_fallocate|posix_fallocate64|preadv64|preadv64v2|printf-prs|printf_fp|" - "printf_size|profil|psiginfo|psignal|ptrace|ptsname|putc_u|putchar|putchar_u|" - "putenv|putgrent|putmsg|putpwent|putsgent|putspent|pututxline|putw|putwc_u|" - "putwchar|putwchar_u|pwritev64|pwritev64v2|raise|rcmd|readv|" - "reboot|recvfrom|recvmmsg|regex|regexp|remove|rename|renameat|res-close|res_" - "hconf|res_init|resolv_conf|rexec|rpc_thread|rpmatch|ruserpass|scandir|sched_" - "cpucount|sched_getaffinity|sched_getcpu|seed48|seekdir|semget|semop|semtimedop|" - "sendmsg|setbuf|setegid|seteuid|sethostid|setipv4sourcefilter|setlinebuf|" - "setlogin|setpgrp|setresuid|setrlimit64|setsourcefilter|setutxent|sgetsgent|" - "sgetspent|shmat|shmdt|shmget|sigandset|sigdelset|siggetmask|sighold|sigignore|" - "sigintr|sigisempty|signalfd|sigorset|sigpause|sigpending|sigrelse|sigset|" - "sigstack|sockatmark|speed|splice|sprofil|sscanf|sstk|stime|strcasecmp|" - "strcasestr|strcat|strchr|strcmp|strcpy|strcspn|strerror|strerror_l|strfmon|" - "strfromd|strfromf|strfromf128|strfroml|strfry|strlen|strncase|strncat|strncmp|" - "strncpy|strpbrk|strrchr|strsignal|strspn|strstr|strtod_l|strtof|strtof128_l|" - "strtof_l|strtoimax|strtok|strtol_l|strtold_l|strtoul|strtoumax|strxfrm|stty|svc|" - "svc_raw|svc_simple|svc_tcp|svc_udp|svc_unix|swab|sync_file_range|syslog|system|" - "tcflow|tcflush|tcgetattr|tcgetsid|tcsendbrk|tcsetpgrp|tee|telldir|tempnam|" - "tmpnam|tmpnam_r|tsearch|ttyname|ttyname_r|ttyslot|tzset|ualarm|ulimit|umount|" - "unlockpt|updwtmpx|ustat|utimensat|utmp_file|utmpxname|version|" - "versionsort|vfprintf|vfscanf|vfwscanf|vlimit|vmsplice|vprintf|vtimes|wait[0-9]|" - "wcfuncs|wcfuncs_l|wcscpy|wcscspn|wcsdup|wcsncat|wcsncmp|wcsnrtombs|wcspbrk|" - "wcsrchr|wcsstr|wcstod_l|wcstof|wcstoimax|wcstok|wcstold_l|wcstombs|wcstoumax|" - "wcswidth|wcsxfrm|wctob|wctype_l|wcwidth|wfileops|wgenops|wmemcmp|wmemstream|" - "wordexp|wstrops|x2y2m1l|xcrypt|xdr|xdr_float|xdr_intXX_t|xdr_mem|xdr_rec|xdr_" - "ref|xdr_sizeof|xdr_stdio|mq_notify|aio_|timer_routines|nptl-|shm-|sem_close|" - "setuid|pt-raise|x2y2)", - regex_opts); - - return std::regex_search(_file, _pattern); -} - -//======================================================================================// -// -bool -c_stdlib_function_constraint(const std::string& _func) -{ - static std::regex _pattern( - "^(malloc|calloc|free|buffer|fscan|fstab|internal|gnu|fprint|isalnum|isalpha|" - "isascii|isastream|isblank|isblank_l|iscntrl|isctype|isdigit|isdigit_l|isfdtype|" - "isgraph|islower|islower_l|isprint|isprint_l|ispunct|isspace|isupper|isupper_l|" - "iswprint|isxdigit|asprintf|atof|atoi|atol|atoll|memalign|memccpy|memcpy|memchr|" - "memcmp|memfrob|memset|mkdtemp|mkfifo|mkfifoat|mkostemp64|mkostemps64|mkstemp|" - "mkstemps64|mktemp|mlock2|monstartup|mprobe|mremap_chunk|get_current_dir_name|" - "get_free_list|getaliasbyname|getaliasent|getauxval|getchar|getchar_unlocked|" - "getdate|getdirentries|getentropy|getenv|getfs|getgrent|getgrgid|" - "getgrnam|getgrouplist|gethostbyaddr|gethostbyname|gethostbyname2|gethostent|" - "gethostid|getifaddrs|getifaddrs_internal|getipv4sourcefilter|getkeyserv_handle|" - "getloadavg|getlogin|getlogin_fd0|getlogin_r_fd0|getmntent|getmsg|getnetbyaddr|" - "getnetbyname|getnetent|getnetgrent|getopt|getopt_long|getopt_long_only|getpass|" - "getprotobyname|getprotobynumber|getprotoent|getpwent|getpwnam|getpwnam_r|" - "getpwuid|getrandom|getrpcbyname|getrpcbynumber|getrpcent|getrpcport|" - "getservbyname|getservbyname_r|getservbyport|getservent|getsgent|getsgnam|" - "getsourcefilter|getspent|getspnam|getsubopt|getttyname|getttyname_r|" - "getusershell|getutent_r_file|getutent_r_unknown|getutid_r_file|getutid_r_" - "unknown|getutline|getutline_r_file|getutline_r_unknown|getutmp|getutxent|" - "getutxid|getutxline|getw|psiginfo|psignal|ptmalloc_init|ptrace|ptsname|putc_" - "unlocked|putchar|putchar_unlocked|putenv|putgrent|putmsg|putpwent|putsgent|" - "putspent|pututline_file|pututxline|putw|pw_map_free|pwritev|pwritev2|" - "qsort|raise|rcmd|re_acquire_state|re_acquire_state_context|re_" - "comp|re_compile_internal|re_dfa_add_node|re_exec|re_node_set_init_union|re_node_" - "set_insert|re_node_set_merge|re_search_internal|re_search_stub|re_string_" - "context_at|re_string_reconstruct|readtcp|readunix|readv|realloc|realpath|str_to_" - "mpn|strcasecmp|strcat|strcmp|strcpy|strcspn|strerror|strerror_l|strerror_thread_" - "freeres|strfmon|strfromd|strfromf|strfromf128|strfroml|strfry|strlen|" - "strncasecmp|strncat|strncmp|strncpy|strpbrk|strrchr|strsignal|strspn|strtof32|" - "strtoimax|strtok|strtol_l|strtold_l|strtoull|strtoumax|strxfrm|xdrstdio|xdrmem|" - "inet_|inet6_|clock_|backtrace_|dummy_|fts_|fts64_|fexecv|execv|stime|ftime|" - "gmtime|wcs|envz_|fmem|fputc|fgetc|fputwc|fgetwc|vprintf|feget|fetest|feenable|" - "feset|fedisable|nscd_|fork|execl|tzset|ntp_|mtrace|tr_[a-z]+hook|mcheck_[a-z_]+" - "ftell|fputs|fgets|siglongjmp|sigdelset|killpg|tolower|toupper|daemon|" - "iconv_[a-z_]+|catopen|catgets|catclose|check_add_mapping$|sem_open|sem_close|" - "sem_unlink|do_futex_wait|sem_timedwait|unwind_stop|unwind_cleanup|longjmp_" - "compat|vfork_|elision_init|cr_|cri_|aio_|mq_|sem_init|waitpid$|sigcancel_" - "handler|sighandler_setxid|start_thread$|clock$|semctl$|shm_open$|shm_unlink$|" - "printf|dprintf|walker$|clear_once_control$|libcr_|sem_wait$|sem_trywait$|vfork|" - "pause$|wait$|waitid$|msgrcv$|sigwait$|sigsuspend$|recvmsg$|sendmsg$|" - "ftrylockfile$|funlockfile$|tee$|setbuf$|setbuffer$|enlarge_userbuf$|convert_and_" - "print$|feraise|lio_|atomic_|err$|errx$|print_errno_message$|error_tail$|" - "clntunix_|sem_destroy|setxid_mark_thread|feupdate|send$|connect$|longjmp|pwrite|" - "accept$|stpncpy$|writeunix$|xflowf$|mbrlen$)", - regex_opts); - - return std::regex_search(_func, _pattern); -} -//======================================================================================// -// diff --git a/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.cpp b/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.cpp index 7591a2cc8d..7c0e59c694 100644 --- a/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.cpp +++ b/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.cpp @@ -22,10 +22,12 @@ #include "omnitrace.hpp" +#include #include #include #include #include +#include #include #include #include @@ -42,7 +44,6 @@ bool binary_rewrite = false; bool is_attached = false; bool loop_level_instr = false; bool werror = false; -bool stl_func_instr = false; bool use_mpi = false; bool is_static_exe = false; bool is_driver = false; @@ -50,6 +51,7 @@ bool allow_overlapping = false; bool instr_dynamic_callsites = false; bool instr_traps = false; bool instr_loop_traps = false; +bool explicit_dump_and_exit = false; size_t batch_size = 50; strset_t extra_libs = {}; size_t min_address_range = (1 << 8); // 256 @@ -67,18 +69,30 @@ std::map end_expr = {}; const auto npos_v = string_t::npos; string_t instr_mode = "trace"; string_t print_instrumented = {}; +string_t print_excluded = {}; string_t print_available = {}; string_t print_overlapping = {}; +strset_t print_formats = { "txt", "json" }; std::string modfunc_dump_dir = {}; auto regex_opts = std::regex_constants::egrep | std::regex_constants::optimize; + +std::string +get_absolute_exe_filepath(std::string exe_name, const std::string& env_path = "PATH"); + +std::string +get_absolute_lib_filepath(std::string lib_name, + const std::string& env_path = "LD_LIBRARY_PATH"); + +bool +file_exists(const std::string& name); + +std::string +get_realpath(const std::string&); + +std::string +get_cwd(); } // namespace -std::string -get_absolute_exe_filepath(std::string exe_name); - -std::string -get_absolute_lib_filepath(std::string lib_name); - //======================================================================================// // // entry point @@ -97,22 +111,22 @@ main(int argc, char** argv) _dyn_api_rt_paths.insert(_dyn_api_rt_paths.begin(), _dyn_api_rt_abs); for(auto&& itr : _dyn_api_rt_paths) { - auto file_exists = [](const std::string& _fname) { + auto _file_exists = [](const std::string& _fname) { struct stat _buffer; if(stat(_fname.c_str(), &_buffer) == 0) return (S_ISREG(_buffer.st_mode) != 0 || S_ISLNK(_buffer.st_mode) != 0); return false; }; - if(file_exists(itr)) + if(_file_exists(itr)) tim::set_env("DYNINSTAPI_RT_LIB", itr, 0); - else if(file_exists(TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.so"))) + else if(_file_exists(TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.so"))) tim::set_env("DYNINSTAPI_RT_LIB", TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.so"), 0); - else if(file_exists(TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.a"))) + else if(_file_exists(TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.a"))) tim::set_env("DYNINSTAPI_RT_LIB", TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.a"), 0); } - verbprintf(0, "[omnitrace][exe] DYNINST_API_RT: %s\n", + verbprintf(0, "DYNINST_API_RT: %s\n", tim::get_env("DYNINSTAPI_RT_LIB", "").c_str()); argv0 = argv[0]; @@ -121,12 +135,19 @@ main(int argc, char** argv) address_space_t* addr_space = nullptr; string_t mutname = {}; string_t outfile = {}; - std::vector inputlib = { "libomnitrace" }; + std::vector inputlib = { "libomnitrace-dl" }; std::vector libname = {}; std::vector sharedlibname = {}; std::vector staticlibname = {}; tim::process::id_t _pid = -1; + fixed_module_functions = { + { &available_module_functions, false }, + { &instrumented_module_functions, false }, + { &excluded_module_functions, false }, + { &overlapping_module_functions, false }, + }; + bpatch->setTypeChecking(true); bpatch->setSaveFPR(true); bpatch->setDelayedParsing(true); @@ -176,11 +197,12 @@ main(int argc, char** argv) } } - auto cmd_string = [](int _ac, char** _av) { + auto cmd_string = [](int _ac, char** _av) -> std::string { + if(_ac == 0) return std::string{}; stringstream_t ss; for(int i = 0; i < _ac; ++i) - ss << _av[i] << " "; - return ss.str(); + ss << " " << _av[i]; + return ss.str().substr(1); }; if(_cmdc > 0 && !mutname.empty()) @@ -208,12 +230,6 @@ main(int argc, char** argv) if(_cmdc > 0) cmdv0 = _cmdv[0]; - std::stringstream jump_description; - jump_description - << "Instrument with function pointers in OMNITRACE_JUMP_LIBRARY (default: " - << tim::get_env("OMNITRACE_JUMP_LIBRARY", "jump/libomnitrace.so") - << ")"; - // now can loop through the options. If the first character is '-', then we know // we have an option. Check to see if it is one of our options and process it. If // it is unrecognized, then set the errflag to report an error. When we come to a @@ -245,13 +261,40 @@ main(int argc, char** argv) .max_count(1) .action([](parser_t& p) { werror = p.get("error"); }); parser - .add_argument( - { "--print-dir" }, - "Output directory for diagnostic available/instrumented/overlapping module " - "function lists, e.g. {print-dir}/available.txt") + .add_argument({ "--simulate" }, + "Exit after outputting diagnostic " + "{available,instrumented,excluded,overlapping} module " + "function lists, e.g. available-instr.txt") + .max_count(1) + .dtype("bool") + .action([](parser_t& p) { explicit_dump_and_exit = p.get("simulate"); }); + parser + .add_argument({ "--print-format" }, + "Output format for diagnostic " + "{available,instrumented,excluded,overlapping} module " + "function lists, e.g. {print-dir}/available-instr.txt") + .min_count(1) + .max_count(3) + .dtype("string") + .choices({ "xml", "json", "txt" }) + .action([](parser_t& p) { print_formats = p.get("print-format"); }); + parser + .add_argument({ "--print-dir" }, + "Output directory for diagnostic " + "{available,instrumented,excluded,overlapping} module " + "function lists, e.g. {print-dir}/available-instr.txt") .count(1) .dtype("string") .action([](parser_t& p) { modfunc_dump_dir = p.get("print-dir"); }); + parser + .add_argument( + { "--print-available" }, + "Print the available entities for instrumentation (functions, modules, or " + "module-function pair) to stdout applying regular expressions and exit") + .count(1) + .choices({ "functions", "modules", "functions+", "pair", "pair+" }) + .action( + [](parser_t& p) { print_available = p.get("print-available"); }); parser .add_argument( { "--print-instrumented" }, @@ -263,14 +306,15 @@ main(int argc, char** argv) print_instrumented = p.get("print-instrumented"); }); parser - .add_argument( - { "--print-available" }, - "Print the available entities for instrumentation (functions, modules, or " - "module-function pair) to stdout applying regular expressions and exit") + .add_argument({ "--print-excluded" }, + "Print the entities for instrumentation (functions, modules, or " + "module-function " + "pair) which are excluded from the instrumentation to stdout after " + "applying regular expressions and exit") .count(1) .choices({ "functions", "modules", "functions+", "pair", "pair+" }) .action( - [](parser_t& p) { print_available = p.get("print-available"); }); + [](parser_t& p) { print_excluded = p.get("print-excluded"); }); parser .add_argument( { "--print-overlapping" }, @@ -288,8 +332,14 @@ main(int argc, char** argv) parser.add_argument({ "" }, ""); parser .add_argument({ "-o", "--output" }, - "Enable generation of a new executable (binary-rewrite)") - .count(1) + "Enable generation of a new executable (binary-rewrite). If a " + "filename is not provided, omnitrace will use the basename and " + "output to the cwd, unless the target binary is in the cwd. In the " + "latter case, omnitrace will either use ${PWD}/.inst " + "(non-libraries) or ${PWD}/instrumented/ (libraries)") + .min_count(0) + .max_count(1) + .dtype("string") .action([&outfile](parser_t& p) { binary_rewrite = true; outfile = p.get("output"); @@ -345,16 +395,7 @@ main(int argc, char** argv) "The primary function to instrument around, e.g. 'main'") .count(1) .action([](parser_t& p) { main_fname = p.get("main-function"); }); - parser.add_argument({ "-j", "--jump" }, jump_description.str()) - .dtype("boolean") - .max_count(1) - .action([&inputlib](parser_t& p) { - if(p.get("jump")) - { - for(auto& itr : inputlib) - itr += "-jump"; - } - }); + /* parser .add_argument({ "-s", "--stubs" }, "Instrument with library stubs for LD_PRELOAD") .dtype("boolean") @@ -366,6 +407,7 @@ main(int argc, char** argv) itr += "-stubs"; } }); + */ parser.add_argument({ "--driver" }, "Force main or _init/_fini instrumentation") .dtype("boolean") .max_count(1) @@ -380,6 +422,31 @@ main(int argc, char** argv) for(const auto& itr : _load) extra_libs.insert(itr); }); + parser + .add_argument({ "--load-instr" }, + "Load {available,instrumented,excluded,overlapping}-instr JSON or " + "XML file(s) and override what is read from the binary") + .dtype("filepath") + .max_count(-1) + .action([](parser_t& p) { + auto _load = p.get("load-instr"); + std::map module_function_map = { + { "available_module_functions", &available_module_functions }, + { "instrumented_module_functions", &instrumented_module_functions }, + { "excluded_module_functions", &excluded_module_functions }, + { "overlapping_module_functions", &overlapping_module_functions }, + }; + for(const auto& itr : _load) + load_info(itr, module_function_map, 0); + for(const auto& itr : module_function_map) + { + auto _empty = itr.second->empty(); + if(!_empty) + verbprintf(0, "Loaded %zu module functions for %s\n", + itr.second->size(), itr.first.c_str()); + fixed_module_functions.at(itr.second) = !_empty; + } + }); parser .add_argument({ "--init-functions" }, "Initialization function(s) for supplemental instrumentation " @@ -396,18 +463,22 @@ main(int argc, char** argv) parser.add_argument({ "" }, ""); parser.add_argument({ "[SYMBOL SELECTION OPTIONS]" }, ""); parser.add_argument({ "" }, ""); - parser.add_argument({ "-I", "-R", "--function-include" }, - "Regex for selecting functions"); - parser.add_argument({ "-E", "--function-exclude" }, "Regex for excluding functions"); - parser.add_argument({ "-MI", "-MR", "--module-include" }, - "Regex for selecting modules/files/libraries"); + parser.add_argument({ "-I", "--function-include" }, + "Regex(es) for including functions (despite heuristics)"); + parser.add_argument({ "-E", "--function-exclude" }, + "Regex(es) for excluding functions (always applied)"); + parser.add_argument({ "-R", "--function-restrict" }, + "Regex(es) for restricting functions only to those " + "that match the provided regular-expressions"); + parser.add_argument({ "-MI", "--module-include" }, + "Regex(es) for selecting modules/files/libraries " + "(despite heuristics)"); parser.add_argument({ "-ME", "--module-exclude" }, - "Regex for excluding modules/files/libraries"); - parser - .add_argument({ "-S", "--stdlib" }, - "Enable instrumentation of C++ standard library functions.") - .max_count(1) - .action([](parser_t& p) { stl_func_instr = p.get("stdlib"); }); + "Regex(es) for excluding modules/files/libraries " + "(always applied)"); + parser.add_argument({ "-MR", "--module-restrict" }, + "Regex(es) for restricting modules/files/libraries only to those " + "that match the provided regular-expressions"); parser.add_argument({ "" }, ""); parser.add_argument({ "[RUNTIME OPTIONS]" }, ""); @@ -479,15 +550,6 @@ main(int argc, char** argv) parser.add_argument({ "" }, ""); parser.add_argument({ "[GRANULARITY OPTIONS]" }, ""); parser.add_argument({ "" }, ""); - parser - .add_argument({ "--dynamic-callsites" }, - "Force instrumentation if a function has dynamic callsites (e.g. " - "function pointers)") - .max_count(1) - .dtype("boolean") - .action([](parser_t& p) { - instr_dynamic_callsites = p.get("dynamic-callsites"); - }); parser.add_argument({ "-l", "--instrument-loops" }, "Instrument at the loop level") .dtype("boolean") .max_count(1) @@ -511,6 +573,15 @@ main(int argc, char** argv) .action([](parser_t& p) { min_loop_address_range = p.get("min-address-range-loop"); }); + parser + .add_argument({ "--dynamic-callsites" }, + "Force instrumentation if a function has dynamic callsites (e.g. " + "function pointers)") + .max_count(1) + .dtype("boolean") + .action([](parser_t& p) { + instr_dynamic_callsites = p.get("dynamic-callsites"); + }); parser .add_argument( { "--traps" }, @@ -548,8 +619,10 @@ main(int argc, char** argv) parser .add_argument( { "-b", "--batch-size" }, - "Dyninst supports batch insertion of multiple points. If one large batch " - "insertion fails, this value will be used to create smaller batches") + "Dyninst supports batch insertion of multiple points during runtime " + "instrumentation. If one large batch " + "insertion fails, this value will be used to create smaller batches. Larger " + "batches generally decrease the instrumentation time") .count(1) .dtype("int") .action([](parser_t& p) { batch_size = p.get("batch-size"); }); @@ -577,6 +650,34 @@ main(int argc, char** argv) return -1; } + if(binary_rewrite && outfile.empty()) + { + auto _is_local = (get_realpath(cmdv0) == + TIMEMORY_JOIN('/', get_cwd(), ::basename(cmdv0.c_str()))); + auto _cmd = std::string{ ::basename(cmdv0.c_str()) }; + if(_cmd.find('.') == std::string::npos) + { + // there is no extension, assume it is an exe + outfile = (_is_local) ? TIMEMORY_JOIN('.', _cmd, "inst") : _cmd; + } + else if(_cmd.find("lib") == 0 || _cmd.find(".so") != std::string::npos || + _cmd.find(".a") == _cmd.length() - 2) + { + // if it starts with lib, ends with .a, or contains .so (e.g. libfoo.so, + // libfoo.so.2), assume it is a library and retain the name but put it in a + // different directory + outfile = (_is_local) ? TIMEMORY_JOIN('/', "instrumented", _cmd) : _cmd; + } + else + { + outfile = (_is_local) ? TIMEMORY_JOIN('.', _cmd, "inst") : _cmd; + } + verbprintf(0, + "Binary rewrite was activated via '-o' but no filename was provided. " + "Using: '%s'\n", + outfile.c_str()); + } + if(modfunc_dump_dir.empty()) { modfunc_dump_dir = tim::get_env("OMNITRACE_OUTPUT_PATH", ""); @@ -598,42 +699,41 @@ main(int argc, char** argv) // //----------------------------------------------------------------------------------// // - // Helper function for adding regex expressions - // - auto add_regex = [](auto& regex_array, const string_t& regex_expr) { - if(!regex_expr.empty()) - regex_array.emplace_back(std::regex(regex_expr, regex_opts)); - }; - - add_regex(func_include, tim::get_env("OMNITRACE_REGEX_INCLUDE", "")); - add_regex(func_exclude, tim::get_env("OMNITRACE_REGEX_EXCLUDE", "")); - - if(parser.exists("R")) { - auto keys = parser.get("R"); - for(const auto& itr : keys) - add_regex(func_include, itr); - } + // Helper function for adding regex expressions + auto add_regex = [](auto& regex_array, const string_t& regex_expr) { + if(!regex_expr.empty()) + regex_array.emplace_back(std::regex(regex_expr, regex_opts)); + }; - if(parser.exists("E")) - { - auto keys = parser.get("E"); - for(const auto& itr : keys) - add_regex(func_exclude, itr); - } + add_regex(func_include, tim::get_env("OMNITRACE_REGEX_INCLUDE", "")); + add_regex(func_exclude, tim::get_env("OMNITRACE_REGEX_EXCLUDE", "")); + add_regex(func_restrict, tim::get_env("OMNITRACE_REGEX_RESTRICT", "")); - if(parser.exists("MI")) - { - auto keys = parser.get("MI"); - for(const auto& itr : keys) - add_regex(file_include, itr); - } + add_regex(file_include, + tim::get_env("OMNITRACE_REGEX_MODULE_INCLUDE", "")); + add_regex(file_exclude, + tim::get_env("OMNITRACE_REGEX_MODULE_EXCLUDE", "")); + add_regex(file_restrict, + tim::get_env("OMNITRACE_REGEX_MODULE_RESTRICT", "")); - if(parser.exists("ME")) - { - auto keys = parser.get("ME"); - for(const auto& itr : keys) - add_regex(file_exclude, itr); + // Helper function for parsing the regex options + auto _parse_regex_option = [&parser, &add_regex](const string_t& _option, + regexvec_t& _regex_vec) { + if(parser.exists(_option)) + { + auto keys = parser.get(_option); + for(const auto& itr : keys) + add_regex(_regex_vec, itr); + } + }; + + _parse_regex_option("function-include", func_include); + _parse_regex_option("function-exclude", func_exclude); + _parse_regex_option("function-restrict", func_restrict); + _parse_regex_option("module-include", file_include); + _parse_regex_option("module-exclude", file_exclude); + _parse_regex_option("module-restrict", file_restrict); } //----------------------------------------------------------------------------------// @@ -756,17 +856,22 @@ main(int argc, char** argv) //----------------------------------------------------------------------------------// std::set module_names; + static auto _insert_module_function = [](fmodset_t& _module_funcs, auto _v) { + if(!fixed_module_functions.at(&_module_funcs)) _module_funcs.emplace(_v); + }; + auto _add_overlapping = [](module_t* mitr, procedure_t* pitr) { if(!pitr->isInstrumentable()) return; std::vector _overlapping{}; if(pitr->findOverlapping(_overlapping)) { - overlapping_module_functions.insert(module_function{ mitr, pitr }); + _insert_module_function(overlapping_module_functions, + module_function{ mitr, pitr }); for(auto* oitr : _overlapping) { if(!oitr->isInstrumentable()) continue; - overlapping_module_functions.insert( - module_function{ oitr->getModule(), oitr }); + _insert_module_function(overlapping_module_functions, + module_function{ oitr->getModule(), oitr }); } } }; @@ -784,7 +889,7 @@ main(int argc, char** argv) if(!pitr->isInstrumentable()) continue; auto _modfn = module_function{ itr, pitr }; module_names.insert(_modfn.module); - available_module_functions.insert(std::move(_modfn)); + _insert_module_function(available_module_functions, _modfn); _add_overlapping(itr, pitr); } } @@ -805,7 +910,7 @@ main(int argc, char** argv) { auto _modfn = module_function{ mod, itr }; module_names.insert(_modfn.module); - available_module_functions.insert(std::move(_modfn)); + _insert_module_function(available_module_functions, _modfn); _add_overlapping(mod, itr); } } @@ -848,11 +953,12 @@ main(int argc, char** argv) auto _output_prefix = tim::get_env("OMNITRACE_OUTPUT_PREFIX", ""); dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, - TIMEMORY_JOIN("", _output_prefix, "available-instr.txt")), - available_module_functions, 1, werror); + TIMEMORY_JOIN("", _output_prefix, "available-instr")), + available_module_functions, 1, werror, "available-instr", print_formats); dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, - TIMEMORY_JOIN("", _output_prefix, "overlapping-instr.txt")), - overlapping_module_functions, 1, werror); + TIMEMORY_JOIN("", _output_prefix, "overlapping-instr")), + overlapping_module_functions, 1, werror, "overlapping_module_functions", + print_formats); //----------------------------------------------------------------------------------// // @@ -1231,9 +1337,18 @@ main(int argc, char** argv) std::string _libname = {}; for(auto&& itr : sharedlibname) - _libname = get_absolute_lib_filepath(itr); + { + if(_libname.empty()) _libname = get_absolute_lib_filepath(itr, "LD_LIBRARY_PATH"); + if(_libname.empty()) _libname = get_absolute_lib_filepath(itr, "LIBRARY_PATH"); + } + for(auto&& itr : staticlibname) + { + if(_libname.empty()) _libname = get_absolute_lib_filepath(itr, "LIBRARY_PATH"); + if(_libname.empty()) _libname = get_absolute_lib_filepath(itr, "LD_LIBRARY_PATH"); + } if(_libname.empty()) _libname = "libomnitrace.so"; + // prioritize the user environment arguments auto env_vars = parser.get("env"); env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MODE", instr_mode)); env_vars.emplace_back(TIMEMORY_JOIN('=', "HSA_ENABLE_INTERRUPT", "0")); @@ -1325,11 +1440,33 @@ main(int argc, char** argv) //----------------------------------------------------------------------------------// std::vector> instr_procedure_functions; auto instr_procedures = [&](const procedure_vec_t& procedures) { - verbprintf(3, "Instrumenting %lu procedures...\n", + // + auto _report = [](int _lvl, const string_t& _action, const string_t& _type, + const string_t& _reason, const string_t& _name) { + static std::map already_reported{}; + if(already_reported[_type].count(_name) == 0) + { + verbprintf(_lvl, "[%s][%s] %s :: '%s'...\n", _type.c_str(), + _action.c_str(), _reason.c_str(), _name.c_str()); + already_reported[_type].insert(_name); + } + }; + + verbprintf(2, "Instrumenting %lu procedures...\n", (unsigned long) procedures.size()); + + auto check_regex_restrictions = [](const std::string& _name, + const regexvec_t& _regexes) { + // NOLINTNEXTLINE + for(auto& itr : _regexes) + if(std::regex_search(_name, itr)) return true; + return false; + }; + for(auto* itr : procedures) { if(!itr) continue; + char modname[FUNCNAMELEN]; char fname[FUNCNAMELEN]; @@ -1340,122 +1477,209 @@ main(int argc, char** argv) else itr->getModuleName(modname, FUNCNAMELEN); - if(itr == main_func && main_func->isInstrumentable()) + if(!itr->isInstrumentable()) + { + _report(2, "Skipping", "function", "uninstrumentable", fname); + continue; + } + + if(itr == main_func) { hash_ids.emplace_back(std::hash()(main_sign.get()), main_sign.get()); - auto main_mf = module_function{ modname, fname, main_sign, itr }; - available_module_functions.insert(main_mf); - instrumented_module_functions.insert(main_mf); + _insert_module_function( + available_module_functions, + module_function{ modname, fname, main_sign, itr }); + _insert_module_function( + instrumented_module_functions, + module_function{ modname, fname, main_sign, itr }); continue; } - if(!itr->isInstrumentable()) - { - verbprintf(2, "Skipping uninstrumentable function: %s\n", fname); - continue; - } - - if(std::string{ modname }.find("libdyninst") != std::string::npos) continue; - - if(module_constraint(modname)) continue; - if(!instrument_module(modname)) continue; - auto name = get_func_file_line_info(mod, itr); + if(strlen(modname) == 0) + { + _report(3, "Skipping", "module", "empty name", modname); + continue; + } + if(name.get().empty()) { - verbprintf(2, "Skipping function [empty name]: %s\n", fname); + _report(3, "Skipping", "function", "empty name", fname); continue; } - if(routine_constraint(name.m_name.c_str())) continue; - if(!instrument_entity(name.m_name)) continue; + // apply module and function restrictions + auto _force_inc = false; - if(is_static_exe && has_debug_info && string_t{ fname } == "_fini" && - string_t{ modname } == "DEFAULT_MODULE") + //--------------------------------------------------------------------------// + // + // RESTRICT REGEXES + // + //--------------------------------------------------------------------------// + if(!file_restrict.empty()) { - verbprintf(2, "Skipping function [DEFAULT_MODULE]: %s\n", fname); - continue; + if(check_regex_restrictions(modname, file_restrict)) + { + _report(1, "Forcing", "module", "module-restrict-regex", modname); + _force_inc = true; + } + else + { + _report(2, "Skipping", "module", "module-restrict-regex", modname); + continue; + } } - _add_overlapping(mod, itr); - - if(!allow_overlapping && - overlapping_module_functions.find(module_function{ mod, itr }) != - overlapping_module_functions.end()) + if(!func_restrict.empty()) { - verbprintf(2, "Skipping function [overlapping]: %s / %s\n", - name.m_name.c_str(), name.get().c_str()); - continue; + if(check_regex_restrictions(name.m_name, func_restrict)) + { + _report(1, "Forcing", "function", "function-restrict-regex", + name.m_name); + _force_inc = true; + } + else if(check_regex_restrictions(name.get(), func_restrict)) + { + _report(1, "Forcing", "function", "function-restrict-regex", + name.get()); + _force_inc = true; + } + else + { + _report(2, "Skipping", "function", "function-restrict-regex", + name.get()); + continue; + } } - // directly try to get loop entry points - const std::vector* _loop_entries = - itr->findPoint(BPatch_locLoopEntry); + //--------------------------------------------------------------------------// + // + // INCLUDE REGEXES + // + //--------------------------------------------------------------------------// + if(!file_include.empty()) + { + if(check_regex_restrictions(modname, file_include)) + { + _report(1, "Forcing", "module", "module-include-regex", modname); + _force_inc = true; + } + } + + if(!func_include.empty()) + { + if(check_regex_restrictions(name.m_name, func_include)) + { + _report(1, "Forcing", "function", "function-include-regex", + name.m_name); + _force_inc = true; + } + else if(check_regex_restrictions(name.get(), func_include)) + { + _report(1, "Forcing", "function", "function-include-regex", + name.get()); + _force_inc = true; + } + } + + //--------------------------------------------------------------------------// + // + // EXCLUDE REGEXES + // + //--------------------------------------------------------------------------// + if(!file_exclude.empty()) + { + if(check_regex_restrictions(modname, file_exclude)) + { + _report(1, "Skipping", "module", "module-exclude-regex", modname); + continue; + } + } + + if(!func_exclude.empty()) + { + if(check_regex_restrictions(name.m_name, func_exclude)) + { + _report(1, "Skipping", "function", "function-exclude-regex", + name.m_name); + continue; + } + else if(check_regex_restrictions(name.get(), func_exclude)) + { + _report(1, "Skipping", "function", "function-exclude-regex", + name.get()); + continue; + } + } // try to get loops via the control flow graph flow_graph_t* cfg = itr->getCFG(); basic_loop_vec_t basic_loop{}; if(cfg) cfg->getOuterLoops(basic_loop); - // if the function has dynamic callsites and user specified instrumenting - // dynamic callsites, force the instrumentation - bool _force_instr = false; - if(cfg && instr_dynamic_callsites) - _force_instr = cfg->containsDynamicCallsites(); + if(!_force_inc) + { + if(module_constraint(modname)) continue; + if(!instrument_module(modname)) continue; - auto _address_range = module_function{ mod, itr }.address_range; - auto _num_loop_entries = - (_loop_entries) - ? std::max(_loop_entries->size(), basic_loop.size()) - : basic_loop.size(); - auto _has_loop_entries = (_num_loop_entries > 0); - auto _skip_range = - (_has_loop_entries) ? false : (_address_range < min_address_range); - auto _skip_loop_range = - (_has_loop_entries) ? (_address_range < min_loop_address_range) : false; + if(routine_constraint(name.m_name.c_str())) continue; + if(!instrument_entity(name.m_name)) continue; - if(_force_instr && _skip_range) - { - verbprintf( - 1, - "Instrumenting function [dynamic-callsite]: %s / %s despite not " - "satisfy minimum address range (address range = %lu, minimum " - "= %lu) because contains dynamic callsites\n", - name.m_name.c_str(), name.get().c_str(), - (unsigned long) _address_range, (unsigned long) min_address_range); - } - else if(_force_instr && _skip_loop_range) - { - verbprintf( - 1, - "Instrumenting function [dynamic-callsite]: %s / %s despite not " - "satisfy minimum loop address range (address range = %lu, minimum " - "= %lu) because contains dynamic callsites\n", - name.m_name.c_str(), name.get().c_str(), - (unsigned long) _address_range, - (unsigned long) min_loop_address_range); - } - else if(_skip_range) - { - verbprintf(1, - "Skipping function [min-address-range]: %s / %s (address " - "range = %lu, minimum = %lu)\n", - name.m_name.c_str(), name.get().c_str(), - (unsigned long) _address_range, - (unsigned long) min_address_range); - continue; - } - else if(_skip_loop_range) - { - verbprintf(1, - "Skipping function [min-loop-address-range]: %s / %s (address " - "range = %lu, minimum = %lu)\n", - name.m_name.c_str(), name.get().c_str(), - (unsigned long) _address_range, - (unsigned long) min_loop_address_range); - continue; + if(is_static_exe && has_debug_info && string_t{ fname } == "_fini" && + string_t{ modname } == "DEFAULT_MODULE") + { + _report(2, "Skipping", "function", "DEFAULT_MODULE", fname); + continue; + } + + _add_overlapping(mod, itr); + + if(!allow_overlapping && + overlapping_module_functions.find(module_function{ mod, itr }) != + overlapping_module_functions.end()) + { + _report(2, "Skipping", "function", "overlapping", fname); + continue; + } + + // directly try to get loop entry points + const std::vector* _loop_entries = + itr->findPoint(BPatch_locLoopEntry); + + // if the function has dynamic callsites and user specified instrumenting + // dynamic callsites, force the instrumentation + bool _force_instr = false; + if(cfg && instr_dynamic_callsites) + _force_instr = cfg->containsDynamicCallsites(); + + auto _address_range = module_function{ mod, itr }.address_range; + auto _num_loop_entries = + (_loop_entries) + ? std::max(_loop_entries->size(), basic_loop.size()) + : basic_loop.size(); + auto _has_loop_entries = (_num_loop_entries > 0); + auto _skip_range = + (_has_loop_entries) ? false : (_address_range < min_address_range); + auto _skip_loop_range = (_has_loop_entries) + ? (_address_range < min_loop_address_range) + : false; + + if(_force_instr && (_skip_range || _skip_loop_range)) + { + _report(1, "Forcing", "function", "dynamic-callsite", fname); + } + else if(_skip_range) + { + _report(1, "Skipping", "function", "min-address-range", fname); + continue; + } + else if(_skip_loop_range) + { + _report(1, "Skipping", "function", "min-address-range-loop", fname); + continue; + } } bool _entr_success = @@ -1464,30 +1688,39 @@ main(int argc, char** argv) query_instr(itr, BPatch_exit, nullptr, nullptr, instr_traps); if(!_entr_success && !_exit_success) { - verbprintf(2, - "Skipping function [insert-instr]: %s / %s. Either no entry " - "instrumentation points were found or instrumentation " - "required traps and instrumenting via traps were disabled.\n", - name.m_name.c_str(), name.get().c_str()); + _report(2, "Skipping", "function", + "Either no entry " + "instrumentation points were found or instrumentation " + "required traps and instrumenting via traps were disabled.", + fname); continue; } else if(_entr_success && !_exit_success) { - verbprintf(2, - "Skipping function [insert-instr]: %s / %s. Function can be " - "only partially instrumented: entry = %s, exit = %s\n", - name.m_name.c_str(), name.get().c_str(), - _entr_success ? "y" : "n", _exit_success ? "y" : "n"); + std::stringstream _ss{}; + _ss << "Function can be only partially instrument (entry = " + << std::boolalpha << _entr_success << ", exit = " << _exit_success + << ")"; + _report(2, "Skipping", "function", _ss.str(), fname); continue; } hash_ids.emplace_back(std::hash()(name.get()), name.get()); - available_module_functions.insert(module_function{ mod, itr }); - instrumented_module_functions.insert(module_function{ mod, itr }); + _insert_module_function(available_module_functions, + module_function{ mod, itr }); + _insert_module_function(instrumented_module_functions, + module_function{ mod, itr }); auto _f = [=]() { - verbprintf(1, "Instrumenting |> [ %s ] -> [ %s ]\n", modname, - name.m_name.c_str()); + static std::set _reported{}; + auto _hashv = + std::hash{}(TIMEMORY_JOIN('|', modname, name.m_name)); + if(!_reported.emplace(_hashv).second) + { + verbprintf(1, "Instrumenting |> [ %s ] -> [ %s ]\n", modname, + name.m_name.c_str()); + } + auto _name = name.get(); auto _hash = std::hash()(_name); auto _trace_entr = (entr_hash) ? omnitrace_call_expr(_hash) @@ -1507,7 +1740,7 @@ main(int argc, char** argv) if(loop_level_instr) { - verbprintf(1, "Instrumenting at the loop level: %s\n", + verbprintf(3, "Instrumenting at the loop level: %s\n", name.m_name.c_str()); for(auto* litr : basic_loop) @@ -1518,22 +1751,21 @@ main(int argc, char** argv) query_instr(itr, BPatch_exit, cfg, litr, instr_loop_traps); if(!_lentr_success && !_lexit_success) { - verbprintf( - 2, - "Skipping function [insert-instr-loop]: %s / %s. Either no " - "entry instrumentation points were found or instrumentation " - "required traps and instrumenting via traps were disabled.\n", - name.m_name.c_str(), name.get().c_str()); + _report( + 2, "Skipping", "function-loop", + "Either no entry instrumentation points were found or " + "instrumentation " + "required traps and instrumenting via traps were disabled.", + fname); continue; } else if(_lentr_success && !_lexit_success) { - verbprintf( - 2, - "Skipping function [insert-instr-loop]: %s / %s. Function " - "can be only partially instrumented: entry = %s, exit = %s\n", - name.m_name.c_str(), name.get().c_str(), - _lentr_success ? "y" : "n", _lexit_success ? "y" : "n"); + std::stringstream _ss{}; + _ss << "Function can be only partially instrument (entry = " + << std::boolalpha << _lentr_success + << ", exit = " << _lexit_success << ")"; + _report(2, "Skipping", "function-loop", _ss.str(), fname); continue; } @@ -1542,6 +1774,14 @@ main(int argc, char** argv) auto _lhash = std::hash()(_lname); hash_ids.emplace_back(_lhash, _lname); auto _lf = [=]() { + static std::set _reported{}; + auto _hashv = std::hash{}( + TIMEMORY_JOIN('|', modname, name.m_name)); + if(!_reported.emplace(_hashv).second) + { + verbprintf(1, "Loop Instrumenting |> [ %s ] -> [ %s ]\n", + modname, name.m_name.c_str()); + } auto _ltrace_entr = (entr_hash) ? omnitrace_call_expr(_lhash) : omnitrace_call_expr(_lname.c_str()); @@ -1613,7 +1853,7 @@ main(int argc, char** argv) // //----------------------------------------------------------------------------------// - if(app_thread) + if(app_thread && is_attached) { assert(app_thread != nullptr); verbprintf(1, "Executing initial snippets...\n"); @@ -1622,10 +1862,12 @@ main(int argc, char** argv) } else { - verbprintf(1, "Adding main entry snippets...\n"); if(main_entr_points) + { + verbprintf(1, "Adding main entry snippets...\n"); addr_space->insertSnippet(BPatch_sequence(init_names), *main_entr_points, BPatch_callBefore, BPatch_firstSnippet); + } } if(main_exit_points) @@ -1706,20 +1948,34 @@ main(int argc, char** argv) // //----------------------------------------------------------------------------------// + for(const auto& itr : available_module_functions) + { + _insert_module_function(excluded_module_functions, itr); + } + bool _dump_and_exit = ((print_available.length() + print_instrumented.length() + - print_overlapping.length()) > 0); + print_overlapping.length() + print_excluded.length()) > 0) || + explicit_dump_and_exit; dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, - TIMEMORY_JOIN("", _output_prefix, "available-instr.txt")), - available_module_functions, 0, werror); + TIMEMORY_JOIN("", _output_prefix, "available-instr")), + available_module_functions, 0, werror, "available_module_functions", + print_formats); dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, - TIMEMORY_JOIN("", _output_prefix, "instrumented-instr.txt")), - instrumented_module_functions, 0, werror); + TIMEMORY_JOIN("", _output_prefix, "instrumented-instr")), + instrumented_module_functions, 0, werror, "instrumented_module_functions", + print_formats); dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, - TIMEMORY_JOIN("", _output_prefix, "overlapping-instr.txt")), - overlapping_module_functions, 0, werror); + TIMEMORY_JOIN("", _output_prefix, "excluded-instr")), + excluded_module_functions, 0, werror, "excluded_module_functions", + print_formats); + dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, + TIMEMORY_JOIN("", _output_prefix, "overlapping-instr")), + overlapping_module_functions, 0, werror, "overlapping_module_functions", + print_formats); - auto _dump_info = [](const string_t& _mode, const fmodset_t& _modset) { + auto _dump_info = [](const std::string& _label, const string_t& _mode, + const fmodset_t& _modset) { std::map> _data{}; std::unordered_map> _dups{}; auto _insert = [&](const std::string& _m, const std::string& _v) { @@ -1732,17 +1988,19 @@ main(int argc, char** argv) if(_mode == "modules") { for(const auto& itr : _modset) - _insert(itr.module, itr.module); + _insert(itr.module, TIMEMORY_JOIN("", "[", itr.module, "]")); } else if(_mode == "functions") { for(const auto& itr : _modset) - _insert(itr.module, itr.function); + _insert(itr.module, TIMEMORY_JOIN("", "[", itr.function, "][", + itr.address_range, "]")); } else if(_mode == "functions+") { for(const auto& itr : _modset) - _insert(itr.module, itr.signature.get()); + _insert(itr.module, TIMEMORY_JOIN("", "[", itr.signature.get(), "][", + itr.address_range, "]")); } else if(_mode == "pair") { @@ -1750,8 +2008,8 @@ main(int argc, char** argv) { std::stringstream _ss{}; _ss << std::boolalpha; - _ss << "[" << itr.module << "] --> [ " << itr.address_range << " ][" - << itr.function << "]"; + _ss << "" << itr.module << "] --> [" << itr.function << "][" + << itr.address_range << "]"; _insert(itr.module, _ss.str()); } } @@ -1761,8 +2019,8 @@ main(int argc, char** argv) { std::stringstream _ss{}; _ss << std::boolalpha; - _ss << "[" << itr.module << "] --> [ " << itr.address_range << " ][" - << itr.signature.get() << "]"; + _ss << "[" << itr.module << "] --> [" << itr.signature.get() << "][" + << itr.address_range << "]"; _insert(itr.module, _ss.str()); } } @@ -1772,19 +2030,24 @@ main(int argc, char** argv) } for(auto& mitr : _data) { - if(_mode != "modules") std::cout << "\n" << mitr.first << ":\n"; + if(_mode != "modules" && _mode != "pair" && _mode != "pair+") + std::cout << "\n[" << _label << "] " << mitr.first << ":\n"; + std::sort(mitr.second.begin(), mitr.second.end()); for(auto& itr : mitr.second) { - std::cout << " " << itr << "\n"; + std::cout << "[" << _label << "] " << itr << "\n"; } } }; - if(!print_available.empty()) _dump_info(print_available, available_module_functions); + if(!print_available.empty()) + _dump_info("available", print_available, available_module_functions); if(!print_instrumented.empty()) - _dump_info(print_instrumented, instrumented_module_functions); + _dump_info("instrumented", print_instrumented, instrumented_module_functions); + if(!print_excluded.empty()) + _dump_info("excluded", print_excluded, excluded_module_functions); if(!print_overlapping.empty()) - _dump_info(print_overlapping, overlapping_module_functions); + _dump_info("overlapping", print_overlapping, overlapping_module_functions); if(_dump_and_exit) exit(EXIT_SUCCESS); @@ -1809,25 +2072,23 @@ main(int argc, char** argv) code = (success) ? EXIT_SUCCESS : EXIT_FAILURE; if(success) { + verbprintf(0, "\n"); if(outfile.find('/') != 0) { - char cwd[FUNCNAMELEN]; - auto* ret = getcwd(cwd, FUNCNAMELEN); - consume_parameters(ret); - printf("\nThe instrumented executable image is stored in '%s/%s'\n", cwd, - outfile.c_str()); + verbprintf(0, "The instrumented executable image is stored in '%s/%s'\n", + get_cwd().c_str(), outfile.c_str()); } else - printf("\nThe instrumented executable image is stored in '%s'\n", - outfile.c_str()); + { + verbprintf(0, "The instrumented executable image is stored in '%s'\n", + outfile.c_str()); + } } if(main_func) { - printf("[omnitrace][exe] Getting linked libraries for %s...\n", - cmdv0.c_str()); - printf("[omnitrace][exe] Consider instrumenting the relevant " - "libraries...\n\n"); + verbprintf(0, "Getting linked libraries for %s...\n", cmdv0.c_str()); + verbprintf(0, "Consider instrumenting the relevant libraries...\n\n"); using TIMEMORY_PIPE = tim::popen::TIMEMORY_PIPE; @@ -1852,13 +2113,12 @@ main(int argc, char** argv) if(!app_thread->isTerminated()) { - app_thread->detach(true); - pid_t cpid = app_thread->getPid(); - pid_t w; + pid_t cpid = app_thread->getPid(); int status = 0; + app_thread->detach(true); do { - w = waitpid(cpid, &status, WUNTRACED); + pid_t w = waitpid(cpid, &status, WUNTRACED); if(w == -1) { perror("waitpid"); @@ -1927,98 +2187,64 @@ instrument_module(const string_t& file_name) } }; - auto is_include = [&](bool _if_empty) { - if(file_include.empty()) return _if_empty; - // NOLINTNEXTLINE(readability-use-anyofallof) - for(auto& itr : file_include) - { - if(std::regex_search(file_name, itr)) return true; - } - return false; + static std::regex ext_regex{ "\\.(s|S)$", regex_opts }; + static std::regex sys_regex{ "^(s|k|e|w)_[A-Za-z_0-9\\-]+\\.(c|C)$", regex_opts }; + static std::regex sys_build_regex{ "^(\\.\\./sysdeps/|/build/)", regex_opts }; + static std::regex dyninst_regex{ "(dyninst|DYNINST|(^|/)RT[[:graph:]]+\\.c$)", + regex_opts }; + static std::regex dependlib_regex{ "^(lib|)(omnitrace|pthread|caliper|gotcha|papi|" + "cupti|TAU|likwid|pfm|nvperf|unwind)", + regex_opts }; + static std::regex core_cmod_regex{ + "^(malloc|(f|)lock|sig|sem)[a-z_]+(|64|_r|_l)\\.c$" }; - - auto is_exclude = [&]() { - // NOLINTNEXTLINE(readability-use-anyofallof) - for(auto& itr : file_exclude) - { - if(std::regex_search(file_name, itr)) return true; - } - return false; + static std::regex core_lib_regex{ + "^(lib|)(c|z|rt|dl|dw|util|zstd|elf|pthread|open[\\-]rte|open[\\-]pal|" + "gcc_s|tcmalloc|profiler|tbbmalloc|tbbmalloc_proxy|event_pthreads|ltdl|" + "stdc\\+\\+|malloc|selinux|pcre[0-9]+)(-|\\.)", + regex_opts }; + static std::regex prefix_regex{ "^(_|\\.[a-zA-Z0-9])", regex_opts }; - auto _user_include = is_include(false); - auto _user_exclude = is_exclude(); - - if(_user_include && !_user_exclude) - return (_report("Including", "user-regex", 2), true); - - string_t ext_str = "\\.(s|S)$"; - static std::regex ext_regex(ext_str, regex_opts); - static std::regex sys_regex("^(s|k|e|w)_[A-Za-z_0-9\\-]+\\.(c|C)$", regex_opts); - static std::regex userlib_regex( - "^(lib|)(omnitrace|caliper|gotcha|papi|cupti|TAU|likwid|dyninst|pfm|nvtx|upcxx|" - "nvperf|hsa|amdhip64|pthread|sem_|malloc|RT[A-Za-z_0-9\\-]+\\.c$|\\.\\./sysdeps/" - "|/build/)", - regex_opts); - static std::regex corelib_regex( - "^lib(c|z|rt|dl|dw|util|zstd|elf|pthread|open[\\-]rte|open[\\-]pal|" - "hwloc|numa|event|udev|dyninstAPI_RT|gcc_s|tcmalloc|profiler|tbbmalloc|" - "tbbmalloc_proxy|event_pthreads|ltdl)(-|\\.)", - regex_opts); - // these are all due to TAU - static std::regex prefix_regex( - "^(_|\\.[a-zA-Z0-9]|RT|Tau|Profiler|Rts|Papi|Py|Comp_xl\\.cpp|Comp_gnu\\.cpp|" - "UserEvent\\.cpp|FunctionInfo\\.cpp|PthreadLayer\\.cpp|" - "Comp_intel[0-9]\\.cpp|Tracer\\.cpp)", - regex_opts); - static std::regex suffix_regex( - "(printf|gettext|^sig[a-z]+|^exit|^setenv|on_exit|quick_exit|_crypt|^str[a-z_]+|" - "mmap[0-9]+|^err|getu[a-z]+|^call_once|^sendto|^timer_[a-z]+|^read|^close|^recv|^" - "lseek[0-9]+|^open[a-z0-9]+|^nlist|^fclrexcpt|^conj[a-z0-9]*|^cimag[a-" - "z0-9]*|^creal[a-z0-9]*|^cabs[a-z0-9]*|^wmem[a-z_]+|^mem[a-z_]+|^asctime|time|" - "timeofday|timespec_get|locale|^abort|scanf|tmpfile|getline|fseek|putc|rewind|" - "vscanf|memmove|uid|tsz|gid|cvt|cvt_r|^error|_r|[a-z]64|^f[a-z]+|^makecontext|^" - "basename|^wcp[a-z]+|[a-z]+dir|^mb[a-z]+|^dir[a-z]+|euid[a-z]+|^c[36][24][a-z]+|^" - "set[a-z_]+|^get[a-z_]+|^shm[a-z]+|^wc[a-z_]+|brk|^write[a-z]+|RTcommon|" - "RTfreebsd|RTheap|RTheap-freebsd|RTheap-linux|RTheap-win|RTlinux|RTmemEmulator|" - "RTposix|RTsignal|RTstatic_ctors_dtors-aarch64|RTstatic_ctors_dtors_begin|" - "RTstatic_ctors_dtors_end|RTstatic_ctors_dtors-ppc32|RTstatic_ctors_dtors-ppc64|" - "RTstatic_ctors_dtors-x86|RTthread-aarch64|RTthread|RTthread-powerpc|RTthread-" - "x86-64|RTthread-x86|RTwinnt|unwind)\\.c$", - regex_opts); - + // file extensions that should not be instrumented if(std::regex_search(file_name, ext_regex)) { return (_report("Excluding", "file extension", 3), false); } - if(std::regex_search(file_name, sys_regex)) + // system modules that should not be instrumented (wastes time) + if(std::regex_search(file_name, sys_regex) || + std::regex_search(file_name, sys_build_regex)) { - return (_report("Excluding", "system library", 3), false); + return (_report("Excluding", "system module", 3), false); } - if(std::regex_search(file_name, corelib_regex)) + // dyninst modules that must not be instrumented + if(std::regex_search(file_name, dyninst_regex)) { - return (_report("Excluding", "core library", 3), false); + return (_report("Excluding", "dyninst module", 3), false); } - if(std::regex_search(file_name, userlib_regex)) + // modules used by omnitrace and dependent libraries + if(std::regex_search(file_name, core_lib_regex) || + std::regex_search(file_name, core_cmod_regex)) { - return (_report("Excluding", "instrumentation", 3), false); + return (_report("Excluding", "core module", 3), false); } + // modules used by omnitrace and dependent libraries + if(std::regex_search(file_name, dependlib_regex)) + { + return (_report("Excluding", "dependency module", 3), false); + } + + // known set of modules whose starting sequence of characters suggest it should not be + // instrumented (wastes time) if(std::regex_search(file_name, prefix_regex)) { return (_report("Excluding", "prefix match", 3), false); } - if(std::regex_search(file_name, suffix_regex)) - { - return (_report("Excluding", "suffix match", 3), false); - } - - if(_user_exclude) return (_report("Excluding", "user-regex", 2), false); - _report("Including", "no constraint", 2); return true; @@ -2030,48 +2256,25 @@ extern const strset_t exclude_function_names; bool instrument_entity(const string_t& function_name) { - auto is_include = [&](bool _if_empty) { - if(func_include.empty()) return _if_empty; - // NOLINTNEXTLINE(readability-use-anyofallof) - for(auto& itr : func_include) + auto _report = [&function_name](const string_t& _action, const string_t& _reason, + int _lvl) { + static strset_t already_reported{}; + if(already_reported.count(function_name) == 0) { - if(std::regex_search(function_name, itr)) return true; + verbprintf(_lvl, "%s function [%s] : '%s'...\n", _action.c_str(), + _reason.c_str(), function_name.c_str()); + already_reported.insert(function_name); } - return false; }; - auto is_exclude = [&]() { - // NOLINTNEXTLINE(readability-use-anyofallof) - for(auto& itr : func_exclude) - { - if(std::regex_search(function_name, itr)) - { - verbprintf(2, "Excluding function [user-regex] : '%s'...\n", - function_name.c_str()); - return true; - } - } - return false; - }; - - auto _user_include = is_include(false) && !is_exclude(); - - if(_user_include) - { - verbprintf(2, "Including function [user-regex] : '%s'...\n", - function_name.c_str()); - return true; - } - static std::regex exclude( - "(omnitrace|tim::|cereal|N3tim|MPI_Init|MPI_Finalize|::__[A-Za-z]|" + "(omnitrace|tim::|N3tim|MPI_Init|MPI_Finalize|::__[A-Za-z]|" "dyninst|tm_clones|malloc$|calloc$|free$|realloc$|std::addressof)", regex_opts); static std::regex exclude_cxx("(std::_Sp_counted_base|std::use_facet)", regex_opts); static std::regex leading( "^(_|\\.|frame_dummy|\\(|targ|new|delete|operator new|operator delete|" - "std::allocat|nvtx|gcov|main\\.cold|TAU|tau|Tau|dyn|RT|" - "sys|pthread|posix|clone|" + "std::allocat|nvtx|gcov|TAU|tau|Tau|dyn|RT|sys|pthread|posix|clone|" "virtual thunk|non-virtual thunk|transaction clone|" "RtsLayer|DYNINST|PthreadLayer|threaded_func|PMPI|" "Kokkos::Impl::|Kokkos::Experimental::Impl::|Kokkos::impl_|" @@ -2079,60 +2282,39 @@ instrument_entity(const string_t& function_name) regex_opts); static std::regex trailing("(\\.part\\.[0-9]+|\\.constprop\\.[0-9]+|\\.|\\.[0-9]+)$", regex_opts); - static std::regex stlfunc("^std::", regex_opts); static strset_t whole = get_whole_function_names(); - if(!stl_func_instr && std::regex_search(function_name, stlfunc)) + // don't instrument the functions when key is found anywhere in function name + if(std::regex_search(function_name, exclude) || + std::regex_search(function_name, exclude_cxx)) { - verbprintf(3, "Excluding function [stl] : '%s'...\n", function_name.c_str()); + _report("critical", function_name, 3); return false; } - // don't instrument the functions when key is found anywhere in function name - if(std::regex_search(function_name, exclude)) + if(whole.count(function_name) > 0) { - verbprintf(3, "Excluding function [critical, any match] : '%s'...\n", - function_name.c_str()); - return false; - } - - // don't instrument the functions when key is found anywhere in function name - if(std::regex_search(function_name, exclude_cxx)) - { - verbprintf(3, "Excluding function [critical_cxx, any match] : '%s'...\n", - function_name.c_str()); + _report("critical", function_name, 3); return false; } // don't instrument the functions when key is found at the start of the function name if(std::regex_search(function_name, leading)) { - verbprintf(3, "Excluding function [critical, leading match] : '%s'...\n", - function_name.c_str()); + _report("recommended", function_name, 3); return false; } // don't instrument the functions when key is found at the end of the function name if(std::regex_search(function_name, trailing)) { - verbprintf(3, "Excluding function [critical, trailing match] : '%s'...\n", - function_name.c_str()); + _report("recommended", function_name, 3); return false; } - if(whole.count(function_name) > 0) - { - verbprintf(3, "Excluding function [critical, whole match] : '%s'...\n", - function_name.c_str()); - return false; - } + _report("Including function [no constraint] : '%s'...\n", function_name, 3); - bool use = is_include(true) && !is_exclude(); - if(use) - verbprintf(2, "Including function [no constraint] : '%s'...\n", - function_name.c_str()); - - return use; + return true; } //======================================================================================// @@ -2302,25 +2484,22 @@ routine_constraint(const char* fname) } } +namespace +{ //======================================================================================// // std::string -get_absolute_exe_filepath(std::string exe_name) +get_absolute_exe_filepath(std::string exe_name, const std::string& env_path) { - auto file_exists = [](const std::string& name) { - struct stat buffer; - return (stat(name.c_str(), &buffer) == 0); - }; - if(!exe_name.empty() && !file_exists(exe_name)) { auto _exe_orig = exe_name; - auto _paths = tim::delimit(tim::get_env("PATH", ""), ":"); + auto _paths = tim::delimit(tim::get_env(env_path, ""), ":"); for(auto& pitr : _paths) { if(file_exists(TIMEMORY_JOIN('/', pitr, exe_name))) { - exe_name = TIMEMORY_JOIN('/', pitr, exe_name); + exe_name = get_realpath(TIMEMORY_JOIN('/', pitr, exe_name)); verbprintf(0, "[omnitrace][exe] Resolved '%s' to '%s'...\n", _exe_orig.c_str(), exe_name.c_str()); break; @@ -2333,30 +2512,30 @@ get_absolute_exe_filepath(std::string exe_name) exe_name.c_str()); } } + else if(!exe_name.empty()) + { + return get_realpath(exe_name); + } + return exe_name; } //======================================================================================// // std::string -get_absolute_lib_filepath(std::string lib_name) +get_absolute_lib_filepath(std::string lib_name, const std::string& env_path) { - auto file_exists = [](const std::string& name) { - struct stat buffer; - return (stat(name.c_str(), &buffer) == 0); - }; - if(!lib_name.empty() && (!file_exists(lib_name) || std::regex_match(lib_name, std::regex("^[A-Za-z0-9].*")))) { auto _lib_orig = lib_name; auto _paths = tim::delimit( - std::string{ ".:" } + tim::get_env("LD_LIBRARY_PATH", ""), ":"); + std::string{ ".:" } + tim::get_env(env_path, ""), ":"); for(auto& pitr : _paths) { if(file_exists(TIMEMORY_JOIN('/', pitr, lib_name))) { - lib_name = TIMEMORY_JOIN('/', pitr, lib_name); + lib_name = get_realpath(TIMEMORY_JOIN('/', pitr, lib_name)); verbprintf(0, "[omnitrace][exe] Resolved '%s' to '%s'...\n", _lib_orig.c_str(), lib_name.c_str()); break; @@ -2369,5 +2548,39 @@ get_absolute_lib_filepath(std::string lib_name) lib_name.c_str()); } } + else if(!lib_name.empty()) + { + return get_realpath(lib_name); + } + return lib_name; } + +//======================================================================================// +// +bool +file_exists(const std::string& name) +{ + struct stat buffer; + return (stat(name.c_str(), &buffer) == 0); +} + +std::string +get_realpath(const std::string& _f) +{ + char _buffer[PATH_MAX]; + if(!::realpath(_f.c_str(), _buffer)) + { + verbprintf(2, "Warning! realpath could not be found for %s\n", _f.c_str()); + return _f; + } + return std::string{ _buffer }; +} + +std::string +get_cwd() +{ + char cwd[PATH_MAX]; + return std::string{ getcwd(cwd, PATH_MAX) }; +} +} // namespace diff --git a/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.hpp b/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.hpp index a4ca11ee10..528d3551a8 100644 --- a/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.hpp +++ b/projects/rocprofiler-systems/source/bin/omnitrace/omnitrace.hpp @@ -25,6 +25,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -45,12 +49,16 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include +#include #include #include #include @@ -76,6 +84,7 @@ using strvec_t = std::vector; using strset_t = std::set; using regexvec_t = std::vector; using fmodset_t = std::set; +using fixed_modset_t = std::map; using exec_callback_t = BPatchExecCallback; using exit_callback_t = BPatchExitCallback; using fork_callback_t = BPatchForkCallback; @@ -142,10 +151,14 @@ static snippet_vec_t fini_names = {}; static fmodset_t available_module_functions = {}; static fmodset_t instrumented_module_functions = {}; static fmodset_t overlapping_module_functions = {}; +static fmodset_t excluded_module_functions = {}; +static fixed_modset_t fixed_module_functions = {}; static regexvec_t func_include = {}; static regexvec_t func_exclude = {}; static regexvec_t file_include = {}; static regexvec_t file_exclude = {}; +static regexvec_t file_restrict = {}; +static regexvec_t func_restrict = {}; // //======================================================================================// @@ -217,12 +230,6 @@ error_func_real(error_level_t level, int num, const char* const* params); void error_func_fake(error_level_t level, int num, const char* const* params); -bool -c_stdlib_module_constraint(const string_t& file); - -bool -c_stdlib_function_constraint(const string_t& func); - //======================================================================================// inline string_t @@ -277,6 +284,19 @@ struct function_signature TIMEMORY_DEFAULT_OBJECT(function_signature) + template + void serialize(ArchiveT& _ar, const unsigned) + { + namespace cereal = tim::cereal; + (void) get(); + _ar(cereal::make_nvp("loop", m_loop), cereal::make_nvp("info_beg", m_info_beg), + cereal::make_nvp("info_end", m_info_end), cereal::make_nvp("row", m_row), + cereal::make_nvp("col", m_col), cereal::make_nvp("return", m_return), + cereal::make_nvp("name", m_name), cereal::make_nvp("params", m_params), + cereal::make_nvp("file", m_file), cereal::make_nvp("signature", m_signature)); + (void) get(); + } + function_signature(string_t _ret, const string_t& _name, string_t _file, location_t _row = { 0, 0 }, location_t _col = { 0, 0 }, bool _loop = false, bool _info_beg = false, bool _info_end = false) @@ -294,8 +314,8 @@ struct function_signature } function_signature(const string_t& _ret, const string_t& _name, const string_t& _file, - const std::vector& _params, location_t&& _row = { 0, 0 }, - location_t&& _col = { 0, 0 }, bool _loop = false, + const std::vector& _params, location_t _row = { 0, 0 }, + location_t _col = { 0, 0 }, bool _loop = false, bool _info_beg = false, bool _info_end = false) : function_signature(_ret, _name, _file, _row, _col, _loop, _info_beg, _info_end) { @@ -306,6 +326,11 @@ struct function_signature m_params += ")"; } + friend bool operator==(const function_signature& lhs, const function_signature& rhs) + { + return lhs.get() == rhs.get(); + } + static auto get(function_signature& sig) { return sig.get(); } string_t get() const @@ -354,6 +379,8 @@ struct module_function return _instance; } + TIMEMORY_DEFAULT_OBJECT(module_function) + static void reset_width() { get_width().fill(0); } static void update_width(const module_function& rhs) @@ -409,6 +436,12 @@ struct module_function : (lhs.module < rhs.module); } + friend bool operator==(const module_function& lhs, const module_function& rhs) + { + return std::tie(lhs.module, lhs.function, lhs.signature, lhs.address_range) == + std::tie(rhs.module, rhs.function, rhs.signature, rhs.address_range); + } + static void write_header(std::ostream& os) { auto w0 = std::min(get_width()[0], absolute_max_width); @@ -452,7 +485,16 @@ struct module_function size_t address_range = 0; string_t module = {}; string_t function = {}; - function_signature signature; + function_signature signature = {}; + + template + void serialize(ArchiveT& _ar, const unsigned) + { + namespace cereal = tim::cereal; + _ar(cereal::make_nvp("address_range", address_range), + cereal::make_nvp("module", module), cereal::make_nvp("function", function), + cereal::make_nvp("signature", signature)); + } }; // //======================================================================================// @@ -471,26 +513,215 @@ dump_info(std::ostream& _os, const fmodset_t& _data) module_function::reset_width(); } // +template ::value, int> = 0> static inline void -dump_info(const string_t& _oname, const fmodset_t& _data, int _level, bool _fail) +dump_info(ArchiveT& _ar, const fmodset_t& _data) { + _ar(tim::cereal::make_nvp("module_functions", _data)); +} +// +static inline void +dump_info(const string_t& _label, string_t _oname, const string_t& _ext, + const fmodset_t& _data, int _level, bool _fail) +{ + namespace cereal = tim::cereal; + namespace policy = tim::policy; + + _oname += "." + _ext; + auto _handle_error = [&]() { + std::stringstream _msg{}; + _msg << "[dump_info] Error opening '" << _oname << " for output"; + verbprintf(_level, "%s\n", _msg.str().c_str()); + if(_fail) + throw std::runtime_error(std::string{ "[omnitrace][exe]" } + _msg.str()); + }; + if(!debug_print && verbose_level < _level) return; - std::ofstream ofs{ _oname }; - if(ofs) + if(_ext == "txt") { - verbprintf(_level, "Dumping '%s'... ", _oname.c_str()); - dump_info(ofs, _data); - verbprintf_bare(_level, "Done\n"); + std::ofstream ofs{}; + if(!tim::filepath::open(ofs, _oname)) + _handle_error(); + else + { + verbprintf(_level, "Outputting '%s'... ", _oname.c_str()); + dump_info(ofs, _data); + verbprintf_bare(_level, "Done\n"); + } + ofs.close(); + } + else if(_ext == "xml") + { + std::stringstream oss{}; + { + using output_policy = policy::output_archive; + output_policy::indent() = true; + auto ar = output_policy::get(oss); + + ar->setNextName("omnitrace"); + ar->startNode(); + ar->setNextName(_label.c_str()); + ar->startNode(); + (*ar)(cereal::make_nvp("module_functions", _data)); + ar->finishNode(); + ar->finishNode(); + } + + std::ofstream ofs{}; + if(!tim::filepath::open(ofs, _oname)) + _handle_error(); + else + { + verbprintf(_level, "Outputting '%s'... ", _oname.c_str()); + ofs << oss.str() << std::endl; + verbprintf_bare(_level, "Done\n"); + } + ofs.close(); + } + else if(_ext == "json") + { + std::stringstream oss{}; + { + using output_policy = policy::output_archive; + auto ar = output_policy::get(oss); + + ar->setNextName("omnitrace"); + ar->startNode(); + ar->setNextName(_label.c_str()); + ar->startNode(); + (*ar)(cereal::make_nvp("module_functions", _data)); + ar->finishNode(); + ar->finishNode(); + } + + std::ofstream ofs{}; + if(!tim::filepath::open(ofs, _oname)) + _handle_error(); + else + { + verbprintf(_level, "Outputting '%s'... ", _oname.c_str()); + ofs << oss.str() << std::endl; + verbprintf_bare(_level, "Done\n"); + } + ofs.close(); } else { - std::stringstream _msg{}; - _msg << "[" << __FUNCTION__ << "] Error opening '" << _oname << " for output"; - verbprintf(_level, "%s\n", _msg.str().c_str()); - if(_fail) throw std::runtime_error(_msg.str()); + throw std::runtime_error(TIMEMORY_JOIN( + "", "[omnitrace][exe] Error in ", __FUNCTION__, " :: filename '", _oname, + "' does not have one of recognized file extensions: txt, json, xml")); + } +} +// +static inline void +dump_info(const string_t& _oname, const fmodset_t& _data, int _level, bool _fail, + const string_t& _type, const strset_t& _ext) +{ + for(const auto& itr : _ext) + dump_info(_type, _oname, itr, _data, _level, _fail); +} +// +static inline void +load_info(const string_t& _label, const string_t& _iname, fmodset_t& _data, int _level) +{ + namespace cereal = tim::cereal; + namespace policy = tim::policy; + + auto _pos = _iname.find_last_of('.'); + std::string _ext = {}; + if(_pos != std::string::npos) _ext = _iname.substr(_pos + 1, _iname.length()); + + auto _handle_error = [&]() { + std::stringstream _msg{}; + _msg << "[load_info] Error opening '" << _iname << " for input"; + verbprintf(_level, "%s\n", _msg.str().c_str()); + throw std::runtime_error(std::string{ "[omnitrace][exe]" } + _msg.str()); + }; + + if(_ext == "xml") + { + verbprintf(_level, "Reading '%s'... ", _iname.c_str()); + std::ifstream ifs{ _iname }; + if(!ifs) + _handle_error(); + else + { + using input_policy = policy::input_archive; + auto ar = input_policy::get(ifs); + + ar->setNextName("omnitrace"); + ar->startNode(); + ar->setNextName(_label.c_str()); + ar->startNode(); + (*ar)(cereal::make_nvp("module_functions", _data)); + ar->finishNode(); + ar->finishNode(); + } + verbprintf_bare(_level, "Done\n"); + ifs.close(); + } + else if(_ext == "json") + { + verbprintf(_level, "Reading '%s'... ", _iname.c_str()); + std::ifstream ifs{ _iname }; + if(!ifs) + _handle_error(); + else + { + using input_policy = policy::input_archive; + auto ar = input_policy::get(ifs); + + ar->setNextName("omnitrace"); + ar->startNode(); + ar->setNextName(_label.c_str()); + ar->startNode(); + (*ar)(cereal::make_nvp("module_functions", _data)); + ar->finishNode(); + ar->finishNode(); + } + verbprintf_bare(_level, "Done\n"); + ifs.close(); + } + else + { + throw std::runtime_error(TIMEMORY_JOIN( + "", "[omnitrace][exe] Error in ", __FUNCTION__, " :: filename '", _iname, + "' does not have one of recognized extentions: txt, json, xml :: ", _ext)); + } +} +// +static inline void +load_info(const string_t& _inp, std::map& _data, int _level) +{ + std::vector _exceptions{}; + _exceptions.reserve(_data.size()); + for(auto& itr : _data) + { + try + { + fmodset_t _tmp{}; + load_info(itr.first, _inp, _tmp, _level); + // add to the existing + itr.second->insert(_tmp.begin(), _tmp.end()); + // if it did not throw it was successfully loaded + _exceptions.clear(); + break; + } catch(std::exception& _e) + { + _exceptions.emplace_back(_e.what()); + } + } + if(!_exceptions.empty()) + { + std::stringstream _msg{}; + for(auto& itr : _exceptions) + { + _msg << "[omnitrace][exe] " << itr << "\n"; + } + throw std::runtime_error(_msg.str()); } - ofs.close(); } // //======================================================================================// diff --git a/projects/rocprofiler-systems/source/lib/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/CMakeLists.txt index a241cff965..bc1bec9911 100644 --- a/projects/rocprofiler-systems/source/lib/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/CMakeLists.txt @@ -30,7 +30,7 @@ target_link_libraries( $ $ $ - $ + $ $,omnitrace::omnitrace-sanitizer,>) # ------------------------------------------------------------------------------# @@ -137,3 +137,31 @@ install( TARGETS omnitrace-library DESTINATION ${CMAKE_INSTALL_LIBDIR} OPTIONAL) + +# ------------------------------------------------------------------------------# +# +# omnitrace dl library +# +# ------------------------------------------------------------------------------# + +add_library(omnitrace-dl-library SHARED) +add_library(omnitrace::omnitrace-dl-library ALIAS omnitrace-dl-library) + +target_compile_definitions(omnitrace-dl-library PRIVATE TIMEMORY_USE_VISIBILITY=0) +target_sources(omnitrace-dl-library PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/dl.cpp) +target_link_libraries(omnitrace-dl-library + PRIVATE $) + +set_target_properties( + omnitrace-dl-library + PROPERTIES OUTPUT_NAME omnitrace-dl + CXX_VISIBILITY_PRESET "hidden" + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH "\$ORIGIN") + +install( + TARGETS omnitrace-dl-library + DESTINATION ${CMAKE_INSTALL_LIBDIR} + OPTIONAL) diff --git a/projects/rocprofiler-systems/source/lib/include/library/api.hpp b/projects/rocprofiler-systems/source/lib/include/library/api.hpp index d81505e87a..1419e88dc0 100644 --- a/projects/rocprofiler-systems/source/lib/include/library/api.hpp +++ b/projects/rocprofiler-systems/source/lib/include/library/api.hpp @@ -30,27 +30,27 @@ extern "C" { /// handles configuration logic - void omnitrace_init_library(void) TIMEMORY_VISIBILITY("default"); + void omnitrace_init_library(void) OMNITRACE_VISIBILITY("default"); /// starts gotcha wrappers - void omnitrace_init(const char*, bool, const char*) TIMEMORY_VISIBILITY("default"); + void omnitrace_init(const char*, bool, const char*) OMNITRACE_VISIBILITY("default"); /// shuts down all tooling and generates output - void omnitrace_finalize(void) TIMEMORY_VISIBILITY("default"); + void omnitrace_finalize(void) OMNITRACE_VISIBILITY("default"); /// sets an environment variable void omnitrace_set_env(const char* env_name, const char* env_val) - TIMEMORY_VISIBILITY("default"); + OMNITRACE_VISIBILITY("default"); /// sets whether MPI should be used - void omnitrace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default"); + void omnitrace_set_mpi(bool use, bool attached) OMNITRACE_VISIBILITY("default"); /// starts an instrumentation region - void omnitrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default"); + void omnitrace_push_trace(const char* name) OMNITRACE_VISIBILITY("default"); /// stops an instrumentation region - void omnitrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default"); + void omnitrace_pop_trace(const char* name) OMNITRACE_VISIBILITY("default"); /// used by omnitrace-critical-trace - bool omnitrace_init_tooling() TIMEMORY_VISIBILITY("hidden"); + bool omnitrace_init_tooling() OMNITRACE_VISIBILITY("hidden"); } diff --git a/projects/rocprofiler-systems/source/lib/include/library/components/fwd.hpp b/projects/rocprofiler-systems/source/lib/include/library/components/fwd.hpp index dcc769b452..6cd03b5e4a 100644 --- a/projects/rocprofiler-systems/source/lib/include/library/components/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/include/library/components/fwd.hpp @@ -24,12 +24,16 @@ #include "library/defines.hpp" +#include +#include #include #include #include #include #include +TIMEMORY_DEFINE_NS_API(project, omnitrace) + TIMEMORY_DECLARE_COMPONENT(roctracer) namespace omnitrace @@ -92,6 +96,34 @@ TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, omnitrace::component::sampling_gpu_ false_type) #endif +TIMEMORY_SET_COMPONENT_API(omnitrace::component::omnitrace, project::omnitrace, + category::dynamic_instrumentation, os::supports_linux) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::roctracer, project::omnitrace, + tpls::rocm, device::gpu, os::supports_linux, + category::external) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_wall_clock, project::omnitrace, + category::timing, os::supports_unix, category::sampling, + category::interrupt_sampling) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_cpu_clock, project::omnitrace, + category::timing, os::supports_unix, category::sampling, + category::interrupt_sampling) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_percent, project::omnitrace, + category::timing, os::supports_unix, category::sampling, + category::interrupt_sampling) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_busy, project::omnitrace, + tpls::rocm, device::gpu, os::supports_linux, + category::sampling, category::thread_sampling) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_memory, project::omnitrace, + tpls::rocm, device::gpu, os::supports_linux, category::memory, + category::sampling, category::thread_sampling) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_power, project::omnitrace, + tpls::rocm, device::gpu, os::supports_linux, category::power, + category::sampling, category::thread_sampling) +TIMEMORY_SET_COMPONENT_API(omnitrace::component::sampling_gpu_temp, project::omnitrace, + tpls::rocm, device::gpu, os::supports_linux, + category::temperature, category::sampling, + category::thread_sampling) + TIMEMORY_PROPERTY_SPECIALIZATION(omnitrace::component::omnitrace, OMNITRACE_COMPONENT, "omnitrace", "omnitrace_component") TIMEMORY_PROPERTY_SPECIALIZATION(omnitrace::component::roctracer, OMNITRACE_ROCTRACER, diff --git a/projects/rocprofiler-systems/source/lib/include/library/components/pthread_gotcha.hpp b/projects/rocprofiler-systems/source/lib/include/library/components/pthread_gotcha.hpp index 0824ed5627..51817756d9 100644 --- a/projects/rocprofiler-systems/source/lib/include/library/components/pthread_gotcha.hpp +++ b/projects/rocprofiler-systems/source/lib/include/library/components/pthread_gotcha.hpp @@ -37,13 +37,14 @@ struct pthread_gotcha : tim::component::base using routine_t = void* (*) (void*); using promise_t = std::promise; - wrapper(routine_t _routine, void* _arg, bool, promise_t*); + wrapper(routine_t _routine, void* _arg, bool, int64_t, promise_t*); void* operator()() const; static void* wrap(void* _arg); private: bool m_enable_sampling = false; + int64_t m_parent_tid = 0; routine_t m_routine = nullptr; void* m_arg = nullptr; promise_t* m_promise = nullptr; diff --git a/projects/rocprofiler-systems/source/lib/include/library/config.hpp b/projects/rocprofiler-systems/source/lib/include/library/config.hpp index b4f31ae2b6..da94c9f87a 100644 --- a/projects/rocprofiler-systems/source/lib/include/library/config.hpp +++ b/projects/rocprofiler-systems/source/lib/include/library/config.hpp @@ -31,9 +31,9 @@ #include "library/defines.hpp" #include "library/state.hpp" #include "library/timemory.hpp" -#include "timemory/macros/language.hpp" #include +#include #include #include @@ -129,6 +129,9 @@ get_debug_finalize(); bool get_debug(); +bool +get_debug_sampling(); + bool get_debug_tid(); @@ -252,5 +255,5 @@ std::atomic& get_cpu_cid(); std::unique_ptr>& -get_cpu_cid_stack(int64_t _tid = threading::get_id()); +get_cpu_cid_stack(int64_t _tid = threading::get_id(), int64_t _parent = 0); } // namespace omnitrace diff --git a/projects/rocprofiler-systems/source/lib/include/library/defines.hpp.in b/projects/rocprofiler-systems/source/lib/include/library/defines.hpp.in index 488e158f55..14e0aa7b67 100644 --- a/projects/rocprofiler-systems/source/lib/include/library/defines.hpp.in +++ b/projects/rocprofiler-systems/source/lib/include/library/defines.hpp.in @@ -50,3 +50,8 @@ #define OMNITRACE_SAMPLING_GPU_TEMP OMNITRACE_SAMPLING_GPU_TEMP_idx #define OMNITRACE_SAMPLING_GPU_BUSY OMNITRACE_SAMPLING_GPU_BUSY_idx #define OMNITRACE_SAMPLING_GPU_MEMORY_USAGE OMNITRACE_SAMPLING_GPU_MEMORY_USAGE_idx + +#define OMNITRACE_ATTRIBUTE(...) __attribute__((__VA_ARGS__)) +#define OMNITRACE_VISIBILITY(MODE) OMNITRACE_ATTRIBUTE(visibility(MODE)) +#define OMNITRACE_PUBLIC_API OMNITRACE_VISIBILITY("default") +#define OMNITRACE_HIDDEN_API OMNITRACE_VISIBILITY("hidden") diff --git a/projects/rocprofiler-systems/source/lib/src/dl.cpp b/projects/rocprofiler-systems/source/lib/src/dl.cpp new file mode 100644 index 0000000000..5b2cbdee50 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/src/dl.cpp @@ -0,0 +1,325 @@ +// MIT License +// +// Copyright (c) 2020, The Regents of the University of California, +// through Lawrence Berkeley National Laboratory (subject to receipt of any +// required approvals from the U.S. Dept. of Energy). All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OMNITRACE_VISIBLE __attribute__((visibility("default"))) +#define OMNITRACE_HIDDEN __attribute__((visibility("internal"))) +#define OMNITRACE_INLINE __attribute__((__always_inline__)) +#define OMNITRACE_DLSYM(VARNAME, HANDLE, FUNCNAME) \ + if(HANDLE) \ + { \ + *(void**) (&VARNAME) = dlsym(HANDLE, FUNCNAME); \ + if(VARNAME == nullptr && _omnitrace_dl_verbose >= 0) \ + { \ + fprintf(stderr, "[omnitrace][dl][pid=%i]> %s :: %s\n", process::get_id(), \ + FUNCNAME, dlerror()); \ + } \ + } + +namespace process = tim::process; + +//--------------------------------------------------------------------------------------// +// +// omnitrace symbols +// +//--------------------------------------------------------------------------------------// + +extern "C" +{ + void omnitrace_init_library(void) OMNITRACE_VISIBLE; + void omnitrace_init(const char*, bool, const char*) OMNITRACE_VISIBLE; + void omnitrace_finalize(void) OMNITRACE_VISIBLE; + void omnitrace_set_env(const char* env_name, const char* env_val) OMNITRACE_VISIBLE; + void omnitrace_set_mpi(bool use, bool attached) OMNITRACE_VISIBLE; + void omnitrace_push_trace(const char* name) OMNITRACE_VISIBLE; + void omnitrace_pop_trace(const char* name) OMNITRACE_VISIBLE; +} + +//--------------------------------------------------------------------------------------// + +namespace +{ +inline int +get_omnitrace_env(); + +// environment priority: +// - OMNITRACE_DL_DEBUG +// - OMNITRACE_DL_VERBOSE +// - OMNITRACE_DEBUG +// - OMNITRACE_VERBOSE +int _omnitrace_dl_verbose = + tim::get_env("OMNITRACE_DL_DEBUG", false, false) + ? 100 + : tim::get_env("OMNITRACE_DL_VERBOSE", get_omnitrace_env(), false); + +// The docs for dlopen suggest that the combination of RTLD_LOCAL + RTLD_DEEPBIND +// (when available) helps ensure that the symbols in the instrumentation library +// libomnitrace.so will use it's own symbols... not symbols that are potentially +// instrumented. However, this only applies to the symbols in libomnitrace.so, +// which is NOT self-contained, i.e. symbols in timemory and the libs it links to +// (such as libpapi.so) are not protected by the deep-bind option. Additionally, +// it should be noted that DynInst does *NOT* add instrumentation by manipulating the +// dynamic linker (otherwise it would only be limited to shared libs) -- it manipulates +// the instructions in the binary so that a call to a function such as "main" actually +// calls "main_dyninst", which executes the instrumentation snippets around the actual +// "main" (this is the reason you need the dyninstAPI_RT library). +// +// UPDATE: +// Use of RTLD_DEEPBIND has been removed because it causes the dyninst +// ProcControlAPI to segfault within pthread_cond_wait on certain executables. +// +// Here are the docs on the dlopen options used: +// +// RTLD_LAZY +// Perform lazy binding. Only resolve symbols as the code that references them is +// executed. If the symbol is never referenced, then it is never resolved. (Lazy +// binding is only performed for function references; references to variables are +// always immediately bound when the library is loaded.) +// +// RTLD_LOCAL +// This is the converse of RTLD_GLOBAL, and the default if neither flag is specified. +// Symbols defined in this library are not made available to resolve references in +// subsequently loaded libraries. +// +// RTLD_DEEPBIND (since glibc 2.3.4) +// Place the lookup scope of the symbols in this library ahead of the global scope. +// This means that a self-contained library will use its own symbols in preference to +// global symbols with the same name contained in libraries that have already been +// loaded. This flag is not specified in POSIX.1-2001. +// +#if __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 4 +auto _omnitrace_dl_dlopen_flags = RTLD_LAZY | RTLD_LOCAL; +const char* _omnitrace_dl_dlopen_descr = "RTLD_LAZY | RTLD_LOCAL"; +#else +auto _omnitrace_dl_dlopen_flags = RTLD_LAZY | RTLD_LOCAL; +const char* _omnitrace_dl_dlopen_descr = "RTLD_LAZY | RTLD_LOCAL"; +#endif + +/// This class contains function pointers for omnitrace's instrumentation functions +struct OMNITRACE_HIDDEN indirect +{ + explicit OMNITRACE_INLINE indirect(std::string libpath) + : m_libpath{ find_path(std::move(libpath)) } + { + if(_omnitrace_dl_verbose > 0) + { + fprintf(stderr, "[omnitrace][dl][pid=%i] libomnitrace.so resolved to '%s'\n", + process::get_id(), m_libpath.c_str()); + } + tim::set_env("HSA_TOOLS_LIB", m_libpath, 0); + open(); + init(); + } + + OMNITRACE_INLINE ~indirect() { dlclose(m_libhandle); } + + OMNITRACE_INLINE void open() + { + if(m_libhandle) return; + + auto* libhandle = dlopen(m_libpath.c_str(), _omnitrace_dl_dlopen_flags); + + if(libhandle) + { + m_libhandle = libhandle; + if(_omnitrace_dl_verbose > 0) + { + fprintf(stderr, "[omnitrace][dl][pid=%i] dlopen(%s, %s) :: success\n", + process::get_id(), m_libpath.c_str(), _omnitrace_dl_dlopen_descr); + } + } + else + { + if(_omnitrace_dl_verbose >= 0) + { + perror("dlopen"); + fprintf(stderr, "[omnitrace][dl][pid=%i] dlopen(%s, %s) :: %s\n", + process::get_id(), m_libpath.c_str(), _omnitrace_dl_dlopen_descr, + dlerror()); + } + } + + dlerror(); // Clear any existing error + } + + OMNITRACE_INLINE void init() + { + if(!m_libhandle) open(); + + // Initialize all pointers + OMNITRACE_DLSYM(omnitrace_init_library_f, m_libhandle, "omnitrace_init_library"); + OMNITRACE_DLSYM(omnitrace_init_f, m_libhandle, "omnitrace_init"); + OMNITRACE_DLSYM(omnitrace_finalize_f, m_libhandle, "omnitrace_finalize"); + OMNITRACE_DLSYM(omnitrace_set_env_f, m_libhandle, "omnitrace_set_env"); + OMNITRACE_DLSYM(omnitrace_set_mpi_f, m_libhandle, "omnitrace_set_mpi"); + OMNITRACE_DLSYM(omnitrace_push_trace_f, m_libhandle, "omnitrace_push_trace"); + OMNITRACE_DLSYM(omnitrace_pop_trace_f, m_libhandle, "omnitrace_pop_trace"); + } + + static OMNITRACE_INLINE std::string find_path(std::string&& _path) + { + auto _paths = tim::delimit( + TIMEMORY_JOIN(":", tim::get_env("OMNITRACE_PATH", ""), + tim::get_env("LD_LIBRARY_PATH", ""), + tim::get_env("LIBRARY_PATH", "")), + ":"); + + auto file_exists = [](const std::string& name) { + struct stat buffer; + return (stat(name.c_str(), &buffer) == 0); + }; + + for(auto&& itr : _paths) + { + auto _f = TIMEMORY_JOIN('/', itr, _path); + if(file_exists(_f)) return _f; + } + return _path; + } + +public: + void (*omnitrace_init_library_f)(void) = nullptr; + void (*omnitrace_init_f)(const char*, bool, const char*) = nullptr; + void (*omnitrace_finalize_f)(void) = nullptr; + void (*omnitrace_set_env_f)(const char*, const char*) = nullptr; + void (*omnitrace_set_mpi_f)(bool, bool) = nullptr; + void (*omnitrace_push_trace_f)(const char*) = nullptr; + void (*omnitrace_pop_trace_f)(const char*) = nullptr; + +private: + void* m_libhandle = nullptr; + std::string m_libpath = {}; +}; + +inline std::unique_ptr& +get_indirect() OMNITRACE_HIDDEN; + +template +inline void +invoke(const char* _name, FuncT&& _func, Args... _args) OMNITRACE_HIDDEN; +} // namespace + +//--------------------------------------------------------------------------------------// + +extern "C" +{ + void omnitrace_init_library(void) + { + invoke(__FUNCTION__, get_indirect()->omnitrace_init_library_f); + } + + void omnitrace_init(const char* a, bool b, const char* c) + { + invoke(__FUNCTION__, get_indirect()->omnitrace_init_f, a, b, c); + } + + void omnitrace_finalize(void) + { + invoke(__FUNCTION__, get_indirect()->omnitrace_finalize_f); + } + + void omnitrace_push_trace(const char* name) + { + invoke(__FUNCTION__, get_indirect()->omnitrace_push_trace_f, name); + } + + void omnitrace_pop_trace(const char* name) + { + invoke(__FUNCTION__, get_indirect()->omnitrace_pop_trace_f, name); + } + + void omnitrace_set_env(const char* a, const char* b) + { + tim::set_env(a, b, 0); + invoke(__FUNCTION__, get_indirect()->omnitrace_set_env_f, a, b); + } + + void omnitrace_set_mpi(bool a, bool b) + { + invoke(__FUNCTION__, get_indirect()->omnitrace_set_mpi_f, a, b); + } +} + +//--------------------------------------------------------------------------------------// + +namespace +{ +int +get_omnitrace_env() +{ + auto&& _debug = tim::get_env("OMNITRACE_DEBUG", false, false); + return tim::get_env("OMNITRACE_VERBOSE", (_debug) ? 100 : 0, false); +} + +std::unique_ptr& +get_indirect() +{ + static std::unique_ptr _v = std::make_unique( + tim::get_env("OMNITRACE_LIBRARY", "libomnitrace.so")); + return _v; +} + +template +void +invoke(const char* _name, FuncT&& _func, Args... _args) +{ + if(!get_indirect()) + { + // allow -2 verbosity to avoid throwing exception + if(_omnitrace_dl_verbose >= -1) + throw std::runtime_error( + "[omnitrace][dl] nullptr to struct holding dlsym function"); + return; + } + + if(_func) + { + std::invoke(std::forward(_func), _args...); + } + else if(_omnitrace_dl_verbose >= 0) + { + fprintf(stderr, "[omnitrace][dl] %s\n", + TIMEMORY_JOIN("", "null function pointer to ", _name, ". Ignoring ", + _name, "(", _args..., ")") + .c_str()); + } +} +} // namespace diff --git a/projects/rocprofiler-systems/source/lib/src/library.cpp b/projects/rocprofiler-systems/source/lib/src/library.cpp index 0f6aa67395..88ee4589d2 100644 --- a/projects/rocprofiler-systems/source/lib/src/library.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library.cpp @@ -28,11 +28,11 @@ #include "library/debug.hpp" #include "library/defines.hpp" #include "library/gpu.hpp" +#include "library/ptl.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" #include "library/thread_sampler.hpp" #include "library/timemory.hpp" -#include "timemory/mpl/type_traits.hpp" #include #include @@ -44,7 +44,11 @@ namespace std::vector& get_interval_data() { - static thread_local auto _v = std::vector{}; + static thread_local auto _v = []() { + auto _tmp = std::vector{}; + _tmp.reserve(max_supported_threads); + return _tmp; + }(); return _v; } @@ -86,10 +90,9 @@ is_system_backend() } auto& -get_instrumentation_bundles() +get_instrumentation_bundles(int64_t _tid = threading::get_id()) { - static thread_local auto& _v = - instrumentation_bundles::instances().at(threading::get_id()); + static thread_local auto& _v = instrumentation_bundles::instances().at(_tid); return _v; } @@ -120,10 +123,41 @@ using Phase = critical_trace::Phase; /// \brief the "start" function for an instrumentation region /// //======================================================================================// +namespace +{ +auto& +push_count() +{ + static thread_local uint16_t _v = 0; + return _v; +} +auto& +pop_count() +{ + static thread_local uint16_t _v = 0; + return _v; +} + +struct reentry_guard +{ + reentry_guard(uint16_t& _v) + : m_v{ _v } + { + ++m_v; + } + ~reentry_guard() { --m_v; } + uint16_t& m_v; + + operator bool() const { return (m_v <= 1); } +}; +} // namespace extern "C" void omnitrace_push_trace(const char* name) { + reentry_guard _lk{ push_count() }; + if(!_lk) return; + // return if not active if(get_state() == State::Finalized) return; @@ -135,12 +169,14 @@ omnitrace_push_trace(const char* name) return; } - OMNITRACE_DEBUG("[%s] %s\n", __FUNCTION__, name); + OMNITRACE_DEBUG_F("%s\n", name); static auto _sample_rate = std::max(get_instrumentation_interval(), 1); static thread_local size_t _sample_idx = 0; + auto& _interval = get_interval_data(); auto _enabled = (_sample_idx++ % _sample_rate == 0); - get_interval_data().emplace_back(_enabled); + + _interval.emplace_back(_enabled); if(_enabled) get_functors().first(name); if(get_use_critical_trace()) { @@ -168,9 +204,17 @@ omnitrace_push_trace(const char* name) extern "C" void omnitrace_pop_trace(const char* name) { + // the same thread should not be pushing AND popping + if(push_count() > 0) return; + + reentry_guard _lk{ pop_count() }; + // return if reentry + if(!_lk) return; + + // only execute when active if(get_state() == State::Active) { - OMNITRACE_DEBUG("[%s] %s\n", __FUNCTION__, name); + OMNITRACE_DEBUG_F("%s\n", name); auto& _interval_data = get_interval_data(); if(!_interval_data.empty()) { @@ -382,9 +426,8 @@ omnitrace_init_tooling() !get_use_rocm_smi()) { get_state() = State::Finalized; - OMNITRACE_DEBUG("[%s] Both perfetto and timemory are disabled. Setting the state " - "to finalized\n", - __FUNCTION__); + OMNITRACE_DEBUG_F("Both perfetto and timemory are disabled. Setting the state " + "to finalized\n"); return false; } @@ -487,6 +530,7 @@ omnitrace_init_tooling() }; static auto _pop_timemory = [](const char* name) { + auto _hash = tim::hash::get_hash_id(tim::string_view_t{ name }); auto& _data = get_instrumentation_bundles(); if(_data.bundles.empty()) { @@ -494,10 +538,17 @@ omnitrace_init_tooling() "omnitrace_pop_trace", name); return; } - _data.bundles.back()->stop(); - _data.allocator.destroy(_data.bundles.back()); - _data.allocator.deallocate(_data.bundles.back(), 1); - _data.bundles.pop_back(); + for(size_t i = _data.bundles.size(); i > 0; --i) + { + auto*& _v = _data.bundles.at(i - 1); + if(_v->get_hash() == _hash) + { + _v->stop(); + _data.allocator.destroy(_v); + _data.allocator.deallocate(_v, 1); + _data.bundles.erase(_data.bundles.begin() + (i - 1)); + } + } }; static auto _pop_perfetto = [](const char*) { @@ -597,11 +648,32 @@ omnitrace_finalize(void) // return if not active if(get_state() != State::Active) { - OMNITRACE_DEBUG_F("State = %s. Finalization skipped\n", - std::to_string(get_state()).c_str()); + OMNITRACE_BASIC_DEBUG_F("State = %s. Finalization skipped\n", + std::to_string(get_state()).c_str()); return; } + get_state() = State::Finalized; + + get_functors().first = [](const char*) {}; + get_functors().second = [](const char*) {}; + + if(get_use_timemory()) + { + auto& _data = get_instrumentation_bundles(); + if(!_data.bundles.empty()) + { + for(size_t i = _data.bundles.size(); i > 0; --i) + { + auto*& _v = _data.bundles.at(i - 1); + _v->stop(); + _data.allocator.destroy(_v); + _data.allocator.deallocate(_v, 1); + } + _data.bundles.clear(); + } + } + pthread_gotcha::enable_sampling_on_child_threads() = false; auto _debug_init = get_debug_finalize(); @@ -611,24 +683,22 @@ omnitrace_finalize(void) if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value); } }; - OMNITRACE_DEBUG("[%s]\n", __FUNCTION__); + OMNITRACE_DEBUG_F("\n"); auto& _thread_bundle = thread_data::instance(); if(_thread_bundle) _thread_bundle->stop(); if(dmp::rank() == 0 && get_verbose() >= 0) fprintf(stderr, "\n"); - if(get_verbose_env() > 0) config::print_settings(); - - get_state() = State::Finalized; + if(get_verbose() > 0 || get_debug()) config::print_settings(); if(get_use_sampling()) { - OMNITRACE_DEBUG("[%s] Shutting down sampling...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Shutting down sampling...\n"); sampling::shutdown(); sampling::block_signals(); } - OMNITRACE_DEBUG("[%s] Stopping gotcha bundle...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Stopping gotcha bundle...\n"); // stop the gotcha bundle if(get_gotcha_bundle()) { @@ -639,13 +709,13 @@ omnitrace_finalize(void) pthread_gotcha::shutdown(); thread_sampler::shutdown(); - OMNITRACE_DEBUG("[%s] Shutting down roctracer...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Shutting down roctracer...\n"); // ensure that threads running roctracer callbacks shutdown comp::roctracer::shutdown(); if(dmp::rank() == 0) fprintf(stderr, "\n"); - OMNITRACE_DEBUG("[%s] Stopping main bundle...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Stopping main bundle...\n"); // stop the main bundle and report the high-level metrics if(get_main_bundle()) { @@ -659,18 +729,18 @@ omnitrace_finalize(void) int _threadpool_verbose = (get_debug()) ? 4 : -1; tasking::get_roctracer_thread_pool().set_verbose(_threadpool_verbose); - tasking::get_critical_trace_thread_pool().set_verbose(_threadpool_verbose); + if(get_use_critical_trace()) + tasking::get_critical_trace_thread_pool().set_verbose(_threadpool_verbose); // join extra thread(s) used by roctracer - OMNITRACE_DEBUG("[%s] waiting for all roctracer tasks to complete...\n", - __FUNCTION__); + OMNITRACE_DEBUG_F("waiting for all roctracer tasks to complete...\n"); tasking::get_roctracer_task_group().join(); // print out thread-data if they are not still running // if they are still running (e.g. thread-pool still alive), the // thread-specific data will be wrong if try to stop them from // the main thread. - OMNITRACE_DEBUG("[%s] Destroying thread bundle data...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Destroying thread bundle data...\n"); for(auto& itr : thread_data::instances()) { if(itr && itr->get() && @@ -684,8 +754,7 @@ omnitrace_finalize(void) } // ensure that all the MT instances are flushed - OMNITRACE_DEBUG("[%s] Stopping and destroying instrumentation bundles...\n", - __FUNCTION__); + OMNITRACE_DEBUG_F("Stopping and destroying instrumentation bundles...\n"); for(auto& itr : instrumentation_bundles::instances()) { while(!itr.bundles.empty()) @@ -701,8 +770,7 @@ omnitrace_finalize(void) // ensure that all the MT instances are flushed if(get_use_sampling()) { - OMNITRACE_DEBUG("[%s] Post-processing the sampling backtraces...\n", - __FUNCTION__); + OMNITRACE_DEBUG_F("Post-processing the sampling backtraces...\n"); for(size_t i = 0; i < max_supported_threads; ++i) { sampling::backtrace::post_process(i); @@ -712,7 +780,7 @@ omnitrace_finalize(void) if(get_use_critical_trace() || (get_use_rocm_smi() && get_use_roctracer())) { - OMNITRACE_DEBUG("[%s] Generating the critical trace...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Generating the critical trace...\n"); // increase the thread-pool size tasking::get_critical_trace_thread_pool().initialize_threadpool( get_critical_trace_num_threads()); @@ -739,18 +807,12 @@ omnitrace_finalize(void) if(get_use_critical_trace()) { - OMNITRACE_DEBUG("[%s] Generating the critical trace...\n", __FUNCTION__); - // increase the thread-pool size - tasking::get_critical_trace_thread_pool().initialize_threadpool( - get_critical_trace_num_threads()); - // make sure outstanding hash tasks completed before compute - OMNITRACE_PRINT("[%s] waiting for all critical trace tasks to complete...\n", - __FUNCTION__); + OMNITRACE_PRINT_F("waiting for all critical trace tasks to complete...\n"); tasking::get_critical_trace_task_group().join(); // launch compute task - OMNITRACE_PRINT("[%s] launching critical trace compute task...\n", __FUNCTION__); + OMNITRACE_PRINT_F("launching critical trace compute task...\n"); critical_trace::compute(); } @@ -767,11 +829,10 @@ omnitrace_finalize(void) perfetto::TrackEvent::Flush(); auto& tracing_session = get_trace_session(); - OMNITRACE_DEBUG("[%s] Stopping the blocking perfetto trace sessions...\n", - __FUNCTION__); + OMNITRACE_DEBUG_F("Stopping the blocking perfetto trace sessions...\n"); tracing_session->StopBlocking(); - OMNITRACE_DEBUG("[%s] Getting the trace data...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Getting the trace data...\n"); std::vector trace_data{ tracing_session->ReadTraceBlocking() }; if(trace_data.empty()) @@ -808,18 +869,31 @@ omnitrace_finalize(void) // these should be destroyed before timemory is finalized, especially the // roctracer thread-pool - OMNITRACE_DEBUG("[%s] Destroying the thread pools...\n", __FUNCTION__); - tasking::get_roctracer_thread_pool().destroy_threadpool(); - tasking::get_critical_trace_thread_pool().destroy_threadpool(); + OMNITRACE_DEBUG_F("Destroying the roctracer thread pool...\n"); + { + std::unique_lock _lk{ tasking::get_roctracer_mutex() }; + tasking::get_roctracer_task_group().join(); + tasking::get_roctracer_task_group().clear(); + tasking::get_roctracer_task_group().set_pool(nullptr); + tasking::get_roctracer_thread_pool().destroy_threadpool(); + } - if(get_use_sampling()) - static_cast*>( - tim::settings::instance()->find("OMNITRACE_DEBUG")->second.get()) - ->set(false); + OMNITRACE_DEBUG_F("Destroying the critical trace thread pool...\n"); + { + std::unique_lock _lk{ tasking::get_critical_trace_mutex() }; + tasking::get_critical_trace_task_group().join(); + tasking::get_critical_trace_task_group().clear(); + tasking::get_critical_trace_task_group().set_pool(nullptr); + tasking::get_critical_trace_thread_pool().destroy_threadpool(); + } - OMNITRACE_DEBUG("[%s] Finalizing timemory...\n", __FUNCTION__); + OMNITRACE_DEBUG_F("Finalizing timemory...\n"); tim::timemory_finalize(); - OMNITRACE_DEBUG("[%s] Finalizing timemory... Done\n", __FUNCTION__); + + OMNITRACE_DEBUG_F("Disabling signal handling...\n"); + tim::disable_signal_detection(); + + OMNITRACE_DEBUG_F("Finalized\n"); if(_perfetto_output_error) { diff --git a/projects/rocprofiler-systems/source/lib/src/library/components/backtrace.cpp b/projects/rocprofiler-systems/source/lib/src/library/components/backtrace.cpp index 6e01b43915..ff9e33e053 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/components/backtrace.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/components/backtrace.cpp @@ -231,8 +231,16 @@ backtrace::get_thread_cpu_timestamp() const void backtrace::sample(int signum) { - static bool _debug = tim::get_env("OMNITRACE_DEBUG_SAMPLING", get_debug()); - if(_debug) + if(get_state() != State::Active) + { + OMNITRACE_CONDITIONAL_PRINT( + get_debug_sampling(), + "request to sample (signal %i) ignored because omnitrace is not active\n", + signum); + return; + } + + if(get_debug_sampling()) { static auto _timestamp_str = [](const auto& _tp) { char _repr[64]; @@ -269,7 +277,7 @@ backtrace::sample(int signum) if(strlen(*itr) == 0) break; } std::reverse(m_data.begin(), itr); - if(!get_debug()) + if(!get_debug_sampling()) { bool _ignore = false; for(auto& itr : m_data) @@ -317,6 +325,7 @@ backtrace::configure(bool _setup, int64_t _tid) if(_setup && !_sampler && !_is_running) { + (void) get_debug_sampling(); // make sure query in sampler does not allocate assert(_tid == threading::get_id()); sampling::block_signals(*_signal_types); if constexpr(tim::trait::is_available::value) @@ -424,7 +433,7 @@ backtrace::post_process(int64_t _tid) auto _use_label = [](const std::string& _lbl, bool _check_internal) -> short { // debugging feature static bool _keep_internal = - tim::get_env("OMNITRACE_SAMPLING_KEEP_INTERNAL", get_debug()); + tim::get_env("OMNITRACE_SAMPLING_KEEP_INTERNAL", get_debug_sampling()); const auto _npos = std::string::npos; if(_keep_internal) return 1; if(_lbl.find("omnitrace_init_tooling") != _npos) return -1; @@ -450,8 +459,8 @@ backtrace::post_process(int64_t _tid) // "_dyninst", i.e. "main" will show up as "main_dyninst" in the backtrace. auto _patch_label = [](std::string _lbl) -> std::string { // debugging feature - static bool _keep_suffix = - tim::get_env("OMNITRACE_SAMPLING_KEEP_DYNINST_SUFFIX", get_debug()); + static bool _keep_suffix = tim::get_env( + "OMNITRACE_SAMPLING_KEEP_DYNINST_SUFFIX", get_debug_sampling()); if(_keep_suffix) return _lbl; const std::string _dyninst{ "_dyninst" }; auto _pos = _lbl.find(_dyninst); @@ -570,7 +579,7 @@ backtrace::post_process(int64_t _tid) if(_data.empty()) return; OMNITRACE_CONDITIONAL_PRINT( - get_verbose() >= 0 || get_debug(), + get_verbose() >= 0 || get_debug_sampling(), "Post-processing %zu sampling entries for thread %lu...\n", _data.size(), _tid); std::sort(_data.begin(), _data.end(), diff --git a/projects/rocprofiler-systems/source/lib/src/library/components/mpi_gotcha.cpp b/projects/rocprofiler-systems/source/lib/src/library/components/mpi_gotcha.cpp index be19210145..e2dbe68264 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/components/mpi_gotcha.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/components/mpi_gotcha.cpp @@ -25,10 +25,10 @@ #include "library/components/omnitrace.hpp" #include "library/config.hpp" #include "library/debug.hpp" -#include "timemory/backends/process.hpp" #include #include +#include namespace omnitrace { diff --git a/projects/rocprofiler-systems/source/lib/src/library/components/pthread_gotcha.cpp b/projects/rocprofiler-systems/source/lib/src/library/components/pthread_gotcha.cpp index 94a3fd59c8..71de8b81dd 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/components/pthread_gotcha.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/components/pthread_gotcha.cpp @@ -27,10 +27,11 @@ #include "library/debug.hpp" #include "library/sampling.hpp" -#include +#include #include #include +#include #include namespace omnitrace @@ -85,8 +86,9 @@ stop_bundle(bundle_t& _bundle, int64_t _tid) } // namespace pthread_gotcha::wrapper::wrapper(routine_t _routine, void* _arg, bool _enable_sampling, - promise_t* _p) + int64_t _parent, promise_t* _p) : m_enable_sampling{ _enable_sampling } +, m_parent_tid{ _parent } , m_routine{ _routine } , m_arg{ _arg } , m_promise{ _p } @@ -117,6 +119,8 @@ pthread_gotcha::wrapper::operator()() const } } }; + if(_active) get_cpu_cid_stack(threading::get_id(), m_parent_tid); + if(m_enable_sampling && _enable_sampling && _active) { _tid = threading::get_id(); @@ -197,11 +201,16 @@ pthread_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, { bundle_t _bundle{ "pthread_create" }; auto _enable_sampling = enable_sampling_on_child_threads(); + auto _active = (get_state() == omnitrace::State::Active); + int64_t _tid = (_active) ? threading::get_id() : 0; + + // ensure that cpu cid stack exists on the parent thread if active + if(_active) get_cpu_cid_stack(); if(!get_use_sampling() || !_enable_sampling) { // if(!get_use_sampling()) start_bundle(_bundle); - auto* _obj = new wrapper(start_routine, arg, _enable_sampling, nullptr); + auto* _obj = new wrapper(start_routine, arg, _enable_sampling, _tid, nullptr); // create the thread auto _ret = pthread_create(thread, attr, &wrapper::wrap, static_cast(_obj)); @@ -219,7 +228,7 @@ pthread_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, // promise set by thread when signal handler is configured auto _promise = std::promise{}; auto _fut = _promise.get_future(); - auto* _wrap = new wrapper(start_routine, arg, _enable_sampling, &_promise); + auto* _wrap = new wrapper(start_routine, arg, _enable_sampling, _tid, &_promise); // create the thread auto _ret = pthread_create(thread, attr, &wrapper::wrap, static_cast(_wrap)); diff --git a/projects/rocprofiler-systems/source/lib/src/library/components/roctracer.cpp b/projects/rocprofiler-systems/source/lib/src/library/components/roctracer.cpp index ab5e2ae1c3..4340e8b5a6 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/components/roctracer.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/components/roctracer.cpp @@ -215,8 +215,8 @@ TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double) extern "C" { bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) TIMEMORY_VISIBILITY("default"); - void OnUnload() TIMEMORY_VISIBILITY("default"); + const char* const* failed_tool_names) OMNITRACE_VISIBILITY("default"); + void OnUnload() OMNITRACE_VISIBILITY("default"); bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) diff --git a/projects/rocprofiler-systems/source/lib/src/library/components/roctracer_callbacks.cpp b/projects/rocprofiler-systems/source/lib/src/library/components/roctracer_callbacks.cpp index d0d0049156..e04ae22925 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/components/roctracer_callbacks.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/components/roctracer_callbacks.cpp @@ -185,14 +185,15 @@ hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* std::unique_lock _lk{ tasking::get_roctracer_mutex() }; auto _begin_ns = begin_timestamp; auto _end_ns = end_timestamp; - tasking::get_roctracer_task_group().exec( - [_name, _begin_ns, _end_ns]() { - roctracer_hsa_bundle_t _bundle{ _name, _scope }; - _bundle.start() - .store(std::plus{}, - static_cast(_end_ns - _begin_ns)) - .stop(); - }); + if(tasking::get_roctracer_task_group().pool()) + tasking::get_roctracer_task_group().exec( + [_name, _begin_ns, _end_ns]() { + roctracer_hsa_bundle_t _bundle{ _name, _scope }; + _bundle.start() + .store(std::plus{}, + static_cast(_end_ns - _begin_ns)) + .stop(); + }); } // timemory is disabled in this callback because collecting data in this // thread causes strange segmentation faults @@ -251,7 +252,8 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) }; std::unique_lock _lk{ tasking::get_roctracer_mutex() }; - tasking::get_roctracer_task_group().exec(_func); + if(tasking::get_roctracer_task_group().pool()) + tasking::get_roctracer_task_group().exec(_func); // timemory is disabled in this callback because collecting data in this thread // causes strange segmentation faults diff --git a/projects/rocprofiler-systems/source/lib/src/library/config.cpp b/projects/rocprofiler-systems/source/lib/src/library/config.cpp index 0557401cd4..3dc22475be 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/config.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/config.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "library/config.hpp" +#include "library/api.hpp" #include "library/debug.hpp" #include "library/defines.hpp" #include "library/thread_data.hpp" @@ -29,16 +30,21 @@ #include #include #include +#include #include #include #include +#include +#include #include +#include #include #include #include #include #include +#include namespace omnitrace { @@ -69,7 +75,8 @@ get_setting_name(std::string _v) { \ auto _ret = _config->insert( \ ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, INITIAL_VALUE, \ - std::set{ "custom", "omnitrace", __VA_ARGS__ }); \ + std::set{ "custom", "omnitrace", "omnitrace-library", \ + __VA_ARGS__ }); \ if(!_ret.second) \ OMNITRACE_PRINT("Warning! Duplicate setting: %s / %s\n", \ get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ @@ -261,6 +268,24 @@ configure_settings() _config->get_memory_units() = "MB"; _config->get_papi_events() = "PAPI_TOT_CYC, PAPI_TOT_INS"; + // settings native to timemory but critically and/or extensively used by omnitrace + auto _add_omnitrace_category = [](auto itr) { + if(itr != _config->end()) + { + auto _categories = itr->second->get_categories(); + _categories.emplace("omnitrace"); + _categories.emplace("omnitrace-library"); + itr->second->set_categories(_categories); + } + }; + + _add_omnitrace_category(_config->find("OMNITRACE_CONFIG_FILE")); + _add_omnitrace_category(_config->find("OMNITRACE_DEBUG")); + _add_omnitrace_category(_config->find("OMNITRACE_VERBOSE")); + _add_omnitrace_category(_config->find("OMNITRACE_TIME_OUTPUT")); + _add_omnitrace_category(_config->find("OMNITRACE_OUTPUT_PREFIX")); + _add_omnitrace_category(_config->find("OMNITRACE_OUTPUT_PATH")); + #if defined(TIMEMORY_USE_PAPI) int _paranoid = 2; { @@ -287,6 +312,10 @@ configure_settings() tim::trait::runtime_enabled::set(false); _config->get_papi_events() = ""; } + else + { + _add_omnitrace_category(_config->find("OMNITRACE_PAPI_EVENTS")); + } #else _config->get_papi_quiet() = true; #endif @@ -330,6 +359,30 @@ configure_settings() if(tim::mpi::is_initialized()) settings::default_process_suffix() = tim::mpi::rank(); #endif OMNITRACE_CONDITIONAL_BASIC_PRINT(get_verbose_env() > 0, "configuration complete\n"); + + if(_config->get_enable_signal_handler()) + { + using signal_settings = tim::signal_settings; + using sys_signal = tim::sys_signal; + tim::disable_signal_detection(); + auto _exit_action = [](int nsig) { + tim::sampling::block_signals({ SIGPROF, SIGALRM }, + tim::sampling::sigmask_scope::process); + OMNITRACE_BASIC_PRINT( + "Finalizing afer signal %i :: %s\n", nsig, + signal_settings::str(static_cast(nsig)).c_str()); + if(get_state() == State::Active) omnitrace_finalize(); + kill(process::get_id(), nsig); + }; + signal_settings::set_exit_action(_exit_action); + tim::set_env("SIGNAL_ENABLE_INTERRUPT", "1", 0); + signal_settings::check_environment(); + auto default_signals = signal_settings::get_default(); + for(const auto& itr : default_signals) + signal_settings::enable(itr); + auto enabled_signals = signal_settings::get_enabled(); + tim::enable_signal_detection(enabled_signals); + } } void @@ -575,6 +628,13 @@ get_debug() return static_cast&>(*_v->second).get(); } +bool +get_debug_sampling() +{ + static bool _v = tim::get_env("OMNITRACE_DEBUG_SAMPLING", get_debug_env()); + return (_v || get_debug()); +} + int get_verbose_env() { @@ -896,19 +956,24 @@ get_cpu_cid() } std::unique_ptr>& -get_cpu_cid_stack(int64_t _tid) +get_cpu_cid_stack(int64_t _tid, int64_t _parent) { struct omnitrace_cpu_cid_stack {}; using thread_data_t = thread_data, omnitrace_cpu_cid_stack>; - static auto& _v = thread_data_t::instances(); - static thread_local auto _v_check = [_tid]() { - thread_data_t::construct((_tid > 0) ? *thread_data_t::instances().at(0) - : std::vector{}); + static auto& _v = thread_data_t::instances(); + static thread_local auto _v_copy = [_tid, _parent]() { + auto _parent_tid = _parent; + // if tid != parent and there is not a valid pointer for the provided parent + // thread id set it to zero since that will always be valid + if(_tid != _parent_tid && !_v.at(_parent_tid)) _parent_tid = 0; + // copy over the thread ids from the parent if tid != parent + thread_data_t::construct((_tid != _parent_tid) ? *(_v.at(_parent_tid)) + : std::vector{}); return true; }(); return _v.at(_tid); - (void) _v_check; + (void) _v_copy; } namespace diff --git a/projects/rocprofiler-systems/source/lib/src/library/critical_trace.cpp b/projects/rocprofiler-systems/source/lib/src/library/critical_trace.cpp index 2f30c566bd..4e04855702 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/critical_trace.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/critical_trace.cpp @@ -508,10 +508,9 @@ get(int64_t _tid) void add_hash_id(const hash_ids& _labels) { - std::unique_lock _lk{ tasking::get_critical_trace_mutex(), - std::defer_lock }; - if(!_lk.owns_lock()) _lk.lock(); - tasking::get_critical_trace_task_group().run([_labels]() { + std::unique_lock _lk{ tasking::get_critical_trace_mutex() }; + if(!tasking::get_critical_trace_task_group().pool()) return; + tasking::get_critical_trace_task_group().exec([_labels]() { static std::mutex _mtx{}; _mtx.lock(); for(auto itr : _labels) @@ -539,9 +538,8 @@ void update(int64_t _tid) { if(!get_use_critical_trace() && !get_use_rocm_smi()) return; - std::unique_lock _lk{ tasking::get_critical_trace_mutex(), - std::defer_lock }; - if(!_lk.owns_lock()) _lk.lock(); + std::unique_lock _lk{ tasking::get_critical_trace_mutex() }; + if(!tasking::get_critical_trace_task_group().pool()) return; call_chain _data{}; std::swap(_data, *critical_trace::get(_tid)); tasking::get_critical_trace_task_group().exec(update_critical_path, _data, _tid); @@ -551,6 +549,8 @@ void compute(int64_t _tid) { update(_tid); + std::unique_lock _lk{ tasking::get_critical_trace_mutex() }; + if(!tasking::get_critical_trace_task_group().pool()) return; tasking::get_critical_trace_task_group().exec(compute_critical_trace); } @@ -824,8 +824,10 @@ get_entries(int64_t _ts, const std::function& _eval) } *_targ = _v; }; + std::unique_lock _lk{ tasking::get_critical_trace_mutex() }; size_t _n = 0; std::vector> _v{}; + if(!tasking::get_critical_trace_task_group().pool()) return _v; tasking::get_critical_trace_task_group().exec(_func, &_v, &_n); tasking::get_critical_trace_task_group().join(); OMNITRACE_DEBUG("critical_trace::%s :: found %zu out of %zu entries at %li...\n", diff --git a/projects/rocprofiler-systems/source/lib/src/library/gpu.cpp b/projects/rocprofiler-systems/source/lib/src/library/gpu.cpp index 8b03773461..598cad5b09 100644 --- a/projects/rocprofiler-systems/source/lib/src/library/gpu.cpp +++ b/projects/rocprofiler-systems/source/lib/src/library/gpu.cpp @@ -28,7 +28,7 @@ # if !defined(TIMEMORY_USE_HIP) # define TIMEMORY_USE_HIP 1 # endif -# include "timemory/components/hip/backends.hpp" +# include #endif namespace omnitrace