diff --git a/.cmake-format.yaml b/.cmake-format.yaml index 99488f39ba..6c7d80a1d2 100644 --- a/.cmake-format.yaml +++ b/.cmake-format.yaml @@ -215,6 +215,22 @@ parse: DEFINITIONS: '*' LINK_LIBRARIES: '*' INCLUDE_DIRECTORIES: '*' + omnitrace_add_validation_test: + kwargs: + NAME: '*' + ARGS: '*' + LABELS: '*' + TIMEOUT: '*' + DEPENDS: '*' + PROPERTIES: '*' + PASS_REGEX: '*' + FAIL_REGEX: '*' + SKIP_REGEX: '*' + ENVIRONMENT: '*' + PERFETTO_FILE: '*' + PERFETTO_METRIC: '*' + TIMEMORY_FILE: '*' + TIMEMORY_METRIC: '*' override_spec: {} vartags: [] proptags: [] diff --git a/CMakeLists.txt b/CMakeLists.txt index 2352753abd..595c6f4d4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,8 +116,6 @@ if(CI_BUILD) ADVANCED) omnitrace_add_option(OMNITRACE_BUILD_DEBUG "Enable building with extensive debug symbols" OFF ADVANCED) - omnitrace_add_option(OMNITRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF - ADVANCED) omnitrace_add_option( OMNITRACE_BUILD_HIDDEN_VISIBILITY "Build with hidden visibility (disable for Debug builds)" OFF ADVANCED) @@ -131,8 +129,6 @@ else() ADVANCED) omnitrace_add_option(OMNITRACE_BUILD_DEBUG "Enable building with extensive debug symbols" OFF ADVANCED) - omnitrace_add_option(OMNITRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF - ADVANCED) omnitrace_add_option( OMNITRACE_BUILD_HIDDEN_VISIBILITY "Build with hidden visibility (disable for Debug builds)" ON ADVANCED) diff --git a/cmake/MacroUtilities.cmake b/cmake/MacroUtilities.cmake index 3636042ea7..e24e68478f 100644 --- a/cmake/MacroUtilities.cmake +++ b/cmake/MacroUtilities.cmake @@ -108,27 +108,57 @@ function(OMNITRACE_CAPITALIZE str var) endfunction() # ------------------------------------------------------------------------------# -# function omnitrace_strip_target() +# function omnitrace_strip_target( [FORCE] [EXPLICIT]) # -# Creates a target which runs ctest but depends on all the tests being built. +# Creates a post-build command which strips a binary. FORCE flag will override # -function(OMNITRACE_STRIP_TARGET _TARGET) - if(CMAKE_STRIP AND OMNITRACE_STRIP_LIBRARIES) - add_custom_command( - TARGET ${_TARGET} - POST_BUILD - COMMAND - ${CMAKE_STRIP} -w --keep-symbol="omnitrace_init" - --keep-symbol="omnitrace_finalize" --keep-symbol="omnitrace_push_trace" - --keep-symbol="omnitrace_pop_trace" --keep-symbol="omnitrace_push_region" - --keep-symbol="omnitrace_pop_region" --keep-symbol="omnitrace_set_env" - --keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_reset_preload" - --keep-symbol="omnitrace_user_*" --keep-symbol="ompt_start_tool" - --keep-symbol="kokkosp_*" --keep-symbol="OnLoad" --keep-symbol="OnUnload" - --keep-symbol="OnLoadToolProp" --keep-symbol="OnUnloadTool" - --keep-symbol="__libc_start_main" ${ARGN} $ - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMENT "Stripping ${_TARGET}...") +function(OMNITRACE_STRIP_TARGET) + cmake_parse_arguments(STRIP "FORCE;EXPLICIT" "" "ARGS" ${ARGN}) + + list(LENGTH STRIP_UNPARSED_ARGUMENTS NUM_UNPARSED) + + if(NUM_UNPARSED EQUAL 1) + set(_TARGET "${STRIP_UNPARSED_ARGUMENTS}") + else() + omnitrace_message(FATAL_ERROR + "omnitrace_strip_target cannot deduce target from \"${ARGN}\"") + endif() + + if(NOT TARGET "${_TARGET}") + omnitrace_message( + FATAL_ERROR + "omnitrace_strip_target not provided valid target: \"${_TARGET}\"") + endif() + + if(CMAKE_STRIP AND (STRIP_FORCE OR OMNITRACE_STRIP_LIBRARIES)) + if(STRIP_EXPLICIT) + add_custom_command( + TARGET ${_TARGET} + POST_BUILD + COMMAND ${CMAKE_STRIP} ${STRIP_ARGS} $ + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Stripping ${_TARGET}...") + else() + add_custom_command( + TARGET ${_TARGET} + POST_BUILD + COMMAND + ${CMAKE_STRIP} -w --keep-symbol="omnitrace_init" + --keep-symbol="omnitrace_finalize" + --keep-symbol="omnitrace_push_trace" + --keep-symbol="omnitrace_pop_trace" + --keep-symbol="omnitrace_push_region" + --keep-symbol="omnitrace_pop_region" --keep-symbol="omnitrace_set_env" + --keep-symbol="omnitrace_set_mpi" + --keep-symbol="omnitrace_reset_preload" + --keep-symbol="omnitrace_user_*" --keep-symbol="ompt_start_tool" + --keep-symbol="kokkosp_*" --keep-symbol="OnLoad" + --keep-symbol="OnUnload" --keep-symbol="OnLoadToolProp" + --keep-symbol="OnUnloadTool" --keep-symbol="__libc_start_main" + ${STRIP_ARGS} $ + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Stripping ${_TARGET}...") + endif() endif() endfunction() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 8c3b8ac360..07a26b3749 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -53,3 +53,4 @@ add_subdirectory(lulesh) add_subdirectory(rccl) add_subdirectory(rewrite-caller) add_subdirectory(causal) +add_subdirectory(trace-time-window) diff --git a/examples/trace-time-window/CMakeLists.txt b/examples/trace-time-window/CMakeLists.txt new file mode 100644 index 0000000000..fe68dc16fe --- /dev/null +++ b/examples/trace-time-window/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.15 FATAL_ERROR) + +project(omnitrace-trace-time-window-example LANGUAGES CXX) + +if(OMNITRACE_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if(${PROJECT_NAME} IN_LIST OMNITRACE_DISABLE_EXAMPLES OR ${_DIR} IN_LIST + OMNITRACE_DISABLE_EXAMPLES) + return() + endif() +endif() + +set(CMAKE_BUILD_TYPE "Debug") + +add_executable(trace-time-window trace-time-window.cpp) +target_compile_options(trace-time-window PRIVATE ${_FLAGS}) + +if(OMNITRACE_INSTALL_EXAMPLES) + install( + TARGETS trace-time-window + DESTINATION bin + COMPONENT omnitrace-examples) +endif() diff --git a/examples/trace-time-window/trace-time-window.cpp b/examples/trace-time-window/trace-time-window.cpp new file mode 100644 index 0000000000..12a7d3f978 --- /dev/null +++ b/examples/trace-time-window/trace-time-window.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include + +#define NOINLINE __attribute__((noinline)) + +NOINLINE size_t +inner(); + +NOINLINE size_t +outer_a(); + +NOINLINE size_t +outer_b(); + +NOINLINE size_t +outer_c(); + +NOINLINE size_t +outer_d(); + +NOINLINE size_t +outer_e(); + +int +main(int argc, char** argv) +{ + int nrepeat = 1; + if(argc > 1) nrepeat = atol(argv[1]); + + std::string _name = argv[0]; + auto _pos = _name.find_last_of('/'); + if(_pos != std::string::npos) _name = _name.substr(_pos + 1); + + size_t nitr = 0; + for(int i = 0; i < nrepeat; ++i) + { + nitr += outer_a(); + nitr += outer_b(); + nitr += outer_c(); + nitr += outer_d(); + nitr += outer_e(); + printf("[%s][%i] number of calls made = %zu\n", _name.c_str(), i, nitr); + } +} + +size_t +inner(size_t _duration) +{ + static int64_t _n = 0; + + if(_n++ % 5 == 2) + { + using clock_type = std::chrono::high_resolution_clock; + auto _end = clock_type::now() + std::chrono::milliseconds{ _duration }; + size_t nitr = 0; + while(clock_type::now() < _end) + { + ++nitr; + } + return nitr; + } + else + { + std::this_thread::sleep_for(std::chrono::milliseconds{ _duration }); + return 1; + } +} + +#define OUTER_FUNCTION(TAG) \ + size_t outer_##TAG() { return inner(500); } + +OUTER_FUNCTION(a) +OUTER_FUNCTION(b) +OUTER_FUNCTION(c) +OUTER_FUNCTION(d) +OUTER_FUNCTION(e) diff --git a/external/dyninst b/external/dyninst index e4d2eb36ae..dcc8dad3fb 160000 --- a/external/dyninst +++ b/external/dyninst @@ -1 +1 @@ -Subproject commit e4d2eb36ae2de522f27e5c9f77de8b30e92630c7 +Subproject commit dcc8dad3fb15ec40041f671df0de7d157f65c0e2 diff --git a/external/timemory b/external/timemory index 64bf1067a4..92fc712074 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 64bf1067a49e16733c67e0fde47f343ae85335cd +Subproject commit 92fc71207483fb9ccbb1f37a45e76484df3dc0f1 diff --git a/scripts/run-ci.py b/scripts/run-ci.py index f17dff9524..f77557ef2a 100755 --- a/scripts/run-ci.py +++ b/scripts/run-ci.py @@ -19,7 +19,6 @@ def which(cmd, require): def generate_custom(args, cmake_args, ctest_args): - if not os.path.exists(args.binary_dir): os.makedirs(args.binary_dir) @@ -74,7 +73,6 @@ def generate_custom(args, cmake_args, ctest_args): def generate_dashboard_script(args): - CODECOV = 1 if args.coverage else 0 DASHBOARD_MODE = args.mode SOURCE_DIR = os.path.realpath(args.source_dir) @@ -244,7 +242,6 @@ def run(*args, **kwargs): if __name__ == "__main__": - args, cmake_args, ctest_args = parse_args() if not os.path.exists(args.binary_dir): diff --git a/source/bin/omnitrace-causal/impl.cpp b/source/bin/omnitrace-causal/impl.cpp index d757590c75..bf558cc61a 100644 --- a/source/bin/omnitrace-causal/impl.cpp +++ b/source/bin/omnitrace-causal/impl.cpp @@ -58,7 +58,7 @@ namespace console = ::tim::utility::console; namespace argparse = ::tim::argparse; using namespace timemory::join; using tim::get_env; -using tim::log::colorized; +using tim::log::monochrome; using tim::log::stream; namespace std @@ -535,15 +535,6 @@ parse_args(int argc, char** argv, std::vector& _env, exit(EXIT_FAILURE); }); - auto _add_separator = [&](std::string _v, const std::string& _desc) { - parser.add_argument({ "" }, ""); - parser - .add_argument({ join("", "[", _v, "]") }, - (_desc.empty()) ? _desc : join({ "", "(", ")" }, _desc)) - .color(color::info()); - parser.add_argument({ "" }, ""); - }; - parser.enable_help(); parser.enable_version("omnitrace-causal", "v" OMNITRACE_VERSION_STRING, OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); @@ -553,16 +544,16 @@ parse_args(int argc, char** argv, std::vector& _env, parser.set_description_width( std::min(_cols - parser.get_help_width() - 8, 120)); - _add_separator("DEBUG OPTIONS", ""); + parser.start_group("DEBUG OPTIONS", ""); parser.add_argument({ "--monochrome" }, "Disable colorized output") .max_count(1) .dtype("bool") .action([&](parser_t& p) { - auto _colorized = !p.get("monochrome"); - colorized() = _colorized; - p.set_use_color(_colorized); - update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); - update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); + auto _monochrome = p.get("monochrome"); + monochrome() = _monochrome; + p.set_use_color(!_monochrome); + update_env(_env, "OMNITRACE_MONOCHROME", (_monochrome) ? "1" : "0"); + update_env(_env, "MONOCHROME", (_monochrome) ? "1" : "0"); }); parser.add_argument({ "--debug" }, "Debug output") .max_count(1) @@ -582,7 +573,7 @@ parse_args(int argc, char** argv, std::vector& _env, bool _generate_configs = false; bool _add_defaults = true; - _add_separator("GENERAL OPTIONS", ""); + parser.start_group("GENERAL OPTIONS", ""); parser.add_argument({ "-c", "--config" }, "Base configuration file") .min_count(0) .dtype("filepath") @@ -629,8 +620,8 @@ parse_args(int argc, char** argv, std::vector& _env, .dtype("bool") .action([&](parser_t& p) { _add_defaults = !p.get("no-defaults"); }); - _add_separator("CAUSAL PROFILING OPTIONS (General)", - "These settings will be applied to all causal profiling runs"); + parser.start_group("CAUSAL PROFILING OPTIONS (General)", + "These settings will be applied to all causal profiling runs"); parser.add_argument({ "-m", "--mode" }, "Causal profiling mode") .count(1) .dtype("string") @@ -706,7 +697,7 @@ parse_args(int argc, char** argv, std::vector& _env, .dtype("int") .action([&](parser_t& p) { _niterations = p.get("iterations"); }); - _add_separator( + parser.start_group( "CAUSAL PROFILING OPTIONS (Combinatorial)", "Each individual argument to these options will multiply the number runs by the " "number of arguments and the number of iterations. E.g. -n 2 -B \"MAIN\" -F " @@ -804,6 +795,8 @@ parse_args(int argc, char** argv, std::vector& _env, _function_excludes = p.get>("function-exclude"); }); + parser.end_group(); + #if OMNITRACE_HIP_VERSION > 0 && OMNITRACE_HIP_VERSION < 50300 update_env(_env, "HSA_ENABLE_INTERRUPT", 0); #endif diff --git a/source/bin/omnitrace-sample/impl.cpp b/source/bin/omnitrace-sample/impl.cpp index de6112e432..382df764f2 100644 --- a/source/bin/omnitrace-sample/impl.cpp +++ b/source/bin/omnitrace-sample/impl.cpp @@ -54,14 +54,47 @@ namespace color = tim::log::color; using namespace timemory::join; using tim::get_env; -using tim::log::colorized; +using tim::log::monochrome; using tim::log::stream; namespace { -int verbose = 0; -auto updated_envs = std::set{}; -auto original_envs = std::set{}; +int verbose = 0; +auto updated_envs = std::set{}; +auto original_envs = std::set{}; +auto clock_id_choices = []() { + auto clock_name = [](std::string _v) { + constexpr auto _clock_prefix = std::string_view{ "clock_" }; + for(auto& itr : _v) + itr = tolower(itr); + auto _pos = _v.find(_clock_prefix); + if(_pos == 0) _v = _v.substr(_pos + _clock_prefix.length()); + if(_v == "process_cputime_id") _v = "cputime"; + return _v; + }; + +#define OMNITRACE_CLOCK_IDENTIFIER(VAL) \ + std::make_tuple(clock_name(#VAL), VAL, std::string_view{ #VAL }) + + auto _choices = std::vector{}; + auto _aliases = std::map>{}; + for(auto itr : { OMNITRACE_CLOCK_IDENTIFIER(CLOCK_REALTIME), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_PROCESS_CPUTIME_ID), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC_RAW), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_REALTIME_COARSE), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC_COARSE), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_BOOTTIME) }) + { + auto _choice = std::to_string(std::get<1>(itr)); + _choices.emplace_back(_choice); + _aliases[_choice] = { std::get<0>(itr), std::string{ std::get<2>(itr) } }; + } + +#undef OMNITRACE_CLOCK_IDENTIFIER + + return std::make_pair(_choices, _aliases); +}(); } // namespace std::string @@ -329,15 +362,7 @@ parse_args(int argc, char** argv, std::vector& _env) %{INDENT}%- discard : new data is ignored %{INDENT}%- ring_buffer : new data overwrites oldest data)"; - auto _add_separator = [&](std::string _v, const std::string& _desc) { - parser.add_argument({ "" }, ""); - parser - .add_argument({ join("", "[", _v, "]") }, - (_desc.empty()) ? _desc : join({ "", "(", ")" }, _desc)) - .color(tim::log::color::info()); - parser.add_argument({ "" }, ""); - }; - + parser.set_use_color(true); parser.enable_help(); parser.enable_version("omnitrace-sample", "v" OMNITRACE_VERSION_STRING, OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); @@ -347,16 +372,16 @@ parse_args(int argc, char** argv, std::vector& _env) parser.set_description_width( std::min(_cols - parser.get_help_width() - 8, 120)); - _add_separator("DEBUG OPTIONS", ""); + parser.start_group("DEBUG OPTIONS", ""); parser.add_argument({ "--monochrome" }, "Disable colorized output") .max_count(1) .dtype("bool") .action([&](parser_t& p) { - auto _colorized = !p.get("monochrome"); - colorized() = _colorized; - p.set_use_color(_colorized); - update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); - update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); + auto _monochrome = p.get("monochrome"); + monochrome() = _monochrome; + p.set_use_color(!_monochrome); + update_env(_env, "OMNITRACE_MONOCHROME", (_monochrome) ? "1" : "0"); + update_env(_env, "MONOCHROME", (_monochrome) ? "1" : "0"); }); parser.add_argument({ "--debug" }, "Debug output") .max_count(1) @@ -371,7 +396,8 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "OMNITRACE_VERBOSE", _v); }); - _add_separator("GENERAL OPTIONS", ""); + parser.start_group("GENERAL OPTIONS", + "These are options which are ubiquitously applied"); parser.add_argument({ "-c", "--config" }, "Configuration file") .min_count(0) .dtype("filepath") @@ -437,8 +463,28 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", _h || _d); update_env(_env, "OMNITRACE_USE_ROCM_SMI", _d); }); + parser + .add_argument({ "-w", "--wait" }, + "This option is a combination of '--trace-wait' and " + "'--sampling-wait'. See the descriptions for those two options.") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_TRACE_DELAY", p.get("wait")); + update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("wait")); + }); + parser + .add_argument( + { "-d", "--duration" }, + "This option is a combination of '--trace-duration' and " + "'--sampling-duration'. See the descriptions for those two options.") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_TRACE_DURATION", p.get("duration")); + update_env(_env, "OMNITRACE_SAMPLING_DURATION", p.get("duration")); + }); - _add_separator("TRACING OPTIONS", ""); + parser.start_group("TRACING OPTIONS", "Specific options controlling tracing (i.e. " + "deterministic measurements of every event)"); parser .add_argument({ "--trace-file" }, "Specify the trace output filename. Relative filepath will be with " @@ -464,8 +510,57 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "OMNITRACE_PERFETTO_FILL_POLICY", p.get("trace-fill-policy")); }); + parser + .add_argument({ "--trace-wait" }, + "Set the wait time (in seconds) " + "before collecting trace and/or profiling data" + "(in seconds). By default, the duration is in seconds of realtime " + "but that can changed via --trace-clock-id.") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_TRACE_DELAY", p.get("trace-wait")); + }); + parser + .add_argument({ "--trace-duration" }, + "Set the duration of the trace and/or profile data collection (in " + "seconds). By default, the duration is in seconds of realtime but " + "that can changed via --trace-clock-id.") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_TRACE_DURATION", p.get("trace-duration")); + }); + parser + .add_argument( + { "--trace-periods" }, + "More powerful version of specifying trace delay and/or duration. Format is " + "one or more groups of: :, ::, " + "and/or :::.") + .min_count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_TRACE_PERIODS", + join(array_config{ ",", "", "" }, + p.get>("trace-periods"))); + }); + parser + .add_argument( + { "--trace-clock-id" }, + "Set the default clock ID for for trace delay/duration. Note: \"cputime\" is " + "the *process* CPU time and might need to be scaled based on the number of " + "threads, i.e. 4 seconds of CPU-time for an application with 4 fully active " + "threads would equate to ~1 second of realtime. If this proves to be " + "difficult to handle in practice, please file a feature request for " + "omnitrace to auto-scale based on the number of threads.") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_TRACE_PERIOD_CLOCK_ID", + p.get("trace-clock-id")); + }) + .choices(clock_id_choices.first) + .choice_aliases(clock_id_choices.second); - _add_separator("PROFILE OPTIONS", ""); + parser.start_group("PROFILE OPTIONS", + "Specific options controlling profiling (i.e. deterministic " + "measurements which are aggregated into a summary)"); parser.add_argument({ "--profile-format" }, "Data formats for profiling results") .min_count(1) .max_count(3) @@ -496,7 +591,10 @@ parse_args(int argc, char** argv, std::vector& _env) if(_v.size() > 1) update_env(_env, "OMNITRACE_INPUT_PREFIX", _v.at(1)); }); - _add_separator("HOST/DEVICE (PROCESS SAMPLING) OPTIONS", ""); + parser.start_group( + "HOST/DEVICE (PROCESS SAMPLING) OPTIONS", + "Process sampling is background measurements for resources available to the " + "entire process. These samples are not tied to specific lines/regions of code"); parser .add_argument({ "--process-freq" }, "Set the default host/device sampling frequency " @@ -545,7 +643,8 @@ parse_args(int argc, char** argv, std::vector& _env) join(array_config{ "," }, p.get>("gpus"))); }); - _add_separator("GENERAL SAMPLING OPTIONS", ""); + parser.start_group("GENERAL SAMPLING OPTIONS", + "General options for timer-based sampling per-thread"); parser .add_argument({ "-f", "--freq" }, "Set the default sampling frequency " "(number of interrupts per second)") @@ -555,23 +654,24 @@ parse_args(int argc, char** argv, std::vector& _env) }); parser .add_argument( - { "-w", "--wait" }, + { "--sampling-wait" }, "Set the default wait time (i.e. delay) before taking first sample " "(in seconds). This delay time is based on the clock of the sampler, i.e., a " "delay of 1 second for CPU-clock sampler may not equal 1 second of realtime") .count(1) .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("wait")); + update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("sampling-wait")); }); parser .add_argument( - { "-d", "--duration" }, + { "--sampling-duration" }, "Set the duration of the sampling (in seconds of realtime). I.e., it is " "possible (currently) to set a CPU-clock time delay that exceeds the " "real-time duration... resulting in zero samples being taken") .count(1) .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_SAMPLING_DURATION", p.get("duration")); + update_env(_env, "OMNITRACE_SAMPLING_DURATION", + p.get("sampling-duration")); }); parser .add_argument({ "-t", "--tids" }, @@ -584,7 +684,9 @@ parse_args(int argc, char** argv, std::vector& _env) join(array_config{ ", " }, p.get>("tids"))); }); - _add_separator("SAMPLING TIMER OPTIONS", ""); + parser.start_group( + "SAMPLING TIMER OPTIONS", + "These options determine the heuristic for deciding when to take a sample"); parser.add_argument({ "--cputime" }, _cputime_desc) .min_count(0) .action([&](parser_t& p) { @@ -660,8 +762,9 @@ parse_args(int argc, char** argv, std::vector& _env) _backend_choices.erase("rocprofiler"); #endif - _add_separator("BACKEND OPTIONS", "These options control region information captured " - "w/o sampling or instrumentation"); + parser.start_group("BACKEND OPTIONS", + "These options control region information captured " + "w/o sampling or instrumentation"); parser.add_argument({ "-I", "--include" }, "Include data from these backends") .choices(_backend_choices) .action([&](parser_t& p) { @@ -727,7 +830,7 @@ parse_args(int argc, char** argv, std::vector& _env) remove_env(_env, "KOKKOS_PROFILE_LIBRARY"); }); - _add_separator("HARDWARE COUNTER OPTIONS", ""); + parser.start_group("HARDWARE COUNTER OPTIONS", "See also: omnitrace-avail -H"); parser .add_argument({ "-C", "--cpu-events" }, "Set the CPU hardware counter events to record (ref: " @@ -750,7 +853,7 @@ parse_args(int argc, char** argv, std::vector& _env) }); #endif - _add_separator("MISCELLANEOUS OPTIONS", ""); + parser.start_group("MISCELLANEOUS OPTIONS", ""); parser .add_argument({ "-i", "--inlines" }, "Include inline info in output when available") @@ -768,6 +871,8 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "HSA_ENABLE_INTERRUPT", p.get("hsa-interrupt")); }); + parser.end_group(); + auto _inpv = std::vector{}; auto _outv = std::vector{}; bool _hash = false; diff --git a/source/docs/conf.py b/source/docs/conf.py index 43dcbd25a7..9c41836f5f 100644 --- a/source/docs/conf.py +++ b/source/docs/conf.py @@ -154,6 +154,7 @@ for pref in preferences: from recommonmark.transform import AutoStructify + # app setup hook def setup(app): app.add_config_value( diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index 8b918427a8..251eefe337 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -550,8 +550,8 @@ extern "C" { void omnitrace_preinit_library(void) { - if(!omnitrace::common::get_env("OMNITRACE_COLORIZED_LOG", tim::log::colorized())) - tim::log::colorized() = false; + if(omnitrace::common::get_env("OMNITRACE_MONOCHROME", tim::log::monochrome())) + tim::log::monochrome() = true; } int omnitrace_preload_library(void) diff --git a/source/lib/omnitrace-user/omnitrace/categories.h b/source/lib/omnitrace-user/omnitrace/categories.h index 3ba3be597b..5d1c1275e8 100644 --- a/source/lib/omnitrace-user/omnitrace/categories.h +++ b/source/lib/omnitrace-user/omnitrace/categories.h @@ -75,6 +75,8 @@ extern "C" OMNITRACE_CATEGORY_PROCESS_PAGE_FAULT, OMNITRACE_CATEGORY_PROCESS_USER_MODE_TIME, OMNITRACE_CATEGORY_PROCESS_KERNEL_MODE_TIME, + OMNITRACE_CATEGORY_THREAD_WALL_TIME, + OMNITRACE_CATEGORY_THREAD_CPU_TIME, OMNITRACE_CATEGORY_THREAD_PAGE_FAULT, OMNITRACE_CATEGORY_THREAD_PEAK_MEMORY, OMNITRACE_CATEGORY_THREAD_CONTEXT_SWITCH, diff --git a/source/lib/omnitrace/CMakeLists.txt b/source/lib/omnitrace/CMakeLists.txt index 476a844b9a..210e660491 100644 --- a/source/lib/omnitrace/CMakeLists.txt +++ b/source/lib/omnitrace/CMakeLists.txt @@ -15,12 +15,8 @@ target_include_directories( omnitrace-interface-library INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -target_compile_definitions( - omnitrace-interface-library - INTERFACE - OMNITRACE_MAX_THREADS=${OMNITRACE_MAX_THREADS} - $,CUSTOM_DATA_SOURCE,>> - ) +target_compile_definitions(omnitrace-interface-library + INTERFACE OMNITRACE_MAX_THREADS=${OMNITRACE_MAX_THREADS}) target_link_libraries( omnitrace-interface-library diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 00bd6cf4d0..c07cb56e5a 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -20,12 +20,13 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// clang-format off #include -// clang-format on - +// +// above should always be included first +// #include "api.hpp" #include "common/setup.hpp" +#include "library/categories.hpp" #include "library/causal/data.hpp" #include "library/causal/experiment.hpp" #include "library/causal/sampling.hpp" @@ -37,6 +38,7 @@ #include "library/components/rocprofiler.hpp" #include "library/concepts.hpp" #include "library/config.hpp" +#include "library/constraint.hpp" #include "library/coverage.hpp" #include "library/critical_trace.hpp" #include "library/debug.hpp" @@ -56,24 +58,25 @@ #include "library/utility.hpp" #include "omnitrace/categories.h" // in omnitrace-user -#include -#include -#include #include #include +#include #include -#include +#include #include +#include +#include +#include #include #include #include #include +#include #include +#include #include #include -#include -#include using namespace omnitrace; @@ -122,9 +125,18 @@ ensure_initialization(bool _offset, int64_t _glob_n, int64_t _offset_n) return _offset; } +void +finalization_handler() +{ + if(get_state() == State::Active) omnitrace_finalize(); +} + auto ensure_finalization(bool _static_init = false) { + if(config::set_signal_handler(nullptr) == nullptr) + config::set_signal_handler(&finalization_handler); + if(_static_init) { auto _idx = threading::add_callback(&ensure_initialization); @@ -132,6 +144,12 @@ ensure_finalization(bool _static_init = false) throw exception("failure adding threading callback"); } + OMNITRACE_CI_BASIC_THROW( + config::set_signal_handler(nullptr) != &finalization_handler, + "Assignment of signal handler failed. signal handler is %s, expected %s\n", + as_hex(reinterpret_cast(config::set_signal_handler(nullptr))).c_str(), + as_hex(reinterpret_cast(&finalization_handler)).c_str()); + const auto& _info = thread_info::init(); const auto& _tid = _info->index_data; if(_tid) @@ -144,7 +162,7 @@ ensure_finalization(bool _static_init = false) _tid->system_value); } - if(!get_env("OMNITRACE_COLORIZED_LOG", true)) tim::log::colorized() = false; + if(get_env("OMNITRACE_MONOCHROME", false)) tim::log::monochrome() = true; (void) tim::manager::instance(); (void) tim::settings::shared_instance(); @@ -192,7 +210,7 @@ struct fini_bundle { using data_type = std::tuple; - TIMEMORY_DEFAULT_OBJECT(fini_bundle) + OMNITRACE_DEFAULT_OBJECT(fini_bundle) fini_bundle(std::string_view _label) : m_label{ _label } @@ -400,7 +418,7 @@ omnitrace_init_library_hidden() extern "C" bool omnitrace_init_tooling_hidden() { - if(!get_env("OMNITRACE_COLORIZED_LOG", true, false)) tim::log::colorized() = false; + if(get_env("OMNITRACE_MONOCHROME", false, false)) tim::log::monochrome() = true; if(!tim::get_env("OMNITRACE_INIT_TOOLING", true)) { @@ -538,6 +556,8 @@ omnitrace_init_tooling_hidden() omnitrace::perfetto::start(); } + categories::setup(); + // if static objects are destroyed in the inverse order of when they are // created this should ensure that finalization is called before perfetto // ends the tracing session @@ -701,6 +721,10 @@ omnitrace_finalize_hidden(void) push_enable_sampling_on_child_threads(false); set_sampling_on_all_future_threads(false); + // if the categories are not enabled, it can/will suppress generating output for data + // in category + categories::enable_categories(); + auto _debug_init = get_debug_finalize(); auto _debug_value = get_debug(); if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", true); @@ -951,7 +975,7 @@ omnitrace_finalize_hidden(void) bool _perfetto_output_error = false; if(get_use_perfetto() && !is_system_backend()) { - auto& tracing_session = tracing::get_perfetto_session(); + auto& tracing_session = get_perfetto_session(); OMNITRACE_CI_THROW(tracing_session == nullptr, "Null pointer to the tracing session"); @@ -1061,6 +1085,8 @@ omnitrace_finalize_hidden(void) "omnitrace", _cfg); } + categories::shutdown(); + _finalization.stop(); if(_perfetto_output_error) diff --git a/source/lib/omnitrace/library/CMakeLists.txt b/source/lib/omnitrace/library/CMakeLists.txt index 50d7f84420..6c84ca864e 100644 --- a/source/lib/omnitrace/library/CMakeLists.txt +++ b/source/lib/omnitrace/library/CMakeLists.txt @@ -3,7 +3,9 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/defines.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/defines.hpp @ONLY) set(library_sources + ${CMAKE_CURRENT_LIST_DIR}/categories.cpp ${CMAKE_CURRENT_LIST_DIR}/config.cpp + ${CMAKE_CURRENT_LIST_DIR}/constraint.cpp ${CMAKE_CURRENT_LIST_DIR}/coverage.cpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.cpp ${CMAKE_CURRENT_LIST_DIR}/critical_trace.cpp @@ -31,6 +33,7 @@ set(library_headers ${CMAKE_CURRENT_LIST_DIR}/common.hpp ${CMAKE_CURRENT_LIST_DIR}/concepts.hpp ${CMAKE_CURRENT_LIST_DIR}/config.hpp + ${CMAKE_CURRENT_LIST_DIR}/constraint.hpp ${CMAKE_CURRENT_LIST_DIR}/coverage.hpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.hpp ${CMAKE_CURRENT_LIST_DIR}/critical_trace.hpp diff --git a/source/lib/omnitrace/library/binary/address_multirange.hpp b/source/lib/omnitrace/library/binary/address_multirange.hpp index 24fabcb8b4..5193e6969f 100644 --- a/source/lib/omnitrace/library/binary/address_multirange.hpp +++ b/source/lib/omnitrace/library/binary/address_multirange.hpp @@ -39,7 +39,7 @@ struct address_multirange struct coarse {}; - TIMEMORY_DEFAULT_OBJECT(address_multirange) + OMNITRACE_DEFAULT_OBJECT(address_multirange) address_multirange& operator+=(std::pair&&); address_multirange& operator+=(std::pair&& _v); diff --git a/source/lib/omnitrace/library/binary/address_range.hpp b/source/lib/omnitrace/library/binary/address_range.hpp index ffd0de2717..5cf3d077ec 100644 --- a/source/lib/omnitrace/library/binary/address_range.hpp +++ b/source/lib/omnitrace/library/binary/address_range.hpp @@ -43,7 +43,7 @@ struct address_range uintptr_t low = std::numeric_limits::max(); uintptr_t high = std::numeric_limits::min(); - TIMEMORY_DEFAULT_OBJECT(address_range) + OMNITRACE_DEFAULT_OBJECT(address_range) explicit address_range(uintptr_t _v); address_range(uintptr_t _low, uintptr_t _high); diff --git a/source/lib/omnitrace/library/binary/analysis.cpp b/source/lib/omnitrace/library/binary/analysis.cpp index 6a8083eb9f..2922f541a0 100644 --- a/source/lib/omnitrace/library/binary/analysis.cpp +++ b/source/lib/omnitrace/library/binary/analysis.cpp @@ -64,7 +64,7 @@ namespace binary namespace { binary_info -parse_line_info(const std::string& _name) +parse_line_info(const std::string& _name, bool _process_dwarf) { auto _info = binary_info{}; @@ -105,10 +105,17 @@ parse_line_info(const std::string& _name) << "section set size (" << _section_set.size() << ") != section map size (" << _section_map.size() << ")\n"; - _info.debug_info = dwarf_entry::process_dwarf(_bfd->fd, _info.ranges); + if(_process_dwarf) + { + std::tie(_info.debug_info, _info.ranges, _info.breakpoints) = + dwarf_entry::process_dwarf(_bfd->fd); + } for(auto& itr : _info.symbols) - itr.read_dwarf(_info.debug_info); + { + itr.read_dwarf_entries(_info.debug_info); + itr.read_dwarf_breakpoints(_info.breakpoints); + } _info.sort(); } @@ -122,7 +129,7 @@ parse_line_info(const std::string& _name) std::vector get_binary_info(const std::vector& _files, - const std::vector& _filters) + const std::vector& _filters, bool _process_dwarf) { auto _satisfies_filter = [&_filters](auto _scope, const std::string& _value) { for(const auto& itr : _filters) // NOLINT @@ -157,7 +164,7 @@ get_binary_info(const std::vector& _files, if(filepath::exists(_filename) && _satisfies_binary_filter(_filename) && _exists.find(_filename) == _exists.end()) { - _data.emplace_back(parse_line_info(_filename)); + _data.emplace_back(parse_line_info(_filename, _process_dwarf)); _exists.emplace(_filename); } } diff --git a/source/lib/omnitrace/library/binary/analysis.hpp b/source/lib/omnitrace/library/binary/analysis.hpp index 6121898686..87038a37d1 100644 --- a/source/lib/omnitrace/library/binary/analysis.hpp +++ b/source/lib/omnitrace/library/binary/analysis.hpp @@ -54,6 +54,7 @@ using bfd_file = ::tim::unwind::bfd_file; using hash_value_t = ::tim::hash_value_t; std::vector -get_binary_info(const std::vector&, const std::vector&); +get_binary_info(const std::vector&, const std::vector&, + bool _process_dwarf = true); } // namespace binary } // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/binary_info.hpp b/source/lib/omnitrace/library/binary/binary_info.hpp index ebd45c26da..30e31bcf33 100644 --- a/source/lib/omnitrace/library/binary/binary_info.hpp +++ b/source/lib/omnitrace/library/binary/binary_info.hpp @@ -30,8 +30,10 @@ #include +#include #include #include +#include #include namespace omnitrace @@ -40,17 +42,19 @@ namespace binary { struct binary_info { - std::shared_ptr bfd = {}; - std::vector mappings = {}; - std::deque symbols = {}; - std::deque debug_info = {}; - std::vector ranges = {}; - std::unordered_map sections = {}; + std::shared_ptr bfd = {}; + std::vector mappings = {}; + std::deque symbols = {}; + std::deque debug_info = {}; + std::vector ranges = {}; + std::vector breakpoints = {}; + std::unordered_map sections = {}; - void sort(); + void sort(); + std::string filename() const; template - RetT* find_section(uintptr_t); + RetT* find_section(uintptr_t) const; }; inline void @@ -60,11 +64,12 @@ binary_info::sort() utility::filter_sort_unique(symbols); utility::filter_sort_unique(ranges); utility::filter_sort_unique(debug_info); + utility::filter_sort_unique(breakpoints); } template inline RetT* -binary_info::find_section(uintptr_t _addr) +binary_info::find_section(uintptr_t _addr) const { for(const auto& sitr : sections) { @@ -72,5 +77,11 @@ binary_info::find_section(uintptr_t _addr) } return nullptr; } + +inline std::string +binary_info::filename() const +{ + return (bfd) ? std::string{ bfd->name } : std::string{}; +} } // namespace binary } // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/dwarf_entry.cpp b/source/lib/omnitrace/library/binary/dwarf_entry.cpp index be29ca228c..00c18029ad 100644 --- a/source/lib/omnitrace/library/binary/dwarf_entry.cpp +++ b/source/lib/omnitrace/library/binary/dwarf_entry.cpp @@ -41,28 +41,51 @@ get_dwarf_address_ranges(Dwarf_Die* _die) { auto _ranges = std::vector{}; - if(dwarf_tag(_die) != DW_TAG_compile_unit) return _ranges; + if(dwarf_tag(_die) != DW_TAG_compile_unit && dwarf_tag(_die) != DW_TAG_subprogram) + return _ranges; Dwarf_Addr _low_pc; Dwarf_Addr _high_pc; dwarf_lowpc(_die, &_low_pc); dwarf_highpc(_die, &_high_pc); - _ranges.emplace_back(address_range{ _low_pc, _high_pc }); + if(_low_pc > _high_pc) + { + Dwarf_Addr _entry_pc; + dwarf_entrypc(_die, &_entry_pc); + if(_entry_pc < _low_pc) _low_pc = _entry_pc; + } + + if(_low_pc < _high_pc) _ranges.emplace_back(_low_pc, _high_pc); Dwarf_Addr _base_addr; ptrdiff_t _offset = 0; do { - _ranges.emplace_back(address_range{ 0, 0 }); - } while((_offset = dwarf_ranges(_die, _offset, &_base_addr, &_ranges.back().low, - &_ranges.back().high)) > 0); - // will always have one extra - _ranges.pop_back(); + uintptr_t _low = 0; + uintptr_t _high = 0; + _offset = dwarf_ranges(_die, _offset, &_base_addr, &_low, &_high); + if(_low < _high) _ranges.emplace_back(_low, _high); + } while(_offset > 0); return _ranges; } +auto +get_dwarf_breakpoints(Dwarf_Die* _die) +{ + auto _bkpts = std::vector{}; + + if(dwarf_tag(_die) != DW_TAG_subprogram) return _bkpts; + + Dwarf_Addr* _pts = nullptr; + auto _npts = dwarf_entry_breakpoints(_die, &_pts); + + if(_npts > 0 && _pts) _bkpts.assign(_pts, _pts + _npts); + + return _bkpts; +} + auto get_dwarf_entry(Dwarf_Die* _die) { @@ -133,37 +156,50 @@ dwarf_entry::is_valid() const return (*this != dwarf_entry{} && !file.empty()); } -std::deque -dwarf_entry::process_dwarf(int _fd, std::vector& _ranges) +dwarf_entry::dwarf_tuple_t +dwarf_entry::process_dwarf(int _fd) { - auto* _dwarf_v = dwarf_begin(_fd, DWARF_C_READ); - auto _line_info = std::deque{}; + auto* _dwarf_v = dwarf_begin(_fd, DWARF_C_READ); + auto _data_v = dwarf_tuple_t{}; - size_t cu_header_size = 0; - Dwarf_Off cu_off = 0; - Dwarf_Off next_cu_off = 0; - for(; dwarf_nextcu(_dwarf_v, cu_off, &next_cu_off, &cu_header_size, nullptr, nullptr, - nullptr) == 0; - cu_off = next_cu_off) + if(_dwarf_v) { - Dwarf_Off cu_die_off = cu_off + cu_header_size; - Dwarf_Die cu_die; - if(dwarf_offdie(_dwarf_v, cu_die_off, &cu_die) != nullptr) + auto& _entries = std::get<0>(_data_v); + auto& _ranges = std::get<1>(_data_v); + auto& _bkpts = std::get<2>(_data_v); + + size_t cu_header_size = 0; + Dwarf_Off cu_off = 0; + Dwarf_Off next_cu_off = 0; + for(; dwarf_nextcu(_dwarf_v, cu_off, &next_cu_off, &cu_header_size, nullptr, + nullptr, nullptr) == 0; + cu_off = next_cu_off) { - Dwarf_Die* _die = &cu_die; - if(dwarf_tag(_die) == DW_TAG_compile_unit) + auto cu_die_off = cu_off + cu_header_size; + auto cu_die = Dwarf_Die{}; + if(dwarf_offdie(_dwarf_v, cu_die_off, &cu_die) != nullptr) { - combine(_line_info, get_dwarf_entry(_die)); - combine(_ranges, get_dwarf_address_ranges(_die)); + Dwarf_Die* _die = &cu_die; + if(dwarf_tag(_die) == DW_TAG_compile_unit) + { + combine(_entries, get_dwarf_entry(_die)); + combine(_ranges, get_dwarf_address_ranges(_die)); + } + else if(dwarf_tag(_die) == DW_TAG_subprogram) + { + combine(_bkpts, get_dwarf_breakpoints(_die)); + combine(_ranges, get_dwarf_address_ranges(_die)); + } } } + + dwarf_end(_dwarf_v); + utility::filter_sort_unique(_entries); + utility::filter_sort_unique(_ranges); + utility::filter_sort_unique(_bkpts); } - dwarf_end(_dwarf_v); - utility::filter_sort_unique(_line_info); - utility::filter_sort_unique(_ranges); - - return _line_info; + return _data_v; } template diff --git a/source/lib/omnitrace/library/binary/dwarf_entry.hpp b/source/lib/omnitrace/library/binary/dwarf_entry.hpp index 0abb5366ad..3832bfcca2 100644 --- a/source/lib/omnitrace/library/binary/dwarf_entry.hpp +++ b/source/lib/omnitrace/library/binary/dwarf_entry.hpp @@ -31,7 +31,11 @@ namespace binary { struct dwarf_entry { - TIMEMORY_DEFAULT_OBJECT(dwarf_entry) + // tuple of dwarf line info, address ranges, and breakpoints + using dwarf_tuple_t = std::tuple, std::vector, + std::vector>; + + OMNITRACE_DEFAULT_OBJECT(dwarf_entry) bool begin_statement = false; bool end_sequence = false; @@ -53,7 +57,7 @@ struct dwarf_entry bool operator!=(const dwarf_entry&) const; explicit operator bool() const { return is_valid(); } - static std::deque process_dwarf(int _fd, std::vector&); + static dwarf_tuple_t process_dwarf(int _fd); template void serialize(ArchiveT&, const unsigned int); diff --git a/source/lib/omnitrace/library/binary/link_map.cpp b/source/lib/omnitrace/library/binary/link_map.cpp index 962c8fa064..bd1536530d 100644 --- a/source/lib/omnitrace/library/binary/link_map.cpp +++ b/source/lib/omnitrace/library/binary/link_map.cpp @@ -39,13 +39,59 @@ namespace omnitrace { namespace binary { +namespace +{ +const open_modes_vec_t default_link_open_modes = { (RTLD_LAZY | RTLD_NOLOAD), + (RTLD_LAZY | RTLD_LOCAL) }; +} + +std::string +get_linked_path(const char* _name, open_modes_vec_t&& _open_modes) +{ + if(_name == nullptr) return config::get_exe_realpath(); + + if(_open_modes.empty()) _open_modes = default_link_open_modes; + + auto _lib = std::string{ _name }; + void* _handle = nullptr; + bool _noload = false; + for(auto _mode : _open_modes) + { + _handle = dlopen(_name, _mode); + _noload = (_mode & RTLD_NOLOAD) == RTLD_NOLOAD; + if(_handle) break; + } + + if(_handle) + { + struct link_map* _link_map = nullptr; + dlinfo(_handle, RTLD_DI_LINKMAP, &_link_map); + if(_link_map != nullptr && !std::string_view{ _link_map->l_name }.empty()) + { + _lib = filepath::realpath(_link_map->l_name, nullptr, false); + } + if(_noload == false) dlclose(_handle); + } + return _lib; +} + std::set get_link_map(const char* _lib, const std::string& _exclude_linked_by, - const std::string& _exclude_re) + const std::string& _exclude_re, open_modes_vec_t&& _open_modes) { - auto _get_chain = [](const char* _name) { - void* _handle = dlopen(_name, RTLD_LAZY | RTLD_NOLOAD); - auto _chain = std::set{}; + if(_open_modes.empty()) _open_modes = default_link_open_modes; + + auto _get_chain = [&_open_modes](const char* _name) { + void* _handle = nullptr; + bool _noload = false; + for(auto _mode : _open_modes) + { + _handle = dlopen(_name, _mode); + _noload = (_mode & RTLD_NOLOAD) == RTLD_NOLOAD; + if(_handle) break; + } + + auto _chain = std::set{}; if(_handle) { struct link_map* _link_map = nullptr; @@ -66,6 +112,8 @@ get_link_map(const char* _lib, const std::string& _exclude_linked_by, } _next = _next->l_next; } + + if(_noload == false) dlclose(_handle); } return _chain; }; @@ -78,6 +126,7 @@ get_link_map(const char* _lib, const std::string& _exclude_linked_by, for(const auto& itr : _full_chain) { + std::cout << itr << std::endl; if(_excl_chain.find(itr) == _excl_chain.end()) { if(_exclude_re.empty() || !std::regex_search(itr, std::regex{ _exclude_re })) diff --git a/source/lib/omnitrace/library/binary/link_map.hpp b/source/lib/omnitrace/library/binary/link_map.hpp index fecfebdbf0..946545775d 100644 --- a/source/lib/omnitrace/library/binary/link_map.hpp +++ b/source/lib/omnitrace/library/binary/link_map.hpp @@ -23,14 +23,18 @@ #pragma once #include +#include #include #include #include +#include namespace omnitrace { namespace binary { +using open_modes_vec_t = std::vector; + struct link_file { link_file(std::string_view&& _v) @@ -44,11 +48,16 @@ struct link_file std::string name = {}; }; +// helper function for translating generic lib name to resolved path +std::string +get_linked_path(const char*, open_modes_vec_t&& = {}); + // default parameters: get the linked binaries for the exe but exclude the linked binaries // from libomnitrace std::set get_link_map(const char* _lib = nullptr, const std::string& _exclude_linked_by = "libomnitrace.so", - const std::string& _exclude_re = "libomnitrace-([a-zA-Z]+)\\.so"); + const std::string& _exclude_re = "libomnitrace-([a-zA-Z]+)\\.so", + open_modes_vec_t&& _open_modes = {}); } // namespace binary } // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/symbol.cpp b/source/lib/omnitrace/library/binary/symbol.cpp index b2fdff880a..a551d8f608 100644 --- a/source/lib/omnitrace/library/binary/symbol.cpp +++ b/source/lib/omnitrace/library/binary/symbol.cpp @@ -136,7 +136,7 @@ symbol::operator bool() const } size_t -symbol::read_dwarf(const std::deque& _info) +symbol::read_dwarf_entries(const std::deque& _info) { for(const auto& itr : _info) { @@ -173,6 +173,20 @@ symbol::read_dwarf(const std::deque& _info) return dwarf_info.size(); } +size_t +symbol::read_dwarf_breakpoints(const std::vector& _bkpts) +{ + for(const auto& itr : _bkpts) + { + if(address.contains(itr)) breakpoints.emplace_back(itr); + } + + // make sure the breakpoints are sorted low to high + std::sort(breakpoints.begin(), breakpoints.end()); + + return breakpoints.size(); +} + bool symbol::read_bfd(bfd_file& _bfd) { diff --git a/source/lib/omnitrace/library/binary/symbol.hpp b/source/lib/omnitrace/library/binary/symbol.hpp index ebbfc62780..6e86ddf160 100644 --- a/source/lib/omnitrace/library/binary/symbol.hpp +++ b/source/lib/omnitrace/library/binary/symbol.hpp @@ -67,7 +67,8 @@ struct symbol : private tim::unwind::bfd_file::symbol explicit operator bool() const; bool read_bfd(bfd_file&); - size_t read_dwarf(const std::deque&); + size_t read_dwarf_entries(const std::deque&); + size_t read_dwarf_breakpoints(const std::vector&); address_range ipaddr() const { return address + load_address; } symbol clone() const; @@ -89,6 +90,7 @@ struct symbol : private tim::unwind::bfd_file::symbol address_range address = {}; std::string func = {}; std::string file = {}; + std::vector breakpoints = {}; std::vector inlines = {}; std::vector dwarf_info = {}; }; diff --git a/source/lib/omnitrace/library/categories.cpp b/source/lib/omnitrace/library/categories.cpp new file mode 100644 index 0000000000..6b0eb895d1 --- /dev/null +++ b/source/lib/omnitrace/library/categories.cpp @@ -0,0 +1,141 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/categories.hpp" +#include "library/common.hpp" +#include "library/config.hpp" +#include "library/constraint.hpp" +#include "library/debug.hpp" +#include "library/timemory.hpp" +#include "library/utility.hpp" + +#include +#include + +namespace omnitrace +{ +namespace categories +{ +namespace +{ +template +void +configure_categories(bool _enable, const std::set& _categories) +{ + auto _name = trait::name::value; + if(_categories.count(_name) > 0) + { + OMNITRACE_VERBOSE_F(3, "%s category: %s\n", (_enable) ? "Enabling" : "Disabling", + _name); + trait::runtime_enabled::set(_enable); + } +} + +template +void +configure_categories(bool _enable, const std::set& _categories, + std::index_sequence) +{ + (configure_categories>(_enable, _categories), ...); +} + +void +configure_categories(bool _enable, const std::set& _categories) +{ + OMNITRACE_VERBOSE_F(1, "%s categories...\n", (_enable) ? "Enabling" : "Disabling"); + + configure_categories( + _enable, _categories, + utility::make_index_sequence_range<1, OMNITRACE_CATEGORY_LAST>{}); +} +} // namespace + +void +enable_categories(const std::set& _categories) +{ + configure_categories( + true, _categories, + utility::make_index_sequence_range<1, OMNITRACE_CATEGORY_LAST>{}); +} + +void +disable_categories(const std::set& _categories) +{ + configure_categories( + false, _categories, + utility::make_index_sequence_range<1, OMNITRACE_CATEGORY_LAST>{}); +} + +void +setup() +{ + // disable specified categories + disable_categories(); + + auto _trace_specs = constraint::get_trace_specs(); + + if(!_trace_specs.empty()) + { + auto _trace_stages = constraint::get_trace_stages(); + + _trace_stages.init = [](const constraint::spec& _spec) { + if(_spec.delay > 1.0e-3) disable_categories(config::get_enabled_categories()); + return get_state() < State::Finalized; + }; + + _trace_stages.start = [](const constraint::spec&) { + enable_categories(config::get_enabled_categories()); + return get_state() < State::Finalized; + }; + + _trace_stages.stop = [](const constraint::spec&) { + // only disable categories if not finalized since this might run in background + // during finalization and disable output of data in those categories + if(get_state() < State::Finalized) + disable_categories(config::get_enabled_categories()); + return get_state() < State::Finalized; + }; + + auto _promise = std::promise(); + std::thread{ [_trace_specs, _trace_stages](std::promise* _prom) { + // ensure all categories are disabled before proceeding + // if a delay is requested + if(_trace_specs.front().delay > 1.0e-3) + disable_categories(config::get_enabled_categories()); + _prom->set_value(); + for(const auto& itr : _trace_specs) + itr(_trace_stages); + }, + &_promise } + .detach(); + + _promise.get_future().wait_for(std::chrono::seconds{ 1 }); + } +} + +void +shutdown() +{ + disable_categories(config::get_enabled_categories()); +} +} // namespace categories +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/categories.hpp b/source/lib/omnitrace/library/categories.hpp index 65bf5d5946..e9a0b36cab 100644 --- a/source/lib/omnitrace/library/categories.hpp +++ b/source/lib/omnitrace/library/categories.hpp @@ -122,6 +122,8 @@ OMNITRACE_DEFINE_CATEGORY(category, process_context_switch, OMNITRACE_CATEGORY_P OMNITRACE_DEFINE_CATEGORY(category, process_page_fault, OMNITRACE_CATEGORY_PROCESS_PAGE_FAULT, "process_page_fault", "Memory page faults in process (collected in background thread)") OMNITRACE_DEFINE_CATEGORY(category, process_user_mode_time, OMNITRACE_CATEGORY_PROCESS_USER_MODE_TIME, "process_user_cpu_time", "CPU time of functions executing in user-space in process in seconds (collected in background thread)") OMNITRACE_DEFINE_CATEGORY(category, process_kernel_mode_time, OMNITRACE_CATEGORY_PROCESS_KERNEL_MODE_TIME, "process_kernel_cpu_time", "CPU time of functions executing in kernel-space in process in seconds (collected in background thread)") +OMNITRACE_DEFINE_CATEGORY(category, thread_wall_time, OMNITRACE_CATEGORY_THREAD_WALL_TIME, "thread_wall_time", "Wall-clock time on thread (derived from sampling)") +OMNITRACE_DEFINE_CATEGORY(category, thread_cpu_time, OMNITRACE_CATEGORY_THREAD_CPU_TIME, "thread_cpu_time", "CPU time on thread (derived from sampling)") OMNITRACE_DEFINE_CATEGORY(category, thread_page_fault, OMNITRACE_CATEGORY_THREAD_PAGE_FAULT, "thread_page_fault", "Memory page faults on thread (derived from sampling)") OMNITRACE_DEFINE_CATEGORY(category, thread_peak_memory, OMNITRACE_CATEGORY_THREAD_PEAK_MEMORY, "thread_peak_memory", "Peak memory usage on thread in MB (derived from sampling)") OMNITRACE_DEFINE_CATEGORY(category, thread_context_switch, OMNITRACE_CATEGORY_THREAD_CONTEXT_SWITCH, "thread_context_switch", "Context switches on thread (derived from sampling)") @@ -182,6 +184,8 @@ using name = perfetto_category; OMNITRACE_PERFETTO_CATEGORY(category::process_page_fault), \ OMNITRACE_PERFETTO_CATEGORY(category::process_user_mode_time), \ OMNITRACE_PERFETTO_CATEGORY(category::process_kernel_mode_time), \ + OMNITRACE_PERFETTO_CATEGORY(category::thread_wall_time), \ + OMNITRACE_PERFETTO_CATEGORY(category::thread_cpu_time), \ OMNITRACE_PERFETTO_CATEGORY(category::thread_page_fault), \ OMNITRACE_PERFETTO_CATEGORY(category::thread_peak_memory), \ OMNITRACE_PERFETTO_CATEGORY(category::thread_context_switch), \ @@ -193,3 +197,33 @@ using name = perfetto_category; #if defined(TIMEMORY_USE_PERFETTO) # define TIMEMORY_PERFETTO_CATEGORIES OMNITRACE_PERFETTO_CATEGORIES #endif + +#include +#include + +namespace omnitrace +{ +inline namespace config +{ +std::set +get_enabled_categories(); + +std::set +get_disabled_categories(); +} // namespace config + +namespace categories +{ +void +enable_categories(const std::set& = config::get_enabled_categories()); + +void +disable_categories(const std::set& = config::get_disabled_categories()); + +void +setup(); + +void +shutdown(); +} // namespace categories +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp b/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp index d828459eca..320698b07e 100644 --- a/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp +++ b/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp @@ -46,7 +46,7 @@ struct blocking_gotcha : comp::base { static constexpr size_t gotcha_capacity = 13; - TIMEMORY_DEFAULT_OBJECT(blocking_gotcha) + OMNITRACE_DEFAULT_OBJECT(blocking_gotcha) // string id for component static std::string label(); diff --git a/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp b/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp index f089d45edd..b931c4659f 100644 --- a/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp +++ b/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp @@ -37,7 +37,7 @@ namespace component { struct causal_gotcha : tim::component::base { - TIMEMORY_DEFAULT_OBJECT(causal_gotcha) + OMNITRACE_DEFAULT_OBJECT(causal_gotcha) // string id for component static std::string label() { return "causal_gotcha"; } diff --git a/source/lib/omnitrace/library/causal/components/progress_point.hpp b/source/lib/omnitrace/library/causal/components/progress_point.hpp index 56d5b07e29..50b92d5452 100644 --- a/source/lib/omnitrace/library/causal/components/progress_point.hpp +++ b/source/lib/omnitrace/library/causal/components/progress_point.hpp @@ -52,7 +52,7 @@ struct progress_point : comp::base static std::string label(); static std::string description(); - TIMEMORY_DEFAULT_OBJECT(progress_point) + OMNITRACE_DEFAULT_OBJECT(progress_point) void start(); void stop(); @@ -130,7 +130,7 @@ struct push_node { using type = omnitrace::causal::component::progress_point; - TIMEMORY_DEFAULT_OBJECT(push_node) + OMNITRACE_DEFAULT_OBJECT(push_node) push_node(type& _obj, scope::config _scope, hash_value_t _hash, int64_t _tid = threading::get_id()) @@ -147,7 +147,7 @@ struct pop_node { using type = omnitrace::causal::component::progress_point; - TIMEMORY_DEFAULT_OBJECT(pop_node) + OMNITRACE_DEFAULT_OBJECT(pop_node) pop_node(type& _obj, int64_t _tid = threading::get_id()) { (*this)(_obj, _tid); } diff --git a/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp b/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp index 62a53db8e0..f8197a62cf 100644 --- a/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp +++ b/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp @@ -45,7 +45,7 @@ struct unblocking_gotcha : comp::base { static constexpr size_t gotcha_capacity = 8; - TIMEMORY_DEFAULT_OBJECT(unblocking_gotcha) + OMNITRACE_DEFAULT_OBJECT(unblocking_gotcha) // string id for component static std::string label(); diff --git a/source/lib/omnitrace/library/causal/delay.hpp b/source/lib/omnitrace/library/causal/delay.hpp index 24a509cde6..9b82c10339 100644 --- a/source/lib/omnitrace/library/causal/delay.hpp +++ b/source/lib/omnitrace/library/causal/delay.hpp @@ -44,7 +44,7 @@ struct delay { using value_type = void; - TIMEMORY_DEFAULT_OBJECT(delay) + OMNITRACE_DEFAULT_OBJECT(delay) static void process(); static void credit(); diff --git a/source/lib/omnitrace/library/causal/experiment.hpp b/source/lib/omnitrace/library/causal/experiment.hpp index bf4ea77ab6..dc1cd72fb6 100644 --- a/source/lib/omnitrace/library/causal/experiment.hpp +++ b/source/lib/omnitrace/library/causal/experiment.hpp @@ -90,7 +90,7 @@ struct experiment static std::string description(); static const std::atomic& get_current_experiment(); - TIMEMORY_DEFAULT_OBJECT(experiment) + OMNITRACE_DEFAULT_OBJECT(experiment) bool start(); bool wait() const; // returns false if interrupted diff --git a/source/lib/omnitrace/library/causal/selected_entry.hpp b/source/lib/omnitrace/library/causal/selected_entry.hpp index 848af86c18..158a41ad05 100644 --- a/source/lib/omnitrace/library/causal/selected_entry.hpp +++ b/source/lib/omnitrace/library/causal/selected_entry.hpp @@ -47,7 +47,7 @@ namespace causal { struct selected_entry { - TIMEMORY_DEFAULT_OBJECT(selected_entry) + OMNITRACE_DEFAULT_OBJECT(selected_entry) uintptr_t address = 0x0; uintptr_t symbol_address = 0x0; diff --git a/source/lib/omnitrace/library/components/backtrace_metrics.cpp b/source/lib/omnitrace/library/components/backtrace_metrics.cpp index c18bba6f6a..ea87e01459 100644 --- a/source/lib/omnitrace/library/components/backtrace_metrics.cpp +++ b/source/lib/omnitrace/library/components/backtrace_metrics.cpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -150,10 +151,32 @@ void backtrace_metrics::stop() {} +namespace +{ +template +auto get_enabled(tim::type_list) +{ + constexpr size_t N = sizeof...(Tp); + auto _v = std::bitset{}; + size_t _n = 0; + (_v.set(_n++, trait::runtime_enabled::get()), ...); + return _v; +} +} // namespace void backtrace_metrics::sample(int) { - auto _tid = threading::get_id(); + if(!get_enabled(type_list{}).all()) + { + m_valid.reset(); + return; + } + + m_valid = get_enabled(categories_t{}); + + // return if everything is disabled + if(!m_valid.any()) return; + auto _cache = tim::rusage_cache{ RUSAGE_THREAD }; m_cpu = tim::get_clock_thread_now(); m_mem_peak = _cache.get_peak_rss(); @@ -163,16 +186,15 @@ backtrace_metrics::sample(int) if constexpr(tim::trait::is_available::value) { - if(tim::trait::runtime_enabled::get()) + constexpr auto hw_counters_idx = tim::index_of::value; + constexpr auto hw_category_idx = + tim::index_of::value; + + auto _tid = threading::get_id(); + if(m_valid.test(hw_category_idx) && m_valid.test(hw_counters_idx)) { assert(get_papi_vector(_tid).get() != nullptr); m_hw_counter = get_papi_vector(_tid)->record(); - // const auto& _cfg = get_papi_vector(_tid)->get_config(); - // std::cerr << "Config: "; - // for(size_t i = 0; i < _cfg->size; ++i) - // std::cerr << "[" << _cfg->labels.at(i) << "|" << _cfg->event_names.at(i) - // << "|" << _cfg->event_codes.at(i) << "]"; - // std::cerr << "\n"; } } } @@ -220,23 +242,27 @@ backtrace_metrics::configure(bool _setup, int64_t _tid) } void -backtrace_metrics::init_perfetto(int64_t _tid) +backtrace_metrics::init_perfetto(int64_t _tid, valid_array_t _valid) { auto _hw_cnt_labels = *get_papi_labels(_tid); auto _tid_name = JOIN("", '[', _tid, ']'); if(!perfetto_counter_track::exists(_tid)) { - perfetto_counter_track::emplace( - _tid, JOIN(' ', "Thread Peak Memory Usage", _tid_name, "(S)"), "MB"); - perfetto_counter_track::emplace( - _tid, JOIN(' ', "Thread Context Switches", _tid_name, "(S)")); - perfetto_counter_track::emplace( - _tid, JOIN(' ', "Thread Page Faults", _tid_name, "(S)")); + if(get_valid(category::thread_peak_memory{}, _valid)) + perfetto_counter_track::emplace( + _tid, JOIN(' ', "Thread Peak Memory Usage", _tid_name, "(S)"), "MB"); + if(get_valid(category::thread_context_switch{}, _valid)) + perfetto_counter_track::emplace( + _tid, JOIN(' ', "Thread Context Switches", _tid_name, "(S)")); + if(get_valid(category::thread_page_fault{}, _valid)) + perfetto_counter_track::emplace( + _tid, JOIN(' ', "Thread Page Faults", _tid_name, "(S)")); } if(!perfetto_counter_track::exists(_tid) && - tim::trait::runtime_enabled::get()) + get_valid(type_list{}, _valid) && + get_valid(category::thread_hardware_counter{}, _valid)) { for(auto& itr : _hw_cnt_labels) { @@ -250,7 +276,7 @@ backtrace_metrics::init_perfetto(int64_t _tid) } void -backtrace_metrics::fini_perfetto(int64_t _tid) +backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid) { auto _hw_cnt_labels = *get_papi_labels(_tid); const auto& _thread_info = thread_info::get(_tid, SequentTID); @@ -260,22 +286,32 @@ backtrace_metrics::fini_perfetto(int64_t _tid) uint64_t _ts = _thread_info->get_stop(); - TRACE_COUNTER("thread_peak_memory", - perfetto_counter_track::at(_tid, 0), _ts, 0); + if(get_valid(category::thread_peak_memory{}, _valid)) + { + TRACE_COUNTER(trait::name::value, + perfetto_counter_track::at(_tid, 0), _ts, 0); + } - TRACE_COUNTER("thread_context_switch", - perfetto_counter_track::at(_tid, 1), _ts, 0); + if(get_valid(category::thread_context_switch{}, _valid)) + { + TRACE_COUNTER(trait::name::value, + perfetto_counter_track::at(_tid, 1), _ts, 0); + } - TRACE_COUNTER("thread_page_fault", - perfetto_counter_track::at(_tid, 2), _ts, 0); + if(get_valid(category::thread_page_fault{}, _valid)) + { + TRACE_COUNTER(trait::name::value, + perfetto_counter_track::at(_tid, 2), _ts, 0); + } - if(tim::trait::runtime_enabled::get()) + if(get_valid(type_list{}, _valid) && + get_valid(category::thread_hardware_counter{}, _valid)) { for(size_t i = 0; i < perfetto_counter_track::size(_tid); ++i) { if(i < _hw_cnt_labels.size()) { - TRACE_COUNTER("thread_hardware_counter", + TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, i), _ts, 0.0); } } @@ -285,23 +321,33 @@ backtrace_metrics::fini_perfetto(int64_t _tid) void backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const { - TRACE_COUNTER("thread_peak_memory", - perfetto_counter_track::at(_tid, 0), _ts, - m_mem_peak / units::megabyte); + if((*this)(category::thread_peak_memory{})) + { + TRACE_COUNTER(trait::name::value, + perfetto_counter_track::at(_tid, 0), _ts, + m_mem_peak / units::megabyte); + } - TRACE_COUNTER("thread_context_switch", - perfetto_counter_track::at(_tid, 1), _ts, m_ctx_swch); + if((*this)(category::thread_context_switch{})) + { + TRACE_COUNTER(trait::name::value, + perfetto_counter_track::at(_tid, 1), _ts, + m_ctx_swch); + } - TRACE_COUNTER("thread_page_fault", - perfetto_counter_track::at(_tid, 2), _ts, m_page_flt); - - if(tim::trait::runtime_enabled::get()) + if((*this)(category::thread_page_fault{})) + { + TRACE_COUNTER(trait::name::value, + perfetto_counter_track::at(_tid, 2), _ts, + m_page_flt); + } + if((*this)(type_list{}) && (*this)(category::thread_hardware_counter{})) { for(size_t i = 0; i < perfetto_counter_track::size(_tid); ++i) { if(i < m_hw_counter.size()) { - TRACE_COUNTER("thread_hardware_counter", + TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, i), _ts, m_hw_counter.at(i)); } diff --git a/source/lib/omnitrace/library/components/backtrace_metrics.hpp b/source/lib/omnitrace/library/components/backtrace_metrics.hpp index 7eb6f9cfa7..d62658c1a4 100644 --- a/source/lib/omnitrace/library/components/backtrace_metrics.hpp +++ b/source/lib/omnitrace/library/components/backtrace_metrics.hpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -45,11 +46,14 @@ namespace omnitrace { +template +using type_list = ::tim::type_list; + namespace component { struct backtrace_metrics : tim::component::empty_base -, tim::concepts::component +, concepts::component { static constexpr size_t num_hw_counters = TIMEMORY_PAPI_ARRAY_SIZE; @@ -60,6 +64,13 @@ struct backtrace_metrics using system_clock = std::chrono::system_clock; using system_time_point = typename system_clock::time_point; + using categories_t = + type_list; + static constexpr size_t num_categories = std::tuple_size::value; + using valid_array_t = std::bitset; + static std::string label(); static std::string description(); @@ -72,16 +83,31 @@ struct backtrace_metrics backtrace_metrics& operator=(backtrace_metrics&&) noexcept = default; static void configure(bool, int64_t _tid = threading::get_id()); - static void init_perfetto(int64_t _tid); - static void fini_perfetto(int64_t _tid); + static void init_perfetto(int64_t _tid, valid_array_t); + static void fini_perfetto(int64_t _tid, valid_array_t); static std::vector get_hw_counter_labels(int64_t); + template + static bool get_valid(Tp, valid_array_t); + + template + static bool get_valid(type_list, valid_array_t); + static void start(); static void stop(); void sample(int = -1); void post_process(int64_t _tid, const backtrace* _bt, const backtrace_metrics* _last) const; + explicit operator bool() const { return m_valid.any(); } + + template + bool operator()(Tp) const; + + template + bool operator()(type_list) const; + + auto get_valid() const { return m_valid; } auto get_cpu_timestamp() const { return m_cpu; } auto get_peak_memory() const { return m_mem_peak; } auto get_context_switches() const { return m_ctx_swch; } @@ -91,12 +117,44 @@ struct backtrace_metrics void post_process_perfetto(int64_t _tid, uint64_t _ts) const; private: + valid_array_t m_valid = {}; int64_t m_cpu = 0; int64_t m_mem_peak = 0; int64_t m_ctx_swch = 0; int64_t m_page_flt = 0; hw_counter_data_t m_hw_counter = {}; }; + +template +bool +backtrace_metrics::get_valid(type_list, valid_array_t _valid) +{ + constexpr auto idx = tim::index_of::value; + return _valid.test(idx); +} + +template +bool backtrace_metrics::operator()(type_list) const +{ + static_assert(!concepts::is_type_listing::value, + "Error! invalid call with tuple"); + + constexpr auto idx = tim::index_of::value; + return m_valid.test(idx); +} + +template +bool +backtrace_metrics::get_valid(Tp, valid_array_t _valid) +{ + return get_valid(type_list{}, _valid); +} + +template +bool backtrace_metrics::operator()(Tp) const +{ + return (*this)(type_list{}); +} } // namespace component } // namespace omnitrace diff --git a/source/lib/omnitrace/library/components/category_region.hpp b/source/lib/omnitrace/library/components/category_region.hpp index ebc1e18296..3eeb031f45 100644 --- a/source/lib/omnitrace/library/components/category_region.hpp +++ b/source/lib/omnitrace/library/components/category_region.hpp @@ -68,7 +68,10 @@ using tracing_count_categories_t = category::rocm_hsa, category::rocm_rccl>; // these categories are added to the critical trace -using critical_trace_categories_t = type_list; +using critical_trace_categories_t = + type_list; // convert these categories to throughput points using causal_throughput_categories_t = @@ -128,7 +131,7 @@ void category_region::start(std::string_view name, Args&&... args) { // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + if(tracing::category_push_disabled()) return; // unconditionally return if thread is disabled or finalized if(get_thread_state() == ThreadState::Disabled) return; @@ -212,7 +215,7 @@ void category_region::stop(std::string_view name, Args&&... args) { // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + if(tracing::category_pop_disabled()) return; if(get_thread_state() == ThreadState::Disabled) return; @@ -315,7 +318,7 @@ category_region::mark(std::string_view name, Args&&...) if constexpr(!_ct_use_causal) return; // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + if(tracing::category_mark_disabled()) return; // the expectation here is that if the state is not active then the call // to omnitrace_init_tooling_hidden will activate all the appropriate @@ -345,9 +348,6 @@ void category_region::audit(const gotcha_data_t& _data, audit::incoming, Args&&... _args) { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; - start(_data.tool_id.c_str(), [&](perfetto::EventContext ctx) { if(config::get_perfetto_annotations()) { @@ -364,9 +364,6 @@ void category_region::audit(const gotcha_data_t& _data, audit::outgoing, Args&&... _args) { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; - stop(_data.tool_id.c_str(), [&](perfetto::EventContext ctx) { if(config::get_perfetto_annotations()) tracing::add_perfetto_annotation(ctx, "return", JOIN(", ", _args...)); @@ -379,9 +376,6 @@ void category_region::audit(std::string_view _name, audit::incoming, Args&&... _args) { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; - start(_name.data(), [&](perfetto::EventContext ctx) { if(config::get_perfetto_annotations()) { @@ -398,9 +392,6 @@ void category_region::audit(std::string_view _name, audit::outgoing, Args&&... _args) { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; - stop(_name.data(), [&](perfetto::EventContext ctx) { if(config::get_perfetto_annotations()) tracing::add_perfetto_annotation(ctx, "return", JOIN(", ", _args...)); @@ -466,6 +457,5 @@ struct local_category_region : comp::base, void private: std::string_view m_prefix = {}; }; - } // namespace component } // namespace omnitrace diff --git a/source/lib/omnitrace/library/components/comm_data.hpp b/source/lib/omnitrace/library/components/comm_data.hpp index 9cfc572080..e58756541a 100644 --- a/source/lib/omnitrace/library/components/comm_data.hpp +++ b/source/lib/omnitrace/library/components/comm_data.hpp @@ -97,7 +97,7 @@ struct comm_data : base static constexpr auto label = "RCCL Comm Send"; }; - TIMEMORY_DEFAULT_OBJECT(comm_data) + OMNITRACE_DEFAULT_OBJECT(comm_data) static void preinit(); static void configure(); diff --git a/source/lib/omnitrace/library/components/cpu_freq.hpp b/source/lib/omnitrace/library/components/cpu_freq.hpp index 7e80e53694..b86671f67b 100644 --- a/source/lib/omnitrace/library/components/cpu_freq.hpp +++ b/source/lib/omnitrace/library/components/cpu_freq.hpp @@ -45,7 +45,7 @@ struct cpu_freq using storage_type = tim::storage; using cpu_id_set_t = std::set; - TIMEMORY_DEFAULT_OBJECT(cpu_freq) + OMNITRACE_DEFAULT_OBJECT(cpu_freq) // string id for component static std::string label(); diff --git a/source/lib/omnitrace/library/components/ensure_storage.hpp b/source/lib/omnitrace/library/components/ensure_storage.hpp index 65dbc2231f..6950379290 100644 --- a/source/lib/omnitrace/library/components/ensure_storage.hpp +++ b/source/lib/omnitrace/library/components/ensure_storage.hpp @@ -39,7 +39,7 @@ namespace template struct ensure_storage { - TIMEMORY_DEFAULT_OBJECT(ensure_storage) + OMNITRACE_DEFAULT_OBJECT(ensure_storage) void operator()() const { OMNITRACE_FOLD_EXPRESSION((*this)(tim::type_list{})); } diff --git a/source/lib/omnitrace/library/components/exit_gotcha.hpp b/source/lib/omnitrace/library/components/exit_gotcha.hpp index b9d7df1ff1..cca80b7402 100644 --- a/source/lib/omnitrace/library/components/exit_gotcha.hpp +++ b/source/lib/omnitrace/library/components/exit_gotcha.hpp @@ -44,7 +44,7 @@ struct exit_gotcha : tim::component::base using exit_func_t = void (*)(int); using abort_func_t = void (*)(); - TIMEMORY_DEFAULT_OBJECT(exit_gotcha) + OMNITRACE_DEFAULT_OBJECT(exit_gotcha) // string id for component static std::string label() { return "exit_gotcha"; } diff --git a/source/lib/omnitrace/library/components/fork_gotcha.hpp b/source/lib/omnitrace/library/components/fork_gotcha.hpp index 2051b868af..d579f6ada9 100644 --- a/source/lib/omnitrace/library/components/fork_gotcha.hpp +++ b/source/lib/omnitrace/library/components/fork_gotcha.hpp @@ -37,7 +37,7 @@ struct fork_gotcha : comp::base using gotcha_data_t = comp::gotcha_data; - TIMEMORY_DEFAULT_OBJECT(fork_gotcha) + OMNITRACE_DEFAULT_OBJECT(fork_gotcha) // string id for component static std::string label() { return "fork_gotcha"; } diff --git a/source/lib/omnitrace/library/components/mpi_gotcha.hpp b/source/lib/omnitrace/library/components/mpi_gotcha.hpp index 7c58d4a7b8..ba2140e937 100644 --- a/source/lib/omnitrace/library/components/mpi_gotcha.hpp +++ b/source/lib/omnitrace/library/components/mpi_gotcha.hpp @@ -38,7 +38,7 @@ struct mpi_gotcha : comp::base using comm_t = tim::mpi::comm_t; using gotcha_data_t = comp::gotcha_data; - TIMEMORY_DEFAULT_OBJECT(mpi_gotcha) + OMNITRACE_DEFAULT_OBJECT(mpi_gotcha) // string id for component static std::string label() { return "mpi_gotcha"; } diff --git a/source/lib/omnitrace/library/components/numa_gotcha.hpp b/source/lib/omnitrace/library/components/numa_gotcha.hpp index 46056294e5..a71a9cd2e6 100644 --- a/source/lib/omnitrace/library/components/numa_gotcha.hpp +++ b/source/lib/omnitrace/library/components/numa_gotcha.hpp @@ -44,7 +44,7 @@ struct numa_gotcha : tim::component::base using exit_func_t = void (*)(int); using abort_func_t = void (*)(); - TIMEMORY_DEFAULT_OBJECT(numa_gotcha) + OMNITRACE_DEFAULT_OBJECT(numa_gotcha) // string id for component static std::string label() { return "numa_gotcha"; } diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index fde9ddb898..b84d0f40ab 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -161,6 +161,7 @@ pthread_create_gotcha::wrapper::operator()() const auto _signals = std::set{}; auto _coverage = (get_mode() == Mode::Coverage); const auto& _parent_info = thread_info::get(m_config.parent_tid, InternalTID); + const auto& _info = thread_info::init(m_config.offset); auto _dtor = [&]() { set_thread_state(ThreadState::Internal); if(_is_sampling) @@ -189,16 +190,22 @@ pthread_create_gotcha::wrapper::operator()() const _thr_bundle->stop(); if(_bundle) stop_bundle(*_bundle, _tid); pthread_create_gotcha::shutdown(_tid); + OMNITRACE_BASIC_VERBOSE( + 1, "[PID=%i][rank=%i] Thread %s (parent: %s) exited\n", process::get_id(), + dmp::rank(), _info->index_data->as_string().c_str(), + _parent_info->index_data->as_string().c_str()); } }; auto _active = (get_state() == ::omnitrace::State::Active && bundles != nullptr && bundles_mutex != nullptr); - - const auto& _info = thread_info::init(m_config.offset); if(_active && !_coverage && !m_config.offset) { _tid = _info->index_data->sequent_value; + OMNITRACE_BASIC_VERBOSE(1, "[PID=%i][rank=%i] Thread %s (parent: %s) created\n", + process::get_id(), dmp::rank(), + _info->index_data->as_string().c_str(), + _parent_info->index_data->as_string().c_str()); threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str()); if(!thread_bundle_data_t::instances().at(_tid)) { @@ -235,6 +242,14 @@ pthread_create_gotcha::wrapper::operator()() const sampling::unblock_signals(); } } + else if(m_config.offset) + { + OMNITRACE_BASIC_VERBOSE( + 2, + "[PID=%i][rank=%i] Thread %s (parent: %s) created [started by omnitrace]\n", + process::get_id(), dmp::rank(), _info->index_data->as_string().c_str(), + _parent_info->index_data->as_string().c_str()); + } // notify the wrapper that all internal work is completed if(m_config.promise) m_config.promise->set_value(); @@ -399,8 +414,9 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, if(_active && !_disabled && !_info->is_offset) { - OMNITRACE_VERBOSE(1, "Creating new thread on PID %i (rank: %i), TID %li\n", - process::get_id(), dmp::rank(), _tid); + OMNITRACE_BASIC_VERBOSE(2, "[PID=%i][rank=%i] Starting new thread on %s...\n", + process::get_id(), dmp::rank(), + _info->index_data->as_string().c_str()); } // ensure that cpu cid stack exists on the parent thread if active diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp index e327d213ce..fc540a3abe 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp @@ -64,7 +64,7 @@ struct pthread_create_gotcha : tim::component::base wrapper_config m_config = {}; }; - TIMEMORY_DEFAULT_OBJECT(pthread_create_gotcha) + OMNITRACE_DEFAULT_OBJECT(pthread_create_gotcha) // string id for component static std::string label() { return "pthread_create_gotcha"; } diff --git a/source/lib/omnitrace/library/components/pthread_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_gotcha.cpp index 60f64f8178..6648bcab5c 100644 --- a/source/lib/omnitrace/library/components/pthread_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_gotcha.cpp @@ -48,7 +48,7 @@ struct stop { using type = omnitrace::component::pthread_create_gotcha_t; - TIMEMORY_DEFAULT_OBJECT(stop) + OMNITRACE_DEFAULT_OBJECT(stop) template explicit stop(type&, Args&&...) diff --git a/source/lib/omnitrace/library/components/pthread_gotcha.hpp b/source/lib/omnitrace/library/components/pthread_gotcha.hpp index 7edc6f791a..8f42af89be 100644 --- a/source/lib/omnitrace/library/components/pthread_gotcha.hpp +++ b/source/lib/omnitrace/library/components/pthread_gotcha.hpp @@ -33,7 +33,7 @@ namespace omnitrace { struct pthread_gotcha : tim::component::base { - TIMEMORY_DEFAULT_OBJECT(pthread_gotcha) + OMNITRACE_DEFAULT_OBJECT(pthread_gotcha) // string id for component static std::string label() { return "pthread_gotcha"; } diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.hpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.hpp index 80a8459de9..d7074cbfcd 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.hpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.hpp @@ -44,7 +44,7 @@ struct pthread_mutex_gotcha : comp::base using hash_array_t = std::array; using gotcha_data_t = comp::gotcha_data; - TIMEMORY_DEFAULT_OBJECT(pthread_mutex_gotcha) + OMNITRACE_DEFAULT_OBJECT(pthread_mutex_gotcha) explicit pthread_mutex_gotcha(const gotcha_data_t&); diff --git a/source/lib/omnitrace/library/components/rocprofiler.hpp b/source/lib/omnitrace/library/components/rocprofiler.hpp index bb3ed6f1f3..2de4baddf5 100644 --- a/source/lib/omnitrace/library/components/rocprofiler.hpp +++ b/source/lib/omnitrace/library/components/rocprofiler.hpp @@ -109,7 +109,7 @@ struct rocprofiler using base_type = base; using tracker_type = policy::instance_tracker; - TIMEMORY_DEFAULT_OBJECT(rocprofiler) + OMNITRACE_DEFAULT_OBJECT(rocprofiler) static void preinit(); static void global_init() { setup(); } @@ -173,7 +173,7 @@ struct set_storage using storage_array_t = std::array*, max_threads>; friend struct get_storage; - TIMEMORY_DEFAULT_OBJECT(set_storage) + OMNITRACE_DEFAULT_OBJECT(set_storage) auto operator()(storage*, size_t) const {} auto operator()(type&, size_t) const {} @@ -192,7 +192,7 @@ struct get_storage { using type = component::rocm_data_tracker; - TIMEMORY_DEFAULT_OBJECT(get_storage) + OMNITRACE_DEFAULT_OBJECT(get_storage) auto operator()(const type&) const { diff --git a/source/lib/omnitrace/library/components/roctracer.hpp b/source/lib/omnitrace/library/components/roctracer.hpp index d042b290a1..b3ee6f14c9 100644 --- a/source/lib/omnitrace/library/components/roctracer.hpp +++ b/source/lib/omnitrace/library/components/roctracer.hpp @@ -51,7 +51,7 @@ struct roctracer using base_type = base; using tracker_type = policy::instance_tracker; - TIMEMORY_DEFAULT_OBJECT(roctracer) + OMNITRACE_DEFAULT_OBJECT(roctracer) static void preinit(); static void global_init() { setup(); } diff --git a/source/lib/omnitrace/library/concepts.hpp b/source/lib/omnitrace/library/concepts.hpp index c9926367a0..bd55d54da6 100644 --- a/source/lib/omnitrace/library/concepts.hpp +++ b/source/lib/omnitrace/library/concepts.hpp @@ -94,5 +94,33 @@ public: static constexpr bool value = sfinae(0); constexpr auto operator()() const { return sfinae(0); } }; + +template +struct tuple_element_impl; + +template +struct tuple_element_impl, true> +{ + using type = typename std::tuple_element>::type; +}; + +template +struct tuple_element_impl, false> +{ + using type = void; +}; + +template +struct tuple_element; + +template +struct tuple_element> +{ + using type = + typename tuple_element_impl, (N < sizeof...(Tp))>::type; +}; + +template +using tuple_element_t = typename tuple_element::type; } // namespace concepts } // namespace tim diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index e9ea9695f7..ec232f0576 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -22,12 +22,12 @@ #include "library/config.hpp" #include "common/defines.h" +#include "library/constraint.hpp" #include "library/debug.hpp" #include "library/defines.hpp" #include "library/gpu.hpp" #include "library/mproc.hpp" #include "library/perfetto.hpp" -#include "library/runtime.hpp" #include #include @@ -43,12 +43,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -98,7 +100,7 @@ get_setting_name(std::string _v) template Tp -get_available_perfetto_categories() +get_available_categories() { auto _v = Tp{}; for(auto itr : { OMNITRACE_PERFETTO_CATEGORIES }) @@ -287,8 +289,8 @@ configure_settings(bool _init) "for continuous integration)", false, "debugging", "advanced"); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_COLORIZED_LOG", "Enable colorized logging", - true, "debugging", "advanced"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_MONOCHROME", "Disable colorized logging", + false, "debugging", "advanced"); OMNITRACE_CONFIG_EXT_SETTING(int, "OMNITRACE_DL_VERBOSE", "Verbosity within the omnitrace-dl library", 0, @@ -392,10 +394,45 @@ configure_settings(bool _init) "Enable support for code coverage", false, "coverage", "backend", "advanced"); - OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_INSTRUMENTATION_INTERVAL", - "Instrumentation only takes measurements once every N " - "function calls (not statistical)", - size_t{ 1 }, "instrumentation", "data_sampling", "advanced"); + OMNITRACE_CONFIG_SETTING( + double, "OMNITRACE_TRACE_DELAY", + "Time in seconds to wait before enabling trace/profile data collection. If " + "multiple delays + durations are needed, see OMNITRACE_TRACE_PERIODS.", + 0.0, "trace", "profile", "perfetto", "timemory"); + + OMNITRACE_CONFIG_SETTING( + double, "OMNITRACE_TRACE_DURATION", + "If > 0.0, time (in seconds) to collect trace/profile data. If multiple delays + " + "durations are needed, see OMNITRACE_TRACE_PERIODS.", + 0.0, "trace", "profile", "perfetto", "timemory"); + + auto _clock_s = + config::get_setting_value("OMNITRACE_TRACE_PERIOD_CLOCK_ID").second; + + auto _clock_choices = std::vector{}; + + for(const auto& itr : constraint::get_valid_clock_ids()) + { + _clock_choices.emplace_back( + join("", "(", join('|', itr.name, itr.value, itr.raw_name), ")")); + } + + OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_TRACE_PERIODS", + "Similar to specify trace delay and/or duration except in " + "the form :, ::, " + "and/or :::", + std::string{}, "trace", "profile", "perfetto", "timemory"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_TRACE_PERIOD_CLOCK_ID", + "Set the default clock ID for OMNITRACE_TRACE_DELAY, OMNITRACE_TRACE_DURATION, " + "and/or OMNITRACE_TRACE_PERIODS. E.g. \"realtime\" == the delay/duration is " + "governed by the elapsed realtime, \"cputime\" == the delay/duration is governed " + "by the elapsed CPU-time within the process, etc. Note: when using CPU-based " + "timing, it is recommened to scale the value by the number of threads and be " + "aware that omnitrace may contribute to advancing the process CPU-time", + "CLOCK_REALTIME", "trace", "profile", "perfetto", "timemory") + ->set_choices(_clock_choices); OMNITRACE_CONFIG_SETTING( double, "OMNITRACE_SAMPLING_FREQ", @@ -639,10 +676,18 @@ configure_settings(bool _init) "discard", "perfetto", "data") ->set_choices({ "fill", "discard" }); - OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_PERFETTO_CATEGORIES", - "Categories to collect within perfetto", "", "perfetto", - "data", "advanced") - ->set_choices(get_available_perfetto_categories>()); + OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_ENABLE_CATEGORIES", + "Enable collecting profiling and trace data for these " + "categories and disable all other categories", + "", "trace", "profile", "perfetto", "timemory", "data", + "advanced") + ->set_choices(get_available_categories>()); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_DISABLE_CATEGORIES", + "Disable collecting profiling and trace data for these categories", "", "trace", + "profile", "perfetto", "timemory", "data", "advanced") + ->set_choices(get_available_categories>()); OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_PERFETTO_ANNOTATIONS", "Include debug annotations in perfetto trace. When enabled, " @@ -977,8 +1022,8 @@ configure_settings(bool _init) settings::suppress_config() = true; - if(!get_env("OMNITRACE_COLORIZED_LOG", _config->get("OMNITRACE_COLORIZED_LOG"))) - tim::log::colorized() = false; + if(get_env("OMNITRACE_MONOCHROME", _config->get("OMNITRACE_MONOCHROME"))) + tim::log::monochrome() = true; if(_init) { @@ -1105,8 +1150,6 @@ configure_mode_settings() _set("OMNITRACE_USE_ROCM_SMI", false); } - get_instrumentation_interval() = std::max(get_instrumentation_interval(), 1); - if(get_use_kokkosp()) { auto _current_kokkosp_lib = tim::get_env("KOKKOS_PROFILE_LIBRARY"); @@ -1156,6 +1199,13 @@ namespace using signal_settings = tim::signals::signal_settings; using sys_signal = tim::signals::sys_signal; +std::atomic& +get_signal_handler() +{ + static auto _v = std::atomic{ nullptr }; + return _v; +} + void omnitrace_exit_action(int nsig) { @@ -1163,7 +1213,8 @@ omnitrace_exit_action(int nsig) tim::signals::sigmask_scope::process); OMNITRACE_BASIC_PRINT("Finalizing afer signal %i :: %s\n", nsig, signal_settings::str(static_cast(nsig)).c_str()); - if(get_state() == State::Active) omnitrace_finalize(); + auto _handler = get_signal_handler().load(); + if(_handler) (*_handler)(); kill(process::get_id(), nsig); } @@ -1183,6 +1234,28 @@ omnitrace_trampoline_handler(int _v) } } // namespace +signal_handler_t +set_signal_handler(signal_handler_t _func) +{ + if(_func) + { + auto _handler = get_signal_handler().load(std::memory_order_relaxed); + if(get_signal_handler().compare_exchange_strong(_handler, _func, + std::memory_order_relaxed)) + { + return _handler; + } + else + { + _handler = get_signal_handler().load(std::memory_order_seq_cst); + get_signal_handler().store(_func); + return _handler; + } + } + + return get_signal_handler().load(); +} + void configure_signal_handler() { @@ -1218,6 +1291,35 @@ configure_signal_handler() } } +int +get_realtime_signal() +{ + return SIGRTMIN + get_sampling_rtoffset(); +} + +int +get_cputime_signal() +{ + return SIGPROF; +} + +std::set get_sampling_signals(int64_t) +{ + auto _v = std::set{}; + if(get_use_causal()) + { + _v.emplace(get_cputime_signal()); + _v.emplace(get_realtime_signal()); + } + else + { + if(get_use_sampling_cputime()) _v.emplace(get_cputime_signal()); + if(get_use_sampling_realtime()) _v.emplace(get_realtime_signal()); + } + + return _v; +} + void configure_disabled_settings() { @@ -1964,18 +2066,74 @@ get_perfetto_fill_policy() return static_cast&>(*_v->second).get(); } -std::set -get_perfetto_categories() +namespace { - static auto _v = get_config()->find("OMNITRACE_PERFETTO_CATEGORIES"); - static auto _avail = get_available_perfetto_categories>(); - auto _ret = std::set{}; - for(auto itr : tim::delimit( - static_cast&>(*_v->second).get(), " ,;:")) - { - if(_avail.count(itr) > 0) _ret.emplace(itr); - } - return _ret; +auto +get_category_config() +{ + using strset_t = std::set; + + static auto _v = []() { + auto _avail = get_available_categories(); + auto _parse = [&_avail](const auto& _setting) { + auto _ret = strset_t{}; + for(auto itr : tim::delimit( + static_cast&>(*_setting->second).get(), + " ,;:\n\t")) + { + if(_avail.count(itr) > 0) _ret.emplace(itr); + } + return _ret; + }; + + auto _enabled = _parse(get_config()->find("OMNITRACE_ENABLE_CATEGORIES")); + auto _disabled = _parse(get_config()->find("OMNITRACE_DISABLE_CATEGORIES")); + + if(_enabled.empty() && _disabled.empty()) + { + _enabled = _avail; + } + else if(_enabled.empty() && !_disabled.empty()) + { + for(auto itr : _avail) + { + if(_disabled.count(itr) == 0) _enabled.emplace(itr); + } + } + else if(!_enabled.empty() && _disabled.empty()) + { + for(auto itr : _avail) + { + if(_enabled.count(itr) == 0) _disabled.emplace(itr); + } + } + else + { + OMNITRACE_ABORT("Error! Conflicting options OMNITRACE_ENABLE_CATEGORIES and " + "OMNITRACE_DISABLE_CATEGORIES were both provided."); + } + + OMNITRACE_CI_THROW(_enabled.size() + _disabled.size() != _avail.size(), + "Error! Internal error for categories: %zu (enabled) + %zu " + "(disabled) != %zu (total)\n", + _enabled.size(), _disabled.size(), _avail.size()); + + return std::make_pair(_enabled, _disabled); + }(); + + return _v; +} +} // namespace +std::set +get_enabled_categories() +{ + return get_category_config().first; +} + +std::set +get_disabled_categories() +{ + return get_category_config().second; } bool @@ -2043,13 +2201,6 @@ get_perfetto_output_filename() return _val; } -size_t& -get_instrumentation_interval() -{ - static auto _v = get_config()->find("OMNITRACE_INSTRUMENTATION_INTERVAL"); - return static_cast&>(*_v->second).get(); -} - double get_sampling_freq() { diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index e58b49cbe4..ff5150f5fb 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -22,7 +22,6 @@ #pragma once -#include "api.hpp" #include "library/common.hpp" #include "library/defines.hpp" #include "library/state.hpp" @@ -43,6 +42,12 @@ namespace omnitrace // inline namespace config { +using signal_handler_t = void (*)(void); + +// if arg is nullptr, returns current signal handler +// if arg is non-null, returns replaced signal handler +signal_handler_t set_signal_handler(signal_handler_t); + bool settings_are_configured() OMNITRACE_HOT; @@ -55,6 +60,15 @@ configure_mode_settings(); void configure_signal_handler(); +int +get_realtime_signal(); + +int +get_cputime_signal(); + +std::set +get_sampling_signals(int64_t _tid = 0); + void configure_disabled_settings(); @@ -257,7 +271,10 @@ std::string get_perfetto_fill_policy(); std::set -get_perfetto_categories(); +get_enabled_categories(); + +std::set +get_disabled_categories(); bool get_perfetto_annotations() OMNITRACE_HOT; @@ -284,8 +301,11 @@ get_perfetto_roctracer_per_stream() OMNITRACE_HOT; int64_t get_critical_trace_count(); -size_t& -get_instrumentation_interval(); +double +get_trace_delay(); + +double +get_trace_duration(); double get_sampling_freq(); diff --git a/source/lib/omnitrace/library/constraint.cpp b/source/lib/omnitrace/library/constraint.cpp new file mode 100644 index 0000000000..27d122bfa8 --- /dev/null +++ b/source/lib/omnitrace/library/constraint.cpp @@ -0,0 +1,349 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/constraint.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/state.hpp" +#include "library/utility.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace constraint +{ +namespace +{ +namespace units = ::tim::units; + +using clock_type = std::chrono::high_resolution_clock; +using duration_type = std::chrono::duration; + +#define OMNITRACE_CLOCK_IDENTIFIER(VAL) \ + clock_identifier { #VAL, VAL } + +auto +clock_name(std::string _v) +{ + constexpr auto _clock_prefix = std::string_view{ "clock_" }; + for(auto& itr : _v) + itr = tolower(itr); + auto _pos = _v.find(_clock_prefix); + if(_pos == 0) _v = _v.substr(_pos + _clock_prefix.length()); + if(_v == "process_cputime_id") _v = "cputime"; + return _v; +} + +auto accepted_clock_ids = + std::set{ OMNITRACE_CLOCK_IDENTIFIER(CLOCK_REALTIME), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_PROCESS_CPUTIME_ID), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC_RAW), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_REALTIME_COARSE), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_MONOTONIC_COARSE), + OMNITRACE_CLOCK_IDENTIFIER(CLOCK_BOOTTIME) }; + +template +clock_identifier +find_clock_identifier(const Tp& _v) +{ + const char* _descript = ""; + if constexpr(std::is_integral::value) + { + _descript = "value"; + for(const auto& itr : accepted_clock_ids) + { + if(itr.value == _v) + { + return itr; + } + } + } + else + { + _descript = "name"; + auto _clock_name = clock_name(_v); + for(const auto& itr : accepted_clock_ids) + { + if(itr.name == _clock_name || itr.raw_name == _v || + std::to_string(itr.value) == _v) + { + return itr; + } + } + } + + OMNITRACE_THROW("Unknown clock id %s: %s. Valid choices: %s\n", _descript, + timemory::join::join("", _v).c_str(), + timemory::join::join("", accepted_clock_ids).c_str()); +} + +void +sleep(uint64_t _n) +{ + std::this_thread::sleep_for(std::chrono::nanoseconds{ _n }); +} + +timespec +get_timespec(clockid_t clock_id) noexcept +{ + struct timespec _ts; + clock_gettime(clock_id, &_ts); + return _ts; +} + +template +Tp +get_clock_now(clockid_t clock_id) noexcept +{ + constexpr Tp factor = (Precision::den == std::nano::den) + ? 1 + : (Precision::den / static_cast(std::nano::den)); + auto _ts = get_timespec(clock_id); + return (_ts.tv_sec * std::nano::den + _ts.tv_nsec) * factor; +} +} // namespace + +//--------------------------------------------------------------------------------------// +// +// stages implementation +// +//--------------------------------------------------------------------------------------// + +stages::stages() +: init{ [](const spec&) { return get_state() < State::Finalized; } } +, wait{ [](const spec& _spec) { + sleep(std::min(100 * units::msec, _spec.delay * units::sec)); + return get_state() < State::Finalized; +} } +, start{ [](const spec&) { return get_state() < State::Finalized; } } +, collect{ [](const spec& _spec) { + sleep(std::min(100 * units::msec, _spec.duration * units::sec)); + return get_state() < State::Finalized; +} } +, stop{ [](const spec&) { return get_state() < State::Finalized; } } +{} + +//--------------------------------------------------------------------------------------// +// +// clock identifier implementation +// +//--------------------------------------------------------------------------------------// + +clock_identifier::clock_identifier(std::string_view _name, int _val) +: value{ _val } +, raw_name{ _name } +, name{ clock_name(std::string{ _name }) } +{} + +bool +clock_identifier::operator<(const clock_identifier& _rhs) const +{ + return value < _rhs.value; +} + +bool +clock_identifier::operator==(const clock_identifier& _rhs) const +{ + return std::tie(raw_name, value) == std::tie(_rhs.raw_name, _rhs.value); +} + +bool +clock_identifier::operator==(int _rhs) const +{ + return (value == _rhs); +} + +bool +clock_identifier::operator==(std::string _rhs) const +{ + return (raw_name == std::string_view{ _rhs }) || + (name == clock_name(std::move(_rhs))); +} + +std::string +clock_identifier::as_string() const +{ + auto _name = name; + for(auto& itr : _name) + itr = tolower(itr); + auto _ss = std::stringstream{}; + _ss << _name << "(id=" << raw_name << ", value=" << value << ")"; + return _ss.str(); +} + +//--------------------------------------------------------------------------------------// +// +// spec implementation +// +//--------------------------------------------------------------------------------------// + +spec::spec(clock_identifier _id, double _delay, double _dur, uint64_t _n, uint64_t _rep) +: delay{ _delay } +, duration{ _dur } +, count{ _n } +, repeat{ _rep } +, clock_id{ std::move(_id) } +{} + +spec::spec(int _clock_id, double _delay, double _dur, uint64_t _n, uint64_t _rep) +: delay{ _delay } +, duration{ _dur } +, count{ _n } +, repeat{ _rep } +, clock_id{ find_clock_identifier(_clock_id) } +{} + +spec::spec(const std::string& _clock_id, double _delay, double _dur, uint64_t _n, + uint64_t _rep) +: delay{ _delay } +, duration{ _dur } +, count{ _n } +, repeat{ _rep } +, clock_id{ find_clock_identifier(_clock_id) } +{} + +spec::spec(const std::string& _line) +: spec{ config::get_setting_value("OMNITRACE_TRACE_PERIOD_CLOCK_ID").second, + config::get_setting_value("OMNITRACE_TRACE_DELAY").second, + config::get_setting_value("OMNITRACE_TRACE_DURATION").second } +{ + auto _delim = tim::delimit(_line, ":"); + if(!_delim.empty()) delay = utility::convert(_delim.at(0)); + if(_delim.size() > 1) duration = utility::convert(_delim.at(1)); + if(_delim.size() > 2) repeat = utility::convert(_delim.at(2)); + if(_delim.size() > 3) clock_id = find_clock_identifier(_delim.at(3)); +} + +void +spec::operator()(const stages& _stages) const +{ + auto _n = repeat; + if(_n < 1) _n = std::numeric_limits::max(); + + while(get_state() < State::Active) + sleep(1 * units::usec); + + for(uint64_t i = 0; i < _n; ++i) + { + auto _spec = spec{ clock_id, delay, duration, i, repeat }; + auto _wait = [_spec](const auto& _func, auto _dur) { + auto _ret = true; + auto _now = get_clock_now(_spec.clock_id.value); + auto _del = (_dur * units::sec); + auto _end = _now + _del; + while(get_clock_now(_spec.clock_id.value) < _end && (_ret = _func(_spec))) + {} + return _ret; + }; + + OMNITRACE_VERBOSE(2, + "Executing constraint spec %lu of %lu :: delay: %6.3f, " + "duration: %6.3f, clock: %s\n", + i, _spec.repeat, _spec.delay, _spec.duration, + _spec.clock_id.as_string().c_str()); + + if(_stages.init(_spec) && _wait(_stages.wait, _spec.delay) && + _stages.start(_spec) && _wait(_stages.collect, _spec.duration) && + _stages.stop(_spec)) + {} + else + { + break; + } + } +} + +//--------------------------------------------------------------------------------------// +// +// global usage functions +// +//--------------------------------------------------------------------------------------// + +const std::set& +get_valid_clock_ids() +{ + return accepted_clock_ids; +} + +std::vector +get_trace_specs() +{ + auto _v = std::vector{}; + + { + auto _delay_v = config::get_setting_value("OMNITRACE_TRACE_DELAY").second; + auto _duration_v = + config::get_setting_value("OMNITRACE_TRACE_DURATION").second; + auto _clock_v = find_clock_identifier( + config::get_setting_value("OMNITRACE_TRACE_PERIOD_CLOCK_ID") + .second); + + if(_delay_v > 0.0 || _duration_v > 0.0) + { + _v.emplace_back(_clock_v, _delay_v, _duration_v); + } + } + + { + auto _periods_v = + config::get_setting_value("OMNITRACE_TRACE_PERIODS").second; + if(!_periods_v.empty()) + { + for(auto itr : tim::delimit(_periods_v, " ;\t\n")) + _v.emplace_back(itr); + } + } + + return _v; +} + +stages +get_trace_stages() +{ + auto _v = stages{}; + + _v.init = [](const spec&) { return get_state() < State::Finalized; }; + _v.wait = [](const spec& _spec) { + sleep(std::min(100 * units::msec, _spec.delay * units::sec)); + return get_state() < State::Finalized; + }; + _v.start = [](const spec&) { return get_state() < State::Finalized; }; + _v.collect = [](const spec& _spec) { + sleep(std::min(100 * units::msec, _spec.duration * units::sec)); + return get_state() < State::Finalized; + }; + _v.stop = [](const spec&) { return get_state() < State::Finalized; }; + + return _v; +} +} // namespace constraint +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/constraint.hpp b/source/lib/omnitrace/library/constraint.hpp new file mode 100644 index 0000000000..888df8c003 --- /dev/null +++ b/source/lib/omnitrace/library/constraint.hpp @@ -0,0 +1,114 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +/// @file +/// This provides generic functionality for constraining data collection within +/// a windows of time. E.g., delay, delay + duration, (delay + duration) * nrepeat +/// +/// @todo Migrate delay/duration for sampling, process sampling, and causal profiling +/// to use this +/// + +#include "library/defines.hpp" + +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace constraint +{ +struct spec; + +struct stages +{ + using functor_t = std::function; + + stages(); + + OMNITRACE_DEFAULT_COPY_MOVE(stages) + + functor_t init = [](const spec&) { return true; }; + functor_t wait = [](const spec&) { return true; }; + functor_t start = [](const spec&) { return true; }; + functor_t collect = [](const spec&) { return true; }; + functor_t stop = [](const spec&) { return true; }; +}; + +struct clock_identifier +{ + int value = -1; + std::string_view raw_name = {}; + std::string name = {}; + + clock_identifier(); + clock_identifier(std::string_view, int); + + OMNITRACE_DEFAULT_COPY_MOVE(clock_identifier) + + std::string as_string() const; + + bool operator<(const clock_identifier& _rhs) const; + bool operator==(const clock_identifier& _rhs) const; + bool operator==(int _rhs) const; + bool operator==(std::string _rhs) const; + + friend std::ostream& operator<<(std::ostream& _os, const clock_identifier& _v) + { + return (_os << _v.as_string()); + } +}; + +struct spec +{ + spec(int, double, double, uint64_t = 0, uint64_t = 1); + spec(clock_identifier, double, double, uint64_t = 0, uint64_t = 1); + spec(const std::string&, double, double, uint64_t = 0, uint64_t = 1); + spec(const std::string&); + + OMNITRACE_DEFAULT_COPY_MOVE(spec) + + void operator()(const stages&) const; + + double delay = 0.0; + double duration = 0.0; + uint64_t count = 0; + uint64_t repeat = 1; + clock_identifier clock_id = {}; +}; + +const std::set& +get_valid_clock_ids(); + +std::vector +get_trace_specs(); + +stages +get_trace_stages(); +} // namespace constraint +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/debug.cpp b/source/lib/omnitrace/library/debug.cpp index d90bcfcd2d..036babfcc0 100644 --- a/source/lib/omnitrace/library/debug.cpp +++ b/source/lib/omnitrace/library/debug.cpp @@ -22,7 +22,6 @@ #include "library/debug.hpp" #include "library/binary/address_range.hpp" -#include "library/runtime.hpp" #include "library/state.hpp" #include @@ -91,7 +90,7 @@ get_file() { static FILE* _v = []() { auto&& _fname = tim::get_env("OMNITRACE_LOG_FILE", ""); - if(!_fname.empty()) tim::log::colorized() = false; + if(!_fname.empty()) tim::log::monochrome() = true; return (_fname.empty()) ? stderr : tim::filepath::fopen(_fname, "w"); }(); return _v; diff --git a/source/lib/omnitrace/library/defines.hpp.in b/source/lib/omnitrace/library/defines.hpp.in index ca93a56d92..c91d8799bb 100644 --- a/source/lib/omnitrace/library/defines.hpp.in +++ b/source/lib/omnitrace/library/defines.hpp.in @@ -42,3 +42,20 @@ #define OMNITRACE_SAMPLING_GPU_MEMORY_USAGE OMNITRACE_SAMPLING_GPU_MEMORY_USAGE_idx #define OMNITRACE_METADATA(...) ::tim::manager::add_metadata(__VA_ARGS__) + +#if !defined(OMNITRACE_DEFAULT_OBJECT) +# define OMNITRACE_DEFAULT_OBJECT(NAME) \ + NAME() = default; \ + NAME(const NAME&) = default; \ + NAME(NAME&&) noexcept = default; \ + NAME& operator=(const NAME&) = default; \ + NAME& operator=(NAME&&) noexcept = default; +#endif + +#if !defined(OMNITRACE_DEFAULT_COPY_MOVE) +# define OMNITRACE_DEFAULT_COPY_MOVE(NAME) \ + NAME(const NAME&) = default; \ + NAME(NAME&&) noexcept = default; \ + NAME& operator=(const NAME&) = default; \ + NAME& operator=(NAME&&) noexcept = default; +#endif diff --git a/source/lib/omnitrace/library/gpu.cpp b/source/lib/omnitrace/library/gpu.cpp index 96f7b355a2..f14c8cb6e1 100644 --- a/source/lib/omnitrace/library/gpu.cpp +++ b/source/lib/omnitrace/library/gpu.cpp @@ -20,30 +20,34 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#if !defined(OMNITRACE_USE_ROCM_SMI) +# define OMNITRACE_USE_ROCM_SMI 0 +#endif + +#if !defined(OMNITRACE_USE_HIP) +# define OMNITRACE_USE_HIP 0 +#endif + +#if OMNITRACE_USE_HIP > 0 +# if !defined(TIMEMORY_USE_HIP) +# define TIMEMORY_USE_HIP 1 +# endif +#endif + #include "library/gpu.hpp" #include "library/debug.hpp" #include "library/defines.hpp" #include -#if defined(OMNITRACE_USE_ROCM_SMI) && OMNITRACE_USE_ROCM_SMI > 0 -# include "library/rocm_smi.hpp" -#elif !defined(OMNITRACE_USE_ROCM_SMI) -# define OMNITRACE_USE_ROCM_SMI 0 -#endif - -#if defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0 -# if !defined(TIMEMORY_USE_HIP) -# define TIMEMORY_USE_HIP 1 -# endif -# include -#elif !defined(OMNITRACE_USE_HIP) -# define OMNITRACE_USE_HIP 0 +#if OMNITRACE_USE_ROCM_SMI > 0 +# include #endif #if OMNITRACE_USE_HIP > 0 # include # include +# include # if !defined(OMNITRACE_HIP_RUNTIME_CALL) # define OMNITRACE_HIP_RUNTIME_CALL(err) \ @@ -62,6 +66,49 @@ namespace omnitrace { namespace gpu { +namespace +{ +namespace scope = ::tim::scope; + +#if OMNITRACE_USE_ROCM_SMI > 0 +# define OMNITRACE_ROCM_SMI_CALL(ERROR_CODE) \ + ::omnitrace::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__) + +void +check_rsmi_error(rsmi_status_t _code, const char* _file, int _line) +{ + if(_code == RSMI_STATUS_SUCCESS) return; + const char* _msg = nullptr; + auto _err = rsmi_status_string(_code, &_msg); + if(_err != RSMI_STATUS_SUCCESS) + OMNITRACE_THROW("rsmi_status_string failed. No error message available. " + "Error code %i originated at %s:%i\n", + static_cast(_code), _file, _line); + OMNITRACE_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast(_code), + _msg); +} + +bool +rsmi_init() +{ + auto _rsmi_init = []() { + try + { + OMNITRACE_ROCM_SMI_CALL(::rsmi_init(0)); + } catch(std::exception& _e) + { + OMNITRACE_BASIC_VERBOSE(1, "Exception thrown initializing rocm-smi: %s\n", + _e.what()); + return false; + } + return true; + }(); + + return _rsmi_init; +} +#endif +} // namespace + int hip_device_count() { @@ -72,13 +119,37 @@ hip_device_count() #endif } +int +rsmi_device_count() +{ +#if OMNITRACE_USE_ROCM_SMI > 0 + if(!rsmi_init()) return 0; + + static auto _num_devices = []() { + uint32_t _v = 0; + try + { + OMNITRACE_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_v)); + } catch(std::exception& _e) + { + OMNITRACE_BASIC_VERBOSE( + 1, "Exception thrown getting the rocm-smi devices: %s\n", _e.what()); + } + return _v; + }(); + + return _num_devices; +#else + return 0; +#endif +} + int device_count() { #if OMNITRACE_USE_ROCM_SMI > 0 // store as static since calls after rsmi_shutdown will return zero - static auto _v = rocm_smi::device_count(); - return _v; + return rsmi_device_count(); #elif OMNITRACE_USE_HIP > 0 return ::tim::hip::device_count(); #else diff --git a/source/lib/omnitrace/library/gpu.hpp b/source/lib/omnitrace/library/gpu.hpp index 66acd7670f..24e761c71a 100644 --- a/source/lib/omnitrace/library/gpu.hpp +++ b/source/lib/omnitrace/library/gpu.hpp @@ -32,6 +32,9 @@ device_count(); int hip_device_count(); +int +rsmi_device_count(); + void add_hip_device_metadata(); } // namespace gpu diff --git a/source/lib/omnitrace/library/perfetto.cpp b/source/lib/omnitrace/library/perfetto.cpp index f941727c1f..d68f307926 100644 --- a/source/lib/omnitrace/library/perfetto.cpp +++ b/source/lib/omnitrace/library/perfetto.cpp @@ -28,37 +28,40 @@ namespace omnitrace { namespace perfetto { +auto& +get_config() +{ + static auto _v = ::perfetto::TraceConfig{}; + return _v; +} + +auto& +get_session() +{ + static auto _v = std::unique_ptr<::perfetto::TracingSession>{}; + return _v; +} + void setup() { auto args = ::perfetto::TracingInitArgs{}; auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{}; - auto& cfg = tracing::get_perfetto_config(); + auto& cfg = get_config(); // environment settings - auto shmem_size_hint = get_perfetto_shmem_size_hint(); - auto buffer_size = get_perfetto_buffer_size(); + auto shmem_size_hint = config::get_perfetto_shmem_size_hint(); + auto buffer_size = config::get_perfetto_buffer_size(); auto _policy = - get_perfetto_fill_policy() == "discard" + config::get_perfetto_fill_policy() == "discard" ? ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD : ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER; auto* buffer_config = cfg.add_buffers(); buffer_config->set_size_kb(buffer_size); buffer_config->set_fill_policy(_policy); - std::set _available_categories = {}; - std::set _disabled_categories = {}; - for(auto itr : { OMNITRACE_PERFETTO_CATEGORIES }) - _available_categories.emplace(itr.name); - auto _enabled_categories = config::get_perfetto_categories(); - for(const auto& itr : _available_categories) - { - if(!_enabled_categories.empty() && _enabled_categories.count(itr) == 0) - _disabled_categories.emplace(itr); - } - - for(const auto& itr : _disabled_categories) + for(const auto& itr : config::get_disabled_categories()) { OMNITRACE_VERBOSE_F(1, "Disabling perfetto track event category: %s\n", itr.c_str()); @@ -81,31 +84,19 @@ setup() void start() { -#if defined(CUSTOM_DATA_SOURCE) - // Add the following: - ::perfetto::DataSourceDescriptor dsd{}; - dsd.set_name("com.example.custom_data_source"); - CustomDataSource::Register(dsd); - auto* ds_cfg = cfg.add_data_sources()->mutable_config(); - ds_cfg->set_name("com.example.custom_data_source"); - CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) { - auto packet = ctx.NewTracePacket(); - packet->set_timestamp(::perfetto::TrackEvent::GetTraceTimeNs()); - packet->set_for_testing()->set_str("Hello world!"); - PRINT_HERE("%s", "Trace"); - }); -#endif - auto& cfg = tracing::get_perfetto_config(); - auto& tracing_session = tracing::get_perfetto_session(); + auto& cfg = get_config(); + auto& tracing_session = get_session(); tracing_session = ::perfetto::Tracing::NewTrace(); tracing_session->Setup(cfg); tracing_session->StartBlocking(); } } // namespace perfetto + +std::unique_ptr<::perfetto::TracingSession>& +get_perfetto_session() +{ + return ::omnitrace::perfetto::get_session(); +} } // namespace omnitrace PERFETTO_TRACK_EVENT_STATIC_STORAGE(); - -#if defined(CUSTOM_DATA_SOURCE) -PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource); -#endif diff --git a/source/lib/omnitrace/library/perfetto.hpp b/source/lib/omnitrace/library/perfetto.hpp index 1c7638cef9..2c6f2999ef 100644 --- a/source/lib/omnitrace/library/perfetto.hpp +++ b/source/lib/omnitrace/library/perfetto.hpp @@ -43,123 +43,22 @@ PERFETTO_DEFINE_CATEGORIES(OMNITRACE_PERFETTO_CATEGORIES); namespace omnitrace { -#if defined(CUSTOM_DATA_SOURCE) -class CustomDataSource : public perfetto::DataSource -{ -public: - void OnSetup(const SetupArgs&) override - { - // Use this callback to apply any custom configuration to your data source - // based on the TraceConfig in SetupArgs. - OMNITRACE_PRINT_F("[CustomDataSource] setup\n"); - } - - void OnStart(const StartArgs&) override - { - // This notification can be used to initialize the GPU driver, enable - // counters, etc. StartArgs will contains the DataSourceDescriptor, - // which can be extended. - OMNITRACE_PRINT_F("[CustomDataSource] start\n"); - } - - void OnStop(const StopArgs&) override - { - // Undo any initialization done in OnStart. - OMNITRACE_PRINT_F("[CustomDataSource] stop\n"); - } - - // Data sources can also have per-instance state. - int my_custom_state = 0; -}; - -PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource); -#endif +std::unique_ptr<::perfetto::TracingSession>& +get_perfetto_session(); template struct perfetto_counter_track { - using track_map_t = std::map>; + using track_map_t = std::map>; using name_map_t = std::map>>; using data_t = std::pair; - static auto init() { (void) get_data(); } - - static auto exists(size_t _idx, int64_t _n = -1) - { - bool _v = get_data().second.count(_idx) != 0; - if(_n < 0 || !_v) return _v; - return static_cast(_n) < get_data().second.at(_idx).size(); - } - - static size_t size(size_t _idx) - { - bool _v = get_data().second.count(_idx) != 0; - if(!_v) return 0; - return get_data().second.at(_idx).size(); - } - + static auto init() { (void) get_data(); } + static auto exists(size_t _idx, int64_t _n = -1); + static size_t size(size_t _idx); static auto emplace(size_t _idx, const std::string& _v, const char* _units = nullptr, const char* _category = nullptr, int64_t _mult = 1, - bool _incr = false) - { - auto& _name_data = get_data().first[_idx]; - auto& _track_data = get_data().second[_idx]; - std::vector> _missing = {}; - if(config::get_is_continuous_integration()) - { - for(const auto& itr : _name_data) - { - _missing.emplace_back(std::make_tuple(*itr, itr->c_str(), false)); - } - } - auto _index = _track_data.size(); - auto& _name = _name_data.emplace_back(std::make_unique(_v)); - const char* _unit_name = (_units && strlen(_units) > 0) ? _units : nullptr; - _track_data.emplace_back(perfetto::CounterTrack{ _name->c_str() } - .set_unit_name(_unit_name) - .set_category(_category) - .set_unit_multiplier(_mult) - .set_is_incremental(_incr)); - if(config::get_is_continuous_integration()) - { - for(auto& itr : _missing) - { - const char* citr = std::get<1>(itr); - for(const auto& ditr : _name_data) - { - if(citr == ditr->c_str() && strcmp(citr, ditr->c_str()) == 0) - { - std::get<2>(itr) = true; - break; - } - } - if(!std::get<2>(itr)) - { - std::set _prev = {}; - std::set _curr = {}; - for(const auto& eitr : _missing) - _prev.emplace( - static_cast(const_cast(std::get<1>(eitr)))); - for(const auto& eitr : _name_data) - _curr.emplace( - static_cast(const_cast(eitr->c_str()))); - std::stringstream _pss{}; - for(auto&& eitr : _prev) - _pss << " " << std::hex << std::setw(12) << std::left << eitr; - std::stringstream _css{}; - for(auto&& eitr : _curr) - _css << " " << std::hex << std::setw(12) << std::left << eitr; - OMNITRACE_THROW("perfetto_counter_track emplace method for '%s' (%p) " - "invalidated C-string '%s' (%p).\n%8s: %s\n%8s: %s\n", - _v.c_str(), (void*) _name->c_str(), - std::get<0>(itr).c_str(), - (void*) std::get<0>(itr).c_str(), "previous", - _pss.str().c_str(), "current", _css.str().c_str()); - } - } - } - return _index; - } + bool _incr = false); static auto& at(size_t _idx, size_t _n) { return get_data().second.at(_idx).at(_n); } @@ -170,4 +69,87 @@ private: return _v; } }; + +template +auto +perfetto_counter_track::exists(size_t _idx, int64_t _n) +{ + bool _v = get_data().second.count(_idx) != 0; + if(_n < 0 || !_v) return _v; + return static_cast(_n) < get_data().second.at(_idx).size(); +} + +template +size_t +perfetto_counter_track::size(size_t _idx) +{ + bool _v = get_data().second.count(_idx) != 0; + if(!_v) return 0; + return get_data().second.at(_idx).size(); +} + +template +auto +perfetto_counter_track::emplace(size_t _idx, const std::string& _v, + const char* _units, const char* _category, + int64_t _mult, bool _incr) +{ + auto& _name_data = get_data().first[_idx]; + auto& _track_data = get_data().second[_idx]; + std::vector> _missing = {}; + if(config::get_is_continuous_integration()) + { + for(const auto& itr : _name_data) + { + _missing.emplace_back(std::make_tuple(*itr, itr->c_str(), false)); + } + } + auto _index = _track_data.size(); + auto& _name = _name_data.emplace_back(std::make_unique(_v)); + const char* _unit_name = (_units && strlen(_units) > 0) ? _units : nullptr; + _track_data.emplace_back(::perfetto::CounterTrack{ _name->c_str() } + .set_unit_name(_unit_name) + .set_category(_category) + .set_unit_multiplier(_mult) + .set_is_incremental(_incr)); + if(config::get_is_continuous_integration()) + { + for(auto& itr : _missing) + { + const char* citr = std::get<1>(itr); + for(const auto& ditr : _name_data) + { + if(citr == ditr->c_str() && strcmp(citr, ditr->c_str()) == 0) + { + std::get<2>(itr) = true; + break; + } + } + if(!std::get<2>(itr)) + { + std::set _prev = {}; + std::set _curr = {}; + for(const auto& eitr : _missing) + _prev.emplace( + static_cast(const_cast(std::get<1>(eitr)))); + for(const auto& eitr : _name_data) + _curr.emplace(static_cast(const_cast(eitr->c_str()))); + std::stringstream _pss{}; + for(auto&& eitr : _prev) + _pss << " " << std::hex << std::setw(12) << std::left << eitr; + std::stringstream _css{}; + for(auto&& eitr : _curr) + _css << " " << std::hex << std::setw(12) << std::left << eitr; + OMNITRACE_THROW("perfetto_counter_track emplace method for '%s' (%p) " + "invalidated C-string '%s' (%p).\n%8s: %s\n%8s: %s\n", + _v.c_str(), (void*) _name->c_str(), + std::get<0>(itr).c_str(), + (void*) std::get<0>(itr).c_str(), "previous", + _pss.str().c_str(), "current", _css.str().c_str()); + } + } + } + return _index; +} + } // namespace omnitrace diff --git a/source/lib/omnitrace/library/rocm_smi.cpp b/source/lib/omnitrace/library/rocm_smi.cpp index f27dd2c7ff..bce7d222dd 100644 --- a/source/lib/omnitrace/library/rocm_smi.cpp +++ b/source/lib/omnitrace/library/rocm_smi.cpp @@ -442,20 +442,7 @@ post_process() uint32_t device_count() { - uint32_t _num_devices = 0; - try - { - static auto _rsmi_init_once = []() { OMNITRACE_ROCM_SMI_CALL(rsmi_init(0)); }; - static std::once_flag _once{}; - std::call_once(_once, _rsmi_init_once); - - OMNITRACE_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_num_devices)); - } catch(std::exception& _e) - { - OMNITRACE_BASIC_VERBOSE(1, "Exception thrown getting the rocm-smi devices: %s\n", - _e.what()); - } - return _num_devices; + return gpu::rsmi_device_count(); } } // namespace rocm_smi } // namespace omnitrace diff --git a/source/lib/omnitrace/library/rocm_smi.hpp b/source/lib/omnitrace/library/rocm_smi.hpp index b22658e39a..7cdc915670 100644 --- a/source/lib/omnitrace/library/rocm_smi.hpp +++ b/source/lib/omnitrace/library/rocm_smi.hpp @@ -82,7 +82,7 @@ struct data using mem_usage_t = uint64_t; using temp_t = int64_t; - TIMEMORY_DEFAULT_OBJECT(data) + OMNITRACE_DEFAULT_OBJECT(data) explicit data(uint32_t _dev_id); diff --git a/source/lib/omnitrace/library/rocprofiler.cpp b/source/lib/omnitrace/library/rocprofiler.cpp index a318f6fb61..d0a7f12ddc 100644 --- a/source/lib/omnitrace/library/rocprofiler.cpp +++ b/source/lib/omnitrace/library/rocprofiler.cpp @@ -660,7 +660,7 @@ post_process_timemory() rocm_event* parent = nullptr; mutable std::vector children = {}; - TIMEMORY_DEFAULT_OBJECT(local_event) + OMNITRACE_DEFAULT_OBJECT(local_event) explicit local_event(rocm_event* _v) : parent{ _v } diff --git a/source/lib/omnitrace/library/roctracer.cpp b/source/lib/omnitrace/library/roctracer.cpp index 43230fdd61..0644db61b8 100644 --- a/source/lib/omnitrace/library/roctracer.cpp +++ b/source/lib/omnitrace/library/roctracer.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "library/roctracer.hpp" +#include "library/components/category_region.hpp" #include "library/components/fwd.hpp" #include "library/config.hpp" #include "library/critical_trace.hpp" @@ -99,7 +100,7 @@ get_roctracer_kernels() auto& get_roctracer_hip_data(int64_t _tid = threading::get_id()) { - using data_t = std::unordered_map; + using data_t = std::unordered_map; using thread_data_t = thread_data; static auto& _v = thread_data_t::instances(construct_on_init{}); return _v.at(_tid); @@ -124,7 +125,7 @@ struct cid_data : cid_tuple_t { using cid_tuple_t::cid_tuple_t; - TIMEMORY_DEFAULT_OBJECT(cid_data) + OMNITRACE_DEFAULT_OBJECT(cid_data) auto& cid() { return std::get<0>(*this); } auto& pcid() { return std::get<1>(*this); } @@ -454,20 +455,12 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, { case ROCTX_API_ID_roctxRangePushA: { - if(get_use_perfetto()) - tracing::push_perfetto(category::rocm_roctx{}, _data->args.message); - - if(get_use_timemory()) - tracing::push_timemory(category::rocm_roctx{}, _data->args.message); - + component::category_region::start(_data->args.message); break; } case ROCTX_API_ID_roctxRangePop: { - if(get_use_timemory()) - tracing::pop_timemory(category::rocm_roctx{}, _data->args.message); - if(get_use_perfetto()) - tracing::pop_perfetto(category::rocm_roctx{}, _data->args.message); + component::category_region::stop(_data->args.message); break; } case ROCTX_API_ID_roctxRangeStartA: @@ -479,11 +472,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, std::string_view{ _data->args.message }); } - if(get_use_perfetto()) - tracing::push_perfetto(category::rocm_roctx{}, _data->args.message); - - if(get_use_timemory()) - tracing::push_timemory(category::rocm_roctx{}, _data->args.message); + component::category_region::start(_data->args.message); break; } case ROCTX_API_ID_roctxRangeStop: @@ -510,10 +499,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, if(!_message.empty()) { - if(get_use_timemory()) - tracing::pop_timemory(category::rocm_roctx{}, _message.data()); - if(get_use_perfetto()) - tracing::pop_perfetto(category::rocm_roctx{}, _message.data()); + component::category_region::stop(_message.data()); } break; @@ -733,8 +719,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* } if(get_use_timemory()) { - auto itr = get_roctracer_hip_data()->emplace(_corr_id, - roctracer_bundle_t{ op_name }); + auto itr = get_roctracer_hip_data()->emplace( + _corr_id, roctracer_hip_bundle_t{ op_name }); if(itr.second) { itr.first->second.start(); @@ -983,7 +969,7 @@ hip_activity_callback(const char* begin, const char* end, void* arg) if(_found && _name != nullptr && get_use_timemory()) { auto _func = [_beg_ns, _end_ns, _name]() { - roctracer_bundle_t _bundle{ _name }; + roctracer_hip_bundle_t _bundle{ _name }; _bundle.start() .store(std::plus{}, static_cast(_end_ns - _beg_ns)) .stop() diff --git a/source/lib/omnitrace/library/roctracer.hpp b/source/lib/omnitrace/library/roctracer.hpp index cf6085802e..b555b43b91 100644 --- a/source/lib/omnitrace/library/roctracer.hpp +++ b/source/lib/omnitrace/library/roctracer.hpp @@ -46,10 +46,10 @@ namespace omnitrace { -using roctracer_bundle_t = - tim::component_bundle; +using roctracer_hip_bundle_t = + tim::component_bundle; using roctracer_hsa_bundle_t = - tim::component_bundle; + tim::component_bundle; using roctracer_functions_t = std::vector>>; // HSA API callback function diff --git a/source/lib/omnitrace/library/runtime.cpp b/source/lib/omnitrace/library/runtime.cpp index 5a61e92154..61e6eee0c0 100644 --- a/source/lib/omnitrace/library/runtime.cpp +++ b/source/lib/omnitrace/library/runtime.cpp @@ -89,35 +89,6 @@ sampling_on_child_threads() } } // namespace -int -get_realtime_signal() -{ - return SIGRTMIN + config::get_sampling_rtoffset(); -} - -int -get_cputime_signal() -{ - return SIGPROF; -} - -std::set get_sampling_signals(int64_t) -{ - auto _v = std::set{}; - if(config::get_use_causal()) - { - _v.emplace(get_cputime_signal()); - _v.emplace(get_realtime_signal()); - } - else - { - if(config::get_use_sampling_cputime()) _v.emplace(get_cputime_signal()); - if(config::get_use_sampling_realtime()) _v.emplace(get_realtime_signal()); - } - - return _v; -} - std::atomic& get_cpu_cid() { diff --git a/source/lib/omnitrace/library/runtime.hpp b/source/lib/omnitrace/library/runtime.hpp index 6bfe6f3502..da1b2cbd0a 100644 --- a/source/lib/omnitrace/library/runtime.hpp +++ b/source/lib/omnitrace/library/runtime.hpp @@ -78,15 +78,6 @@ get_init_bundle(); std::unique_ptr& get_preinit_bundle(); -int -get_realtime_signal(); - -int -get_cputime_signal(); - -std::set -get_sampling_signals(int64_t _tid = 0); - std::atomic& get_cpu_cid() TIMEMORY_HOT; diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index aa81055645..a32433f6a9 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -854,11 +854,19 @@ void post_process_perfetto(int64_t _tid, const bundle_t* _init, const std::vector& _data) { + auto _valid_metrics = backtrace_metrics::valid_array_t{}; + + for(const auto& itr : _data) + { + const auto* _bt_mt = itr->get(); + if(_bt_mt) _valid_metrics |= _bt_mt->get_valid(); + } + if(trait::runtime_enabled::get()) { OMNITRACE_VERBOSE(3 || get_debug_sampling(), "[%li] Post-processing metrics for perfetto...\n", _tid); - backtrace_metrics::init_perfetto(_tid); + backtrace_metrics::init_perfetto(_tid, _valid_metrics); for(const auto& itr : _data) { const auto* _bt_metrics = itr->get(); @@ -867,8 +875,7 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init, if(_bt_time->get_tid() != _tid) continue; _bt_metrics->post_process_perfetto(_tid, _bt_time->get_timestamp()); } - - backtrace_metrics::fini_perfetto(_tid); + backtrace_metrics::fini_perfetto(_tid, _valid_metrics); } OMNITRACE_VERBOSE(3 || get_debug_sampling(), @@ -936,6 +943,12 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init, _bt_mt->get_hw_counters().size() == _last->get()->get_hw_counters().size(); + auto _hw_counters_enabled = [](const auto* _bt_v) { + return (_bt_v != nullptr) && + (*_bt_v)(type_list{}) && + (*_bt_v)(category::thread_hardware_counter{}); + }; + // annotations common to both modes auto _common_annotate = [&](::perfetto::EventContext& ctx, bool _is_last) { if(_include_common && _is_last) @@ -943,7 +956,9 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init, tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); tracing::add_perfetto_annotation(ctx, "end_ns", _end); } - if(_include_hw && _is_last) + if(_include_hw && _is_last && _last && + _hw_counters_enabled(_last->get()) && + _hw_counters_enabled(_bt_mt)) { // current values when read auto _hw_cnt_vals = _bt_mt->get_hw_counters(); @@ -1048,16 +1063,15 @@ post_process_timemory(int64_t _tid, const bundle_t* _init, using bundle_t = tim::lightweight_tuple; - auto* _bt_data = itr->get(); - auto* _bt_time = itr->get(); - auto* _bt_metrics = itr->get(); + auto* _bt_data = itr->get(); + auto* _bt_time = itr->get(); + auto* _bt_metrics = itr->get(); + const auto* _last_metrics = _last->get(); - if(!_bt_data || !_bt_time || !_bt_metrics) continue; + if(!_bt_data || !_bt_time) continue; double _elapsed_wc = (_bt_time->get_timestamp() - _last->get()->get_timestamp()); - double _elapsed_cc = (_bt_metrics->get_cpu_timestamp() - - _last->get()->get_cpu_timestamp()); std::vector _tc{}; _tc.reserve(_bt_data->size()); @@ -1090,31 +1104,45 @@ post_process_timemory(int64_t _tid, const bundle_t* _init, if constexpr(tim::trait::is_available::value) { auto* _cc = iitr.get(); - if(_cc) + + if(_cc && _bt_metrics && _last_metrics && + (*_bt_metrics)(category::thread_cpu_time{}) && + (*_last_metrics)(category::thread_cpu_time{})) { + double _elapsed_cc = (_bt_metrics->get_cpu_timestamp() - + _last_metrics->get_cpu_timestamp()); + _cc->set_value(_elapsed_cc / sampling_cpu_clock::get_unit()); _cc->set_accum(_elapsed_cc / sampling_cpu_clock::get_unit()); } } if constexpr(tim::trait::is_available::value) { - auto _hw_cnt_vals = _bt_metrics->get_hw_counters(); - if(_last && _bt_metrics->get_hw_counters().size() == - _last->get()->get_hw_counters().size()) + auto _hw_counters_enabled = [](const auto* _bt_v) { + return (_bt_v != nullptr) && + (*_bt_v)(type_list{}) && + (*_bt_v)(category::thread_hardware_counter{}); + }; + + if(_bt_metrics && _last_metrics && _hw_counters_enabled(_bt_metrics) && + _hw_counters_enabled(_last_metrics)) { - for(size_t k = 0; k < _bt_metrics->get_hw_counters().size(); ++k) + auto _hw_cnt_vals = _bt_metrics->get_hw_counters(); + if(_bt_metrics->get_hw_counters().size() == + _last_metrics->get_hw_counters().size()) { - if(_last->get()->get_hw_counters()[k] > - _hw_cnt_vals[k]) - _hw_cnt_vals[k] -= - _last->get()->get_hw_counters()[k]; + for(size_t k = 0; k < _bt_metrics->get_hw_counters().size(); ++k) + { + if(_last_metrics->get_hw_counters()[k] > _hw_cnt_vals[k]) + _hw_cnt_vals[k] -= _last_metrics->get_hw_counters()[k]; + } + } + auto* _hw_counter = iitr.get(); + if(_hw_counter) + { + _hw_counter->set_value(_hw_cnt_vals); + _hw_counter->set_accum(_hw_cnt_vals); } - } - auto* _hw_counter = iitr.get(); - if(_hw_counter) - { - _hw_counter->set_value(_hw_cnt_vals); - _hw_counter->set_accum(_hw_cnt_vals); } } iitr.pop(); diff --git a/source/lib/omnitrace/library/thread_info.cpp b/source/lib/omnitrace/library/thread_info.cpp index ee3280df55..852e415a1c 100644 --- a/source/lib/omnitrace/library/thread_info.cpp +++ b/source/lib/omnitrace/library/thread_info.cpp @@ -98,6 +98,15 @@ init_index_data(int64_t _tid, bool _offset = false) const auto unknown_thread = std::optional{}; } // namespace +std::string +thread_index_data::as_string() const +{ + auto _ss = std::stringstream{}; + _ss << sequent_value << " [" << as_hex(system_value) << "] (#" << internal_value + << ")"; + return _ss.str(); +} + int64_t grow_data(int64_t _tid) { diff --git a/source/lib/omnitrace/library/thread_info.hpp b/source/lib/omnitrace/library/thread_info.hpp index 89c2163a7c..07738bbfb7 100644 --- a/source/lib/omnitrace/library/thread_info.hpp +++ b/source/lib/omnitrace/library/thread_info.hpp @@ -64,6 +64,8 @@ struct thread_index_data int64_t internal_value = utility::get_thread_index(); int64_t system_value = tim::threading::get_sys_tid(); int64_t sequent_value = tim::threading::get_id(); + + std::string as_string() const; }; int64_t grow_data(int64_t); diff --git a/source/lib/omnitrace/library/tracing.cpp b/source/lib/omnitrace/library/tracing.cpp index 365da846d3..5956b5ce29 100644 --- a/source/lib/omnitrace/library/tracing.cpp +++ b/source/lib/omnitrace/library/tracing.cpp @@ -34,20 +34,6 @@ bool debug_pop = tim::get_env("OMNITRACE_DEBUG_POP", false) || get_debug_env(); bool debug_mark = tim::get_env("OMNITRACE_DEBUG_MARK", false) || get_debug_env(); bool debug_user = tim::get_env("OMNITRACE_DEBUG_USER_REGIONS", false) || get_debug_env(); -perfetto::TraceConfig& -get_perfetto_config() -{ - static auto _v = ::perfetto::TraceConfig{}; - return _v; -} - -std::unique_ptr& -get_perfetto_session() -{ - static auto _v = std::unique_ptr{}; - return _v; -} - std::unordered_map& get_perfetto_track_uuids() { @@ -114,7 +100,6 @@ thread_init() process::get_id(), "thread", threading::get_id()), quirk::config{}); - get_interval_data()->reserve(512); // save the hash maps get_timemory_hash_ids() = tim::get_hash_ids(); get_timemory_hash_aliases() = tim::get_hash_aliases(); diff --git a/source/lib/omnitrace/library/tracing.hpp b/source/lib/omnitrace/library/tracing.hpp index 0cd61932e1..586a900f59 100644 --- a/source/lib/omnitrace/library/tracing.hpp +++ b/source/lib/omnitrace/library/tracing.hpp @@ -40,6 +40,7 @@ #include #include +#include #include #include @@ -70,12 +71,6 @@ extern OMNITRACE_HIDDEN_API bool debug_mark; std::unordered_map& get_perfetto_track_uuids(); -perfetto::TraceConfig& -get_perfetto_config(); - -std::unique_ptr& -get_perfetto_session(); - tim::hash_map_ptr_t& get_timemory_hash_ids(int64_t _tid = threading::get_id()); @@ -91,6 +86,46 @@ record_thread_start_time(); void thread_init(); +template +auto& +get_category_stack(); + +template +inline void +push_perfetto(CategoryT, const char*, Args&&...); + +template +inline void +pop_perfetto(CategoryT, const char*, Args&&...); + +template +inline void +push_perfetto_ts(CategoryT, const char*, uint64_t _ts, Args&&...); + +template +inline void +pop_perfetto_ts(CategoryT, const char*, uint64_t, Args&&...); + +template +inline void +push_perfetto_track(CategoryT, const char*, perfetto::Track, uint64_t, Args&&...); + +template +inline void +pop_perfetto_track(CategoryT, const char*, perfetto::Track, uint64_t, Args&&...); + +template +inline void +mark_perfetto(CategoryT, const char*, Args&&...); + +template +inline void +mark_perfetto_ts(CategoryT, const char*, uint64_t, Args&&...); + +template +inline void +mark_perfetto_track(CategoryT, const char*, perfetto::Track, uint64_t, Args&&...); + // // definitions // @@ -147,13 +182,6 @@ now() return ::tim::get_clock_real_now(); } -inline auto& -get_interval_data(int64_t _tid = threading::get_id()) -{ - static auto& _v = interval_data_instances::instances(construct_on_init{}); - return _v.at(_tid); -} - inline auto& get_instrumentation_bundles(int64_t _tid = threading::get_id()) { @@ -174,44 +202,128 @@ pop_count() return _v; } -template -inline void -push_timemory(CategoryT, const char* name, Args&&... args) +struct category_stack { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + int32_t profile = 0; // use signed so compiler doesn't have to + int32_t tracing = 0; // account for underflow/overflow +}; - auto& _data = tracing::get_instrumentation_bundles(); - // this generates a hash for the raw string array - auto _hash = tim::add_hash_id(tim::string_view_t{ name }); - _data.construct(_hash)->start(std::forward(args)...); +template +auto& +get_category_stack() +{ + static thread_local auto _v = category_stack{}; + return _v; +} + +template +auto& +get_tracing_stack() +{ + return get_category_stack().tracing; +} + +template +auto& +get_profile_stack() +{ + return get_category_stack().profile; +} + +template +auto +category_push_disabled() +{ + return !trait::runtime_enabled::get(); +} + +template +auto +category_mark_disabled() +{ + return !trait::runtime_enabled::get(); +} + +template +auto +category_pop_disabled() +{ + return !trait::runtime_enabled::get() && + (get_profile_stack() + get_tracing_stack()) <= 0; +} + +template +auto +tracing_pop_disabled() +{ + return !trait::runtime_enabled::get() && + get_tracing_stack() <= 0; +} + +template +auto +profile_pop_disabled() +{ + return !trait::runtime_enabled::get() && + get_profile_stack() <= 0; } template inline void -pop_timemory(CategoryT, const char* name, Args&&... args) +push_timemory(CategoryT, std::string_view name, Args&&... args) { // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + if(category_push_disabled()) return; - auto _hash = tim::hash::get_hash_id(tim::string_view_t{ name }); auto& _data = tracing::get_instrumentation_bundles(); - if(_data.bundles.empty()) + // this generates a hash for the raw string array + auto _hash = tim::add_hash_id(name); + _data.construct(_hash)->start(std::forward(args)...); + // increment the profile stack + ++get_profile_stack(); +} + +template +inline void +pop_timemory(CategoryT, std::string_view name, Args&&... args) +{ + // skip if category is disabled and not pushed on this thread + if(profile_pop_disabled()) return; + + auto _hash = tim::hash::get_hash_id(name); + auto& _data = tracing::get_instrumentation_bundles(); + if(OMNITRACE_UNLIKELY(_data.bundles.empty())) { OMNITRACE_DEBUG("[%s] skipped %s :: empty bundle stack\n", "omnitrace_pop_trace", - name); + name.data()); return; } - for(size_t i = _data.bundles.size(); i > 0; --i) + + auto*& _v_back = _data.bundles.back(); + if(OMNITRACE_LIKELY(_v_back->get_hash() == _hash)) { - auto*& _v = _data.bundles.at(i - 1); - if(_v->get_hash() == _hash) + // decrement the profile stack + --get_profile_stack(); + _v_back->stop(std::forward(args)...); + _data.allocator.destroy(_v_back); + _data.allocator.deallocate(_v_back, 1); + _data.bundles.erase(--_data.bundles.end()); + } + else if(_data.bundles.size() > 1) + { + for(size_t i = _data.bundles.size() - 1; i > 0; --i) { - _v->stop(std::forward(args)...); - _data.allocator.destroy(_v); - _data.allocator.deallocate(_v, 1); - _data.bundles.erase(_data.bundles.begin() + (i - 1)); - break; + auto*& _v = _data.bundles.at(i - 1); + if(_v->get_hash() == _hash) + { + // decrement the profile stack + --get_profile_stack(); + _v->stop(std::forward(args)...); + _data.allocator.destroy(_v); + _data.allocator.deallocate(_v, 1); + _data.bundles.erase(_data.bundles.begin() + (i - 1)); + break; + } } } } @@ -221,12 +333,13 @@ inline void push_perfetto(CategoryT, const char* name, Args&&... args) { // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + if(category_push_disabled()) return; - uint64_t _ts = comp::wall_clock::record(); if constexpr(sizeof...(Args) == 1 && std::is_invocable::value) { + ++get_tracing_stack(); + uint64_t _ts = now(); if(config::get_perfetto_annotations()) { TRACE_EVENT_BEGIN(trait::name::value, perfetto::StaticString(name), @@ -240,28 +353,48 @@ push_perfetto(CategoryT, const char* name, Args&&... args) } else { - TRACE_EVENT_BEGIN(trait::name::value, perfetto::StaticString(name), - _ts, std::forward(args)..., - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", _ts); - } - }); + using tuple_type = std::tuple...>; + using arg0_type = concepts::tuple_element_t<0, tuple_type>; + using arg1_type = concepts::tuple_element_t<1, tuple_type>; + + if constexpr(std::is_same::value && + std::is_same::value) + { + push_perfetto_track(CategoryT{}, name, std::forward(args)...); + } + else if constexpr(std::is_same::value) + { + push_perfetto_ts(CategoryT{}, name, std::forward(args)...); + } + else + { + ++get_tracing_stack(); + uint64_t _ts = now(); + TRACE_EVENT_BEGIN( + trait::name::value, perfetto::StaticString(name), _ts, + std::forward(args)..., [&](perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", _ts); + } + }); + } } } template inline void -pop_perfetto(CategoryT, const char*, Args&&... args) +pop_perfetto(CategoryT, const char* name, Args&&... args) { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + // skip if category is disabled and not pushed on this thread + if(tracing_pop_disabled()) return; - uint64_t _ts = comp::wall_clock::record(); if constexpr(sizeof...(Args) == 1 && std::is_invocable::value) { + // decrement tracing stack + --get_tracing_stack(); + uint64_t _ts = now(); if(config::get_perfetto_annotations()) { TRACE_EVENT_END(trait::name::value, _ts, "end_ns", _ts, @@ -275,14 +408,35 @@ pop_perfetto(CategoryT, const char*, Args&&... args) } else { - TRACE_EVENT_END(trait::name::value, _ts, std::forward(args)..., - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "end_ns", _ts); - } - }); + using tuple_type = std::tuple...>; + using arg0_type = concepts::tuple_element_t<0, tuple_type>; + using arg1_type = concepts::tuple_element_t<1, tuple_type>; + + if constexpr(std::is_same::value && + std::is_same::value) + { + pop_perfetto_track(CategoryT{}, name, std::forward(args)...); + } + else if constexpr(std::is_same::value) + { + pop_perfetto_ts(CategoryT{}, name, std::forward(args)...); + } + else + { + // decrement tracing stack + --get_tracing_stack(); + uint64_t _ts = now(); + TRACE_EVENT_END(trait::name::value, _ts, + std::forward(args)..., [&](perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "end_ns", _ts); + } + }); + } } + + (void) name; } template @@ -290,8 +444,9 @@ inline void push_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args) { // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + if(category_push_disabled()) return; + ++get_tracing_stack(); TRACE_EVENT_BEGIN(trait::name::value, perfetto::StaticString(name), _ts, std::forward(args)...); } @@ -300,8 +455,11 @@ template inline void pop_perfetto_ts(CategoryT, const char*, uint64_t _ts, Args&&... args) { - // skip if category is disabled - if(!trait::runtime_enabled::get()) return; + // skip if category is disabled and not pushed on this thread + if(tracing_pop_disabled()) return; + + // decrement tracing stack + --get_tracing_stack(); TRACE_EVENT_END(trait::name::value, _ts, std::forward(args)...); } @@ -311,6 +469,10 @@ inline void push_perfetto_track(CategoryT, const char* name, perfetto::Track _track, uint64_t _ts, Args&&... args) { + // skip if category is disabled + if(category_push_disabled()) return; + + ++get_tracing_stack(); TRACE_EVENT_BEGIN(trait::name::value, perfetto::StaticString(name), _track, _ts, std::forward(args)...); } @@ -320,8 +482,91 @@ inline void pop_perfetto_track(CategoryT, const char*, perfetto::Track _track, uint64_t _ts, Args&&... args) { + // skip if category is disabled and not pushed on this thread + if(tracing_pop_disabled()) return; + + // decrement tracing stack + --get_tracing_stack(); + TRACE_EVENT_END(trait::name::value, _track, _ts, std::forward(args)...); } + +template +inline void +mark_perfetto(CategoryT, const char* name, Args&&... args) +{ + // skip if category is disabled + if(category_mark_disabled()) return; + + if constexpr(sizeof...(Args) == 1 && + std::is_invocable::value) + { + uint64_t _ts = now(); + if(config::get_perfetto_annotations()) + { + TRACE_EVENT_INSTANT(trait::name::value, + perfetto::StaticString(name), _ts, "ns", _ts, + std::forward(args)...); + } + else + { + TRACE_EVENT_INSTANT(trait::name::value, + perfetto::StaticString(name), _ts, + std::forward(args)...); + } + } + else + { + using tuple_type = std::tuple...>; + using arg0_type = concepts::tuple_element_t<0, tuple_type>; + using arg1_type = concepts::tuple_element_t<1, tuple_type>; + + if constexpr(std::is_same::value && + std::is_same::value) + { + mark_perfetto_track(CategoryT{}, name, std::forward(args)...); + } + else if constexpr(std::is_same::value) + { + mark_perfetto_ts(CategoryT{}, name, std::forward(args)...); + } + else + { + uint64_t _ts = now(); + TRACE_EVENT_INSTANT( + trait::name::value, perfetto::StaticString(name), _ts, + std::forward(args)..., [&](perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "ns", _ts); + } + }); + } + } +} + +template +inline void +mark_perfetto_ts(CategoryT, const char* name, uint64_t _ts, Args&&... args) +{ + // skip if category is disabled + if(category_mark_disabled()) return; + + TRACE_EVENT_INSTANT(trait::name::value, perfetto::StaticString(name), _ts, + std::forward(args)...); +} + +template +inline void +mark_perfetto_track(CategoryT, const char*, perfetto::Track _track, uint64_t _ts, + Args&&... args) +{ + // skip if category is disabled + if(category_mark_disabled()) return; + + TRACE_EVENT_INSTANT(trait::name::value, _track, _ts, + std::forward(args)...); +} } // namespace tracing } // namespace omnitrace diff --git a/source/lib/omnitrace/library/utility.hpp b/source/lib/omnitrace/library/utility.hpp index 4a372dcb97..17e69c6c90 100644 --- a/source/lib/omnitrace/library/utility.hpp +++ b/source/lib/omnitrace/library/utility.hpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -226,5 +227,16 @@ get_regex_or(const ContainerT& _container, PredicateT&& _predicate return get_regex_or(_dest, _fallback); } + +template +Tp +convert(std::string_view _inp) +{ + auto _iss = std::stringstream{}; + auto _ret = Tp{}; + _iss << _inp; + _iss >> _ret; + return _ret; +} } // namespace utility } // namespace omnitrace diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bd95a8c453..35c5e4765c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -504,7 +504,7 @@ set(_ompt_preload_environ "OMNITRACE_SAMPLING_REALTIME=ON" "OMNITRACE_SAMPLING_CPUTIME_FREQ=1000" "OMNITRACE_SAMPLING_REALTIME_FREQ=500" - "OMNITRACE_COLORIZED_LOG=OFF") + "OMNITRACE_MONOCHROME=ON") set(_ompt_sample_no_tmpfiles_environ "${_ompt_environment}" @@ -516,7 +516,7 @@ set(_ompt_sample_no_tmpfiles_environ "OMNITRACE_SAMPLING_REALTIME=OFF" "OMNITRACE_SAMPLING_CPUTIME_FREQ=700" "OMNITRACE_USE_TEMPORARY_FILES=OFF" - "OMNITRACE_COLORIZED_LOG=OFF") + "OMNITRACE_MONOCHROME=ON") set(_ompt_preload_samp_regex "Sampler for thread 0 will be triggered 1000.0x per second of CPU-time(.*)Sampler for thread 0 will be triggered 500.0x per second of wall-time(.*)Sampling will be disabled after 0.250000 seconds(.*)Sampling duration of 0.250000 seconds has elapsed. Shutting down sampling" @@ -684,6 +684,111 @@ omnitrace_add_test( RUNTIME_PASS_REGEX "(\\\[[0-9]+\\\]) function coverage :: 66.67%" REWRITE_RUN_PASS_REGEX "(\\\[[0-9]+\\\]) function coverage :: 66.67%") +omnitrace_add_test( + SKIP_BASELINE SKIP_SAMPLING SKIP_PRELOAD + NAME trace-time-window + TARGET trace-time-window + REWRITE_ARGS -e -v 2 --caller-include inner -i 4096 + RUNTIME_ARGS -e -v 1 --caller-include inner -i 4096 + LABELS "time-window" + ENVIRONMENT "${_window_environment};OMNITRACE_TRACE_DURATION=1.25") + +omnitrace_add_validation_test( + NAME trace-time-window-binary-rewrite + TIMEMORY_METRIC "wall_clock" + TIMEMORY_FILE "wall_clock.json" + PERFETTO_METRIC "host" + PERFETTO_FILE "perfetto-trace.proto" + LABELS "time-window" + FAIL_REGEX "outer_d" + ARGS -l + main + outer_a + outer_b + outer_c + -c + 1 + 1 + 1 + 1 + -d + 0 + 1 + 1 + 1 + -p) + +omnitrace_add_validation_test( + NAME trace-time-window-runtime-instrument + TIMEMORY_METRIC "wall_clock" + TIMEMORY_FILE "wall_clock.json" + PERFETTO_METRIC "host" + PERFETTO_FILE "perfetto-trace.proto" + LABELS "time-window" + FAIL_REGEX "outer_d" + ARGS -l + main + outer_a + outer_b + outer_c + -c + 1 + 1 + 1 + 1 + -d + 0 + 1 + 1 + 1 + -p) + +omnitrace_add_test( + SKIP_BASELINE SKIP_SAMPLING SKIP_PRELOAD + NAME trace-time-window-delay + TARGET trace-time-window + REWRITE_ARGS -e -v 2 --caller-include inner -i 4096 + RUNTIME_ARGS -e -v 1 --caller-include inner -i 4096 + LABELS "time-window" + ENVIRONMENT + "${_window_environment};OMNITRACE_TRACE_DELAY=0.75;OMNITRACE_TRACE_DURATION=0.75") + +omnitrace_add_validation_test( + NAME trace-time-window-delay-binary-rewrite + TIMEMORY_METRIC "wall_clock" + TIMEMORY_FILE "wall_clock.json" + PERFETTO_METRIC "host" + PERFETTO_FILE "perfetto-trace.proto" + LABELS "time-window" + ARGS -l + outer_c + outer_d + -c + 1 + 1 + -d + 0 + 0 + -p) + +omnitrace_add_validation_test( + NAME trace-time-window-delay-runtime-instrument + TIMEMORY_METRIC "wall_clock" + TIMEMORY_FILE "wall_clock.json" + PERFETTO_METRIC "host" + PERFETTO_FILE "perfetto-trace.proto" + LABELS "time-window" + ARGS -l + outer_c + outer_d + -c + 1 + 1 + -d + 0 + 0 + -p) + # -------------------------------------------------------------------------------------- # # # critical-trace tests @@ -823,6 +928,10 @@ foreach(_TARGET ${RCCL_TEST_TARGETS}) line return args + -ME + sysdeps + --log-file + rccl-test-${_NAME}.log RUN_ARGS -t 1 -g @@ -910,7 +1019,7 @@ omnitrace_add_causal_test( ) set(_causal_common_args - "-n 10 -e -s 0 10 20 30 -B $") + "-n 20 -e -s 0 10 20 30 -B $") macro( causal_e2e_args_and_validation @@ -945,7 +1054,7 @@ omnitrace_add_causal_test( SKIP_BASELINE NAME cpu-omni-slow-func-e2e TARGET causal-cpu-omni - RUN_ARGS 80 30 432525 200000000 + RUN_ARGS 80 12 432525 250000000 CAUSAL_MODE "func" CAUSAL_ARGS ${_causal_slow_func_args} CAUSAL_VALIDATE_ARGS ${_causal_slow_func_valid} @@ -957,7 +1066,7 @@ omnitrace_add_causal_test( SKIP_BASELINE NAME cpu-omni-fast-func-e2e TARGET causal-cpu-omni - RUN_ARGS 80 30 432525 200000000 + RUN_ARGS 80 12 432525 250000000 CAUSAL_MODE "func" CAUSAL_ARGS ${_causal_fast_func_args} CAUSAL_VALIDATE_ARGS ${_causal_fast_func_valid} @@ -969,7 +1078,7 @@ omnitrace_add_causal_test( SKIP_BASELINE NAME cpu-omni-line-155-e2e TARGET causal-cpu-omni - RUN_ARGS 80 30 432525 200000000 + RUN_ARGS 80 12 432525 250000000 CAUSAL_MODE "line" CAUSAL_ARGS ${_causal_line_155_args} CAUSAL_VALIDATE_ARGS ${_causal_line_155_valid} @@ -981,7 +1090,7 @@ omnitrace_add_causal_test( SKIP_BASELINE NAME cpu-omni-line-165-e2e TARGET causal-cpu-omni - RUN_ARGS 80 30 432525 200000000 + RUN_ARGS 80 12 432525 250000000 CAUSAL_MODE "line" CAUSAL_ARGS ${_causal_line_165_args} CAUSAL_VALIDATE_ARGS ${_causal_line_165_valid} diff --git a/tests/omnitrace-testing.cmake b/tests/omnitrace-testing.cmake index c3907b8b26..d733fb2ee3 100644 --- a/tests/omnitrace-testing.cmake +++ b/tests/omnitrace-testing.cmake @@ -164,6 +164,17 @@ set(_rccl_environment "${_test_openmp_env}" "${_test_library_path}") +set(_window_environment + "OMNITRACE_USE_PERFETTO=ON" + "OMNITRACE_USE_TIMEMORY=ON" + "OMNITRACE_USE_SAMPLING=OFF" + "OMNITRACE_USE_PROCESS_SAMPLING=OFF" + "OMNITRACE_TIME_OUTPUT=OFF" + "OMNITRACE_FILE_OUTPUT=ON" + "OMNITRACE_VERBOSE=2" + "${_test_openmp_env}" + "${_test_library_path}") + # -------------------------------------------------------------------------------------- # set(MPIEXEC_EXECUTABLE_ARGS) @@ -231,7 +242,7 @@ endif() function(OMNITRACE_WRITE_TEST_CONFIG _FILE _ENV) set(_ENV_ONLY - "OMNITRACE_(MODE|USE_MPIP|DEBUG_SETTINGS|FORCE_ROCPROFILER_INIT|DEFAULT_MIN_INSTRUCTIONS|COLORIZED_LOG)=" + "OMNITRACE_(MODE|USE_MPIP|DEBUG_SETTINGS|FORCE_ROCPROFILER_INIT|DEFAULT_MIN_INSTRUCTIONS|MONOCHROME)=" ) set(_FILE_CONTENTS) set(_ENV_CONTENTS) @@ -436,7 +447,7 @@ function(OMNITRACE_ADD_TEST) set(_environ "OMNITRACE_DEFAULT_MIN_INSTRUCTIONS=64" "${TEST_ENVIRONMENT}" - "OMNITRACE_OUTPUT_PATH=omnitrace-tests-output" + "OMNITRACE_OUTPUT_PATH=${PROJECT_BINARY_DIR}/omnitrace-tests-output" "OMNITRACE_OUTPUT_PREFIX=${_prefix}") set(_timeout ${TEST_REWRITE_TIMEOUT}) @@ -575,7 +586,7 @@ function(OMNITRACE_ADD_CAUSAL_TEST) set(_environ "${_causal_environment}" - "OMNITRACE_OUTPUT_PATH=omnitrace-tests-output" + "OMNITRACE_OUTPUT_PATH=${PROJECT_BINARY_DIR}/omnitrace-tests-output" "OMNITRACE_OUTPUT_PREFIX=${_prefix}" "OMNITRACE_CI=ON" "OMNITRACE_USE_PID=OFF" @@ -739,3 +750,146 @@ function(OMNITRACE_ADD_PYTHON_TEST) ${_TEST_PROPERTIES}) endforeach() endfunction() + +# -------------------------------------------------------------------------------------- # +# +# Find Python3 interpreter for output validation +# +# -------------------------------------------------------------------------------------- # + +if(NOT OMNITRACE_USE_PYTHON) + find_package(Python3 QUIET COMPONENTS Interpreter) + + if(Python3_FOUND) + set(OMNITRACE_VALIDATION_PYTHON ${Python3_EXECUTABLE}) + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import perfetto" + RESULT_VARIABLE OMNITRACE_VALIDATION_PYTHON_PERFETTO) + + if(NOT OMNITRACE_VALIDATION_PYTHON_PERFETTO EQUAL 0) + omnitrace_message(AUTHOR_WARNING + "Python3 found but perfetto support is disabled") + endif() + endif() +else() + set(_INDEX 0) + foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS}) + if(NOT OMNITRACE_USE_PYTHON) + continue() + endif() + + list(GET OMNITRACE_PYTHON_ROOT_DIRS ${_INDEX} _PYTHON_ROOT_DIR) + + omnitrace_find_python( + _PYTHON + ROOT_DIR "${_PYTHON_ROOT_DIR}" + COMPONENTS Interpreter) + + if(_PYTHON_EXECUTABLE) + set(OMNITRACE_VALIDATION_PYTHON ${_PYTHON_EXECUTABLE}) + execute_process(COMMAND ${_PYTHON_EXECUTABLE} -c "import perfetto" + RESULT_VARIABLE OMNITRACE_VALIDATION_PYTHON_PERFETTO) + + # prefer Python3 with perfetto support + if(OMNITRACE_VALIDATION_PYTHON_PERFETTO EQUAL 0) + break() + else() + omnitrace_message( + AUTHOR_WARNING + "${_PYTHON_EXECUTABLE} found but perfetto support is disabled") + endif() + endif() + + math(EXPR _INDEX "${_INDEX} + 1") + endforeach() +endif() + +if(NOT OMNITRACE_VALIDATION_PYTHON) + omnitrace_message(AUTHOR_WARNING + "Python3 interpreter not found. Validation tests will be disabled") +endif() + +# -------------------------------------------------------------------------------------- # +# +# Output validation test function +# +# -------------------------------------------------------------------------------------- # + +function(OMNITRACE_ADD_VALIDATION_TEST) + + if(NOT OMNITRACE_VALIDATION_PYTHON) + return() + endif() + + cmake_parse_arguments( + TEST + "" + "NAME;TIMEOUT;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_METRIC;PERFETTO_FILE" + "ENVIRONMENT;LABELS;PROPERTIES;PASS_REGEX;FAIL_REGEX;SKIP_REGEX;DEPENDS;ARGS" + ${ARGN}) + + if(NOT TEST_TIMEOUT) + set(TEST_TIMEOUT 30) + endif() + + set(PYTHON_EXECUTABLE "${OMNITRACE_VALIDATION_PYTHON}") + + list(APPEND TEST_LABELS "validate") + foreach(_DEP ${TEST_DEPENDS}) + list(APPEND TEST_LABELS "validate-${_DEP}") + endforeach() + + list(APPEND TEST_DEPENDS "${TEST_NAME}") + + if(NOT TEST_PASS_REGEX) + set(TEST_PASS_REGEX + "omnitrace-tests-output/${TEST_NAME}/(${TEST_TIMEMORY_FILE}|${TEST_PERFETTO_FILE}) validated" + ) + endif() + + add_test( + NAME validate-${TEST_NAME}-timemory + COMMAND + ${OMNITRACE_VALIDATION_PYTHON} + ${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py -m ${TEST_TIMEMORY_METRIC} + ${TEST_ARGS} -i + ${PROJECT_BINARY_DIR}/omnitrace-tests-output/${TEST_NAME}/${TEST_TIMEMORY_FILE} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) + + if(OMNITRACE_VALIDATION_PYTHON_PERFETTO EQUAL 0) + add_test( + NAME validate-${TEST_NAME}-perfetto + COMMAND + ${OMNITRACE_VALIDATION_PYTHON} + ${CMAKE_CURRENT_LIST_DIR}/validate-perfetto-proto.py -m + ${TEST_PERFETTO_METRIC} ${TEST_ARGS} -i + ${PROJECT_BINARY_DIR}/omnitrace-tests-output/${TEST_NAME}/${TEST_PERFETTO_FILE} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) + endif() + + foreach(_TEST validate-${TEST_NAME}-timemory validate-${TEST_NAME}-perfetto) + + if(NOT TEST "${_TEST}") + continue() + endif() + + set_tests_properties( + ${_TEST} + PROPERTIES ENVIRONMENT + "${_TEST_ENV}" + TIMEOUT + ${TEST_TIMEOUT} + LABELS + "${TEST_LABELS}" + DEPENDS + "${TEST_DEPENDS};${TEST_NAME}" + PASS_REGULAR_EXPRESSION + "${TEST_PASS_REGEX}" + FAIL_REGULAR_EXPRESSION + "${TEST_FAIL_REGEX}" + SKIP_REGULAR_EXPRESSION + "${TEST_SKIP_REGEX}" + REQUIRED_FILES + "${TEST_FILE}" + ${TEST_PROPERTIES}) + endforeach() +endfunction() diff --git a/tests/validate-causal-json.py b/tests/validate-causal-json.py index d2650f6efb..85d867b570 100755 --- a/tests/validate-causal-json.py +++ b/tests/validate-causal-json.py @@ -274,7 +274,6 @@ def compute_speedups(_data, args): def get_validations(args): - data = [] _len = len(args.validate) if _len == 0: @@ -297,7 +296,6 @@ def get_validations(args): def main(): - import argparse parser = argparse.ArgumentParser() diff --git a/tests/validate-timemory-json.py b/tests/validate-timemory-json.py index 78b930dcbf..7a52a2dde4 100755 --- a/tests/validate-timemory-json.py +++ b/tests/validate-timemory-json.py @@ -42,6 +42,9 @@ if __name__ == "__main__": parser.add_argument( "-d", "--depths", nargs="+", type=int, help="Expected depths", default=[] ) + parser.add_argument( + "-p", "--print", action="store_true", help="Print the processed perfetto data" + ) parser.add_argument("-i", "--input", type=str, help="Input file", required=True) args = parser.parse_args() @@ -54,6 +57,19 @@ if __name__ == "__main__": ret = 0 with open(args.input) as f: data = json.load(f) + + # demo display of data + if args.print: + for itr in data["timemory"][args.metric]["ranks"][0]["graph"]: + _prefix = itr["prefix"] + _depth = itr["depth"] + _count = itr["entry"]["laps"] + _idx = _prefix.find(">>>") + if _idx is not None: + _prefix = _prefix[(_idx + 4) :] + + print("| {:40} | {:6} | {:6} |".format(_prefix, _count, _depth)) + try: validate_json( data["timemory"][args.metric]["ranks"][0]["graph"],