From 9618ddefba0a925a821c09c311b535fe4448254f Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 24 Jan 2023 18:53:23 -0600 Subject: [PATCH] Causal profiling (#229) * Addition of basic structure * Reworked categories * More causal integration additions * Causal implementation * Update examples * delete virtual_speedup files * Update perfetto submodule to v31.0 * Update dyninst submodule * Update timemory submodule * ElfUtils build for libdw * OMNITRACE_LIKELY and OMNITRACE_UNLIKELY * Update common lib join * Examples updates for causal profiling * config updates with causal options - OMNITRACE_CAUSAL_FIXED_LINE - OMNITRACE_CAUSAL_FIXED_SPEEDUP - OMNITRACE_CAUSAL_FILE - OMNITRACE_CAUSAL_BINARY_SCOPE - OMNITRACE_CAUSAL_SOURCE_SCOPE - version info in banner - support increments in parse_numeric_range - fix occasional deadlock in first call to get_config * PTL general task group * Always include PID in debug/verbose messages * Add blocking/unblocking gotchas to runtime init bundle * CausalState * thread_data updates - generic component_bundle_cache * Improve handling of causal in category_region * components updates - backtrace_causal component - backtrace::get_data member func - decrease ignore_depth in backtrace::sample(int) - handle "omnitrace_main" in backtrace::filter_and_patch(...) - tweak internal thread state scope for pthread_mutex_gotcha wrappers * simplify tracing get_instrumentation_bundles usage * sampling updates - include backtrace_causal component - disable backtrace_metrics if using causal and not using perfetto - disable backtrace and backtrace_timestamp when using causal - post_process_causal * causal updates - more checks in blocking_gotcha and unblocking_gotcha start/stop - miscellaneous overhaul of data - experiment update * Remove virtual speedup * libomnitrace code_object * causal-profiling test * libomnitrace library.cpp updates - handle causal profiling - fini_bundle * Disable causal profiling by default * Updated causal code and example - example: three execution variants: cpu + rng, cpu, rng - example: three instrumentation variants: none, omni, coz - fix blocking gotcha credit - rework perform_experiment_impl - get_eligible_address_ranges - compute_eligible_lines - support fixed lines/speedups/functions - update selected_entry to support function mode - fix causal::delay - experiment updates * omnitrace_progress / omnitrace_user_progress - with accompanying omnitrace_annotated_progress / omnitrace_user_annotated_progress * Update timemory submodule * CausalMode - mode indicated whether causal predictions source be at line-level or function-level * code_object, config, runtime, sampling, thread_data - code_object: address_range - code_object: basic::line_info serialize(), name(), hash() - config updates - two signals for causal sampling - thread_data init fixes * pthread updates - pthread_create_gotcha processes delays - pthread_mutex_gotcha does not wrap pthread_join in causal mode * backtrace_causal update - dynamic delay period stats * main wrapper uses basename of argv[0] * update elfio submodule * perf support (currently unused) * Fix experiment JSON serialization - static_vector.hpp (unused) * causal executable + config options updates - omnitrace-causal exe simplifies running multiple causal configs - changed the causal config option names * Support both throughput and latency points * process-causal-json.py script - will be used later for testing * stable_vector * Rework thread_data * Improve omnitrace-causal exe - better verbosity handling - correct diagnosis of status for child process - execvpe when only one iteration (debugging) * Update timemory submodule * exe --version - omnitrace, omnitrace-avail, and omnitrace-sample all support --version on command-line * OMNITRACE_INTERNAL_API + OMNITRACE_{LIKELY,UNLIKELY} * omnitrace-causal cmake format * omnitrace config update - OMNITRACE_CAUSAL_FILE_CLOBBER * custom exception - wraps STL exception and gets stacktrace during construction * exit_gotcha supports _Exit * use global construct_on_init + max threads - add some safety when exceeding max # of threads * update code_object binary filter - exclude dyninst and tbbmalloc library * containers: c_array, static_vector, stable_vector - moved utility::c_array to container::c_array - created static_vector: std::vector bound to std::array - created stable_vector: vector with stable references * grow thread_data when new thread created * causal updates - data: improve compute_eligible_lines to ignore lambdas - data: use new thread_data - delay: use new thread_data - experiment: properly support latency points - experiment: support file clobber - experiment: ensure non-zero experiment time - progress_point: use new thread_data - backtrace_causal: use new thread_data * Update causal-profiling tests * fix omnitrace-causal backslash escaping * process-causal-json script * restructure causal implementation - update verbose messages for omnitrace-causal diagnose_status - migrated causal implementation in sampling.cpp to causal/sampling.cpp - OMNITRACE_USE_CAUSAL does not require OMNITRACE_USE_SAMPLING - added Mode::Causal - causal sampling uses same signals as regular sampling - moved tracing::thread_init to implementation file - combined tracing::thread_init and tracing::thread_init_sampling - added causal/components folder - pthread_create_gotcha::wrapper_config - omnitrace_preload checks OMNITRACE_USE_CAUSAL - updates mode accordingly * update timemory submodule * update timemory submodule * causal example updates - causal for lulesh * perf code + utility - helpers - relocated causal perf code - placement new when generating unique ptr trait for potentially allocating during sampling - additions to utility header - removed previously added helpers.hpp * update timemory submodule * Default env variables for omnitrace-causal - activate OMNITRACE_USE_KOKKOSP, etc. * update stable_vector and static_vector - static vector can use atomic for size tracking for thread-safe situations * update causal example header - CAUSAL_PROGRESS_NAMED - use CAUSAL_ prefix for some macros * Tweak lulesh example - use CAUSAL_PROGRESS instead of CAUSAL_BEGIN and CAUSAL_END * omnitrace-sample support for causal mode - set OMNITRACE_USE_SAMPLING to off when OMNITRACE_MODE=causal * refactor and cleanup code_object - scope filter - fixes to address_range * overhaul causal data + causal config options - full support for function and line mode - support static vector of instruction pointers - improve line info mapping resolution - remove thread-locality from miscellanous functions where unnecessary - causal options for {binary,source,function,fileline} exclusion * causal experiment, sampling, and backtrace updates - is_selected + unwind address array - experiment warning about progress points - increased buffer size for backtrace_casual sampler - backtrace_causal only stores IP addresses instead of full unwind info * category_region updates - minor refactor - local_category_region::mark * Update causal tests * Bump version to 1.8.0 * omnitrace-causal args + CLOBBER -> RESET - renamed OMNITRACE_CAUSAL_FILE_CLOBBER to OMNITRACE_CAUSAL_FILE_RESET - updated omnitrace-causal exe to support recently added configuration options - other miscellaneous tweaks to data.cpp, experiment.cpp, and sampling.cpp * Refactor causal and code_object - code_object.hpp and code_object.cpp moved into binary folder - causal components namespaced into omnitrace::causal::component - moved sample_data out of backtrace_causal and into own file - renamed backtrace_causal to causal::component::backtrace * preload omnitrace_init + OMNITRACE_DEBUG_MARK - env OMNITRACE_DEBUG_MARK - fix omnitrace_init call when LD_PRELOAD-ing omnitrace * Fix fileline support + line-info output names + experiment log - line-info log files are prefixed with experiment name - don't print experiment duration when E2E - account for fileline scope in analysis * KokkosP: OMNITRACE_KOKKOSP_NAME_LENGTH_MAX - config option to limit the name of kokkos tool callbacks - remove [kokkos] from KokkosP names * Update causal example - minor tweaks to decrease probability of overlapping regions in binary * omnitrace-causal update - prefix N / Ntot in environment printout * Miscellaneous updates - causal::finish_experimenting() - OMNITRACE_CAUSAL_RANDOM_SEED - KokkosP causal updates - exclude some callbacks, make some callbacks unique, etc. - address_range::operator+=(address_range) - combine contiguous ranges in binary/analysis.cpp when file, func, line is same and address range is contiguous - bfd_line_info reads inline info - wait for perform_experiment_impl to complete - causal::delay updates - delay::process checks if experiment is active - uses threading::get_id() - experiment scales duration up for larger speedup experiments - line info samples includes excluded lines - sampler uses CLOCK_REALTIME - blocking_gotcha updates - is no longer fully static - adds audit routine which sets the postblock value to zero if try/timed routine fails - category::host was added to causal_throughput_categories_t - pthread_create_gotcha sets new threads local parent delay - was using internal value, now uses sequent value * Causal improvements to KokkosP * Updates to experiment time scaling - use stats instead of just max * binary/link_map.{hpp,cpp} * update process-causal-json.py * Folded fileline scope into source scope * Update documentation - Add documentation for causal profiling - Replace 'Omnitrace' with 'OmniTrace' everywhere * Update causal-helpers.cmake + omnitrace-testing.cmake - split tests/CMakeLists.txt partially into omnitrace-testing.cmake * omnitrace/causal.h - OMNITRACE_CAUSAL_PROGRESS - OMNITRACE_CAUSAL_PROGRESS_NAMED - OMNITRACE_CAUSAL_BEGIN - OMNITRACE_CAUSAL_END * selected_entry + remove default filters for lambdas and operator() - selected entry stores range and binary load address * update process-causal-json.py * format examples/lulesh/CMakeLists.txt * causal-helpers find_package(Threads) * OMNITRACE_KOKKOSP_KERNEL_LOGGER - was OMNITRACE_KOKKOS_KERNEL_LOGGER * quiet find of coz-profiler * Fix rocm_smi exception handling * Update timemory submodule (binutils) - fix binutls compile error on some systems - bump binutils to v2.40 * Fix miscellaneous tests * OMNITRACE_KOKKOSP_PREFIX * revert rocm_smi handling * ElfUtils updates - default to download version 0.188 - add -Wno-error=null-dereference due to GCC 12 compiler error * Update causal example * Remove OMNITRACE_VERBOSE from global workflow envs * Reliable causal test * disable compilation of causal perf files * Remove set_current_selection with unwind stack * update timemory submodule * fix for segfault on bionic - locking in TLS dtor was causing segfault * remove experiment::is_selected(unwind_stack_t) * update default init of selected_entry * Fix for when IP is not offset by load address * Update CMakeLists.txt * Miscellaneous updates - OMNITRACE_WARNING_OR_CI_THROW - OMNITRACE_REQUIRE - OMNITRACE_PREFER - fixed issues with no ASLR - added load address variable and ipaddr() func to basic/bfd line info - removed get_basic() from dwarf_line_info - TIMEMORY_PREFER -> OMNITRACE_PREFER - removed previously added binary_address and range variables from selected_entry * Removed superfluous CausalState * Additional causal tests (lulesh + kokkos) * filter, prefer, analysis ASLR handling - removed default filter on cold functions - fixed OMNITRACE_PREFER - fixed analysis ASLR handling * Tweak line-info output * Removed some superfluous code - causal/delay - causal/selected_entry * Exclude main.cold in function mode * Update validate-perfetto-proto.py - account for occasional http errors * Add sampling test disabling tmp files * argparser for process-causal-json - support validation - support filtering * Avoid pthread_{lock,unlock} in sampling offload - use homemade atomic_mutex/atomic_lock since contention will be low and using pthread tools might trigger our wrappers * Rename process-causal-json.py - validate-causal-json.py * rework omnitrace_add_causal_test - capable of performing validation - added validation tests * Fix kokkosp_begin_deep_copy + causal * Tweak address range in bfd_line_info::read_pc * Tweak analysis and data IP handling - look for gaps * Disable scaling experiment time by speedup * Revert change in max threads during CI * binary updates - significant overhaul of binary analysis implementation - removed "basic_line_info" and "bfd_line_info" in lieu of "symbol" class - symbol class has basic BFD info + vector of inlines + vector of dwarf info * Updated causal to use new binary analysis - Fix symbol.cpp includes * Updated formatting target - include *.cmake files * Updated causal tests - causal tests should be stable now * Update timemory and dyninst submodules - TPLs are stripped + built w/o debug info * Increase tolerance for causal validation speedups - higher speedups have more variance (increased to +/- 5 from 3) * Support causal output for MPI - i.e. tag with MPI rank * omnitrace-causal launcher argument * improve experiment sampling output * causal data updates - call compute lines once - fixed filtered cached binary info - debugging info when experiment fails to start * Tweaked causal validation tests * dwarf_entry ranges * CI updates - increase max threads to 64 * Tweak causal E2E validation tests - more threads - shorter thread runtime - more iterations * Fix shadowed variable * fix symbol read_bfd last PC calculation * fix maybe-uninitialized warning * omnitrace-causal launcher update - only inject "omnitrace-causal --" once - throw error if no matches found * Update causal profiling docs for launcher * fix address range boundaries --- .cmake-format.yaml | 29 + .github/workflows/opensuse.yml | 3 +- .github/workflows/ubuntu-bionic.yml | 3 +- .github/workflows/ubuntu-focal.yml | 10 +- .github/workflows/ubuntu-jammy.yml | 3 +- README.md | 3 + VERSION | 2 +- cmake/BuildSettings.cmake | 2 +- cmake/ElfUtils.cmake | 141 +++ cmake/Formatting.cmake | 3 + cmake/Packages.cmake | 14 + examples/CMakeLists.txt | 9 +- examples/causal-helpers.cmake | 104 ++ examples/causal/CMakeLists.txt | 42 + examples/causal/causal.cpp | 167 +++ examples/causal/causal.hpp | 46 + examples/causal/impl.cpp | 135 +++ examples/lulesh/CMakeLists.txt | 14 +- examples/lulesh/lulesh.cc | 9 + examples/parallel-overhead/CMakeLists.txt | 10 +- external/dyninst | 2 +- external/elfio | 2 +- external/perfetto | 2 +- external/timemory | 2 +- source/bin/CMakeLists.txt | 1 + source/bin/omnitrace-avail/avail.cpp | 5 + source/bin/omnitrace-causal/CMakeLists.txt | 27 + source/bin/omnitrace-causal/impl.cpp | 985 ++++++++++++++++++ .../bin/omnitrace-causal/omnitrace-causal.cpp | 136 +++ .../bin/omnitrace-causal/omnitrace-causal.hpp | 93 ++ source/bin/omnitrace-sample/impl.cpp | 8 +- .../bin/omnitrace-sample/omnitrace-sample.cpp | 3 +- source/bin/omnitrace/omnitrace.cpp | 12 +- source/bin/tests/CMakeLists.txt | 1 + source/docs/about.md | 24 +- source/docs/causal_profiling.md | 480 +++++++++ source/docs/conf.py | 1 + source/docs/features.md | 5 +- source/docs/getting_started.md | 2 +- source/docs/images/causal-foobar.png | Bin 0 -> 27358 bytes source/docs/index.md | 3 +- source/docs/installation.md | 20 +- source/docs/instrumenting.md | 4 +- source/docs/omnitrace.dox.in | 3 +- source/docs/output.md | 6 +- source/docs/python.md | 12 +- source/docs/runtime.md | 14 +- source/docs/sampling.md | 2 +- source/docs/user_api.md | 21 + source/lib/common/defines.h.in | 3 + source/lib/common/join.hpp | 40 + source/lib/omnitrace-dl/dl.cpp | 44 +- source/lib/omnitrace-dl/dl.hpp | 6 + source/lib/omnitrace-dl/main.c | 4 +- source/lib/omnitrace-user/CMakeLists.txt | 2 + source/lib/omnitrace-user/omnitrace/causal.h | 86 ++ source/lib/omnitrace-user/omnitrace/types.h | 8 +- source/lib/omnitrace-user/omnitrace/user.h | 7 + source/lib/omnitrace-user/user.cpp | 15 + source/lib/omnitrace/CMakeLists.txt | 4 +- source/lib/omnitrace/api.cpp | 13 + source/lib/omnitrace/api.hpp | 10 + source/lib/omnitrace/library.cpp | 158 ++- source/lib/omnitrace/library/CMakeLists.txt | 26 +- .../omnitrace/library/binary/CMakeLists.txt | 22 + .../library/binary/address_multirange.cpp | 75 ++ .../library/binary/address_multirange.hpp | 76 ++ .../library/binary/address_range.cpp | 190 ++++ .../library/binary/address_range.hpp | 104 ++ .../lib/omnitrace/library/binary/analysis.cpp | 194 ++++ .../lib/omnitrace/library/binary/analysis.hpp | 59 ++ .../omnitrace/library/binary/binary_info.hpp | 76 ++ .../omnitrace/library/binary/dwarf_entry.cpp | 201 ++++ .../omnitrace/library/binary/dwarf_entry.hpp | 62 ++ source/lib/omnitrace/library/binary/fwd.hpp | 62 ++ .../lib/omnitrace/library/binary/link_map.cpp | 133 +++ .../lib/omnitrace/library/binary/link_map.hpp | 54 + .../omnitrace/library/binary/scope_filter.cpp | 46 + .../omnitrace/library/binary/scope_filter.hpp | 75 ++ .../lib/omnitrace/library/binary/symbol.cpp | 356 +++++++ .../lib/omnitrace/library/binary/symbol.hpp | 96 ++ .../omnitrace/library/causal/CMakeLists.txt | 22 + .../library/causal/components/CMakeLists.txt | 16 + .../library/causal/components/backtrace.cpp | 227 ++++ .../library/causal/components/backtrace.hpp | 91 ++ .../causal/components/blocking_gotcha.cpp | 154 +++ .../causal/components/blocking_gotcha.hpp | 76 ++ .../causal/components/causal_gotcha.cpp | 95 ++ .../causal/components/causal_gotcha.hpp | 54 + .../causal/components/progress_point.cpp | 241 +++++ .../causal/components/progress_point.hpp | 157 +++ .../causal/components/unblocking_gotcha.cpp | 135 +++ .../causal/components/unblocking_gotcha.hpp | 75 ++ source/lib/omnitrace/library/causal/data.cpp | 949 +++++++++++++++++ source/lib/omnitrace/library/causal/data.hpp | 76 ++ source/lib/omnitrace/library/causal/delay.cpp | 193 ++++ source/lib/omnitrace/library/causal/delay.hpp | 62 ++ .../omnitrace/library/causal/experiment.cpp | 671 ++++++++++++ .../omnitrace/library/causal/experiment.hpp | 139 +++ source/lib/omnitrace/library/causal/fwd.hpp | 52 + source/lib/omnitrace/library/causal/perf.cpp | 555 ++++++++++ source/lib/omnitrace/library/causal/perf.hpp | 256 +++++ .../omnitrace/library/causal/sample_data.cpp | 71 ++ .../omnitrace/library/causal/sample_data.hpp | 64 ++ .../lib/omnitrace/library/causal/sampling.cpp | 375 +++++++ .../lib/omnitrace/library/causal/sampling.hpp | 62 ++ .../library/causal/selected_entry.cpp | 52 + .../library/causal/selected_entry.hpp | 70 ++ source/lib/omnitrace/library/common.hpp | 7 + .../library/components/backtrace.cpp | 3 +- .../library/components/backtrace.hpp | 1 + .../library/components/backtrace_metrics.cpp | 6 +- .../library/components/category_region.hpp | 151 ++- .../library/components/comm_data.cpp | 52 +- .../library/components/exit_gotcha.cpp | 4 + .../library/components/exit_gotcha.hpp | 4 +- .../components/pthread_create_gotcha.cpp | 87 +- .../components/pthread_create_gotcha.hpp | 20 +- .../library/components/pthread_gotcha.cpp | 1 - .../components/pthread_mutex_gotcha.cpp | 7 +- .../library/components/rocprofiler.cpp | 2 +- source/lib/omnitrace/library/concepts.hpp | 13 + source/lib/omnitrace/library/config.cpp | 309 +++++- source/lib/omnitrace/library/config.hpp | 38 +- .../library/containers/CMakeLists.txt | 8 + .../omnitrace/library/containers/c_array.hpp | 132 +++ .../library/containers/operators.hpp | 240 +++++ .../library/containers/stable_vector.hpp | 391 +++++++ .../library/containers/static_vector.hpp | 194 ++++ source/lib/omnitrace/library/coverage.cpp | 3 +- source/lib/omnitrace/library/cpu_freq.cpp | 10 +- source/lib/omnitrace/library/debug.cpp | 41 +- source/lib/omnitrace/library/debug.hpp | 94 +- source/lib/omnitrace/library/exception.cpp | 128 +++ source/lib/omnitrace/library/exception.hpp | 53 + source/lib/omnitrace/library/kokkosp.cpp | 178 +++- source/lib/omnitrace/library/locking.cpp | 102 ++ source/lib/omnitrace/library/locking.hpp | 78 ++ source/lib/omnitrace/library/ptl.cpp | 47 +- source/lib/omnitrace/library/ptl.hpp | 12 + .../library/rocm/hsa_rsrc_factory.hpp | 6 +- source/lib/omnitrace/library/rocm_smi.cpp | 6 +- source/lib/omnitrace/library/roctracer.cpp | 6 +- source/lib/omnitrace/library/runtime.cpp | 37 +- source/lib/omnitrace/library/runtime.hpp | 4 +- source/lib/omnitrace/library/sampling.cpp | 153 ++- source/lib/omnitrace/library/state.cpp | 18 + source/lib/omnitrace/library/state.hpp | 11 + source/lib/omnitrace/library/thread_data.hpp | 561 +++++++--- .../{thread_data.cpp => thread_deleter.cpp} | 9 +- .../lib/omnitrace/library/thread_deleter.hpp | 52 + source/lib/omnitrace/library/thread_info.cpp | 113 +- source/lib/omnitrace/library/thread_info.hpp | 9 +- source/lib/omnitrace/library/timemory.hpp | 1 + source/lib/omnitrace/library/tracing.cpp | 82 +- source/lib/omnitrace/library/tracing.hpp | 121 +-- .../omnitrace/library/tracing/annotation.hpp | 3 +- source/lib/omnitrace/library/utility.hpp | 139 +++ source/lib/omnitrace/progress.cpp | 45 + tests/CMakeLists.txt | 787 ++++---------- tests/omnitrace-testing.cmake | 741 +++++++++++++ tests/validate-causal-json.py | 403 +++++++ tests/validate-perfetto-proto.py | 30 +- 163 files changed, 14418 insertions(+), 1257 deletions(-) create mode 100644 cmake/ElfUtils.cmake create mode 100644 examples/causal-helpers.cmake create mode 100644 examples/causal/CMakeLists.txt create mode 100644 examples/causal/causal.cpp create mode 100644 examples/causal/causal.hpp create mode 100644 examples/causal/impl.cpp create mode 100644 source/bin/omnitrace-causal/CMakeLists.txt create mode 100644 source/bin/omnitrace-causal/impl.cpp create mode 100644 source/bin/omnitrace-causal/omnitrace-causal.cpp create mode 100644 source/bin/omnitrace-causal/omnitrace-causal.hpp create mode 100644 source/docs/causal_profiling.md create mode 100644 source/docs/images/causal-foobar.png create mode 100644 source/lib/omnitrace-user/omnitrace/causal.h create mode 100644 source/lib/omnitrace/library/binary/CMakeLists.txt create mode 100644 source/lib/omnitrace/library/binary/address_multirange.cpp create mode 100644 source/lib/omnitrace/library/binary/address_multirange.hpp create mode 100644 source/lib/omnitrace/library/binary/address_range.cpp create mode 100644 source/lib/omnitrace/library/binary/address_range.hpp create mode 100644 source/lib/omnitrace/library/binary/analysis.cpp create mode 100644 source/lib/omnitrace/library/binary/analysis.hpp create mode 100644 source/lib/omnitrace/library/binary/binary_info.hpp create mode 100644 source/lib/omnitrace/library/binary/dwarf_entry.cpp create mode 100644 source/lib/omnitrace/library/binary/dwarf_entry.hpp create mode 100644 source/lib/omnitrace/library/binary/fwd.hpp create mode 100644 source/lib/omnitrace/library/binary/link_map.cpp create mode 100644 source/lib/omnitrace/library/binary/link_map.hpp create mode 100644 source/lib/omnitrace/library/binary/scope_filter.cpp create mode 100644 source/lib/omnitrace/library/binary/scope_filter.hpp create mode 100644 source/lib/omnitrace/library/binary/symbol.cpp create mode 100644 source/lib/omnitrace/library/binary/symbol.hpp create mode 100644 source/lib/omnitrace/library/causal/CMakeLists.txt create mode 100644 source/lib/omnitrace/library/causal/components/CMakeLists.txt create mode 100644 source/lib/omnitrace/library/causal/components/backtrace.cpp create mode 100644 source/lib/omnitrace/library/causal/components/backtrace.hpp create mode 100644 source/lib/omnitrace/library/causal/components/blocking_gotcha.cpp create mode 100644 source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp create mode 100644 source/lib/omnitrace/library/causal/components/causal_gotcha.cpp create mode 100644 source/lib/omnitrace/library/causal/components/causal_gotcha.hpp create mode 100644 source/lib/omnitrace/library/causal/components/progress_point.cpp create mode 100644 source/lib/omnitrace/library/causal/components/progress_point.hpp create mode 100644 source/lib/omnitrace/library/causal/components/unblocking_gotcha.cpp create mode 100644 source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp create mode 100644 source/lib/omnitrace/library/causal/data.cpp create mode 100644 source/lib/omnitrace/library/causal/data.hpp create mode 100644 source/lib/omnitrace/library/causal/delay.cpp create mode 100644 source/lib/omnitrace/library/causal/delay.hpp create mode 100644 source/lib/omnitrace/library/causal/experiment.cpp create mode 100644 source/lib/omnitrace/library/causal/experiment.hpp create mode 100644 source/lib/omnitrace/library/causal/fwd.hpp create mode 100644 source/lib/omnitrace/library/causal/perf.cpp create mode 100644 source/lib/omnitrace/library/causal/perf.hpp create mode 100644 source/lib/omnitrace/library/causal/sample_data.cpp create mode 100644 source/lib/omnitrace/library/causal/sample_data.hpp create mode 100644 source/lib/omnitrace/library/causal/sampling.cpp create mode 100644 source/lib/omnitrace/library/causal/sampling.hpp create mode 100644 source/lib/omnitrace/library/causal/selected_entry.cpp create mode 100644 source/lib/omnitrace/library/causal/selected_entry.hpp create mode 100644 source/lib/omnitrace/library/containers/CMakeLists.txt create mode 100644 source/lib/omnitrace/library/containers/c_array.hpp create mode 100644 source/lib/omnitrace/library/containers/operators.hpp create mode 100644 source/lib/omnitrace/library/containers/stable_vector.hpp create mode 100644 source/lib/omnitrace/library/containers/static_vector.hpp create mode 100644 source/lib/omnitrace/library/exception.cpp create mode 100644 source/lib/omnitrace/library/exception.hpp create mode 100644 source/lib/omnitrace/library/locking.cpp create mode 100644 source/lib/omnitrace/library/locking.hpp rename source/lib/omnitrace/library/{thread_data.cpp => thread_deleter.cpp} (90%) create mode 100644 source/lib/omnitrace/library/thread_deleter.hpp create mode 100644 source/lib/omnitrace/progress.cpp create mode 100644 tests/omnitrace-testing.cmake create mode 100755 tests/validate-causal-json.py diff --git a/.cmake-format.yaml b/.cmake-format.yaml index ef47853ee8..99488f39ba 100644 --- a/.cmake-format.yaml +++ b/.cmake-format.yaml @@ -50,6 +50,28 @@ parse: BASELINE_FAIL_REGEX: '*' REWRITE_RUN_PASS_REGEX: '*' REWRITE_RUN_FAIL_REGEX: '*' + omnitrace_add_causal_test: + flags: + - SKIP_BASELINE + kwargs: + NAME: '*' + TARGET: '*' + CAUSAL_TIMEOUT: '*' + CAUSAL_VALIDATE_TIMEOUT: '*' + CAUSAL_MODE: '*' + CAUSAL_ARGS: '*' + CAUSAL_VALIDATE_ARGS: '*' + RUNTIME_ARGS: '*' + RUN_ARGS: '*' + ENVIRONMENT: '*' + LABELS: '*' + PROPERTIES: '*' + CAUSAL_PASS_REGEX: '*' + CAUSAL_FAIL_REGEX: '*' + BASELINE_PASS_REGEX: '*' + BASELINE_FAIL_REGEX: '*' + CAUSAL_VALIDATE_PASS_REGEX: '*' + CAUSAL_VALIDATE_FAIL_REGEX: '*' omnitrace_target_compile_definitions: kwargs: PUBLIC: '*' @@ -186,6 +208,13 @@ parse: PATHS: '*' PATH_SUFFIXES: '*' DOC: '*' + omnitrace_causal_example_executable: + kwargs: + TAG: '*' + SOURCES: '*' + DEFINITIONS: '*' + LINK_LIBRARIES: '*' + INCLUDE_DIRECTORIES: '*' override_spec: {} vartags: [] proptags: [] diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index 578533f309..7a179ce564 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -18,7 +18,6 @@ concurrency: cancel-in-progress: true env: - OMNITRACE_VERBOSE: 1 OMNITRACE_CI: ON OMNITRACE_TMPDIR: "%env{PWD}%/testing-tmp" @@ -78,7 +77,7 @@ jobs: -DOMNITRACE_PYTHON_PREFIX=/opt/conda/envs -DOMNITRACE_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10" -DOMNITRACE_CI_MPI_RUN_AS_ROOT=ON - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_DISABLE_EXAMPLES="transpose;rccl" -DOMNITRACE_BUILD_NUMBER=${{ github.run_attempt }} diff --git a/.github/workflows/ubuntu-bionic.yml b/.github/workflows/ubuntu-bionic.yml index 07d8eb72e1..75f8329c8b 100644 --- a/.github/workflows/ubuntu-bionic.yml +++ b/.github/workflows/ubuntu-bionic.yml @@ -20,7 +20,6 @@ concurrency: env: BUILD_TYPE: Release ELFUTILS_DOWNLOAD_VERSION: 0.183 - OMNITRACE_VERBOSE: 1 OMNITRACE_CI: ON GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 OMNITRACE_TMPDIR: "%env{PWD}%/testing-tmp" @@ -111,7 +110,7 @@ jobs: -DOMNITRACE_PYTHON_PREFIX=/opt/conda/envs -DOMNITRACE_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10" -DLULESH_BUILD_KOKKOS=OFF - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_DISABLE_EXAMPLES="transpose;rccl" -DOMNITRACE_BUILD_NUMBER=${{ github.run_attempt }} diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index b14d497138..069330e208 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -18,7 +18,6 @@ concurrency: cancel-in-progress: true env: - OMNITRACE_VERBOSE: 1 OMNITRACE_CI: ON OMNITRACE_TMPDIR: "%env{PWD}%/testing-tmp" @@ -136,7 +135,7 @@ jobs: -DOMNITRACE_BUILD_STATIC_LIBSTDCXX=${{ matrix.static-libstdcxx }} -DOMNITRACE_PYTHON_PREFIX=/opt/conda/envs -DOMNITRACE_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10" - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_DISABLE_EXAMPLES="transpose;rccl" -DOMNITRACE_BUILD_NUMBER=${{ github.run_attempt }} -DMPI_HEADERS_ALLOW_MPICH=OFF @@ -316,7 +315,7 @@ jobs: -DOMNITRACE_BUILD_LTO=OFF -DOMNITRACE_USE_MPI=OFF -DOMNITRACE_USE_HIP=ON - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_USE_PAPI=OFF -DOMNITRACE_USE_OMPT=OFF -DOMNITRACE_USE_PYTHON=ON @@ -403,7 +402,6 @@ jobs: env: ELFUTILS_DOWNLOAD_VERSION: 0.186 - OMNITRACE_VERBOSE: 1 steps: - uses: actions/checkout@v3 @@ -468,7 +466,7 @@ jobs: -DDYNINST_BUILD_SHARED_LIBS=ON -DDYNINST_BUILD_STATIC_LIBS=OFF -DDYNINST_ELFUTILS_DOWNLOAD_VERSION=${{ env.ELFUTILS_DOWNLOAD_VERSION }} - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_DISABLE_EXAMPLES="transpose;rccl" -DOMNITRACE_BUILD_NUMBER=${{ github.run_attempt }} -DMPI_HEADERS_ALLOW_MPICH=ON @@ -597,6 +595,6 @@ jobs: -DOMNITRACE_USE_PAPI=ON -DOMNITRACE_USE_HIP=OFF -DOMNITRACE_USE_RCCL=OFF - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_DISABLE_EXAMPLES="transpose;rccl" -DOMNITRACE_BUILD_NUMBER=${{ github.run_attempt }} diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index 94ae96e86c..fb8a05d46a 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -18,7 +18,6 @@ concurrency: cancel-in-progress: true env: - OMNITRACE_VERBOSE: 1 OMNITRACE_CI: ON OMNITRACE_TMPDIR: "%env{PWD}%/testing-tmp" @@ -163,7 +162,7 @@ jobs: -DOMNITRACE_PYTHON_PREFIX=/opt/conda/envs -DOMNITRACE_PYTHON_ENVS="py3.7;py3.8;py3.9;py3.10" -DOMNITRACE_STRIP_LIBRARIES=${{ matrix.strip }} - -DOMNITRACE_MAX_THREADS=32 + -DOMNITRACE_MAX_THREADS=64 -DOMNITRACE_DISABLE_EXAMPLES="transpose;rccl" -DOMNITRACE_BUILD_NUMBER=${{ github.run_attempt }} -DUSE_CLANG_OMP=OFF diff --git a/README.md b/README.md index 1720aab896..ac117a16cf 100755 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ such as the memory usage, page-faults, and context-switches, and thread-level me - Periodic software interrupts per-thread - Process-level sampling - Background thread records process-, system- and device-level metrics while the application executes +- Causal profiling + - Quantifies the potential impact of optimizations in parallel codes - Critical trace generation ### Data Analysis @@ -41,6 +43,7 @@ such as the memory usage, page-faults, and context-switches, and thread-level me - Ideal for running at scale - Comprehensive traces - Every individual event/measurement +- Application speedup predictions resulting from potential optimizations in functions and lines of code (causal profiling) - Critical trace analysis (alpha) ### Parallelism API Support diff --git a/VERSION b/VERSION index 10c088013f..27f9cd322b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.4 +1.8.0 diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index c91f277cb2..2ed03f4203 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -145,7 +145,7 @@ endif() # non-debug optimizations # omnitrace_add_interface_library(omnitrace-compile-extra "Extra optimization flags") -if(NOT OMNITRACE_USE_COVERAGE) +if(NOT OMNITRACE_USE_COVERAGE AND OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS) add_target_flag_if_avail( omnitrace-compile-extra "-finline-functions" "-funroll-loops" "-ftree-vectorize" "-ftree-loop-optimize" "-ftree-loop-vectorize") diff --git a/cmake/ElfUtils.cmake b/cmake/ElfUtils.cmake new file mode 100644 index 0000000000..ec17e542bd --- /dev/null +++ b/cmake/ElfUtils.cmake @@ -0,0 +1,141 @@ +# ====================================================================================== +# elfutils.cmake +# +# Configure elfutils for omnitrace +# +# ---------------------------------------- +# +# Accepts the following CMake variables +# +# ElfUtils_ROOT_DIR - Base directory the of elfutils installation +# ElfUtils_INCLUDEDIR - Hint directory that contains the elfutils headers files +# ElfUtils_LIBRARYDIR - Hint directory that contains the elfutils library files +# ElfUtils_MIN_VERSION - Minimum acceptable version of elfutils +# +# Directly exports the following CMake variables +# +# ElfUtils_ROOT_DIR - Computed base directory the of elfutils installation +# ElfUtils_INCLUDE_DIRS - elfutils include directories ElfUtils_LIBRARY_DIRS - Link +# directories for elfutils libraries ElfUtils_LIBRARIES - elfutils library files +# +# NOTE: The exported ElfUtils_ROOT_DIR can be different from the value provided by the +# user in the case that it is determined to build elfutils from source. In such a case, +# ElfUtils_ROOT_DIR will contain the directory of the from-source installation. +# +# See Modules/FindLibElf.cmake and Modules/FindLibDwarf.cmake for details +# +# ====================================================================================== + +include_guard(GLOBAL) +include(ExternalProject) + +# Minimum acceptable version of elfutils NB: We need >=0.178 because libdw isn't +# thread-safe before then +set(_min_version 0.178) + +set(ElfUtils_MIN_VERSION + ${_min_version} + CACHE STRING "Minimum acceptable elfutils version") + +if(${ElfUtils_MIN_VERSION} VERSION_LESS ${_min_version}) + omnitrace_message( + FATAL_ERROR + "Requested version ${ElfUtils_MIN_VERSION} is less than minimum supported version (${_min_version})" + ) +endif() + +# If we didn't find a suitable version on the system, then download one from the web +set(ElfUtils_DOWNLOAD_VERSION + "0.188" + CACHE STRING "Version of elfutils to download and install") + +# make sure we are not downloading a version less than minimum +if(${ElfUtils_DOWNLOAD_VERSION} VERSION_LESS ${ElfUtils_MIN_VERSION}) + omnitrace_message( + FATAL_ERROR + "elfutils download version is set to ${ElfUtils_DOWNLOAD_VERSION} but elfutils minimum version is set to ${ElfUtils_MIN_VERSION}" + ) +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "GNU") + set(ElfUtils_C_COMPILER + "${CMAKE_C_COMPILER}" + CACHE FILEPATH "C compiler used to compiler ElfUtils") +else() + find_program( + ElfUtils_C_COMPILER + NAMES gcc + PATH_SUFFIXES bin) +endif() + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set(ElfUtils_CXX_COMPILER + "${CMAKE_CXX_COMPILER}" + CACHE FILEPATH "C++ compiler used to compiler ElfUtils") +else() + find_program( + ElfUtils_CXX_COMPILER + NAMES g++ + PATH_SUFFIXES bin) +endif() + +find_program( + MAKE_COMMAND + NAMES make gmake + PATH_SUFFIXES bin) + +if(NOT ElfUtils_C_COMPILER OR NOT ElfUtils_CXX_COMPILER) + omnitrace_message( + FATAL_ERROR + "ElfUtils requires the GNU C and C++ compilers. ElfUtils_C_COMPILER: ${ElfUtils_C_COMPILER}, ElfUtils_CXX_COMPILER: ${ElfUtils_CXX_COMPILER}" + ) +endif() + +set(_eu_root ${PROJECT_BINARY_DIR}/external/elfutils) +set(_eu_inc_dirs $) +set(_eu_lib_dirs $) +set(_eu_libs $ + $) +set(_eu_build_byproducts "${_eu_root}/lib/libdw${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${_eu_root}/lib/libelf${CMAKE_STATIC_LIBRARY_SUFFIX}") + +externalproject_add( + omnitrace-elfutils-build + PREFIX ${PROJECT_BINARY_DIR}/external/elfutils + URL https://sourceware.org/elfutils/ftp/${ElfUtils_DOWNLOAD_VERSION}/elfutils-${ElfUtils_DOWNLOAD_VERSION}.tar.bz2 + BUILD_IN_SOURCE 1 + CONFIGURE_COMMAND + ${CMAKE_COMMAND} -E env CC=${ElfUtils_C_COMPILER} + CFLAGS=-fPIC\ -O3\ -Wno-error=null-dereference CXX=${ElfUtils_CXX_COMPILER} + CXXFLAGS=-fPIC\ -O3\ -Wno-error=null-dereference + [=[LDFLAGS=-Wl,-rpath='$$ORIGIN']=] /configure --enable-install-elfh + --prefix=${_eu_root} --disable-libdebuginfod --disable-debuginfod + --enable-thread-safety + BUILD_COMMAND ${MAKE_COMMAND} install + BUILD_BYPRODUCTS "${_eu_build_byproducts}" + INSTALL_COMMAND "") + +# target for re-executing the installation +add_custom_target( + omnitrace-elfutils-install + COMMAND ${MAKE_COMMAND} install + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/external/elfutils/src/ElfUtils-External + COMMENT "Installing ElfUtils...") + +# -------------- EXPORT VARIABLES --------------------------------------------- + +set(ElfUtils_ROOT_DIR + ${_eu_root} + CACHE PATH "Base directory the of elfutils installation" FORCE) +set(ElfUtils_INCLUDE_DIRS + ${_eu_inc_dirs} + CACHE PATH "elfutils include directory" FORCE) +set(ElfUtils_LIBRARY_DIRS + ${_eu_lib_dirs} + CACHE PATH "elfutils library directory" FORCE) +set(ElfUtils_INCLUDE_DIR + ${ElfUtils_INCLUDE_DIRS} + CACHE PATH "elfutils include directory" FORCE) +set(ElfUtils_LIBRARIES + ${_eu_libs} + CACHE FILEPATH "elfutils library files" FORCE) diff --git a/cmake/Formatting.cmake b/cmake/Formatting.cmake index a8df105042..6e8be8bd61 100644 --- a/cmake/Formatting.cmake +++ b/cmake/Formatting.cmake @@ -71,6 +71,9 @@ if(OMNITRACE_CLANG_FORMAT_EXE ${PROJECT_SOURCE_DIR}/source/*CMakeLists.txt ${PROJECT_SOURCE_DIR}/examples/*CMakeLists.txt ${PROJECT_SOURCE_DIR}/tests/*CMakeLists.txt + ${PROJECT_SOURCE_DIR}/source/*.cmake + ${PROJECT_SOURCE_DIR}/examples/*.cmake + ${PROJECT_SOURCE_DIR}/tests/*.cmake ${PROJECT_SOURCE_DIR}/cmake/*.cmake ${PROJECT_SOURCE_DIR}/source/*.cmake) list(APPEND cmake_files ${PROJECT_SOURCE_DIR}/CMakeLists.txt) diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index 7adfac2cf0..e4b2f59ce2 100644 --- a/cmake/Packages.cmake +++ b/cmake/Packages.cmake @@ -28,6 +28,7 @@ omnitrace_add_interface_library(omnitrace-ptl "Enables PTL support (tasking)") omnitrace_add_interface_library(omnitrace-papi "Enable PAPI support") omnitrace_add_interface_library(omnitrace-ompt "Enable OMPT support") omnitrace_add_interface_library(omnitrace-python "Enables Python support") +omnitrace_add_interface_library(omnitrace-elfutils "Provides ElfUtils") omnitrace_add_interface_library(omnitrace-perfetto "Enables Perfetto support") omnitrace_add_interface_library(omnitrace-timemory "Provides timemory libraries") omnitrace_add_interface_library(omnitrace-timemory-config @@ -258,6 +259,19 @@ endif() omnitrace_target_compile_definitions( omnitrace-ompt INTERFACE OMNITRACE_USE_OMPT=$) +# ----------------------------------------------------------------------------------------# +# +# ElfUtils +# +# ----------------------------------------------------------------------------------------# + +include(ElfUtils) + +target_include_directories(omnitrace-elfutils SYSTEM INTERFACE ${ElfUtils_INCLUDE_DIRS}) +target_compile_definitions(omnitrace-elfutils INTERFACE ${ElfUtils_DEFINITIONS}) +target_link_directories(omnitrace-elfutils INTERFACE ${ElfUtils_LIBRARY_DIRS}) +target_link_libraries(omnitrace-elfutils INTERFACE ${ElfUtils_LIBRARIES}) + # ----------------------------------------------------------------------------------------# # # Dyninst diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index d823cfa45c..8c3b8ac360 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -18,7 +18,7 @@ set(CMAKE_CXX_CLANG_TIDY) set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME examples) if(OMNITRACE_BUILD_DEBUG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -fno-omit-frame-pointer") endif() option(BUILD_SHARED_LIBS "Build dynamic libraries" ON) @@ -36,6 +36,12 @@ if(OMNITRACE_INSTALL_EXAMPLES) include(GNUInstallDirs) endif() +set(OMNITRACE_EXAMPLE_ROOT_DIR + ${CMAKE_CURRENT_LIST_DIR} + CACHE INTERNAL "") +# defines function for creating causal profiling exes +include(${CMAKE_CURRENT_LIST_DIR}/causal-helpers.cmake) + add_subdirectory(transpose) add_subdirectory(parallel-overhead) add_subdirectory(code-coverage) @@ -46,3 +52,4 @@ add_subdirectory(python) add_subdirectory(lulesh) add_subdirectory(rccl) add_subdirectory(rewrite-caller) +add_subdirectory(causal) diff --git a/examples/causal-helpers.cmake b/examples/causal-helpers.cmake new file mode 100644 index 0000000000..342d1aa63c --- /dev/null +++ b/examples/causal-helpers.cmake @@ -0,0 +1,104 @@ +# +# function for +# +include_guard(DIRECTORY) + +if(NOT TARGET omnitrace::omnitrace-user-library) + find_package(omnitrace REQUIRED COMPONENTS user) +endif() + +if(NOT coz-profiler_FOUND) + find_package(coz-profiler QUIET) +endif() + +if(NOT TARGET omni-causal-examples) + add_custom_target(omni-causal-examples) +endif() + +function(omnitrace_causal_example_executable _NAME) + cmake_parse_arguments( + CAUSAL "" "" "SOURCES;DEFINITIONS;INCLUDE_DIRECTORIES;LINK_LIBRARIES" ${ARGN}) + + function(omnitrace_causal_example_interface _TARGET) + if(NOT TARGET ${_TARGET}) + find_package(Threads REQUIRED) + add_library(${_TARGET} INTERFACE) + target_link_libraries(${_TARGET} INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() + endfunction() + + omnitrace_causal_example_interface(omni-causal-example-lib-debug) + omnitrace_causal_example_interface(omni-causal-example-lib-no-debug) + + target_compile_options(omni-causal-example-lib-debug + INTERFACE -g3 -fno-omit-frame-pointer) + target_compile_options(omni-causal-example-lib-no-debug INTERFACE -g0) + + add_executable(${_NAME} ${CAUSAL_SOURCES}) + target_compile_definitions(${_NAME} PRIVATE USE_COZ=0 USE_OMNI=0 + ${CAUSAL_DEFINITIONS}) + target_include_directories(${_NAME} PRIVATE ${OMNITRACE_EXAMPLE_ROOT_DIR}/causal + ${CAUSAL_INCLUDE_DIRECTORIES}) + target_link_libraries( + ${_NAME} PRIVATE ${CAUSAL_LINK_LIBRARIES} omnitrace::omnitrace-user-library + omni-causal-example-lib-debug) + + add_executable(${_NAME}-omni ${CAUSAL_SOURCES}) + target_compile_definitions(${_NAME}-omni PRIVATE USE_COZ=0 USE_OMNI=1 + ${CAUSAL_DEFINITIONS}) + target_include_directories(${_NAME}-omni PRIVATE ${OMNITRACE_EXAMPLE_ROOT_DIR}/causal + ${CAUSAL_INCLUDE_DIRECTORIES}) + target_link_libraries( + ${_NAME}-omni PRIVATE ${CAUSAL_LINK_LIBRARIES} omnitrace::omnitrace-user-library + omni-causal-example-lib-debug) + + add_executable(${_NAME}-ndebug ${CAUSAL_SOURCES}) + target_compile_definitions(${_NAME}-ndebug PRIVATE USE_COZ=0 USE_OMNI=0 + ${CAUSAL_DEFINITIONS}) + target_include_directories( + ${_NAME}-ndebug PRIVATE ${OMNITRACE_EXAMPLE_ROOT_DIR}/causal + ${CAUSAL_INCLUDE_DIRECTORIES}) + target_link_libraries( + ${_NAME}-ndebug + PRIVATE ${CAUSAL_LINK_LIBRARIES} omnitrace::omnitrace-user-library + omni-causal-example-lib-no-debug) + + add_executable(${_NAME}-omni-ndebug ${CAUSAL_SOURCES}) + target_compile_definitions(${_NAME}-omni-ndebug PRIVATE USE_COZ=0 USE_OMNI=1 + ${CAUSAL_DEFINITIONS}) + target_include_directories( + ${_NAME}-omni-ndebug PRIVATE ${OMNITRACE_EXAMPLE_ROOT_DIR}/causal + ${CAUSAL_INCLUDE_DIRECTORIES}) + target_link_libraries( + ${_NAME}-omni-ndebug + PRIVATE ${CAUSAL_LINK_LIBRARIES} omnitrace::omnitrace-user-library + omni-causal-example-lib-no-debug) + + add_dependencies(omni-causal-examples ${_NAME} ${_NAME}-omni ${_NAME}-ndebug + ${_NAME}-omni-ndebug) + + if(coz-profiler_FOUND) + omnitrace_causal_example_interface(omni-causal-example-lib-coz) + target_compile_options(omni-causal-example-lib-coz + INTERFACE -g3 -gdwarf-3 -fno-omit-frame-pointer) + + add_executable(${_NAME}-coz ${CAUSAL_SOURCES}) + target_compile_definitions(${_NAME}-coz PRIVATE USE_COZ=1 USE_OMNI=0 + ${CAUSAL_DEFINITIONS}) + target_include_directories( + ${_NAME}-coz PRIVATE ${OMNITRACE_EXAMPLE_ROOT_DIR}/causal + ${CAUSAL_INCLUDE_DIRECTORIES}) + target_link_libraries(${_NAME}-coz PRIVATE ${CAUSAL_LINK_LIBRARIES} + omni-causal-example-lib-coz coz::coz) + + add_dependencies(omni-causal-examples ${_NAME}-coz) + endif() + + if(OMNITRACE_INSTALL_EXAMPLES) + install( + TARGETS ${_NAME} ${_NAME}-omni ${_NAME}-coz + DESTINATION bin + COMPONENT omnitrace-examples + OPTIONAL) + endif() +endfunction() diff --git a/examples/causal/CMakeLists.txt b/examples/causal/CMakeLists.txt new file mode 100644 index 0000000000..a3bba08bf6 --- /dev/null +++ b/examples/causal/CMakeLists.txt @@ -0,0 +1,42 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +project(omnitrace-causal-example LANGUAGES CXX) + +if(OMNITRACE_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if(${PROJECT_NAME} IN_LIST OMNITRACE_DISABLE_EXAMPLES OR ${_DIR} IN_LIST + OMNITRACE_DISABLE_EXAMPLES) + return() + endif() +endif() + +set(CMAKE_BUILD_TYPE "Release") +find_package(Threads REQUIRED) +if(NOT TARGET omnitrace::omnitrace-user-library) + find_package(omnitrace REQUIRED COMPONENTS user) +endif() + +add_library(causal-interface-library INTERFACE) +target_compile_options(causal-interface-library INTERFACE -g3 -gdwarf-3 + -fno-omit-frame-pointer) +target_link_libraries(causal-interface-library INTERFACE Threads::Threads + ${CMAKE_DL_LIBS}) + +omnitrace_causal_example_executable( + "causal-both" + SOURCES causal.cpp impl.cpp + LINK_LIBRARIES causal-interface-library + DEFINITIONS USE_RNG=1 USE_CPU=1) + +omnitrace_causal_example_executable( + "causal-rng" + SOURCES causal.cpp impl.cpp + LINK_LIBRARIES causal-interface-library + DEFINITIONS USE_RNG=1 USE_CPU=0) + +omnitrace_causal_example_executable( + "causal-cpu" + SOURCES causal.cpp impl.cpp + LINK_LIBRARIES causal-interface-library + DEFINITIONS USE_RNG=0 USE_CPU=1) diff --git a/examples/causal/causal.cpp b/examples/causal/causal.cpp new file mode 100644 index 0000000000..807eb1c970 --- /dev/null +++ b/examples/causal/causal.cpp @@ -0,0 +1,167 @@ +#include "causal.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using mutex_t = std::timed_mutex; +using auto_lock_t = std::unique_lock; +using clock_type = std::chrono::high_resolution_clock; +using nanosec = std::chrono::nanoseconds; + +namespace +{ +std::chrono::duration t_ms; +std::chrono::duration slow_ms; +std::chrono::duration fast_ms; + +template +inline void +consume_variables(Args&&...) +{} +} // namespace + +template +bool +rng_func_impl(int64_t n, uint64_t rseed); + +template +bool +cpu_func_impl(int64_t n, int nloop); + +void +rng_slow_func(int64_t n, uint64_t rseed) __attribute__((noinline)); + +void +rng_fast_func(int64_t n, uint64_t rseed) __attribute__((noinline)); + +void +cpu_slow_func(int64_t n, int nloop) __attribute__((noinline)); + +void +cpu_fast_func(int64_t n, int nloop) __attribute__((noinline)); + +#if USE_CPU > 0 +# define CPU_SLOW_FUNC(...) cpu_slow_func(__VA_ARGS__) +# define CPU_FAST_FUNC(...) cpu_fast_func(__VA_ARGS__) +#else +# define CPU_SLOW_FUNC(...) consume_variables(__VA_ARGS__) +# define CPU_FAST_FUNC(...) consume_variables(__VA_ARGS__) +#endif + +#if USE_RNG > 0 +# define RNG_SLOW_FUNC(...) rng_slow_func(__VA_ARGS__) +# define RNG_FAST_FUNC(...) rng_fast_func(__VA_ARGS__) +#else +# define RNG_SLOW_FUNC(...) consume_variables(__VA_ARGS__) +# define RNG_FAST_FUNC(...) consume_variables(__VA_ARGS__) +#endif + +int +main(int argc, char** argv) +{ + uint64_t rseed = std::random_device{}(); + int nitr = 200; + double frac = 70; + int64_t slow_val = 100000000L; + + if(argc > 1) frac = std::stod(argv[1]); + if(argc > 2) nitr = std::stoi(argv[2]); + if(argc > 3) rseed = std::stoul(argv[3]); + if(argc > 4) slow_val = std::stol(argv[4]); + + int64_t fast_val = (frac / 100.0) * slow_val; + double rfrac = (fast_val / static_cast(slow_val)); + if(argc > 5) fast_val = std::stol(argv[5]); + + printf("\nIterations: %i, fraction: %6.2f, random seed: %lu :: slow = %zu, " + "fast = %zu, expected ratio = %6.2f\n", + nitr, frac, rseed, slow_val, fast_val, rfrac * 100.0); + + auto _t = clock_type::now(); + for(int i = 0; i < nitr; ++i) + { + if(i == 0 || i + 1 == nitr || i % (nitr / 5) == 0) + printf("executing iteration: %i\n", i); + // + auto&& _slow_func = [](auto _nsec, auto _seed, auto _nloop) { + auto _t = clock_type::now(); + CPU_SLOW_FUNC(_nsec, _nloop); + RNG_SLOW_FUNC(_nsec / 5, _seed); + slow_ms += (clock_type::now() - _t); + }; + // + auto&& _fast_func = [](auto _nsec, auto _seed, auto _nloop) { + auto _t = clock_type::now(); + CPU_FAST_FUNC(_nsec, _nloop); + RNG_FAST_FUNC(_nsec / 5, _seed); + fast_ms += (clock_type::now() - _t); + }; + // + CAUSAL_BEGIN("main_iteration"); + // + auto _threads = std::vector{}; + _threads.emplace_back(std::move(_slow_func), slow_val, rseed, 10000); + _threads.emplace_back(std::move(_fast_func), fast_val, rseed, 10000); + for(auto& itr : _threads) + itr.join(); + CAUSAL_END("main_iteration"); + CAUSAL_PROGRESS; + } + t_ms += clock_type::now() - _t; + auto rms = (fast_ms.count() / slow_ms.count()); + printf("slow_func() took %10.3f ms\n", slow_ms.count()); + printf("fast_func() took %10.3f ms\n", fast_ms.count()); + printf("total is %18.3f ms\n", t_ms.count()); + printf("ratio is %18.3f %s\n", 100.0 * rms, "%"); + printf("rdiff is %18.3f %s\n", 100.0 * (rms - rfrac), "%"); +} +// +// +// +void +rng_slow_func(int64_t n, uint64_t rseed) +{ + // clang-format off + while(rng_func_impl(n, rseed) != false) {} + // clang-format on +} +// +// +// +void +rng_fast_func(int64_t n, uint64_t rseed) +{ + // clang-format off + while(rng_func_impl(n, rseed) != true) {} + // clang-format on +} +// +// +// +void +cpu_slow_func(int64_t n, int nloop) +{ + // clang-format off + while(cpu_func_impl(n, nloop) != false) {} + // clang-format on +} +// +// +// +void +cpu_fast_func(int64_t n, int nloop) +{ + // clang-format off + while(cpu_func_impl(n, nloop) != true) {} + // clang-format on +} diff --git a/examples/causal/causal.hpp b/examples/causal/causal.hpp new file mode 100644 index 0000000000..8094cebf77 --- /dev/null +++ b/examples/causal/causal.hpp @@ -0,0 +1,46 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define CAUSAL_STR2(x) #x +#define CAUSAL_STR(x) CAUSAL_STR2(x) +#define CAUSAL_LABEL __FILE__ ":" CAUSAL_STR(__LINE__) + +#if defined(USE_OMNI) && USE_OMNI > 0 +# include +# define CAUSAL_PROGRESS OMNITRACE_CAUSAL_PROGRESS +# define CAUSAL_PROGRESS_NAMED(LABEL) OMNITRACE_CAUSAL_PROGRESS_NAMED(LABEL) +# define CAUSAL_BEGIN(LABEL) OMNITRACE_CAUSAL_BEGIN(LABEL) +# define CAUSAL_END(LABEL) OMNITRACE_CAUSAL_END(LABEL) +#elif defined(USE_COZ) && USE_COZ > 0 +# include +# define CAUSAL_PROGRESS COZ_PROGRESS_NAMED(CAUSAL_LABEL) +# define CAUSAL_PROGRESS_NAMED(LABEL) COZ_PROGRESS_NAMED(LABEL) +# define CAUSAL_BEGIN(LABEL) COZ_BEGIN(LABEL) +# define CAUSAL_END(LABEL) COZ_END(LABEL) +#else +# define CAUSAL_PROGRESS +# define CAUSAL_PROGRESS_NAMED(LABEL) +# define CAUSAL_BEGIN(LABEL) +# define CAUSAL_END(LABEL) +#endif diff --git a/examples/causal/impl.cpp b/examples/causal/impl.cpp new file mode 100644 index 0000000000..e217ea9eaa --- /dev/null +++ b/examples/causal/impl.cpp @@ -0,0 +1,135 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using mutex_t = std::timed_mutex; +using auto_lock_t = std::unique_lock; +using clock_type = std::chrono::high_resolution_clock; +using nanosec = std::chrono::nanoseconds; + +namespace +{ +inline __attribute__((always_inline)) int64_t +clock_tick() noexcept; + +template +inline __attribute__((always_inline)) Ret +clock_tick() noexcept; + +template +inline __attribute__((always_inline)) Tp +get_clock_now(clockid_t clock_id) noexcept; + +template +inline __attribute__((always_inline)) Tp +get_clock_cpu_now() noexcept; +} // namespace + +// +// This implementation works well for Omnitrace +// while COZ makes poor predictions +// +template +bool +rng_func_impl(int64_t n, uint64_t rseed) +{ + int64_t _n = 0; + auto _rng = std::mt19937_64{ rseed }; + auto _dist = std::uniform_int_distribution{ 1, 1 }; + // clang-format off + while(_n < n) _n += _dist(_rng); + // clang-format on + return V; +} + +template bool rng_func_impl(int64_t, uint64_t); +template bool rng_func_impl(int64_t, uint64_t); + +// +// This implementation works well for COZ +// while Omnitrace makes poor predictions +// +template +bool +cpu_func_impl(int64_t n, int nloop) +{ + auto _t = clock_type::now(); + auto _cpu_now = get_clock_cpu_now(); + auto _cpu_end = _cpu_now + n; + // clang-format off + while(get_clock_cpu_now() < _cpu_end) { for(volatile int i = 0; i < nloop; ++i) {} } + // clang-format on + return V; +} + +template bool +cpu_func_impl(int64_t, int); +template bool +cpu_func_impl(int64_t, int); + +namespace +{ +int64_t +clock_tick() noexcept +{ + static int64_t _val = ::sysconf(_SC_CLK_TCK); + return _val; +} + +template +Ret +clock_tick() noexcept +{ + return static_cast(Precision::den) / static_cast(clock_tick()); +} + +template +Tp +get_clock_now(clockid_t clock_id) noexcept +{ + constexpr Tp factor = Precision::den / static_cast(std::nano::den); + struct timespec ts; + clock_gettime(clock_id, &ts); + return (ts.tv_sec * std::nano::den + ts.tv_nsec) * factor; +} + +template +Tp +get_clock_cpu_now() noexcept +{ + return get_clock_now(CLOCK_THREAD_CPUTIME_ID); +} +} // namespace diff --git a/examples/lulesh/CMakeLists.txt b/examples/lulesh/CMakeLists.txt index 9b42d1f1f5..8dd45dd287 100644 --- a/examples/lulesh/CMakeLists.txt +++ b/examples/lulesh/CMakeLists.txt @@ -12,6 +12,8 @@ if(OMNITRACE_DISABLE_EXAMPLES) endif() set(CMAKE_BUILD_TYPE "RelWithDebInfo") +string(REGEX REPLACE " -g(|[0-2]) " " -g3 " CMAKE_CXX_FLAGS_RELWITHDEBINFO + "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") list(INSERT CMAKE_MODULE_PATH 0 ${PROJECT_SOURCE_DIR}/cmake/Modules) @@ -63,9 +65,11 @@ endif() file(GLOB headers ${PROJECT_SOURCE_DIR}/*.h ${PROJECT_SOURCE_DIR}/*.hxx) file(GLOB sources ${PROJECT_SOURCE_DIR}/*.cc) -add_executable(lulesh ${sources} ${headers}) -target_include_directories(lulesh PRIVATE ${PROJECT_SOURCE_DIR}/includes) -target_link_libraries(lulesh PRIVATE Kokkos::kokkos lulesh-mpi) +omnitrace_causal_example_executable( + "lulesh" + SOURCES ${sources} ${headers} + LINK_LIBRARIES Kokkos::kokkos lulesh-mpi + INCLUDE_DIRECTORIES ${PROJECT_SOURCE_DIR}/includes) if(OMNITRACE_INSTALL_EXAMPLES) if(LULESH_BUILD_KOKKOS) @@ -76,8 +80,4 @@ if(OMNITRACE_INSTALL_EXAMPLES) set_target_properties(lulesh PROPERTIES INSTALL_RPATH "\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}") endif() - install( - TARGETS lulesh - DESTINATION bin - COMPONENT omnitrace-examples) endif() diff --git a/examples/lulesh/lulesh.cc b/examples/lulesh/lulesh.cc index f1b65b1120..b632ad27ab 100644 --- a/examples/lulesh/lulesh.cc +++ b/examples/lulesh/lulesh.cc @@ -13,6 +13,8 @@ #include "lulesh.h" +#include "causal.hpp" + static Kokkos::View buffer; static size_t buffer_size; static size_t buffer_offset; @@ -46,6 +48,7 @@ TimeIncrement(Domain& domain) if((domain.dtfixed() <= Real_t(0.0)) && (domain.cycle() != Int_t(0))) { + // CAUSAL_BEGIN("TimeIncrement_Iteration") Real_t ratio; Real_t olddt = domain.deltatime(); @@ -86,6 +89,8 @@ TimeIncrement(Domain& domain) newdt = domain.dtmax(); } domain.deltatime() = newdt; + CAUSAL_PROGRESS_NAMED("TimeIncrement_Iteration"); + // CAUSAL_END("TimeIncrement_Iteration") } if((targetdt > domain.deltatime()) && @@ -2241,12 +2246,15 @@ main(int argc, char* argv[]) while((locDom.time() < locDom.stoptime()) && (locDom.cycle() < opts.its)) { Kokkos::Tools::startSection(_time_incrp); + //CAUSAL_BEGIN("Iteration") TimeIncrement(locDom); Kokkos::Tools::stopSection(_time_incrp); Kokkos::Tools::startSection(_leap_frogp); LagrangeLeapFrog(locDom); Kokkos::Tools::stopSection(_leap_frogp); + CAUSAL_PROGRESS_NAMED("Iteration") + //CAUSAL_END("Iteration") if((opts.showProg != 0) && (opts.quiet == 0) && (myRank == 0)) { @@ -2254,6 +2262,7 @@ main(int argc, char* argv[]) double(locDom.time()), double(locDom.deltatime())); } Kokkos::Tools::markEvent("completed_timestep"); + CAUSAL_PROGRESS } Kokkos::Tools::destroyProfileSection(_time_incrp); diff --git a/examples/parallel-overhead/CMakeLists.txt b/examples/parallel-overhead/CMakeLists.txt index ade34befb8..1458e138a7 100644 --- a/examples/parallel-overhead/CMakeLists.txt +++ b/examples/parallel-overhead/CMakeLists.txt @@ -13,11 +13,17 @@ endif() set(CMAKE_BUILD_TYPE "Release") find_package(Threads REQUIRED) + +add_library(parallel-overhead-compile-options INTERFACE) +target_compile_options(parallel-overhead-compile-options INTERFACE -g) + add_executable(parallel-overhead parallel-overhead.cpp) -target_link_libraries(parallel-overhead PRIVATE Threads::Threads) +target_link_libraries(parallel-overhead PRIVATE Threads::Threads + parallel-overhead-compile-options) add_executable(parallel-overhead-locks parallel-overhead.cpp) -target_link_libraries(parallel-overhead-locks PRIVATE Threads::Threads) +target_link_libraries(parallel-overhead-locks PRIVATE Threads::Threads + parallel-overhead-compile-options) target_compile_definitions(parallel-overhead-locks PRIVATE USE_LOCKS=1) if(OMNITRACE_INSTALL_EXAMPLES) diff --git a/external/dyninst b/external/dyninst index 5d6b3711d9..e4d2eb36ae 160000 --- a/external/dyninst +++ b/external/dyninst @@ -1 +1 @@ -Subproject commit 5d6b3711d90a3fb0f208cd5dcceccbc4559aceb0 +Subproject commit e4d2eb36ae2de522f27e5c9f77de8b30e92630c7 diff --git a/external/elfio b/external/elfio index 637221d267..d00cc32f8b 160000 --- a/external/elfio +++ b/external/elfio @@ -1 +1 @@ -Subproject commit 637221d26748171ae2a29d4abea9571036b05d66 +Subproject commit d00cc32f8b1ed85d22309fac10576a1baf8a4736 diff --git a/external/perfetto b/external/perfetto index 7e8d6801db..b8da070959 160000 --- a/external/perfetto +++ b/external/perfetto @@ -1 +1 @@ -Subproject commit 7e8d6801dbf73936a916dbcd8ed06a758c8d989e +Subproject commit b8da07095979310818f0efde2ef3c69ea70d62c5 diff --git a/external/timemory b/external/timemory index 04a8679c09..64bf1067a4 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 04a8679c09a3e6bf36968fe4fd93573ac46cbe3b +Subproject commit 64bf1067a49e16733c67e0fde47f343ae85335cd diff --git a/source/bin/CMakeLists.txt b/source/bin/CMakeLists.txt index c00f45b990..ec9f6d6978 100644 --- a/source/bin/CMakeLists.txt +++ b/source/bin/CMakeLists.txt @@ -16,6 +16,7 @@ endif() # executables add_subdirectory(omnitrace-avail) add_subdirectory(omnitrace-critical-trace) +add_subdirectory(omnitrace-causal) add_subdirectory(omnitrace-sample) add_subdirectory(omnitrace) diff --git a/source/bin/omnitrace-avail/avail.cpp b/source/bin/omnitrace-avail/avail.cpp index 14a877818a..f7dcbd0581 100644 --- a/source/bin/omnitrace-avail/avail.cpp +++ b/source/bin/omnitrace-avail/avail.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,7 @@ int gpu_count = 0; int main(int argc, char** argv) { + tim::unwind::set_bfd_verbose(3); tim::set_env("OMNITRACE_INIT_TOOLING", "OFF", 1); omnitrace_init_library(); @@ -172,6 +174,9 @@ main(int argc, char** argv) parser_t parser("omnitrace-avail"); parser.enable_help(); + parser.enable_version("omnitrace-avail", "v" OMNITRACE_VERSION_STRING, + OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); + parser.set_help_width(40); parser.add_argument({ "--debug" }, "Enable debug messages") .max_count(1) diff --git a/source/bin/omnitrace-causal/CMakeLists.txt b/source/bin/omnitrace-causal/CMakeLists.txt new file mode 100644 index 0000000000..ca8e59f117 --- /dev/null +++ b/source/bin/omnitrace-causal/CMakeLists.txt @@ -0,0 +1,27 @@ +# ------------------------------------------------------------------------------# +# +# omnitrace-causal target +# +# ------------------------------------------------------------------------------# + +add_executable( + omnitrace-causal + ${CMAKE_CURRENT_LIST_DIR}/omnitrace-causal.cpp + ${CMAKE_CURRENT_LIST_DIR}/omnitrace-causal.hpp ${CMAKE_CURRENT_LIST_DIR}/impl.cpp) + +target_compile_definitions(omnitrace-causal PRIVATE TIMEMORY_CMAKE=1) +target_include_directories(omnitrace-causal PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_link_libraries( + omnitrace-causal + PRIVATE omnitrace::omnitrace-compile-definitions omnitrace::omnitrace-headers + omnitrace::omnitrace-common-library) +set_target_properties( + omnitrace-causal PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" + INSTALL_RPATH "${OMNITRACE_EXE_INSTALL_RPATH}") + +omnitrace_strip_target(omnitrace-causal) + +install( + TARGETS omnitrace-causal + DESTINATION ${CMAKE_INSTALL_BINDIR} + OPTIONAL) diff --git a/source/bin/omnitrace-causal/impl.cpp b/source/bin/omnitrace-causal/impl.cpp new file mode 100644 index 0000000000..d757590c75 --- /dev/null +++ b/source/bin/omnitrace-causal/impl.cpp @@ -0,0 +1,985 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "omnitrace-causal.hpp" + +#include "common/defines.h" +#include "common/delimit.hpp" +#include "common/environment.hpp" +#include "common/join.hpp" +#include "common/setup.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace color = ::tim::log::color; +namespace filepath = ::tim::filepath; +namespace console = ::tim::utility::console; +namespace argparse = ::tim::argparse; +using namespace timemory::join; +using tim::get_env; +using tim::log::colorized; +using tim::log::stream; + +namespace std +{ +std::string +to_string(bool _v) +{ + return (_v) ? "true" : "false"; +} +} // namespace std + +namespace +{ +int verbose = 0; +auto updated_envs = std::set{}; +auto original_envs = std::set{}; +auto child_pids = std::set{}; +auto launcher = std::string{}; + +inline signal_handler& +get_signal_handler(int _sig) +{ + static auto _v = std::unordered_map{}; + auto itr = _v.emplace(_sig, signal_handler{}); + return itr.first->second; +} + +void +create_signal_handler(int sig, signal_handler& sh, void (*func)(int)) +{ + if(sig < 1) return; + sh.m_custom_sigaction.sa_handler = func; + sigemptyset(&sh.m_custom_sigaction.sa_mask); + sh.m_custom_sigaction.sa_flags = SA_RESTART; + if(sigaction(sig, &sh.m_custom_sigaction, &sh.m_original_sigaction) == -1) + { + std::cerr << "Failed to create signal handler for " << sig << std::endl; + } +} + +void +forward_signal(int sig) +{ + for(auto itr : child_pids) + { + TIMEMORY_PRINTF_WARNING(stderr, "Killing pid=%i with signal %i...\n", itr, sig); + kill(itr, sig); + diagnose_status(itr, wait_pid(itr)); + } + signal(sig, SIG_DFL); + kill(getpid(), sig); +} +} // namespace + +int +get_verbose() +{ + verbose = get_env("OMNITRACE_CAUSAL_VERBOSE", + get_env("OMNITRACE_VERBOSE", verbose, false)); + auto _debug = + get_env("OMNITRACE_CAUSAL_DEBUG", get_env("OMNITRACE_DEBUG", false, false)); + if(_debug) verbose += 8; + return verbose; +} + +void +forward_signals(const std::set& _signals) +{ + for(auto itr : _signals) + create_signal_handler(itr, get_signal_handler(itr), &forward_signal); +} + +void +add_child_pid(pid_t _v) +{ + child_pids.emplace(_v); +} + +void +remove_child_pid(pid_t _v) +{ + child_pids.erase(_v); +} + +int +wait_pid(pid_t _pid, int _opts) +{ + int _status = 0; + pid_t _pid_v = -1; + _opts |= WUNTRACED; + do + { + if((_opts & WNOHANG) > 0) + std::this_thread::sleep_for(std::chrono::milliseconds{ 100 }); + _pid_v = waitpid(_pid, &_status, _opts); + } while(_pid <= 0); + return _status; +} + +int +diagnose_status(pid_t _pid, int _status) +{ + auto _verbose = get_verbose(); + if(_verbose >= 3) + { + fflush(stderr); + fflush(stdout); + std::cout << std::flush; + std::cerr << std::flush; + } + + bool _normal_exit = (WIFEXITED(_status) > 0); + bool _unhandled_signal = (WIFSIGNALED(_status) > 0); + bool _core_dump = (WCOREDUMP(_status) > 0); + bool _stopped = (WIFSTOPPED(_status) > 0); + int _exit_status = WEXITSTATUS(_status); + int _stop_signal = (_stopped) ? WSTOPSIG(_status) : 0; + int _ec = (_unhandled_signal) ? WTERMSIG(_status) : 0; + + if(_verbose >= 4) + { + TIMEMORY_PRINTF_INFO( + stderr, + "diagnosing status for process %i :: status: %i... normal exit: %s, " + "unhandled signal: %s, core dump: %s, stopped: %s, exit status: %i, stop " + "signal: %i, exit code: %i\n", + _pid, _status, std::to_string(_normal_exit).c_str(), + std::to_string(_unhandled_signal).c_str(), std::to_string(_core_dump).c_str(), + std::to_string(_stopped).c_str(), _exit_status, _stop_signal, _ec); + } + else if(_verbose >= 3) + { + TIMEMORY_PRINTF_INFO(stderr, + "diagnosing status for process %i :: status: %i ...\n", _pid, + _status); + } + + if(!_normal_exit) + { + if(_ec == 0) _ec = EXIT_FAILURE; + if(_verbose >= 0) + { + TIMEMORY_PRINTF_FATAL( + stderr, "process %i terminated abnormally. exit code: %i\n", _pid, _ec); + } + } + + if(_stopped) + { + if(_verbose >= 0) + { + TIMEMORY_PRINTF_FATAL(stderr, + "process %i stopped with signal %i. exit code: %i\n", + _pid, _stop_signal, _ec); + } + } + + if(_core_dump) + { + if(_verbose >= 0) + { + TIMEMORY_PRINTF_FATAL( + stderr, "process %i terminated and produced a core dump. exit code: %i\n", + _pid, _ec); + } + } + + if(_unhandled_signal) + { + if(_verbose >= 0) + { + TIMEMORY_PRINTF_FATAL(stderr, + "process %i terminated because it received a signal " + "(%i) that was not handled. exit code: %i\n", + _pid, _ec, _ec); + } + } + + if(!_normal_exit && _exit_status > 0) + { + if(_verbose >= 0) + { + if(_exit_status == 127) + { + TIMEMORY_PRINTF_FATAL( + stderr, "execv in process %i failed. exit code: %i\n", _pid, _ec); + } + else + { + TIMEMORY_PRINTF_FATAL( + stderr, + "process %i terminated with a non-zero status. exit code: %i\n", _pid, + _ec); + } + } + } + + return _ec; +} + +std::string +get_realpath(const std::string& _v) +{ + auto* _tmp = realpath(_v.c_str(), nullptr); + auto _ret = std::string{ _tmp }; + free(_tmp); + return _ret; +} + +void +print_command(const std::vector& _argv, std::string_view _prefix) +{ + if(verbose >= 1) + stream(std::cout, color::info()) + << _prefix << "Executing '" << join(array_config{ " " }, _argv) << "'...\n"; + + std::cerr << color::end() << std::flush; +} + +std::vector +get_initial_environment() +{ + std::vector _env; + if(environ != nullptr) + { + int idx = 0; + while(environ[idx] != nullptr) + { + auto* _v = environ[idx++]; + original_envs.emplace(_v); + _env.emplace_back(strdup(_v)); + } + } + + update_env(_env, "OMNITRACE_MODE", "causal"); + update_env(_env, "OMNITRACE_USE_CAUSAL", true); + update_env(_env, "OMNITRACE_USE_SAMPLING", false); + update_env(_env, "OMNITRACE_USE_PERFETTO", false); + update_env(_env, "OMNITRACE_USE_TIMEMORY", false); + update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false); + update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); + + return _env; +} + +void +prepare_command_for_run(char* _exe, std::vector& _argv) +{ + if(!launcher.empty()) + { + bool _injected = false; + auto _new_argv = std::vector{}; + for(auto* itr : _argv) + { + if(!_injected && std::regex_search(itr, std::regex{ launcher })) + { + _new_argv.emplace_back(_exe); + _new_argv.emplace_back(strdup("--")); + _injected = true; + } + _new_argv.emplace_back(itr); + } + + if(!_injected) + { + throw std::runtime_error( + join("", "omnitrace-causal was unable to match \"", launcher, + "\" to any arguments on the command line: \"", + join(array_config{ " ", "", "" }, _argv), "\"")); + } + + std::swap(_argv, _new_argv); + } +} + +void +prepare_environment_for_run(std::vector& _env) +{ + if(launcher.empty()) + { + update_env(_env, "LD_PRELOAD", + get_realpath(get_internal_libpath("libomnitrace-dl.so")), true); + } +} + +std::string +get_internal_libpath(const std::string& _lib) +{ + auto _exe = std::string_view{ realpath("/proc/self/exe", nullptr) }; + auto _pos = _exe.find_last_of('/'); + auto _dir = std::string{ "./" }; + if(_pos != std::string_view::npos) _dir = _exe.substr(0, _pos); + return omnitrace::common::join("/", _dir, "..", "lib", _lib); +} + +void +print_updated_environment(std::vector _env, std::string_view _prefix) +{ + if(get_verbose() < 0) return; + + std::sort(_env.begin(), _env.end(), [](auto* _lhs, auto* _rhs) { + if(!_lhs) return false; + if(!_rhs) return true; + return std::string_view{ _lhs } < std::string_view{ _rhs }; + }); + + std::vector _updates = {}; + std::vector _general = {}; + + for(auto* itr : _env) + { + if(itr == nullptr) continue; + + auto _is_omni = (std::string_view{ itr }.find("OMNITRACE") == 0); + auto _updated = false; + for(const auto& vitr : updated_envs) + { + if(std::string_view{ itr }.find(vitr) == 0) + { + _updated = true; + break; + } + } + + if(_updated) + _updates.emplace_back(itr); + else if(verbose >= 1 && _is_omni) + _general.emplace_back(itr); + } + + if(_general.size() + _updates.size() == 0 || verbose < 0) return; + + std::cerr << std::endl; + + for(auto& itr : _general) + stream(std::cerr, color::source()) << _prefix << itr << "\n"; + for(auto& itr : _updates) + stream(std::cerr, color::source()) << _prefix << itr << "\n"; + + std::cerr << color::end() << std::flush; +} + +template +void +update_env(std::vector& _environ, std::string_view _env_var, Tp&& _env_val, + bool _append, std::string_view _join_delim) +{ + updated_envs.emplace(_env_var); + + auto _key = join("", _env_var, "="); + for(auto& itr : _environ) + { + if(!itr) continue; + if(std::string_view{ itr }.find(_key) == 0) + { + if(_append) + { + if(std::string_view{ itr }.find(join("", _env_val)) == + std::string_view::npos) + { + auto _val = std::string{ itr }.substr(_key.length()); + free(itr); + itr = strdup( + join('=', _env_var, join(_join_delim, _env_val, _val)).c_str()); + } + } + else + { + free(itr); + itr = strdup(omnitrace::common::join('=', _env_var, _env_val).c_str()); + } + return; + } + } + _environ.emplace_back( + strdup(omnitrace::common::join('=', _env_var, _env_val).c_str())); +} + +template +void +add_default_env(std::vector& _environ, std::string_view _env_var, Tp&& _env_val) +{ + auto _key = join("", _env_var, "="); + for(auto& itr : _environ) + { + if(!itr) continue; + if(std::string_view{ itr }.find(_key) == 0) return; + } + + updated_envs.emplace(_env_var); + _environ.emplace_back( + strdup(omnitrace::common::join('=', _env_var, _env_val).c_str())); +} + +void +remove_env(std::vector& _environ, std::string_view _env_var) +{ + auto _key = join("", _env_var, "="); + auto _match = [&_key](auto itr) { return std::string_view{ itr }.find(_key) == 0; }; + + _environ.erase(std::remove_if(_environ.begin(), _environ.end(), _match), + _environ.end()); + + for(const auto& itr : original_envs) + { + if(std::string_view{ itr }.find(_key) == 0) + _environ.emplace_back(strdup(itr.c_str())); + } +} + +std::vector +parse_args(int argc, char** argv, std::vector& _env, + std::vector>& _causal_envs) +{ + using parser_t = argparse::argument_parser; + using parser_err_t = typename parser_t::result_type; + + auto help_check = [](parser_t& p, int _argc, char** _argv) { + std::set help_args = { "-h", "--help", "-?" }; + return (p.exists("help") || _argc == 1 || + (_argc > 1 && help_args.find(_argv[1]) != help_args.end())); + }; + + auto _pec = EXIT_SUCCESS; + auto help_action = [&_pec, argc, argv](parser_t& p) { + if(_pec != EXIT_SUCCESS) + { + std::stringstream msg; + msg << "Error in command:"; + for(int i = 0; i < argc; ++i) + msg << " " << argv[i]; + msg << "\n\n"; + stream(std::cerr, color::fatal()) << msg.str(); + std::cerr << std::flush; + } + + p.print_help(); + exit(_pec); + }; + + const auto* _desc = R"desc( + Causal profiling usually requires multiple runs to reliably resolve the speedup estimates. + This executable is designed to streamline that process. + For example (assume all commands end with '-- '): + + omnitrace-causal -n 5 -- # runs 5x with causal profiling enabled + + omnitrace-causal -s 0 5,10,15,20 # runs 2x with virtual speedups: + # - 0 + # - randomly selected from 5, 10, 15, and 20 + + omnitrace-causal -F func_A func_B func_(A|B) # runs 3x with the function scope limited to: + # 1. func_A + # 2. func_B + # 3. func_A or func_B + General tips: + - Insert progress points at hotspots in your code or use omnitrace's runtime instrumentation + - Note: binary rewrite will produce a incompatible new binary + - Collect a flat profile via sampling + - E.g., omnitrace-sample -F -- + - Inspect sampling_wall_clock.txt and sampling_cpu_clock.txt for functions to target + - Run omnitrace-causal in "function" mode first (does not require debug info) + - Run omnitrace-causal in "line" mode when you are targeting one function (requires debug info) + - Preferably, use predictions from the "function" mode to determine which function to target + - Limit the virtual speedups to a smaller pool, e.g., 0,5,10,25,50, to get reliable predictions quicker + - Make use of the binary, source, and function scope to limit the functions/lines selected for experiments + - Note: source scope requires debug info + )desc"; + + auto parser = parser_t{ basename(argv[0]), _desc }; + + parser.on_error([](parser_t&, const parser_err_t& _err) { + stream(std::cerr, color::fatal()) << _err << "\n"; + exit(EXIT_FAILURE); + }); + + auto _add_separator = [&](std::string _v, const std::string& _desc) { + parser.add_argument({ "" }, ""); + parser + .add_argument({ join("", "[", _v, "]") }, + (_desc.empty()) ? _desc : join({ "", "(", ")" }, _desc)) + .color(color::info()); + parser.add_argument({ "" }, ""); + }; + + parser.enable_help(); + parser.enable_version("omnitrace-causal", "v" OMNITRACE_VERSION_STRING, + OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); + + auto _cols = std::get<0>(console::get_columns()); + if(_cols > parser.get_help_width() + 8) + parser.set_description_width( + std::min(_cols - parser.get_help_width() - 8, 120)); + + _add_separator("DEBUG OPTIONS", ""); + parser.add_argument({ "--monochrome" }, "Disable colorized output") + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { + auto _colorized = !p.get("monochrome"); + colorized() = _colorized; + p.set_use_color(_colorized); + update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); + update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); + }); + parser.add_argument({ "--debug" }, "Debug output") + .max_count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_DEBUG", p.get("debug")); + }); + parser.add_argument({ "-v", "--verbose" }, "Verbose output") + .count(1) + .action([&](parser_t& p) { + auto _v = p.get("verbose"); + verbose = _v; + update_env(_env, "OMNITRACE_VERBOSE", _v); + }); + + std::string _config_file = {}; + std::string _config_folder = "omnitrace-causal-config"; + bool _generate_configs = false; + bool _add_defaults = true; + + _add_separator("GENERAL OPTIONS", ""); + parser.add_argument({ "-c", "--config" }, "Base configuration file") + .min_count(0) + .dtype("filepath") + .action([&](parser_t& p) { + _config_file = + join(array_config{ ":" }, p.get>("config")); + }); + parser + .add_argument( + { "-l", "--launcher" }, + "When running MPI jobs, omnitrace-causal needs to be *before* the executable " + "which launches the MPI processes (i.e. before `mpirun`, `srun`, etc.). Pass " + "the name of the target executable (or a regex for matching to the name of " + "the target) for causal profiling, e.g., `omnitrace-causal -l foo -- mpirun " + "-n 4 foo`. This ensures that the omnitrace library is LD_PRELOADed on the " + "proper target") + .count(1) + .dtype("executable") + .action([&](parser_t& p) { launcher = p.get("launcher"); }); + parser + .add_argument({ "-g", "--generate-configs" }, + "Generate config files instead of passing environment variables " + "directly. If no arguments are provided, the config files will be " + "placed in ${PWD}/omnitrace-causal-config folder") + .min_count(0) + .max_count(1) + .dtype("folder") + .action([&](parser_t& p) { + _generate_configs = true; + auto _dir = p.get("generate-configs"); + if(!_dir.empty()) _config_folder = std::move(_dir); + if(!filepath::exists(_config_folder)) filepath::makedir(_config_folder); + }); + parser + .add_argument({ "--no-defaults" }, + "Do not activate default features which are recommended for causal " + "profiling. For example: PID-tagging of output files and " + "timestamped subdirectories are disabled by default. Kokkos tools " + "support is added by default (OMNITRACE_USE_KOKKOSP=ON) because, " + "for Kokkos applications, the Kokkos-Tools callbacks are used for " + "progress points. Activation of OpenMP tools support is similar") + .min_count(0) + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { _add_defaults = !p.get("no-defaults"); }); + + _add_separator("CAUSAL PROFILING OPTIONS (General)", + "These settings will be applied to all causal profiling runs"); + parser.add_argument({ "-m", "--mode" }, "Causal profiling mode") + .count(1) + .dtype("string") + .choices({ "function", "line" }) + .choice_alias("function", { "func" }) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_CAUSAL_MODE", p.get("mode")); + }); + + parser + .add_argument({ "-o", "--output-name" }, + "Output filename of causal profiling data w/o extension") + .min_count(1) + .dtype("filename") + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_CAUSAL_FILE", p.get("output-name")); + }); + + bool _reset = false; + + parser + .add_argument({ "-r", "--reset" }, + "Overwrite any existing experiment results during the first run") + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { _reset = p.get("reset"); }); + + parser + .add_argument({ "-e", "--end-to-end" }, + "Single causal experiment for the entire application runtime") + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_CAUSAL_END_TO_END", p.get("end-to-end")); + }); + + parser + .add_argument({ "-w", "--wait" }, + "Set the wait time (i.e. delay) before starting the first causal " + "experiment (in seconds)") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_CAUSAL_DELAY", p.get("wait")); + }); + + parser + .add_argument( + { "-d", "--duration" }, + "Set the length of time (in seconds) to perform causal experimentationafter " + "the first experiment is started. Once this amount of time has elapsed, no " + "more causal experiments will be started but any currently running " + "experiment will be allowed to finish.") + .count(1) + .dtype("seconds") + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_CAUSAL_DURATION", p.get("duration")); + }); + + int64_t _niterations = 1; + auto _virtual_speedups = std::vector{}; + auto _function_scopes = std::vector{}; + auto _binary_scopes = std::vector{}; + auto _source_scopes = std::vector{}; + auto _function_excludes = std::vector{}; + auto _binary_excludes = std::vector{}; + auto _source_excludes = std::vector{}; + + parser + .add_argument({ "-n", "--iterations" }, + "Number of times to repeat the combination of run configurations") + .count(1) + .dtype("int") + .action([&](parser_t& p) { _niterations = p.get("iterations"); }); + + _add_separator( + "CAUSAL PROFILING OPTIONS (Combinatorial)", + "Each individual argument to these options will multiply the number runs by the " + "number of arguments and the number of iterations. E.g. -n 2 -B \"MAIN\" -F " + "\"foo\" \"bar\" will produce 4 runs: 2 iterations x 1 binary scope x 2 function " + "scopes (MAIN+foo, MAIN+bar, MAIN+foo, MAIN+bar)"); + + parser + .add_argument({ "-s", "--speedups" }, + "Pool of virtual speedups to sample from during experimentation. " + "Each space designates a group and multiple speedups can be " + "grouped together by commas, e.g. -s 0 0,10,20-50 is two groups: " + "group #1 is '0' and group #2 is '0 10 20 25 30 35 40 45 50'") + .min_count(0) + .max_count(-1) + .dtype("integers") + .action([&](parser_t& p) { + _virtual_speedups = p.get>("speedups"); + }); + + parser + .add_argument({ "-B", "--binary-scope" }, + "Restricts causal experiments to the binaries matching the list of " + "regular expressions. Each space designates a group and multiple " + "scopes can be grouped together with a semi-colon") + .min_count(0) + .max_count(-1) + .dtype("integers") + .action([&](parser_t& p) { + _binary_scopes = p.get>("binary-scope"); + }); + + parser + .add_argument({ "-S", "--source-scope" }, + "Restricts causal experiments to the source files or source file + " + "lineno pairs (i.e. or :) matching the list of " + "regular expressions. Each space designates a group and multiple " + "scopes can be grouped together with a semi-colon") + .min_count(0) + .max_count(-1) + .dtype("integers") + .action([&](parser_t& p) { + _source_scopes = p.get>("source-scope"); + }); + + parser + .add_argument( + { "-F", "--function-scope" }, + "Restricts causal experiments to the functions matching the list of " + "regular expressions. Each space designates a group and multiple " + "scopes can be grouped together with a semi-colon") + .min_count(0) + .max_count(-1) + .dtype("regex-list") + .action([&](parser_t& p) { + _function_scopes = p.get>("function-scope"); + }); + + parser + .add_argument( + { "-BE", "--binary-exclude" }, + "Excludes causal experiments from being performed on the binaries matching " + "the list of regular expressions. Each space designates a group and multiple " + "excludes can be grouped together with a semi-colon") + .min_count(0) + .max_count(-1) + .dtype("integers") + .action([&](parser_t& p) { + _binary_excludes = p.get>("binary-exclude"); + }); + + parser + .add_argument( + { "-SE", "--source-exclude" }, + "Excludes causal experiments from being performed on the code from the " + "source files or source file + lineno pair (i.e. or :) " + "matching the list of regular expressions. Each space designates a group and " + "multiple excludes can be grouped together with a semi-colon") + .min_count(0) + .max_count(-1) + .dtype("integers") + .action([&](parser_t& p) { + _source_excludes = p.get>("source-exclude"); + }); + + parser + .add_argument( + { "-FE", "--function-exclude" }, + "Excludes causal experiments from being performed on the functions matching " + "the list of regular expressions. Each space designates a group and multiple " + "excludes can be grouped together with a semi-colon") + .min_count(0) + .max_count(-1) + .dtype("regex-list") + .action([&](parser_t& p) { + _function_excludes = p.get>("function-exclude"); + }); + +#if OMNITRACE_HIP_VERSION > 0 && OMNITRACE_HIP_VERSION < 50300 + update_env(_env, "HSA_ENABLE_INTERRUPT", 0); +#endif + + auto _inpv = std::vector{}; + auto _outv = std::vector{}; + bool _hash = false; + for(int i = 0; i < argc; ++i) + { + if(_hash) + { + _outv.emplace_back(argv[i]); + } + else if(std::string_view{ argv[i] } == "--") + { + _hash = true; + } + else + { + _inpv.emplace_back(argv[i]); + } + } + + auto _cerr = parser.parse_args(_inpv.size(), _inpv.data()); + if(help_check(parser, argc, argv)) + help_action(parser); + else if(_cerr) + throw std::runtime_error(_cerr.what()); + + if(_niterations < 1) _niterations = 1; + auto _get_size = [](const auto& _v) { return std::max(_v.size(), 1); }; + + auto _causal_envs_tmp = std::vector>{}; + auto _fill = [&_causal_envs_tmp](std::string_view _env_var, const auto& _data, + bool _quote) { + if(_data.empty()) return; + if(_causal_envs_tmp.empty()) _causal_envs_tmp.emplace_back(); + auto _tmp = _causal_envs_tmp; + _causal_envs_tmp.clear(); + _causal_envs_tmp.reserve(_data.size() * _tmp.size()); + for(auto ditr : _data) + { + if(_quote) + { + ditr.insert(0, "\""); + ditr += "\""; + } + + // duplicate the env, add the env variable, emplace back + for(auto itr : _tmp) + { + itr[_env_var] = ditr; + _causal_envs_tmp.emplace_back(itr); + } + } + }; + + if(_add_defaults) + { + add_default_env(_env, "OMNITRACE_TIME_OUTPUT", false); + add_default_env(_env, "OMNITRACE_USE_PID", false); + add_default_env(_env, "OMNITRACE_USE_KOKKOSP", true); + +#if defined(OMNITRACE_USE_OMPT) && OMNITRACE_USE_OMPT > 0 + add_default_env(_env, "OMNITRACE_USE_OMPT", true); +#endif + +#if(defined(OMNITRACE_USE_MPI) && OMNITRACE_USE_MPI > 0) || \ + (defined(OMNITRACE_USE_MPI_HEADERS) && OMNITRACE_USE_MPI_HEADERS > 0) + add_default_env(_env, "OMNITRACE_USE_MPIP", true); +#endif + +#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0 + add_default_env(_env, "OMNITRACE_ROCTRACER_HIP_API", true); + add_default_env(_env, "OMNITRACE_ROCTRACER_HSA_API", true); +#endif + +#if defined(OMNITRACE_USE_RCCL) && OMNITRACE_USE_RCCL > 0 + add_default_env(_env, "OMNITRACE_USE_RCCLP", true); +#endif + } + + _fill("OMNITRACE_CAUSAL_BINARY_EXCLUDE", _binary_excludes, _generate_configs); + _fill("OMNITRACE_CAUSAL_SOURCE_EXCLUDE", _source_excludes, _generate_configs); + _fill("OMNITRACE_CAUSAL_FUNCTION_EXCLUDE", _function_excludes, _generate_configs); + + _fill("OMNITRACE_CAUSAL_BINARY_SCOPE", _binary_scopes, _generate_configs); + _fill("OMNITRACE_CAUSAL_SOURCE_SCOPE", _source_scopes, _generate_configs); + _fill("OMNITRACE_CAUSAL_FUNCTION_SCOPE", _function_scopes, _generate_configs); + + _fill("OMNITRACE_CAUSAL_FIXED_SPEEDUP", _virtual_speedups, false); + + // make sure at least one env exists + if(_causal_envs_tmp.empty()) _causal_envs_tmp.emplace_back(); + + // duplicate for the number of iterations + _causal_envs.clear(); + _causal_envs.reserve(_niterations * _causal_envs_tmp.size()); + for(int64_t i = 0; i < _niterations; ++i) + { + for(const auto& itr : _causal_envs_tmp) + _causal_envs.emplace_back(itr); + } + + if(_generate_configs) + { + auto _is_omni_cfg = [](std::string_view itr) { + return (itr.find("OMNITRACE") == 0 && itr.find("OMNITRACE_MODE") != 0 && + itr.find("OMNITRACE_CONFIG_FILE") != 0 && + itr.find('=') < itr.length()); + }; + + auto _omni_env = std::map{}; + for(auto* itr : _env) + { + if(_is_omni_cfg(itr)) + { + auto _env_var = std::string{ itr }; + auto _pos = _env_var.find('='); + auto _env_val = _env_var.substr(_pos + 1); + _env_var = _env_var.substr(0, _pos); + _omni_env.emplace(_env_var, _env_val); + } + } + + _env.erase(std::remove_if(_env.begin(), _env.end(), _is_omni_cfg), _env.end()); + + _causal_envs_tmp = std::move(_causal_envs); + _causal_envs.clear(); + auto _write_config = + [_omni_env](std::ostream& _os, + const std::map& _data) { + size_t _width = 0; + for(const auto& itr : _omni_env) + _width = std::max(_width, itr.first.length()); + + for(const auto& itr : _data) + _width = std::max(_width, itr.first.length()); + + _os << "# omnitrace common settings\n"; + for(const auto& itr : _omni_env) + _os << std::setw(_width + 1) << std::left << itr.first << " = " + << itr.second << "\n"; + + _os << "\n# omnitrace causal settings\n"; + for(const auto& itr : _data) + _os << std::setw(_width + 1) << std::left << itr.first << " = " + << itr.second << "\n"; + }; + + int nwidth = (std::log10(_causal_envs_tmp.size()) + 1); + for(size_t i = 0; i < _causal_envs_tmp.size(); ++i) + { + std::stringstream fname{}; + fname.fill('0'); + fname << _config_folder << "/causal-" << std::setw(nwidth) << i << ".cfg"; + std::ofstream _ofs{ fname.str() }; + _write_config(_ofs, _causal_envs_tmp.at(i)); + auto _cfg_name = (_config_file.empty()) + ? fname.str() + : join(array_config{ ":" }, _config_file, fname.str()); + auto _cfg = + std::map{ { "OMNITRACE_CONFIG_FILE", + _cfg_name } }; + _causal_envs.emplace_back(_cfg); + } + } + + if(_reset) + _causal_envs.front().emplace(std::string_view{ "OMNITRACE_CAUSAL_FILE_RESET" }, + std::string{ "true" }); + + return _outv; +} + +// explicit instantiation for usage in omnitrace-causal.cpp +template void +update_env(std::vector&, std::string_view, const std::string& _env_val, + bool _append, std::string_view); diff --git a/source/bin/omnitrace-causal/omnitrace-causal.cpp b/source/bin/omnitrace-causal/omnitrace-causal.cpp new file mode 100644 index 0000000000..3759d7eb5c --- /dev/null +++ b/source/bin/omnitrace-causal/omnitrace-causal.cpp @@ -0,0 +1,136 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "omnitrace-causal.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +int +main(int argc, char** argv) +{ + auto _base_env = get_initial_environment(); + auto _causal_env = std::vector>{}; + + bool _has_double_hyphen = false; + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string_view{ argv[i] }; + if(_arg == "--" || _arg == "-?" || _arg == "-h" || _arg == "--help" || + _arg == "--version") + _has_double_hyphen = true; + } + + std::vector _argv = {}; + if(_has_double_hyphen) + { + _argv = parse_args(argc, argv, _base_env, _causal_env); + } + else + { + _argv.reserve(argc); + for(int i = 1; i < argc; ++i) + _argv.emplace_back(argv[i]); + _causal_env.resize(1); + } + + prepare_command_for_run(argv[0], _argv); + prepare_environment_for_run(_base_env); + + if(get_verbose() >= 3) + { + TIMEMORY_PRINTF_INFO(stderr, "causal environments to be executed:\n"); + size_t _n = 0; + for(auto& citr : _causal_env) + { + auto _env = _base_env; + for(const auto& eitr : citr) + update_env(_env, eitr.first, eitr.second); + auto _prefix = std::to_string(_n++) + ": "; + print_updated_environment(_env, _prefix); + } + } + + if(!_argv.empty()) + { + if(_causal_env.size() == 1) + { + auto _env = _base_env; + for(const auto& eitr : _causal_env.front()) + update_env(_env, eitr.first, eitr.second); + print_updated_environment(_env, "0: "); + print_command(_argv, "0: "); + _argv.emplace_back(nullptr); + _env.emplace_back(nullptr); + return execvpe(_argv.front(), _argv.data(), _env.data()); + } + + forward_signals({ SIGINT, SIGTERM, SIGQUIT }); + size_t _ncount = 0; + size_t _width = std::log10(_causal_env.size()) + 1; + for(auto& citr : _causal_env) + { + auto _n = _ncount++; + auto _main_pid = getpid(); + auto _pid = fork(); + + if(get_verbose() >= 3) + { + TIMEMORY_PRINTF_INFO(stderr, "process %i returned %i from fork...\n", + getpid(), _pid); + } + + if(_pid == 0) + { + auto _prefix = std::stringstream{}; + _prefix << std::setw(_width) << std::right << _n << "/" + << std::setw(_width) << std::left << _causal_env.size() << ": [" + << _main_pid << " -> " << getpid() << "] "; + + auto _env = _base_env; + for(const auto& eitr : citr) + update_env(_env, eitr.first, eitr.second); + print_updated_environment(_env, _prefix.str()); + print_command(_argv, _prefix.str()); + _argv.emplace_back(nullptr); + _env.emplace_back(nullptr); + return execvpe(_argv.front(), _argv.data(), _env.data()); + } + else + { + add_child_pid(_pid); + auto _status = wait_pid(_pid); + auto _ret = diagnose_status(_pid, _status); + remove_child_pid(_pid); + if(_ret != 0) return _ret; + } + } + } +} diff --git a/source/bin/omnitrace-causal/omnitrace-causal.hpp b/source/bin/omnitrace-causal/omnitrace-causal.hpp new file mode 100644 index 0000000000..9e0dc529ff --- /dev/null +++ b/source/bin/omnitrace-causal/omnitrace-causal.hpp @@ -0,0 +1,93 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define TIMEMORY_PROJECT_NAME "omnitrace-causal" + +#include +#include +#include +#include +#include +#include +#include + +int +get_verbose(); + +std::string +get_realpath(const std::string&); + +void +print_command(const std::vector& _argv, std::string_view); + +void print_updated_environment(std::vector, std::string_view); + +std::vector +get_initial_environment(); + +void +prepare_command_for_run(char*, std::vector&); + +void +prepare_environment_for_run(std::vector&); + +std::string +get_internal_libpath(const std::string& _lib); + +template +void +update_env(std::vector&, std::string_view, Tp&&, bool _append = false, + std::string_view _join_delim = ":"); + +template +void +add_default_env(std::vector&, std::string_view, Tp&&); + +void +remove_env(std::vector&, std::string_view); + +std::vector +parse_args(int argc, char** argv, std::vector&, + std::vector>&); + +using sigaction_t = struct sigaction; + +struct signal_handler +{ + sigaction_t m_custom_sigaction = {}; + sigaction_t m_original_sigaction = {}; +}; + +void +forward_signals(const std::set&); + +void add_child_pid(pid_t); + +void remove_child_pid(pid_t); + +int +wait_pid(pid_t, int = 0); + +int +diagnose_status(pid_t, int); diff --git a/source/bin/omnitrace-sample/impl.cpp b/source/bin/omnitrace-sample/impl.cpp index 0a0e85cf98..de6112e432 100644 --- a/source/bin/omnitrace-sample/impl.cpp +++ b/source/bin/omnitrace-sample/impl.cpp @@ -104,7 +104,9 @@ get_initial_environment() auto* _omni_libpath = realpath(get_internal_libpath("libomnitrace.so").c_str(), nullptr); - update_env(_env, "OMNITRACE_USE_SAMPLING", true); + auto _mode = get_env("OMNITRACE_MODE", "sampling", false); + + update_env(_env, "OMNITRACE_USE_SAMPLING", (_mode != "causal")); update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false); @@ -147,6 +149,8 @@ get_internal_libpath(const std::string& _lib) void print_updated_environment(std::vector _env) { + if(get_env("OMNITRACE_VERBOSE", 0) < 0) return; + std::sort(_env.begin(), _env.end(), [](auto* _lhs, auto* _rhs) { if(!_lhs) return false; if(!_rhs) return true; @@ -335,6 +339,8 @@ parse_args(int argc, char** argv, std::vector& _env) }; parser.enable_help(); + parser.enable_version("omnitrace-sample", "v" OMNITRACE_VERSION_STRING, + OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); auto _cols = std::get<0>(tim::utility::console::get_columns()); if(_cols > parser.get_help_width() + 8) diff --git a/source/bin/omnitrace-sample/omnitrace-sample.cpp b/source/bin/omnitrace-sample/omnitrace-sample.cpp index 662b9a0ec7..f1cf7ac5a2 100644 --- a/source/bin/omnitrace-sample/omnitrace-sample.cpp +++ b/source/bin/omnitrace-sample/omnitrace-sample.cpp @@ -36,7 +36,8 @@ main(int argc, char** argv) for(int i = 1; i < argc; ++i) { auto _arg = std::string_view{ argv[i] }; - if(_arg == "--" || _arg == "-?" || _arg == "-h" || _arg == "--help") + if(_arg == "--" || _arg == "-?" || _arg == "-h" || _arg == "--help" || + _arg == "--version") _has_double_hyphen = true; } diff --git a/source/bin/omnitrace/omnitrace.cpp b/source/bin/omnitrace/omnitrace.cpp index 92704e07b2..73563a7a4a 100644 --- a/source/bin/omnitrace/omnitrace.cpp +++ b/source/bin/omnitrace/omnitrace.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "omnitrace.hpp" +#include "common/defines.h" #include "fwd.hpp" #include "log.hpp" @@ -349,10 +350,6 @@ main(int argc, char** argv) << std::endl; } - verbprintf(0, "\n"); - verbprintf(0, "command :: '%s'...\n", cmd_string(_cmdc, _cmdv).c_str()); - verbprintf(0, "\n"); - if(_cmdc > 0) cmdv0 = _cmdv[0]; // now can loop through the options. If the first character is '-', then we know @@ -364,6 +361,9 @@ main(int argc, char** argv) string_t extra_help = "-- "; parser.enable_help(); + parser.enable_version("omnitrace", "v" OMNITRACE_VERSION_STRING, + OMNITRACE_GIT_DESCRIBE, OMNITRACE_GIT_REVISION); + parser.add_argument({ "" }, ""); parser.add_argument({ "[DEBUG OPTIONS]" }, ""); parser.add_argument({ "" }, ""); @@ -875,6 +875,10 @@ main(int argc, char** argv) return 0; } + verbprintf(0, "\n"); + verbprintf(0, "command :: '%s'...\n", cmd_string(_cmdc, _cmdv).c_str()); + verbprintf(0, "\n"); + if(err) { std::cerr << err << std::endl; diff --git a/source/bin/tests/CMakeLists.txt b/source/bin/tests/CMakeLists.txt index c2e8f1c815..df23b718a0 100644 --- a/source/bin/tests/CMakeLists.txt +++ b/source/bin/tests/CMakeLists.txt @@ -296,6 +296,7 @@ omnitrace_add_bin_test( _P ~PERFETTO ~PROCESS_SAMPLING + ~KOKKOSP --csv --brief --advanced diff --git a/source/docs/about.md b/source/docs/about.md index acb475c01a..eff87c4a82 100644 --- a/source/docs/about.md +++ b/source/docs/about.md @@ -8,11 +8,11 @@ ## Overview -> ***[Omnitrace](https://github.com/AMDResearch/omnitrace) is an AMD open source research project and is not supported as part of the ROCm software stack.*** +> ***[OmniTrace](https://github.com/AMDResearch/omnitrace) is an AMD open source research project and is not supported as part of the ROCm software stack.*** -[Browse Omnitrace source code on Github](https://github.com/AMDResearch/omnitrace) +[Browse OmniTrace source code on Github](https://github.com/AMDResearch/omnitrace) -[Omnitrace](https://github.com/AMDResearch/omnitrace) is designed for both high-level profiling and +[OmniTrace](https://github.com/AMDResearch/omnitrace) is designed for both high-level profiling and comprehensive tracing of applications running on the CPU or the CPU+GPU via dynamic binary instrumentation, call-stack sampling, and various other means for determining currently executing function and line information. @@ -24,17 +24,17 @@ The JSON output files are compatible with the python package [hatchet](https://g the performance data into pandas dataframes and facilitate multi-run comparisons, filtering, visualization in Jupyter notebooks, and much more. -[Omnitrace](https://github.com/AMDResearch/omnitrace) has two distinct configuration steps when instrumenting: +[OmniTrace](https://github.com/AMDResearch/omnitrace) has two distinct configuration steps when instrumenting: 1. Configuring which functions and modules are instrumented in the target binaries (i.e. executable and/or libraries) - - [Instrumenting with Omnitrace](instrumenting.md) + - [Instrumenting with OmniTrace](instrumenting.md) 2. Configuring what the instrumentation does happens when the instrumented binaries are executed - - [Customizing Omnitrace Runtime](runtime.md) + - [Customizing OmniTrace Runtime](runtime.md) -## Omnitrace Use Cases +## OmniTrace Use Cases When analyzing the performance of an application, ***it is always best to NOT assume you know where the performance bottlenecks are*** -***and why they are happening.*** Omnitrace is a ***tool for the entire execution of application***. It is the sort of tool which is +***and why they are happening.*** OmniTrace is a ***tool for the entire execution of application***. It is the sort of tool which is ideal for *characterizing* where optimization would have the greatest impact on the end-to-end execution of the application and/or viewing what else is happening on the system during a performance bottleneck. @@ -44,10 +44,10 @@ to 1 microsecond (1000x speed-up) but the original application *never spent time you will see zero statistically significant speed-up in end-to-end runtime of your application. In other words, it does not matter how fast or slow the code on GPU is if the application is not bottlenecked waiting on the GPU. -Use Omnitrace to obtain a high-level view of the entire application. Use it to determine where the performance bottlenecks are and +Use OmniTrace to obtain a high-level view of the entire application. Use it to determine where the performance bottlenecks are and obtain clues to why these bottlenecks are happening. If you want ***extensive*** insight into the execution of individual kernels on the GPU, AMD Research is working on another tool for this but you should start with the tool which characterizes the -broad picture: Omnitrace. +broad picture: OmniTrace. -With regard to the CPU, Omnitrace does not target any specific vendor, it works just as well with non-AMD CPUs as with AMD CPUs. -With regard to the GPU, Omnitrace is currently restricted to the HIP and HSA APIs and kernels executing on AMD GPUs. +With regard to the CPU, OmniTrace does not target any specific vendor, it works just as well with non-AMD CPUs as with AMD CPUs. +With regard to the GPU, OmniTrace is currently restricted to the HIP and HSA APIs and kernels executing on AMD GPUs. diff --git a/source/docs/causal_profiling.md b/source/docs/causal_profiling.md new file mode 100644 index 0000000000..adb3a28df2 --- /dev/null +++ b/source/docs/causal_profiling.md @@ -0,0 +1,480 @@ +# Causal Profiling + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 3 +``` + +## What is "Causal Profiling"? + +> ***If you speed up a given block of code by X%, the application will execute Y% faster*** + +Causal profiling directs parallel application developers to where they should focus their optimization +efforts by quantifying the potential impact of optimizations. Causal profiling is rooted in the concept +that *software execution speed is relative*: speeding up a block of code by X% is mathematically equivalent +to that block of code running at its current speed if all the other code running slower by X%. +Thus, causal profiling works by performing experiments on blocks of code during program execution which +insert pauses to slow down all other concurrently running code. During post-processing, these experiments +are translated into calculations for the potential impact of speeding up this block of code. + +Consider the following C++ code executing `foo` and `bar` concurrently in two different threads +where `foo` is 30% faster than `bar` (ideally): + +```cpp +constexpr size_t FOO_N = 7 * 1000000000UL; +constexpr size_t BAR_N = 10 * 1000000000UL; + +void foo() +{ + for(volatile size_t i = 0; i < FOO_N; ++i) {} +} + +void bar() +{ + for(volatile size_t i = 0; i < BAR_N; ++i) {} +} + +int main() +{ + auto _threads = { std::thread{ foo }, + std::thread{ bar } }; + + for(auto& itr : _threads) + itr.join(); +} +``` + +No matter how many optimizations are applied to `foo`, the application will always require the same amount of time +because the end-to-end performance is limited by `bar`. However, a 5% speedup in `bar` will result in the +end-to-end performance improving by 5% and this trend will continue linearly (10% speedup in `bar` yields 10% speedup in +end-to-end performance, and so on) up to 30% speedup, at which point, `bar` executes as fast as `foo`; +any speedup to `bar` beyond 30% will still only yield an end-to-end performance speedup of 30% since the application +will be limited by performance of `foo`, as demonstrated below in the causal profiling visualization: + +![foobar-causal-plot](images/causal-foobar.png) + +The full details of the causal profiling methodology can be found in the paper [Coz: Finding Code that Counts with Causal Profiling](http://arxiv.org/pdf/1608.03676v1.pdf). +The author's implementation is publicly available on [GitHub](https://github.com/plasma-umass/coz). + +## Getting Started + +### Progress Points + +Causal profiling requires "progress points" to track progress through the code in between samples. Progress points must be triggered deterministically via instrumentation. +This can happen in three different ways: + +1. OmniTrace can leverage the callbacks from Kokkos-Tools, OpenMP-Tools, roctracer, etc. and the wrappers around functions for MPI, NUMA, RCCL, etc. to act as progress-points +2. User can leverage the [runtime instrumentation capabilities](instrumenting.md#runtime-instrumentation) to insert progress-points (NOTE: binary rewrite to insert progress-points is not supported) +3. User can leverage the [User API](user_api.md), e.g. `OMNITRACE_CAUSAL_PROGRESS` + +Please note with regard to #2, binary rewrite to insert progress-points is not supported: when a rewritten binary is executed, Dyninst translates the instruction pointer address in order +to execute the instrumentation and, as a result, call-stack samples never return instruction pointer addresses in the ranges defined as valid by OmniTrace. Hopefully, a work-around will +be found in the future. + +### Key Concepts + +| Concept | Setting | Options | Description | +|------------------|-----------------------------------|----------------------------------|--------------------------------------------------------------------------------------------------------------------| +| Mode | `OMNITRACE_CAUSAL_MODE` | `function`, `line` | Select entire function or individual line of code for causal experiments | +| End-to-End | `OMNITRACE_CAUSAL_END_TO_END` | boolean | Perform a single experiment during the entire run (does not require progress-points) | +| Fixed speedup(s) | `OMNITRACE_CAUSAL_FIXED_SPEEDUP` | one or more values from [0, 100] | Virtual speedup or pool of virtual speedups to randomly select | +| Binary scope | `OMNITRACE_CAUSAL_BINARY_SCOPE` | regular expression(s) | Dynamic binaries containing code for experiments | +| Source scope | `OMNITRACE_CAUSAL_SOURCE_SCOPE` | regular expression(s) | `` and/or `:` containing code to include in experiments | +| Function scope | `OMNITRACE_CAUSAL_FUNCTION_SCOPE` | regular expression(s) | Restricts experiments to matching functions (function mode) or lines of code within matching functions (line mode) | + +#### Notes + +1. Binary scope defaults to `%MAIN%` (executable). Scope can be expanded to include linked libraries +2. `` and `:` support requires debug info (i.e. code was compiled with `-g` or, preferably, `-g3`) +3. Function mode does not require debug info but does not support stripped binaries + +### Speedup Prediction Variability and `omnitrace-causal` Executable + +Causal profiling typically require executing the application several times in order to adequately sample all the domains of executing code, experiment speedups, etc. and resolve statistical fluctuations. +The `omnitrace-causal` executable is designed to simplify running this procedure: + +```console +$ omnitrace-causal --help +[omnitrace-causal] Usage: ./bin/omnitrace-causal [ --help (count: 0, dtype: bool) + --version (count: 0, dtype: bool) + --monochrome (max: 1, dtype: bool) + --debug (max: 1, dtype: bool) + --verbose (count: 1) + --config (min: 0, dtype: filepath) + --launcher (count: 1, dtype: executable) + --generate-configs (min: 0, dtype: folder) + --no-defaults (min: 0, dtype: bool) + --mode (count: 1, dtype: string) + --output-name (min: 1, dtype: filename) + --reset (max: 1, dtype: bool) + --end-to-end (max: 1, dtype: bool) + --wait (count: 1, dtype: seconds) + --duration (count: 1, dtype: seconds) + --iterations (count: 1, dtype: int) + --speedups (min: 0, dtype: integers) + --binary-scope (min: 0, dtype: integers) + --source-scope (min: 0, dtype: integers) + --function-scope (min: 0, dtype: regex-list) + --binary-exclude (min: 0, dtype: integers) + --source-exclude (min: 0, dtype: integers) + --function-exclude (min: 0, dtype: regex-list) + ] + + Causal profiling usually requires multiple runs to reliably resolve the speedup estimates. + This executable is designed to streamline that process. + For example (assume all commands end with '-- '): + + omnitrace-causal -n 5 -- # runs 5x with causal profiling enabled + + omnitrace-causal -s 0 5,10,15,20 # runs 2x with virtual speedups: + # - 0 + # - randomly selected from 5, 10, 15, and 20 + + omnitrace-causal -F func_A func_B func_(A|B) # runs 3x with the function scope limited to: + # 1. func_A + # 2. func_B + # 3. func_A or func_B + General tips: + - Insert progress points at hotspots in your code or use omnitrace's runtime instrumentation + - Note: binary rewrite will produce a incompatible new binary + - Run omnitrace-causal in "function" mode first (does not require debug info) + - Run omnitrace-causal in "line" mode when you are targeting one function (requires debug info) + - Preferably, use predictions from the "function" mode to determine which function to target + - Limit the virtual speedups to a smaller pool, e.g., 0,5,10,25,50, to get reliable predictions quicker + - Make use of the binary, source, and function scope to limit the functions/lines selected for experiments + - Note: source scope requires debug info + + +Options: + -h, -?, --help Shows this page + --version Prints the version and exit + + [DEBUG OPTIONS] + + --monochrome Disable colorized output + --debug Debug output + -v, --verbose Verbose output + + [GENERAL OPTIONS] + + -c, --config Base configuration file + -l, --launcher When running MPI jobs, omnitrace-causal needs to be *before* the executable which launches the MPI processes (i.e. + before `mpirun`, `srun`, etc.). Pass the name of the target executable (or a regex for matching to the name of the + target) for causal profiling, e.g., `omnitrace-causal -l foo -- mpirun -n 4 foo`. This ensures that the omnitrace + library is LD_PRELOADed on the proper target + -g, --generate-configs Generate config files instead of passing environment variables directly. If no arguments are provided, the config files + will be placed in ${PWD}/omnitrace-causal-config folder + --no-defaults Do not activate default features which are recommended for causal profiling. For example: PID-tagging of output files + and timestamped subdirectories are disabled by default. Kokkos tools support is added by default + (OMNITRACE_USE_KOKKOSP=ON) because, for Kokkos applications, the Kokkos-Tools callbacks are used for progress points. + Activation of OpenMP tools support is similar + + [CAUSAL PROFILING OPTIONS (General)] + (These settings will be applied to all causal profiling runs) + + -m, --mode [ function (func) | line ] + Causal profiling mode + -o, --output-name Output filename of causal profiling data w/o extension + -r, --reset Overwrite any existing experiment results during the first run + -e, --end-to-end Single causal experiment for the entire application runtime + -w, --wait Set the wait time (i.e. delay) before starting the first causal experiment (in seconds) + -d, --duration Set the length of time (in seconds) to perform causal experimentationafter the first experiment is started. Once this + amount of time has elapsed, no more causal experiments will be started but any currently running experiment will be + allowed to finish. + -n, --iterations Number of times to repeat the combination of run configurations + + [CAUSAL PROFILING OPTIONS (Combinatorial)] + (Each individual argument to these options will multiply the number runs by the number of arguments and the number of + iterations. E.g. -n 2 -B "MAIN" -F "foo" "bar" will produce 4 runs: 2 iterations x 1 binary scope x 2 function scopes + (MAIN+foo, MAIN+bar, MAIN+foo, MAIN+bar)) + + -s, --speedups Pool of virtual speedups to sample from during experimentation. Each space designates a group and multiple speedups can + be grouped together by commas, e.g. -s 0 0,10,20-50 is two groups: group #1 is '0' and group #2 is '0 10 20 25 30 35 40 + 45 50' + -B, --binary-scope Restricts causal experiments to the binaries matching the list of regular expressions. Each space designates a group + and multiple scopes can be grouped together with a semi-colon + -S, --source-scope Restricts causal experiments to the source files or source file + lineno pairs (i.e. or :) matching + the list of regular expressions. Each space designates a group and multiple scopes can be grouped together with a + semi-colon + -F, --function-scope Restricts causal experiments to the functions matching the list of regular expressions. Each space designates a group + and multiple scopes can be grouped together with a semi-colon + -BE, --binary-exclude Excludes causal experiments from being performed on the binaries matching the list of regular expressions. Each space + designates a group and multiple excludes can be grouped together with a semi-colon + -SE, --source-exclude Excludes causal experiments from being performed on the code from the source files or source file + lineno pair (i.e. + or :) matching the list of regular expressions. Each space designates a group and multiple excludes + can be grouped together with a semi-colon + -FE, --function-exclude Excludes causal experiments from being performed on the functions matching the list of regular expressions. Each space + designates a group and multiple excludes can be grouped together with a semi-colon +``` + +#### Examples + +```bash +#!/bin/bash -e + +module load omnitrace + +N=20 +I=3 + +# when providing speedups to omnitrace-causal, speedup +# groups are separated by a space so "0,10" results in +# one speedup group where omnitrace samples from +# the speedup set of {0, 10}. Passing "0 10" (without +# quotes to omnitrace-causal multiplies the +# number of runs by 2, where the first half of the +# runs instruct omnitrace to only use 0 as the +# speedup and the second half of the runs instruct +# omnitrace to only use 10 as the speedup. +SPEEDUPS="0,0,0,10,20,30,40,50,50,75,75,75,90,90,90" +# thus, -s ${SPEEDUPS} only multiplies the number +# of runs by 1 whereas -S ${SPEEDUPS_E2E} multiplies +# the number of runs by 15: +# - 3 runs with speedup of 0 +# - 1 run for each of the speedups 10, 20, 30, and 40 +# - 2 runs with speedup of 50 +# - 3 runs with speedup of 75 +# - 3 runs with speedup of 90 +SPEEDUPS_E2E=$(echo "${SPEEDUPS}" | sed 's/,/ /g') + + +# 20 iterations in function mode with 1 speedup group +# and source scope set to .cpp files +# +# outputs to files: +# - causal/experiments.func.coz +# - causal/experiments.func.json +# +# total executions: 20 +# +omnitrace-causal \ + -n ${N} \ + -s ${SPEEDUPS} \ + -m function \ + -o experiments.func \ + -S ".*\\.cpp" \ + -- \ + ./causal-omni-cpu "${@}" + + +# 20 iterations in line mode with 1 speedup group +# and source scope restricted to lines 155 and 165 +# in the causal.cpp file. +# +# outputs to files: +# - causal/experiments.line.coz +# - causal/experiments.line.json +# +# total executions: 20 +# +omnitrace-causal \ + -n ${N} \ + -s ${SPEEDUPS} \ + -m line \ + -o experiments.line \ + -S "causal\\.cpp:(155|165)" \ + -- \ + ./causal-omni-cpu "${@}" + + +# 3 iterations in function mode of 15 singular speedups +# in end-to-end mode with 2 different function scopes +# where one is restricted to "cpu_slow_func" and +# another is restricted to "cpu_fast_func". +# +# outputs to files: +# - causal/experiments.func.e2e.coz +# - causal/experiments.func.e2e.json +# +# total executions: 90 +# +omnitrace-causal \ + -n ${I} \ + -s ${SPEEDUPS_E2E} \ + -m func \ + -e \ + -o experiments.func.e2e \ + -F "cpu_slow_func" \ + "cpu_fast_func" \ + -- \ + ./causal-omni-cpu "${@}" + +# 3 iterations in line mode of 15 singular speedups +# in end-to-end mode with 2 different source scopes +# where one is restricted to line 155 in causal.cpp +# and another is restricted to line 165 in causal.cpp. +# +# outputs to files: +# - causal/experiments.line.e2e.coz +# - causal/experiments.line.e2e.json +# +# total executions: 90 +# +omnitrace-causal \ + -n ${I} \ + -s ${SPEEDUPS_E2E} \ + -m line \ + -e \ + -o experiments.line.e2e \ + -S "causal\\.cpp:155" \ + "causal\\.cpp:165" \ + -- \ + ./causal-omni-cpu "${@}" + + +export OMP_NUM_THREADS=8 +export OMP_PROC_BIND=spread +export OMP_PLACES=threads + +# set number of iterations to 5 +N=5 + +# 5 iterations in function mode of 1 speedup +# group with the source scope restricted +# to files containing "lulesh" in their filename +# and exclude functions which start with "Kokkos::" +# or "std::enable_if". +# +# outputs to files: +# - causal/experiments.func.coz +# - causal/experiments.func.json +# +# total executions: 5 +# +# First of 5 executions overwrites any +# existing causal/experiments.func.(coz|json) +# file due to "--reset" argument +# +omnitrace-causal \ + --reset \ + -n ${N} \ + -s ${SPEEDUPS} \ + -m func \ + -o experiments.func \ + -S "lulesh.*" \ + -FE "^(Kokkos::|std::enable_if)" \ + -- \ + ./lulesh-omni -i 50 -s 200 -r 20 -b 5 -c 5 -p + + +# 5 iterations in line mode of 1 speedup +# group with the source scope restricted +# to files containing "lulesh" in their filename +# and exclude functions which start with "exec_range" +# or "execute" and which contain either +# "construct_shared_allocation" or "._omp_fn." in +# the function name. +# +# outputs to files: +# - causal/experiments.line.coz +# - causal/experiments.line.json +# +# total executions: 5 +# +# First of 5 executions overwrites any +# existing causal/experiments.line.(coz|json) +# file due to "--reset" argument +# +omnitrace-causal \ + --reset \ + -n ${N} \ + -s ${SPEEDUPS} \ + -m line \ + -o experiments.line \ + -S "lulesh.*" \ + -FE "^(exec_range|execute);construct_shared_allocation;\\._omp_fn\\." \ + -- \ + ./lulesh-omni -i 50 -s 200 -r 20 -b 5 -c 5 -p + + +# 5 iterations in line mode of 1 speedup +# group with the source scope restricted +# to files whose basename is "lulesh.cc" +# for 3 different functions: +# - ApplyMaterialPropertiesForElems +# - CalcHourglassControlForElems +# - CalcVolumeForceForElems +# +# outputs to files: +# - causal/experiments.line.targeted.coz +# - causal/experiments.line.targeted.json +# +# total executions: 15 +# +# First of 5 executions overwrites any +# existing causal/experiments.line.(coz|json) +# file due to "--reset" argument +# +omnitrace-causal \ + --reset \ + -n ${N} \ + -s ${SPEEDUPS} \ + -m line \ + -o experiments.line.targeted \ + -F "ApplyMaterialPropertiesForElems" \ + "CalcHourglassControlForElems" \ + "CalcVolumeForceForElems" \ + -S "lulesh\\.cc" \ + -- \ + ./lulesh-omni -i 50 -s 200 -r 20 -b 5 -c 5 -p +``` + +#### Using `omnitrace-causal` with other launchers (e.g. `mpirun`) + +The `omnitrace-causal` executable is intended to assist with application replay and is designed to always be at the start of the command-line (i.e. the primary process). +`omnitrace-causal` typically adds a `LD_PRELOAD` of the OmniTrace libraries into the environment before launching the command in order to inject the functionality +required to start the causal profiling tooling. However, this is problematic when the target application for causal profiling requires another command-line +tool in order to run, e.g. `foo` is the target application but executing `foo` requires `mpirun -n 2 foo`. If one were to simply do `omnitrace-causal -- mpirun -n 2 foo`, +then the causal profiling would be applied to `mpirun` instead of `foo`. `omnitrace-causal` remedies this by providing a command-line option `-l` / `--launcher` +to indicate the target application is using a launcher script/executable. The argument to the command-line option is the name of (or regex for) the target application +on the command-line. When `--launcher` is used, `omnitrace-causal` will generate all the replay configurations and execute them but delay adding the `LD_PRELOAD`, instead it +will inject a call to itself into the command-line right before the target application. This recursive call to itself will inherit the configuration from +parent `omnitrace-causal` executable, insert an `LD_PRELOAD` into the environment, and then invoke an `execv` to replace itself with the new process launched by the target +application. + +In other words, the following command: + +```console +omnitrace-causal -l foo -n 3 -- mpirun -n 2 foo` +``` + +Effectively results in: + +```console +mpirun -n 2 omnitrace-causal -- foo +mpirun -n 2 omnitrace-causal -- foo +mpirun -n 2 omnitrace-causal -- foo +``` + +### Visualizing the Causal Output + +OmniTrace generates a `causal/experiments.json` and `causal/experiments.coz` in `${OMNITRACE_OUTPUT_PATH}/${OMNITRACE_OUTPUT_PREFIX}`. A standalone GUI for viewing the causal profiling +results in under development but until this is available, visit [plasma-umass.org/coz/](https://plasma-umass.org/coz/) and open the `*.coz` file. + +## OmniTrace vs. Coz + +This section is intended for readers who are familiar with the [Coz profiler](https://github.com/plasma-umass/coz). +OmniTrace provides several additional features and utilities for causal profiling: + +| | [Coz](https://github.com/plasma-umass/coz) | [OmniTrace](https://github.com/AMDResearch/omnitrace) | Notes | +|----------------------|:-------------------------------------------------------------------:|:----------------------------------------------------------:|-------------------------------| +| Debug info | requires debug info in DWARF v3 format (`-gdwarf-3`) | optional, supports any DWARF format version | See Note #1 below | +| Experiment selection | `:` | `` or `:` | See Note #2 below | +| Experiment speedups | Randomly samples b/t 0..100 in increments of 5 or one fixed speedup | Supports specifying smaller subset | Set Note #3 below | +| Scope options | Supports binary and source scopes | Supports binary, source, and function scopes | See Note #4, #5, and #6 below | +| Scope inclusion | Uses `%` as wildcard for binary and source scopes | Full regex support for binary, source, and function scopes | | +| Scope exclusion | Not supported | Supports regexes for excluding binary/source/function | See Note #7 below | +| Call-stack sampling | Linux perf | libunwind | See Note #8 below | + +### Notes + +1. OmniTrace supports a "function" mode which does not require debug info +2. OmniTrace supports selecting entire range of instruction pointers for a function instead of instruction pointer for one line. In large codes, "function" mode + can resolve in fewer iterations and once a target function is identified, one can switch to line mode and limit the function scope to the target function +3. OmniTrace supports randomly sampling from subsets, e.g. { 0, 0, 5, 10 } where 0% is randomly selected 50% of time and 5% and 10% are randomly selected 25% of the time +4. OmniTrace and COZ have same definition for binary scope: the binaries loaded at runtime (e.g. executable and linked libraries) +5. OmniTrace "source scope" supports both `` and `:` formats in contrast to COZ "source scope" which requires `:` format +6. OmniTrace supports a "function" scope which narrows the functions/lines which are eligible for causal experiments to those within the matching functions +7. OmniTrace supports a second filter on scopes for removing binary/source/function caught by inclusive match, e.g. `BINARY_SCOPE=.*` + `BINARY_EXCLUDE=libmpi.*` + initially includes all binaries but exclude regex removes MPI libraries diff --git a/source/docs/conf.py b/source/docs/conf.py index 7187cfaaaa..43dcbd25a7 100644 --- a/source/docs/conf.py +++ b/source/docs/conf.py @@ -137,6 +137,7 @@ breathe_projects_source = { "omnitrace/types.h", "omnitrace/categories.h", "omnitrace/user.h", + "omnitrace/causal.h", ], ) } diff --git a/source/docs/features.md b/source/docs/features.md index 795f75e356..a9c57fd691 100644 --- a/source/docs/features.md +++ b/source/docs/features.md @@ -8,7 +8,7 @@ ## Overview -[Omnitrace](https://github.com/AMDResearch/omnitrace) is designed to be highly extensible. Internally, it leverages the +[OmniTrace](https://github.com/AMDResearch/omnitrace) is designed to be highly extensible. Internally, it leverages the [timemory performance analysis toolkit](https://github.com/NERSC/timemory) to manage extensions, resources, data, etc. @@ -23,6 +23,8 @@ manage extensions, resources, data, etc. - Periodic software interrupts per-thread - Process-level sampling - Background thread records process-, system- and device-level metrics while the application executes +- Causal profiling + - Quantifies the potential impact of optimizations in parallel codes - Critical trace generation ### Data Analysis @@ -32,6 +34,7 @@ manage extensions, resources, data, etc. - Ideal for running at scale - Comprehensive traces - Every individual event/measurement +- Application speedup predictions resulting from potential optimizations in functions and lines of code (causal profiling) - Critical trace analysis (alpha) ### Parallelism API Support diff --git a/source/docs/getting_started.md b/source/docs/getting_started.md index 9dbdcdab2e..d64e9a17aa 100644 --- a/source/docs/getting_started.md +++ b/source/docs/getting_started.md @@ -101,7 +101,7 @@ e.g., omnitrace's meaning of the term "module" when instrumenting Python. ## Data Collection Mode(s) -Omnitrace supports several modes of recording trace and profiling data for your application: +OmniTrace supports several modes of recording trace and profiling data for your application: | Mode | Descriptions | |-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| diff --git a/source/docs/images/causal-foobar.png b/source/docs/images/causal-foobar.png new file mode 100644 index 0000000000000000000000000000000000000000..a887b126aefb474c4130451c94e47bab9135edf1 GIT binary patch literal 27358 zcmcG01yq$^`{lJj2~k2oN};x?!4m1L}EuJ=Yu3OZ6=qX%j)d+gCG0C-t89?W?_8IN;IK+ZVn6a}46!mj(_q=|5M~ z89@JD?5#c&_2loxO+V>h{=N9CxaRx67o()Mdj7o_N8msC!4=6%A#C-?p}##Z2xyDT ze%11ng?+Ge?CV?THdPTxTbnc5HdISi@3n`0HpK<^`%xM{TjO@^W5DkgiANZape3&| z?W)tz6Injk`M#dy`c<(Sr)zCDf$;9m>4~}Hv}?IH&U-MX3X|imG-%WzyKW56Z_5vp z^Dc%cz1MlTx;&1-d%}$3xZt@Kz8LWavt7wwnN9{RJ*$}CT67K_Cw3@XZp!5cNWPzbiv~84Yh=PbM=h<6`H5-88Fju&pD6 zH0II<<(dGA_v#?OYf@Dr@(y3~3$P$l1xTMR=Gf1tM=(`q)2EK>ckF2Pc$btN(i>cS z<9Ga*`>Prs?(fxiH${>fZ?E(h`kk_rIjKE0ynf<6uGKiPBF{o}8TLDfukTkAqmwww zaZWhQxc{XP_x9@*S5D2Ef{3Hi>*b)j?TuQzC&tfp+#-;%0U)5l!dmXa2@lc`*Fui;!elG02n7LNSzA!H4*(aMqrv6*@4Qnzom7}0kOpOCb7jmcI|6jS20dC){6%d_ohT_<`!rz zNgT^$uQh3ZO>WsZ7IJIbuvXPY`)bxd)^V(F+8GN9n*hI0cJ97L7!y$|`?YCEWEZsK z#T!&=N>Mb<_>4!(RE=SryRx^2u2w1Yat#k{j9Y}X$~OPOaiS3d{$sEf6{P%$E#n_t zH5081hD>PB`mge5y(Xri(nC*3RUMZsV>c_7ueV3lj6QWx9_N{iiJ&5mN5*GG zWt>(;ZYt-Db-UM<<#Q8*h`KhP(!roPnGiQwLXdQ{CGuN)8$;OJCXcH~Xvn(VcvwD50lDlbe3zCZm2F$HE9}1R^^}KT zxLjYpe88{zbasSq#COmC>?8d6m<(W|ikLKm57Xm(rTVZ(gB|*29qYF-@uP#R8D{9)_ z%)kUulAjf<8ZXUmh-$EY+h5wm%26os4VrQKx;7H9`-Z!B-I8BBgWhPB%`#7evB6}$ z`SgdW+`))ZI`8!xW21}lGi4qd$$O0?jRi1bkF;)Mt3wv`-s`}4@=7u(KECFo_?1xY zjrJnV(^9C8g2-P_9RYiX*)tj|DdGx@yoUDZ-G)-U%9g7|dBG>M#IXMgvd&|C-ARQxuzEASg_!A--(=QO!zN%9;KRz!VA?ylZcA4=&W ze)ng17HnP)hqa%4+b`5?ybLsfP3&kOHsCuDos^V9UgMu#Jvn2>HSC?2Yqj5udW3@X z6ZX1gNleWRk5uS4TTF7RXFDp$n!$MC@xEwrOwp9J3)xE$SE45^nv1J@>b`j5^t8oh zXSzoAM=!Z(ia&*BU?*UeihoqkB!H^eIZE$c73`nc;CXqqjMfN)@c zSAQ-TFBsO!srlaco>Q%{bJDLvx~1jJ%d0Z{!;%2V+U=+v$Ahv)pQ`M5Ub?pFlqME? z{0>m$^lX^1wZHIFG9o1+Ji8#P<|IkrYU088$!j;Sxl(nzQLUWch^j;w zYOBzhaf+E0>5k}X%Q5=-#la-sRxj^zhMR>tPTA^$EgrxeFYZXN)ctNBV?9g8gu0;Z zYt<&mJUrIdV6 zF>`&D4@I;zm4m3D#`Mjz-ow4_70JQJ3dDQsff#kw&)Lj9=AQ0yb$`}|_u6!Y%t&=# zF(%qQ8*yO0DjaZ@$YExC?i`T@@mqqT1;reIDgI$nwH!{CYyCWXk*J~jj zwtW0X<&4KuJn;uzM%8F|840*QE34zN!NDt-pc{5L zlMR~E*_N*od>`g8E^|VM81`bc}a<;N3+kdo(nJ;lFYEO2*&qh|)Tw&&Z?V1~&Eo=F} z3`bs4-+uck?mS-HbWg?aG;Q=a!7e<`&407a5K8UdNd)w(cE`sunw) zS+tG$`=g|!rs_|X(;uw%BF-&Gs*-YW#7tM&4*t@Ygy{6deUgws=d{^;!O9xBJzX_e zYOIKWMz}d9V`N0t^c9tafuYF7R*sJkWpA%`k?NcxK7X+2k?AppxVpQ){jLnGddfeL zw1kh@3xv@I!!P=$=;)2%Vr1YhTa#rN&z~c` zm6E~)-|B)n?@Hjtb3?^rp|jgnd;j6XBUDuFdbc|{s&zBse6L7JEq3SL$>b_XyK+(x z6Mywly&~weCA5TQXsrZ?L<>rEn2!Y1igo+oc^V*rl^y3K&=uv|fgL;X|W~|e0 z1LBnH^=07QyDzAy{7t4S3pHANkv3wX=@ezLp`lG0-mEG}}DwfrBrJHs36${HVS65ev zXd+K{p=R6ZmZ__v{%k18sJE-o&I+tabGmm8|Ec{JA-w6sCP*|MBHPR(s? z7z6|&*H@Pt*4KO{I`(HRKk#{*upKSSdxyUN9JwPPSh=q3p!ne<32kHS8U5b8=r6q` zZL41t@w61lNn--s!Brd%-6kUX+^OyBSTcBgw%Zkdh2-2Ta5F^ zto>yXtur&YgU^$0g6D{hUvmKrUF&eIKX5&mM3BLDTdB>;#)btF@SHj8m&x0=Zkqmy ziA0NSL1ZK(9_{TPr>3TG=r!(t_=@t9ojp37QZ^(crC5{rXnE1O4Yv%&xsN0SvD4h> z^<+KJ9I2&d98;-AmFu2eTO%-?6&!%V@^&Pr~!dnm9_T z5}bk-$mBF7mLr!;{Anym!b0E&$)(fj6&zKb>xFaegMB|C0@SaK=y(rW(nd?_#&s!q z4|gL26FO`s(*1|xZdZO0Gr*^f(9wU=q$*%)N+~2XG@8|v(B9r21d$AmZAB+Jt@6tH z`fu3T0RdX^m!e{_!oosmJV*H4oLU5xeB;vZ(BB;$DS|havE#jE#LyY1doeJ8R&+BGlxZolFfbH>qjxnITDd$1WpMA<;?{^=GQt7h3}sGcyv1IXEWv3bNwj;)FILdMSB3#l!Un)8}_~dcB{L zj8#~Y(bLmAfj{Bn=ih6_5TkQA)Qg}|R#>i7S67$qR*k0@Aiq{OKlX>;aR-NxJG{4i zd3MiQ!N%)M)8Ut9mq*$2usmo%|IeH)4yeWxvcF!~LzYHlnMpo_GIxeE!ln!9GXF#qZc>VxE1;@1MF;=S* z48LHU#wfOkH`26hNXG+t14~&C1v7Hu4RwjQM#km|a=^C*2|TaYr3OTUFql(yb>3yy zRlnqFemP9ag&a|IeD_xbugg!EUu@cD(6a)$Ig3a82M2fZC^ekr%6{I$O6t^0o+PpD5c*it&AAo9-?3joV0#N!e!GsQVTvA!2^1 z^cp_*1Wg;snsS^)70Zw>%-+987t4CM(DJP*-|%7q_D_n%!$?a@lcNgv>j6pfp4mn&cJDdQ z@t7u+T!9;jU|?U7wlHL1aPX6;C@Bric2VL}JiN~KV3PHz3d``$t>bLv5GvN}YM2IT zzQ}y%P$IjcT!-;XveTxT+_mY<%;XBuRqSG`i+)O%y~MMjicqdwd(x5#m4@l@6t#AMM;6=0cmPPh$+D-^)y2znlaE*X z3NvXz0p%J@jaR{PxF;?G$Lp0S#?b2h4oai(A}>&-p5o&}LGi$2HCbvx<6TPrMhM@S z^v4ye37YEzX@~2B->_Be?zly&x|_{ioQZB1_wmg3vmR{as|K%t>S_JW zMfVn?#u}JkY1(ElspL5Kj85pI;>=B&yT#*D$5p>Q$DP!#yp~sxHAS#n6}nb@H!d~& z&q9tMR5}=BVhQy|4E=`@iZuQeR`L72_d31wLoi6>ww7AubOmn^gK2?f{eHnw)qt(v3BUO@;4z>n=&?A*QO8zysRvRM$ z@tn2{U?N20*xX}d@!;@dBuCz#kh`+cBNLtuEi4%J_V!TlSh@U!Zm;RUm+vlF|=c_MP2wW$3cUSGeL(1ikKXe*!b5@oxTE zF8V5xFq;_+MPGjPV=s@O!w7p(FV?UgURr5(VmM{tq+2H|X$Ni7N%@jdc?QTi=#FK6 zkrR$`U3TU2X0&y$W(^vf49w&BZR6(a%yec9$g6T(4;rWzB3)&dbuS#`&gzoIMUO~5 zfBm`(X>9bbAx@_$(OYV1cA2lPF5Qp4@5G(+3Z-(|O^UeW!@mF;0fF*7O6#!aw{OKD z%nSlhQBamZ<&&oI^YM8}N}4{ANXZ&m4O{y67O17^(hr7(7?vO|?+_ z?|!KqpG9uY=yN7w*ywR-w>2mw!La{Oe~I6&fR~mBKX3ARb6RFPP>Ri!MbCDHpOS1< zb$q~c&t{?N!3^6KadGE3CQe!iCzXuHv>?*Pp@^M_<{w(6s&SkL72XVzgFHa}K(r4?8%81km3^l;}y zVqXv`JaE8xw)8NN* z>?o_O9n1R)pUcK|%+0pd*+i|TTBu7GkE*_ZRZABEHJ*i-oLac7>`*%V>AeOao|)vg z`aBI>QKA57vnApY8lAofl4|-@*Lb6Z39^2B6ya9E_?EwuEjGJ!^`}(fj~^;akQaLT z1cqH6`~W!F+q=CR3jVxv_rPLt_?_}cVc{ye#b?iq*}EDrux7L7P3SSEX}quEW6k{% zGwyuizZthuB;Zf7=II4t%!n4_q1GvlPP}52H5|-owi)TC4r_w&$_LoA!rEk5JC+eo zR7{Wq#mHYb5NAj8A19cMC7)T^m_~PsvlIhYz12t_}Q@oz7U%vbAzt?bqYm_wh zWY>+xY^xiqFPdO>ud1J_8wlMzvG)+ihgn^_(_M8HyZHx|Hd{h~JUv1}N>MB`$&oKk z9~fI7%+O?rrnoeiDmV8s2PCk$rR6CmCZCxZT_~wY*00E*L(V9~?KOvYZC65xGK*2c|Vg&+Z{U)>>*0j^nfqY#on(>p6kVab|!P3ggK#imAM5%G-T!ROJ zriP{_E}(3|!NJyLmJ196MLqNfi}#iiXl=TxYWP1SxXj5^Pf`Df=Qj5$;pc0{{dv1p zzp{`5_SwGHpGt{&7d%3mWQHh!)FmCn3wv!uZ3Z2cFQa~toA0>E!c>V zN`@a;T3Fz4+E7`p_O!IMtxmk}L|s&!43yFnI<=EK+i04HuT}Yre?G94Pg+PwXspy2 z4}=RAO+M9Vxw+sS#dfVkPtI!6NJQDD2HB9pHolCM$&@eT?i z12p>T@?x|t&l@B5;OwQ$HCYq=G;w`ww3roXb#$V8CRi$#+k}QHdLk1Tsp8-Rlm#Bx zZWGeMhxhO8uFh6{{QaAonmlq9%l6MV@3eEb+e4R*SxrKR-*7uN?ADiQ&I zwf65XUy+d&TPzDnNJ#jPQJbR!D)h=Jq+=#glyhZzCgo-CvC+4xmi2QhVqyz|+ZaIz zoi-`MBS@KQ^g@QOE|3Sf@&U+@ZZjP#aOZJ8r_<@e(dkWa9yjOb=eJrP@DfiIc#4hP z7V=s+ip!osUS1wBL=V7sF)%TumMiu3^<}$uuP$~N_4<;sb+8uWDU0ZLHWsy(l%3kO zrG!-zHu2TeD4qs)G=hQ@IJbX03{jDJrVQ^{`&ELkXhfn;PEI9A58b{II(U|f_#xbfUXYd9)s z1Wgn3Ld;(CvQR38U*5?dzA+dK{iIakkO$jRf97JRPE1tv@yU$ij`pZuK!7&bzE;~) z6&=f>R%Cud?p=cqpfEimd0k|JgM_XAya^G|a5X2$A_wV6DKuFYnj=^E1x!<8Qe4+1b56`9Le7H=BDqQDzzhVrR0{So3$t>t9CWAwZmvskm(M!wyYI=!s!A z#6NI2-u&I{gCSpLLICzuS1fDqo~Lm|AuP%{kbL9VGCir8-qdO5F<+^X9$3|I zk4z5m0WqslHaQNcNI05fM_1PmKoTK`Ta%Guyq94h35kFH{PyF=hfUwmR`XpiDMf&o z^~(eZ41i^l(9=IPH8pL6fnYCEYq%>A@SI;p<{28#=?lx1uFR@<%n$fn<`J?ZJ7je~ zhv;{DojLm0`M?AO;|1R`eQiBW;+HSoJv}`Q0+h?m)B{QhaB$jxhmvJ0J;T8G0EI#W zO3CsT=X{Ex^0DvLeY)vQHR{Yb9$er?3Gwm$3HhrOVWQPxW>WL&Bxd)8nUzIF?|w$S zk!-;n$Avw|uxMhlp-2Q1AzLo7(Cf@Okerzh-`4Qgk`Rh3<+qQ1uOOZ&=nHD>)Aa1MX zH$3^7%ZHgFf)yPdk@2h(1GCMA1}a7;rS>6a0HFv zN*m(G%QJt?bD#Dn+osrBYQ(oSFq0e%<#qQZ>`7JN5=VMf39?3C{7VE<^qaSX4dDZc z3$F^?TTZ5Gv(oDP>c2M9TF-YoOp9EAf+F`@8mO*n4x%(Gr>SfGccvTu>)YJ$w+m@W zf>>DyETCloEF7Ty10WVU;BT6-ZQvJ&)Y?K>_ia|aT;}a66jA<}gwf8v)@HE7?dQF) z*Ct9RL78~<=8ZR8pwsD&p=kTRl1M`@Lnf^Ep)d1uX2sr_lS2SQA?3jjtc#3o0Hw28 z{$Eu~CP%J8(X@si$dIlVle4EAs@EOSjJj|s*0g$0z^f1!7ne^^Ff<~f73f9LyskXo z9uoix76T+P&erZY_Rt?cUfFDpHJl%9*k2xvo{iRBNvkUYMG4Rc|MvEFhjFnAn^ixd zs7e!FT_hp=gM^*IU^Q2LDZfs=c6^-}wlI zE`hJ0nVLm-B9$loUV@_S011C^(V=8@gp}uI{dI41r!}ra3tzTOE(PWjh>D5|po-?O zQ*$GhSWP$ss=qv$-Ffo@(2K^kkz71VnVg>msG+~5mRFaTmgsCY( zuJ6RhcOQsuuW9o)k&Sj{YV1#@ZP_iB9y*?EYajg?KKwI`>~?x;2XyT9**e$Dt?x>q zC5{Z^VGaZI+)2W8XunJ~;PGzGe4Cv-lFKZlmO=Z2n3_5OjThEOPs_=P=Oz$9kw}g9 z@S&Oi-ob(9|GK{Z(G^^Z+2XSBZO(VKf(m*)*uSV!cpmi; zg6ow%0t(J&{j+77jB2asjEs!McQVVqzP{cdum2g!ypM=jto0k6R4f4s(j2$Lq1qMf z?EZrX(iPLce*MZ`$?L&MWp`}dcfz&juRdNj_cgYGn4lhNy*2ccT(bR&BD=T=FDh)V z0=m73J&FC}2cNcpxM%xs#l69)j=dhv9Xh8|vxUV)`{i(13Myn2l=~gqtFW|v`(<7B zEC%BVCfMaAE|7FNZMT6y`W28^Gk{+$0Epstu%h1fQOd6VBC%r1Ij<+gbYQKyrd;X# zfsfBX;ATk;&3!3JXW$k6F+sQ7#Tf%d+>Xb6Dg4NwzmYX% zV_-n>=+PsfZ9FI*hk}*^Sa?t@u-!nEgHLc6H~N=)xU;}+*qV>FTHvz!5>C)!uC*jI zY}z`Jzc_J6?YA4BI#%Ek^sfiKEEk1O2*=ma51=)(w%K_k4kqQ0(dANU8RyG^uGO(q zlBONE6^h|E%3;i}s`Q0={lxI${uEEe&tJ{z&#GNKj+T_dHVImC3y(*(&wMa+k@jrLcHhqi ziuQ*A>NuKwgGiXcu4?whqVxuwnu{xLZA~wNR#ipa9%%gp`Bx6x6{~WUR^-m-hamZ> zKyKPHFJ8Dk!XTCUKPb?dM4(du8*R*8!U~g$QZ?}S6{?F1b{(}#0zw*S5y}3uvnQg) zDy=C&7smPGm>wWDp-x6dM*WFj?)8$5Z;!VOrOqqpy>hJlxAIAT#U?`PPqnk!ds1X8CncMfzDTXaWM1XbwGZN)q4?)#?N(XA~b zL4lir4AGlYSKlLY@gk6`J9tv}80|IJ*mYK!{Vn5b58w}Yl!={h%GG`)JX5-tfoSrY zg6fHzBAbX@R9?u9jPSGqcU|Ujrsa;arDtwrHSwWsok2s5@m*ASa)iHnfVV7D5)UnI zu4#t>f3UxdWu<>I77sp=@K>AJawBjRi_ZA+j<1k0JyOzisGOFn9UWhG4)jTfqOxh!9fP^627`r|UDcbJ1-UYiz!^!aeP z`6ALH5;C%npC1XR-k_5LFujz%A}E%PZEfF_t89jU^GxR;2^&Eb>k>nCk?sJXWxt*L znCa+%i7ndOdkW>2Bam*K%(|kz?Tnz_+uzSRfq){6016w|`C4lA*-ES|L7mY!L)VJm zB_P!AGPLw0#aIy_gWNV-&So46*W)nxJI2!a=WwO2PV0&$a3bd|@|I?zo z^M)851B2ae_XVh=StZU-!Ln^cx%{c6+t%yvgM%gsRe%))LGpY3TDxrTh8hOP z6o^LAxi>bx2X(!z(t2a8+MW^Qp(*n=f+O^~0YnoUjbxjRz)fAX;c3v{FZve;d zA5WStE~YY`m1qxdCyte0>$V-b(KK+4CHuJh0OHAGJn_q?jmR}_Bv&yT&eVgSGm0hh z#E%#2hK7YT1K_=Tc=(EkXJ$<)60G5Wp7-K?hqWF89hhp`Wr+SVTjmbzDlY<#D}v7> z0J|vC?!2Ab(Uis`zp_~u&R(tq6v=cbJw>m!m#610?4TC|klSzH5P?Fn4BFN3bOfRW z$lfURr+bUP+uL6-Frc%tvI_X)G<<*g;iIUi&T>aM;B+|EWUPTT&?r?-?~Q4^P78=B5Lx#qp-Rle4q^`Fe)^R>^2q zNe{?=Yl9hRnyvoo$D8Ak1faXi7fGXx!)$<3zX+fKNajvUq$Qso?Xbck-sUu)Az$YTzA{>YsT;{b`4M)stXF}Qb~r8 zhB2|=>m7W)Lcz?RW*Q8DRFZ0)i-1;U;eUz;ABCN^npGx+xjJ1Ir;^WasH-n=iwuLp z6O8otvE{%rZ`pm822O+M=x8dX^1eMS_K&l6&k6t z@#u*TyV{?g71_tP6&UoupoD!s5qBb8&E%{ZW`HMxrd;ShRAHs9oJz*?!NjX(qN`m* zfEuMFf2Pxg6<_;Tb$vQk34T7AUet`FUMhJ zEkin(alLj1K%^YdW7;YeU_}7>%MYOINYk)7)G2l6{E%W0#b!%yJ2tXaz9GiWfH2C+zG%0Dba~n`_xmqW72pZQeiO9TROVBj3!fTAhA$ZCsrGd=D}NF zK%WT?4{z~6b|K>64AF+T#LGHYA=$Dn|p)Zmo3#a zXr*Y^&ks$cBDaEAOKGbA`<9fXht*D(ZFM{U40;$YU$G;nn0~Jaefaod;;=t|7h-v zhhVQj<%-z=2wRrVd7R1@dvB2bkMta1YO#eHO;?83I$!Kh80O>ReDaTw{oEKw?#d`MD^os^4<3!bwWb-L*2 zLWaunUD{V_Oas}{U56>$UWzB*5I$K8eQI&h8}u#}X}vL$8wDl3d-v``RMeK~>FG77 zM34l-!RduABEg_>>?vKTE+-HiyuGu>E3LEtHRC`h63~6(0Sk-m&H1rIFxYUFz;*H& z@#a!W32Jyf9Zel*qFrA3Zkgwuxp5q-Duv`$7l}uEh}_oJWMIZ{t4vfx%PlT0W-ERF z{{1uU9e^=9WL2G80tqOGk<@lhWiUV^?sp+h!p58bfNlHqJ-fS&5Sa~&2!5~IJ*9A8Q4l5zx2)l=XqIP+H z6i7@=Pao`oh*o4UoK-o?PIh?S>ap3|WeE*h8~N(Ja&;RYqo5xEpHmL2wHuodpF!6Y z@PmQm^#Smfv%^8Ly`v*-Aj}?N*DKYuDg7I3l_p$_pKJ%h?dDivS3D;+5mBNc+7|J# z_R`*7!=RJXF4G0WzTHkNiIGUB4Hc1B+|ZwfdQ zR3-F%9lI)>rX&I*i*}!B4|W&q6aw5gkQ7tnE{x!p2xM45WOQ%Q$Yc_`-6*nYvUG7A z*1n9oJvhP3MMcG(J^QRc$Q>vGP4_mqf3G!&NK8Ng0W?m^D=O$Unjd~e!F>-J$&|8r zKx|G^DaKO=CMaN?s~LcSzB;?rK8o1X_?O_5vfOJmkkKS>q#)v9@nm{)z`uc@(iv|% zqc4>}Svj>F7D;n(c&G*Rg1~R`cQCQA;C6?hOvy)&AL{^B7n;Boos&c5aJbe0;_?wv z)_KI&8%ZInC+a-JKE1pnov#Aiw1T95TPnJtB5;2B&Ek>evD2+geWNI>`nmvYi&Vz% zX5gDut{`I0-je_%26u)Lgn#4)_Zu>u*UVwL0^PLXyg1p>9WT-z+p^K=ic+`AL%MtK zo_o;+yKAPC%9(s%7BrRx6&T{+z^x~q6Y$9S)F2*Wpk0wA*nR0w6)Xmg28vNsWaKJ? z+CTo8w1_7Jb$_CUie=9E*~L~sk0gWRYn{{;FTQp)&$(l*3t;%YN!3c3Kb z4xNxNFJ=KDi`?ZC@8W2$bl}TdapbQSP%a^&9#5zKMROs+aT^ZorhwB1K_IJZb~bUP zI~E}N1ffpw>kj}N?j9UuTTBlPNui^oj}>Vn1H=!6JrG@MlV!9J00L16IDUy9_yMNU zUTL-72s$GaUAHA{So`u%c0S-lo?c%4KR#ik9w?2#HH9w^kb$P~kXo_y{tIsI_@yN+ zVAi4eZ*ff7bZdr z`yL(r>D{|K5J0UofZh+`+O5AA*h!w~xXlg>SVDMD zfLTVqSSLVJ1$c|1+02PS;wJ%X0c0BXgaT(0z5aH;$q&zqF62zYJ9kV7>bXIq)B7W} z;lK}{`R<2*am*9|Nj5t(sP&+O)D=c9$p#t>H4ma#jGucTplX2L6a?U#h<6?cKxTL$ zAt_0)_}5MWC>&f5Rd4$G^x3mkF!}m$jo}|`1AhMY-+3xhq_bt1jK*#~6@eq6+MIz4 zI`o4>y~P7Xz~oV2_EGkpgpYT7fYJWa?4~QJcXSbZ6=31u#fAo;g zr%=-VJY~+CV%=WybGPy{$j2b4J3xUC=pY%@1D2*U!F{yA^}ueJ-QN7JaDrZTI{Z%q zz`~hbY?Uo8FE2;^MG>8_PF%px<^jtT&yd#j9930B10`0P zUG+8#0W5L99(3V0IKL^S0%rwOxh(O^f&>KEAncM=*=Dv!vUA7*!92Mg;joke)Cm+W zpb9?^<~R6R(A+v?eE}Rj_uW9>U-jJPRg!xZToC8Gg&#*!dQkZK0k|C`NNC^ulS|@W zzA1G1V^0gCI3Uh~osb>M98lUNE;>o zQN3=@BdG{nzqS>l{D=Bl6#M%7!;P|k8Mp2OxoSIl(oi3l%k#Y;SC~ir+nR=kXy>cT zM6NlcFUI;Pkx;?$uOQQr65##E2Kk+%%WPY z#gbCNOJ8SVd^3CF&Y*UK7I#QYOx6u_Gw5;{Rkp>Af0wlmoTxglop+*4X8Qb2Rkm*N zZaRq2o8~~Up}GmEr&YE)2y?56t~woIcFj!oY@EUF5=h#*eLve7;%>K&i}G zt1O5-2S)pSt?jBEp1%P?myw<>@%c04EiiH;W7B9ge0hM-7NQzcTf1QR?s_WYp1T9} zPhco^2XO_^yw}ZEuXu(fS{B=_L+E@c4xGb!QiPd}vpk<(Q*Xp+p05*@p+6tH!HmS% z#=s$FT6bC6$n|$2!ng0~4W^@lu0A7iCzePe4*`(q*JVMgjnbczis}^&O<+>ei!UA? z(M>hBR{}-(mk>_WNKJF z*I?x)`UFcrIHMGwY4El*HKjK0|6AMxpl>UK(HI>u3CRZaNy(VS8$anPR}1LuERDCz z)9xD;MuPFS$Qw{Flh&TC-~q%H&@Mq8#CydiV;Fvk31KlBs|Q^S(D``H#1wYt9zuc3 zrGwy8V8m(%5Jw)^0i5}3Ni%s9)q7UI;SbJt!@RftcCVOkO!9nLsg1e#fH^JKoXji+ z3lh>(h?OX9ZTkJz%Cc|)?N`u&&<0J{)6>%-{$C2_3(%L0Jhi6^ed#U<%_p}TF}PD$ zSUCG1I1q`e1{M=;z60+4sPEsO0Nb5TFNChC91AGVo-6GNTDWbc(yTED|Fo$}AI86Zb36KD|-kV={1(&h`N$~@6 z9OQqEPg>@H0t`f7OG z_W=!Fb3x$5QU(IX^!)1T${&aR3$XZsQ8x(^IrD+mo*38O{Zzpq9pIRh&Qpx9Yo9pn zRJlwOUKipela)_)!fAoKf96At50E z#ljT;2=D>XTdnn-`zofz3ySXL8w&!Vw**P;Gc^Rj*NqgOgpfu%H1{42mwt>&p|c`t!h8P_yX(+L8YF>>&V#$yQ96l#Lqn z#GKP#;ODzK^wATHl;7z+9>QY-Jj7ZGRJvQlL1XCR!UNnOo6}X1o}Tw-TrP|ie(5#% z(bno&l_CKXP77#Q>Vp(%Vq#)6SsL(KI0A0UZ4D$)T;}k@rX>ZChuLt1ZRKclJk$q+ ztjJ`V1CW#`R?~#VN0Ud?Cr@V7 z4VZXtU7K*c$T{a|-v;)PXn zuVnrMEYQqhFdU)cu-WuBK+?RK99h|Q#a)shu1266`nOx~gXEsme_CJvRy2W2ez^$v zF*GzZ3`jS8Gf2E$jX)$>D14Ry`C|lUG*b7cc1!U)G=S)fKh=ak{*M=6@T3fR zA^ER9VByTbzyMrCd31kCOyp6tZbEWF!FSLHDzrb)*)Csve7rr~iA-DlS$z(`ITT;J zMc^MmvE<=hih69iB4|{`a>EHLT!7pGr&mD7yiEj}Bcm#gxsCpy07bxy(7;=)r=zRQ z(eHJfU^SYRnTce3HCFc7K>Mr(^56CBOnh6L)wV0-x=SnMH;d1UFW!7VlayY+w|Z$T z^rXU9I6vKGcp&A`gzs{xLRwbIFCppaEah^F+|PzqPmROz#A_&C^I>*>;d?e7xwC`T z-5wR)&J%rFt(tFdo^a5`-fOQC9V(V!-@`su<5;G`&iDYx3({Zzbc9o;n|w%(RFw1g zx^NK`eMb+e7tL+l_`uOL;wv`yf|&TAjSAjaVoo*~Ob^JM>|3J_@D&u2skO0}al=;7 z8zMD=H>bDMBfzyOF`w_gH8G(99O3il&uV{;nK-gjF`h)mr70(j^W2t)33Q zI!gj50neTXAb?8iR$R=;b9JgIS(e%F?JL|ef6Cd*jkY<06y#V!rw)>x_a6aZi-{r! z-~HqWQ!F)l2YLv*r;9-!_~<})84f;%wiq1Ef6L3ANT%3S#AcJUo>#^ez4{(;DBuw;9H5 zZfCV;dwQl3lNl?>H}v97hDa=eqM{?M9`%DhprOcg!8~^ggH7l0A5R?{nZJ|G0hkiRE&9vH+4Iy$|Gdr)+SUn+$Sd@x z)A?bbObwI2p{Di{7^F*lZVrThz~e98fde?eU5E;?TaCBv%U9(Eq5~3~K|-ihs&N1qgUe!ITf+k* z6}Uve_8u_-9PN`GIOy32DzJi~suj$7lRG{~nQ)|rnb0xGKG!%A#8fhvNbuikJWrGD zfxZh6KWnSM4vz4PYt(?e1-s(vW9M+DDSv?Jn+&CbH}^; zgoL2ubJ;xsvB4_s=goc}!KVTU6k;}4e+Sqqnx>}- zwzs!WFJ`X;FE36EKQx0yGzSecJ^=xlDr&F|PS++6yuE9_;)J4-$6!y_Y|g@|eKk#Dl5D-8|jTnId4=%x>BGNp)AI%(Z{&SFu0xn$;h z0)agjMf-mAu44FnZhJv7E@mm4#TJ_y&lf_9=Qiw7?^TtX%{B8C?farU7J|H$Qlfxw zql$NEF-h9i8&HX7q$cd{+1iZ08u?8P3N7!d6_PATfofJ?VmL|%__<_B+fM+K8hd-i z@LoXb2M000+_iy&B$=pBAxWR#pqP+wa}&T@yb~pc>ZYnS!c^d1yS>b9RxadAm=P~B z3hv9F@#FN3KafncOH!C5B_?JN1QKm4D2*wStx$8*uMGS$F7zXpVcwe9gS&~?<W) z=DScNQWKwHazEjo#wg~tf+eD9`i`mq9|fB>-ckI)vvgj{I-IA4vW3@H3@lu;c&GgBFg&;0|Bd>)&%pe9=dOwCYm%e|n=r zQzXXZD6%iFD%FhW(v6<cF4*ITIylDn>|0hcr*6#uo@# z8@WJC!K=MIR@Bng1}AnwSLx0}G_tT!IN3G9`DoPwMyILIQRV3c=E2{ zcxkh;{o-dR6;>pO6|UTuo3Ii^4iEI@Wz2}V<=s#{)y|WPfoS)>AsNoahMtbQ+qReD zS85xR)@TWz-i-yS1}mGbmM6H5-OJ}0?_Zmf1Y4g57l-SNgc_0Em4Fwb)Z5b*py)NR zEUaJAo||;N9ijNye7X_6p>~kGKynoJ$a1^eNL^R@%<@a|%CR;@mFT7Z;VLEZF~#OE zg_X5+j`>B$wb3x2&dQc!4}0tKZ0d?}$uh!)O}l6P{r#svk$ZxTEt^`3F9yvwyS(n0 zh5@L(=F$S9@}vWEQcS5~hkTrQLQq6&1zQ@Yv3TEaZ|eMd&~NCr6$I_=l77)>*Ya&q24NSFsDo*%?xq4}Z!IKN87YBE(HD#qgjj?p!ZkB`sU zU{fpnC^iaRMcJ{ab3Neqijxr53-ci_A(#9SKBIsl(P;5^PHn1~49qE|t zWbq|7u81$1N)qnaaxJ=cxKrO^F*zwIyhUBS)>cze-SS8L-Y!k4SFwU^x}eV+@vA%> zl`@*U<0+D4vWM6HZz!abem+B>+g?c@f?)YuDqu%8lBUgb*+@I$H6y^B(@>L)6LX99 z8I3yR!WG_Y>Kj_sNZhg0xx6!^#_-W46Fn^&RoAV`I+GwKf^$}mLTEl|i2IsawH9_f z>xt^gu8mZT6Qwt@dE6a7FnCla|AvzCBBJR#(7wO5?+dWN4NJuyNDqm%{XXB*7D?@Zqurl_o zBsGU9(}Yfv$)pRDxy{nPyAaJV&{>YNi(E3QrQS0&l|)Hd+5G%azs5R4Eb%MwX#?Mb zm?5xIwhg2S?*T*IyG$t{nIi$)R1%0cEDDepA0MjU zI0_Vg^d3Al+(lXnR5gH_)Pb0*H}Na8Uq1*O;jd2h+k)(7J`$pyWx1<(-zM~^gb z5IKb;E;2GIJs)twh&RCxFCdLFO2U8Om-6xXFRfj9JQV!boh&V$lwDaOEwU%Al1i2m zS&LMbtSv}cLY7EGlB|;?L?pzMQ8Zby6cVy)WJ$K9Xb6?}eCzlAKA-phcm9#*8Dr+V z+%5wDA|EMaX7O>VjqE7pvfXJYPI*{ln6FQJsqrP>vW zw?`g26=(0(kTu{mI&~^DGhq+@X{YlI6i1kLTGO${n2>kCCzX-CIO zlH~91@?P4y8?y_bgXBOQ-M)Rh_=XLOVR{7Is7>#B#u)zaBH zB1%%P^|cGN2>_zux#xuqd>l}6YB>8K6++dzR8UZ`y}kXO$9Q*piC>91o$T(*m#l{l z9Rf_%)YQCMKZ|xPx<2f+aSFNrT4y@%*syYBd9F?H}Aq+6=IkfjVd2@o#=tuxUj&$z#lU+ z2e9F^ZeBf=)wpaoF=zul2EhU|Ip|zFnQ+SVef}(qu^pQ#t(wC?l7JNpgJIznFqLg( zD1TF`*7617Xc9JO&Vj!Rh3XRikE1BQ)~To*S9Ti#Yb$^2&Yefu+x&iC8g57qJYM8_ z;83FSIxH*XRRW2QGjnsbq-U*AZTdkoVwGIBSpg3vAN75r8$8q#?bZ?3@J?kdUK|Gv zYm0k*(#mS}hf9;;v~F;c6-`Zs%fwV=VK8q3hfhVnte*DiZIS}ri+KwTE3X|nMed`m zayT2@_*7`*;!=}%N{fU2S5DOrW zExqf;5R z1Nl&66JSEbv5Ogn?g?rqPo{hv92|`X`>2imyN}bXJOJq$H;CY8uEV!zva46`HI|Mc zX6?>=@W7^nflRPa2hN>)?C$w4UAGBM^VjwDIRmMRbV%B+X{JUpN85y#a$ES$q)~2u z{d&&vsFhVRKJV*&%Xi(E^9D{#^PB(OIe@2yWo2c#?@(1WhhmNRa@{;OGG%3Db(&8X zptv?Rjqq_e4&Jq*rRDJ8d&t$+!iW??cB&Mv1nBf0U0}`i`#qWTI4`e^6c0ug$@(kalifpL zy-Z&Cl(+61#DUWVW)JGBsum)d?muv#@tM&a!dvg);35wX4@b!5C1h7Ft*p5wf*esNIAxS$eKr{7n zK|ukHcn&s5OP52->e@YD{*MA=gr!4IMvLhX`8_Ty)Y`v)fA*7-7g8}XbNps8zxzD% zU2xWf>_R(V>KORc@f8G8Q%=WA{dlUstK`>|Foh^pln0I+Ir4R4qAABDtA)5qj^rG_ zR#&G36fwUS8c@jM32*~J+6Ukkl$CXcE*6-{FDTfLGzPCl!qmQ4D)kz=1|v+FbH%y2 z$DgIpySIvqQ}E)Ih(h1GoJ)Ff!>T~XE}(#>8*0lH3kV1p8TTJKU~0MovW>uo2K`hw zh+($5jx;Yp>{A}F{be>axsr>^AFOyE1fmr~KJ6C(>ZPeAA zmFuyvr8)+?>iAl!bqsdKOO0q+*Yimu4+KU`&B9hXdTEf;07!yT`ug<&=M#A2F1_XK zV-t|qD!TNk!%Mgju~6;e#qE%?_@i_661AB^$(fAg zYI?RJTc}LGzrg>+9-i*LFJI*0mbe8`2Mo3Gz)$~T;}ukFUhl}e|Y);|9^Q9IO4 z-$?=mK6yVf%N|$P&FBS(pr6uwpjbEnsy(Z|ifd?(y@aFJt(B2wvB0>!aGXB9N<>tY zl)?DWCxLx_fF>jZD9eOh`8@7XZo1J&i+TU0Uq%t$hA9nL@0#S#&DFuJ(d7?a4A{Y=8meJ=F{bH^x@ ziCTq{=HAHGL=fB|YRMaa%zU4m-`Te3o5Ffz3LzNvIP z-DqUI{J*&z7rwdf*KH}GX~y1%kVP*e{!5w@BNR@F8VacyI zJzf{a+i?4PP!hFy%AF|UqkP4R_T$@Nl)?U8KJf_&Sr;T3_}!REl8UZ5tO=W+Mv#IW zP&aIm6#~G9(T)e!2f6z~R@KqEQWu2Wu)e&NCQi9yX z-+{WRGm;79j?8ruX+qg;+u3%CjEoH1!i7Tbt5G;}!-No_I=jDkteEP(TgdRg*0M5EceIdS7HIR&zGe?TL#NGUVq$VFA|ehPeJH}@B@)0~7Bj(_s_}6Iso<#RTURfmo6MY<1g$8I zQWuL@^a|I;7HkeYX!fTXQ-UBRT>1Vt);6CT0c@kxJ5=~Z8Qp=2XRtoz0 zNMw8JjtlAC1<{{A0fYcP4nQ*z34lp-PHAYM;y=0Nyt5G#nyx{2hywTEl}k|F=KIXK z&wzuMr`SLps_X`Rog8pKPOl9_XFLJ|>+v1_kM+4O+Yifppm|pw*t>V{CWy!m=;_5F z-sC-g+%PXB|xB;mU3@T^66b?%qL-f-ye2@lxNBvQ{Z+ zFf7wE$VOp@fX2E~Mf=9a6s0C$+F=+}W+*gW+}&dlh)z8)VT|Db z#-TEHc(67G*ETxS=mDw0w(s0o3lJ*I2!B|UF0|5{!H|U2C=!bnhFAbxdeGAH-kn|E ztvxj7_fJ=o-9lYGnmq?Y3)z~KMu(3Wq6Rf^wDJyBBSirTs9L+9O97bjp|Uo}YH7+c zQ6TxI(Q#&GX81i@Rvnn+G%VT4GiPKlg>d4-{O`ge5oLV7v%`|Gs`BFQku(KeiQx^g z-8m+x08Cw6@*bJf&EVgc0)2xv5btnHRvedozUWIJns$fur8sX-*ikD$-kb4S2y}@0r|Oy- z4$9xwZX#;m*dMSZ7qEjb2%lJ%PS_KVXzg|XX`vcJ2yzh_u_0Yom=ACoGCeiS?9gGz zBdu>z`uko)L(%7hm_Q`}%1_E3ga&SYes&Bc;N2EZ-qT*RWH$?OqTl|)$%XGM?mx0^& zChPaan3yZN8P%CXo{jrozlIYU`!9khB7&&_8B^yh171}aOY@JL>8aagf3}NLw=u(8 zw#hMu!rR|0yY*NeCMFO{X=xrR6HEw0C7PMw?RJSqp>XJszY(COe!m($9ZYJx3`R|f z-GH?ktP*GBX|N{?O*X1?ZIK0gobJTJ9k~}d9v^pO(Ovv*D>ScDu4;M?)bOwH0!?N$ zg|~Xl(`6z{1DDjv_D^#bWvE*Gt^p-7)>68W8>Dx#F%u=W1*k zN4O>{jy^79^^n+(nCyh4BxE_evX&Npg!QjeQ&pH(%IS2W;oe?|-@?s3wY53$sxZc9 z%gKBg2eYC+Sf7}DB&!kSI7VA6N2is4YPEs2jqJgVMqFD|>i7=l*6Qpvo;JR!x?SBy z6_9)(N)==mVSoYn2P=TFdOv==(q`dz;uxelKE$5`k=0l97*e30CHnJj_%;YafROS~| zimtBWa%J=hSoXjYoi0~=ejt=`6aUoq@+vyy1D*S+j{G$D9SRD1m|t=qLbh8;Nx4QI zjVT%|dqDWgoP7ymi{1Z@if4Yg7uZ-q3NBBI(cRm(rLkQ+D^_I4Vay3Xg-1x}VPWFx zeOE(3i-6;hZrgS7>pjX%fWMLEovuSSDO{A!A_)l`mcEgZhlPwkXb6MYnqp&^#W684 z$LU^&AN76q4=ZDL~LOGnPP{_0WL zLWf2&-l!6ZpkN$+=0y~!+;t!5B8Ff5M76(}m>7-^ZSu?s`$8GgiDPr}>u*BJ_omS| zE#wqp0|-dWCjtkyJoO1heXQcIE?T;{r%RDouIduejlNoR*tI+Cqo!+1SxY$anOmZ z%Nm>=5jJF6=G+Tv-+NRhyWaIy_M| zu>LeY9)S{=9|J59EbXpS%g>###_2vC@$mD$HfYyj)m)6cK<-Dw(o|ZiP zE;tuUdx}t4a3j)h>3orxD7w|L^U8y}UK|Kdl>nQK=wo2umAJh8GR&i>=%#yr1Jf+T zsOqZ>%k!Rg!nlkwBsHwrk538CT%w}cll!wLs`<>*LC<3PW&F`h%o!CZI&fX35eneOmysUWm!KA`b1oN??%#FW^T z*dvch&xhPV$KxPyd>ZmcXFgaaLMe~@{3fnGEdz#TtUrAz-4~7x4zV$2m9X=W<0Q;K z+>~4K@fk+G=(nM;j70%<4HtkAEVEN%GSGZBc&oHC5Or%kRGinfs*C{GC4yDqG zYp#Rlm_QJE*Nwt%uY(wYBK=t_rE2I)_4{ii*0KCqmuH4Qna`~Nq6LuZwOm{j!B00} zW*zKJq-}(I+K$eE&-Ey-NNNJGLTk>6FmPoV#$HkRb=~M^_kQ|xbzAqvmqXhO)u)BM zy}e1O1J}wzz#7QdfT7>XU|3?^%g{R$^Zk+RJ`xiY#10h0Dl9B)R1g8oBlHybg9IF9 z+rNB#)@xQntTO=B6%PwEjO&er2q4Z O<$%T^^&B ***If the system installed cmake is too old, installing a new version of cmake can be done through several methods.*** @@ -114,8 +114,8 @@ and Dyninst requires TBB), and the CMake option to build the package alongside o | Third-Party Library | Minimum Version | Required By | CMake Option | |---------------------|-----------------|-------------|-------------------------------------------| -| Dyninst | 10.0 | Omnitrace | `OMNITRACE_BUILD_DYNINST` (default: OFF) | -| Libunwind | | Omnitrace | `OMNITRACE_BUILD_LIBUNWIND` (default: ON) | +| Dyninst | 10.0 | OmniTrace | `OMNITRACE_BUILD_DYNINST` (default: OFF) | +| Libunwind | | OmniTrace | `OMNITRACE_BUILD_LIBUNWIND` (default: ON) | | TBB | 2018.6 | Dyninst | `DYNINST_BUILD_TBB` (default: OFF) | | ElfUtils | 0.178 | Dyninst | `DYNINST_BUILD_ELFUTILS` (default: OFF) | | LibIberty | | Dyninst | `DYNINST_BUILD_LIBIBERTY` (default: OFF) | @@ -144,7 +144,7 @@ and Dyninst requires TBB), and the CMake option to build the package alongside o ### Installing DynInst -#### Building Dyninst alongside Omnitrace +#### Building Dyninst alongside OmniTrace The easiest way to install Dyninst is to configure omnitrace with `OMNITRACE_BUILD_DYNINST=ON`. Depending on the version of Ubuntu, the apt package manager may have current enough versions of Dyninst's Boost, TBB, and LibIberty dependencies (i.e. `apt-get install libtbb-dev libiberty-dev libboost-dev`); however, it is possible to request Dyninst to install @@ -173,7 +173,7 @@ spack load -r dyninst ### Installing omnitrace -Omnitrace has cmake configuration options for supporting MPI (`OMNITRACE_USE_MPI` or `OMNITRACE_USE_MPI_HEADERS`), HIP kernel tracing (`OMNITRACE_USE_ROCTRACER`), +OmniTrace has cmake configuration options for supporting MPI (`OMNITRACE_USE_MPI` or `OMNITRACE_USE_MPI_HEADERS`), HIP kernel tracing (`OMNITRACE_USE_ROCTRACER`), sampling ROCm devices (`OMNITRACE_USE_ROCM_SMI`), OpenMP-Tools (`OMNITRACE_USE_OMPT`), hardware counters via PAPI (`OMNITRACE_USE_PAPI`), among others. Various additional features can be enabled via the [`TIMEMORY_USE_*` CMake options](https://timemory.readthedocs.io/en/develop/installation.html#cmake-options). Any `OMNITRACE_USE_` option which has a corresponding `TIMEMORY_USE_` option means that the support within timemory for this feature has been integrated @@ -204,9 +204,9 @@ cmake --build omnitrace-build --target install source /opt/omnitrace/share/omnitrace/setup-env.sh ``` -#### MPI Support within Omnitrace +#### MPI Support within OmniTrace -[Omnitrace](https://github.com/AMDResearch/omnitrace) can have full (`OMNITRACE_USE_MPI=ON`) or partial (`OMNITRACE_USE_MPI_HEADERS=ON`) MPI support. +[OmniTrace](https://github.com/AMDResearch/omnitrace) can have full (`OMNITRACE_USE_MPI=ON`) or partial (`OMNITRACE_USE_MPI_HEADERS=ON`) MPI support. The only difference between these two modes is whether or not the results collected via timemory and/or perfetto can be aggregated into a single output file during finalization. When full MPI support is enabled, combining the timemory results always occurs whereas combining the perfetto results is configurable via the `OMNITRACE_PERFETTO_COMBINE_TRACES` setting. diff --git a/source/docs/instrumenting.md b/source/docs/instrumenting.md index 5a54b65de0..9cbab2be38 100644 --- a/source/docs/instrumenting.md +++ b/source/docs/instrumenting.md @@ -311,7 +311,7 @@ omnitrace --simulate -o foo.inst -- foo ### Excluding and Including Modules and Functions -[Omnitrace](https://github.com/AMDResearch/omnitrace) has a set of 6 command-line options which each accept one or more regular expressions for customizing the scope of which module and/or functions are +[OmniTrace](https://github.com/AMDResearch/omnitrace) has a set of 6 command-line options which each accept one or more regular expressions for customizing the scope of which module and/or functions are instrumented. Multiple regexes per option are treated as an OR operation, e.g. `--module-include libfoo libbar` is effectively that same as `--module-include 'libfoo|libbar'`. If you would like to force the inclusion of certain modules and/or function without changing any of the heuristics, use the `--module-include` and/or `--function-include` options. @@ -616,7 +616,7 @@ background system-level thread sampling by default. Tracing capabilities which do not rely on instrumentation, such as the HIP API and kernel tracing (which is collected via roctracer), will still be available. -[Omnitrace](https://github.com/AMDResearch/omnitrace)'s sampling capabilities are always available, even in trace mode, but is deactivated by default. +[OmniTrace](https://github.com/AMDResearch/omnitrace)'s sampling capabilities are always available, even in trace mode, but is deactivated by default. In order to activate sampling in trace mode, simply set `OMNITRACE_USE_SAMPLING=ON` in the environment or in an omnitrace configuration file. diff --git a/source/docs/omnitrace.dox.in b/source/docs/omnitrace.dox.in index 646f6ee71a..de5ec27f51 100644 --- a/source/docs/omnitrace.dox.in +++ b/source/docs/omnitrace.dox.in @@ -116,7 +116,8 @@ WARN_LOGFILE = doc/warnings.log INPUT = @SOURCE_DIR@/README.md \ @SOURCE_DIR@/source/lib/omnitrace-user/omnitrace/types.h \ @SOURCE_DIR@/source/lib/omnitrace-user/omnitrace/categories.h \ - @SOURCE_DIR@/source/lib/omnitrace-user/omnitrace/user.h + @SOURCE_DIR@/source/lib/omnitrace-user/omnitrace/user.h \ + @SOURCE_DIR@/source/lib/omnitrace-user/omnitrace/causal.h INPUT_ENCODING = UTF-8 FILE_PATTERNS = *.h \ *.hh \ diff --git a/source/docs/output.md b/source/docs/output.md index 030c2c485b..a4ae365b19 100644 --- a/source/docs/output.md +++ b/source/docs/output.md @@ -1,4 +1,4 @@ -# Omnitrace Output +# OmniTrace Output ```eval_rst .. toctree:: @@ -55,7 +55,7 @@ $ omnitrace -- ./foo ## Metadata -[Omnitrace](https://github.com/AMDResearch/omnitrace) will output a metadata.json file. This metadata file will contain +[OmniTrace](https://github.com/AMDResearch/omnitrace) will output a metadata.json file. This metadata file will contain information about the settings, environment variables, output files, and info about the system and the run: - Hardware cache sizes @@ -237,7 +237,7 @@ information about the settings, environment variables, output files, and info ab ### Core Configuration Settings -> ***See also: [Customizing Omnitrace Runtime](runtime.md)*** +> ***See also: [Customizing OmniTrace Runtime](runtime.md)*** | Setting | Value | Description | |---------------------------|--------------------|---------------------------------------------------------------------------------------------------| diff --git a/source/docs/python.md b/source/docs/python.md index 0d7fae4f7c..9431600936 100644 --- a/source/docs/python.md +++ b/source/docs/python.md @@ -6,7 +6,7 @@ :maxdepth: 3 ``` -[Omnitrace](https://github.com/AMDResearch/omnitrace) supports profiling Python code at the source-level and/or the script-level. +[OmniTrace](https://github.com/AMDResearch/omnitrace) supports profiling Python code at the source-level and/or the script-level. Python support is enabled via the `OMNITRACE_USE_PYTHON` and the `OMNITRACE_PYTHON_VERSIONS=".` CMake options. Alternatively, to build multiple python versions, use `OMNITRACE_PYTHON_VERSIONS=".;[.]"`, and `OMNITRACE_PYTHON_ROOT_DIRS="/path/to/version;[/path/to/version]"` instead of `OMNITRACE_PYTHON_VERSION`. @@ -30,9 +30,9 @@ export PYTHONPATH=/opt/omnitrace/lib/python3.8/site-packages:${PYTHONPATH} If using either the `share/omnitrace/setup-env.sh` script or the modulefile in `share/modulefiles/omnitrace`, prefixing the `PYTHONPATH` environment variable is automatically handled. -## Running Omnitrace on a Python Script +## Running OmniTrace on a Python Script -Omnitrace provides an `omnitrace-python` helper bash script which effectively handles ensuring `PYTHONPATH` is properly set and the correct python interpreter is used. +OmniTrace provides an `omnitrace-python` helper bash script which effectively handles ensuring `PYTHONPATH` is properly set and the correct python interpreter is used. Thus the following are effectively equivalent: ```bash @@ -57,7 +57,7 @@ optional arguments: Logging verbosity -b, --builtin Put 'profile' in the builtins. Use '@profile' to decorate a single function, or 'with profile:' to profile a single section of code. -c FILE, --config FILE - Omnitrace configuration file + OmniTrace configuration file -s FILE, --setup FILE Code to execute before the code to profile -F [BOOL], --full-filepath [BOOL] @@ -168,7 +168,7 @@ And executed with `omnitrace-python -b -- ./example.py`, omnitrace would produce |-----------------------------------------------------------| ``` -## Omnitrace Python Source Instrumentation +## OmniTrace Python Source Instrumentation Starting from the unmodified `example.py` script above, we start by importing the `omnitrace` module: @@ -232,7 +232,7 @@ The results for both of the source-level instrumentation modes are identical to > ***When `omnitrace-python` is used without built-ins, the profiling results will likely be cluttered by*** > ***numerous functions called during the importing of more complex modules, e.g. `import numpy`.*** -### Omnitrace Python Source Instrumentation Configuration +### OmniTrace Python Source Instrumentation Configuration Within the Python source code, the profiler can be configured by directly modifying the `omnitrace.profiler.config` data fields. diff --git a/source/docs/runtime.md b/source/docs/runtime.md index 070e12b7de..7586a52b79 100644 --- a/source/docs/runtime.md +++ b/source/docs/runtime.md @@ -1,4 +1,4 @@ -# Configuring Omnitrace Runtime +# Configuring OmniTrace Runtime ```eval_rst .. toctree:: @@ -49,7 +49,7 @@ match to nearly all common expressions for boolean logic: ON, OFF, YES, NO, TRUE ### Exploring Components -[Omnitrace](https://github.com/AMDResearch/omnitrace) uses [timemory](https://github.com/NERSC/timemory) extensively to provide various capabilities and manage +[OmniTrace](https://github.com/AMDResearch/omnitrace) uses [timemory](https://github.com/NERSC/timemory) extensively to provide various capabilities and manage data and resources. By default, when `OMNITRACE_USE_TIMEMORY=ON`, omnitrace will only collect wall-clock timing values; however, by modifying the `OMNITRACE_TIMEMORY_COMPONENTS` setting, omnitrace can be configured to collect hardware counters, CPU-clock timers, memory usage, context-switches, page-faults, network statistics, @@ -72,7 +72,7 @@ omnitrace-avail --components --available --string --brief ### Exploring Hardware Counters -[Omnitrace](https://github.com/AMDResearch/omnitrace) supports collecting hardware counters via PAPI and ROCm. +[OmniTrace](https://github.com/AMDResearch/omnitrace) supports collecting hardware counters via PAPI and ROCm. Generally, PAPI is used to collect CPU-based hardware counters and ROCm is used to collect GPU-based hardware counters; although it is possible to install PAPI with ROCm support and collect GPU-based hardware counters via PAPI but this is not recommended because CPU hardware counters via PAPI cannot be collected simultaneously. @@ -143,7 +143,7 @@ OMNITRACE_PAPI_EVENTS = PAPI_TOT_INS perf::CACHE-REFERENCES perf::CACHE- #### OMNITRACE_ROCM_EVENTS -Omnitrace reads the ROCm events from the `${ROCM_PATH}/lib/rocprofiler/metrics.xml` file. Use the `ROCP_METRICS` environment +OmniTrace reads the ROCm events from the `${ROCM_PATH}/lib/rocprofiler/metrics.xml` file. Use the `ROCP_METRICS` environment variable to point omnitrace to a different XML metrics file, e.g., `export ROCP_METRICS=${PWD}/custom_metrics.xml`. `omnitrace-avail -H -c GPU` will show event names with a suffix of `:device=N` where `N` is the device number. For example, if you have two devices, you will see: @@ -201,7 +201,7 @@ OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES = false OMNITRACE_DEBUG = false OMNITRACE_DL_VERBOSE = 0 OMNITRACE_INSTRUMENTATION_INTERVAL = 1 -OMNITRACE_KOKKOS_KERNEL_LOGGER = false +OMNITRACE_KOKKOSP_KERNEL_LOGGER = false OMNITRACE_PAPI_EVENTS = PAPI_TOT_CYC OMNITRACE_PERFETTO_BACKEND = inprocess OMNITRACE_PERFETTO_BUFFER_SIZE_KB = 1024000 @@ -302,7 +302,7 @@ $ omnitrace-avail -S -bd | OMNITRACE_INPUT_PREFIX | Explicitly specify the prefix for in... | | OMNITRACE_INSTRUMENTATION_INTERVAL | Instrumentation only takes measureme... | | OMNITRACE_JSON_OUTPUT | Write json output files | -| OMNITRACE_KOKKOS_KERNEL_LOGGER | Enables kernel logging | +| OMNITRACE_KOKKOSP_KERNEL_LOGGER | Enables kernel logging | | OMNITRACE_MAX_DEPTH | Set the maximum depth of label hiera... | | OMNITRACE_MAX_THREAD_BOOKMARKS | Maximum number of times a worker thr... | | OMNITRACE_MAX_WIDTH | Set the maximum width for component ... | @@ -1161,7 +1161,7 @@ $ omnitrace-avail -H -bd ## Creating a Configuration File -[Omnitrace](https://github.com/AMDResearch/omnitrace) supports 3 configuration file formats: JSON, XML, and plain text. +[OmniTrace](https://github.com/AMDResearch/omnitrace) supports 3 configuration file formats: JSON, XML, and plain text. Use `omnitrace-avail -G -F txt json xml` to generate default configuration files of each format and, optionally, include the `--all` flag for descriptions, etc. Configuration files are specified via the `OMNITRACE_CONFIG_FILE` environment variable diff --git a/source/docs/sampling.md b/source/docs/sampling.md index ba4caf5867..0b65dd898a 100644 --- a/source/docs/sampling.md +++ b/source/docs/sampling.md @@ -192,7 +192,7 @@ are interpreted as belonging to omnitrace and all arguments following the double application and it's arguments. The double-hyphen is only necessary when passing command line arguments to the target which also use hyphens. E.g. `omnitrace-sample ls` works but, in order to run `ls -la`, use `omnitrace-sample -- ls -la`. -[Configuring Omnitrace Runtime](runtime.md) establish the precedence of environment variable values over values specified in the configuration files. This enables +[Configuring OmniTrace Runtime](runtime.md) establish the precedence of environment variable values over values specified in the configuration files. This enables the user to configure the omnitrace runtime to their preferred default behavior in a file such as `~/.omnitrace.cfg` and then easily override those settings via something like `OMNITRACE_ENABLED=OFF omnitrace-sample -- foo`. Similarly, the command line arguments passed to `omnitrace-sample` take precedence over environment variables. diff --git a/source/docs/user_api.md b/source/docs/user_api.md index 75d2113ada..24848a045a 100644 --- a/source/docs/user_api.md +++ b/source/docs/user_api.md @@ -4,6 +4,7 @@ .. doxygenfile:: omnitrace/types.h .. doxygenfile:: omnitrace/categories.h .. doxygenfile:: omnitrace/user.h +.. doxygenfile:: omnitrace/causal.h ``` By default, when omnitrace detects any `omnitrace_user_start_*` or `omnitrace_user_stop_*` function, instrumentation @@ -13,6 +14,26 @@ recorded, regardless of whether whether `omnitrace_user_start_*` or `omnitrace_u ## Example +### Compilation + +#### CMake + +```cmake +find_package(omnitrace REQUIRED COMPONENTS user) + +add_executable(foo foo.cpp) + +target_link_libraries(foo PRIVATE omnitrace::omnitrace-user-library) +``` + +#### General + +Assuming omnitrace installed in `/opt/omnitrace`: + +```bash +g++ -I/opt/omnitrace foo.cpp -o foo -lomnitrace-user +``` + ### User API Implementation ```cpp diff --git a/source/lib/common/defines.h.in b/source/lib/common/defines.h.in index 72f0bf8439..2a8f87c902 100644 --- a/source/lib/common/defines.h.in +++ b/source/lib/common/defines.h.in @@ -71,6 +71,7 @@ #define OMNITRACE_VISIBILITY(MODE) OMNITRACE_ATTRIBUTE(visibility(MODE)) #define OMNITRACE_PUBLIC_API OMNITRACE_VISIBILITY("default") #define OMNITRACE_HIDDEN_API OMNITRACE_VISIBILITY("hidden") +#define OMNITRACE_INTERNAL_API OMNITRACE_VISIBILITY("internal") #define OMNITRACE_INLINE OMNITRACE_ATTRIBUTE(always_inline) inline #define OMNITRACE_NOINLINE OMNITRACE_ATTRIBUTE(noinline) #define OMNITRACE_HOT OMNITRACE_ATTRIBUTE(hot) @@ -80,6 +81,8 @@ #define OMNITRACE_WEAK OMNITRACE_ATTRIBUTE(weak) #define OMNITRACE_PACKED OMNITRACE_ATTRIBUTE(__packed__) #define OMNITRACE_PACKED_ALIGN(VAL) OMNITRACE_PACKED OMNITRACE_ATTRIBUTE(__aligned__(VAL)) +#define OMNITRACE_LIKELY(...) __builtin_expect((__VA_ARGS__), 1) +#define OMNITRACE_UNLIKELY(...) __builtin_expect((__VA_ARGS__), 0) #if defined(OMNITRACE_CI) && OMNITRACE_CI > 0 # if defined(NDEBUG) diff --git a/source/lib/common/join.hpp b/source/lib/common/join.hpp index de1e5d055b..42babe7d8d 100644 --- a/source/lib/common/join.hpp +++ b/source/lib/common/join.hpp @@ -22,9 +22,13 @@ #pragma once +#include +#include #include #include #include +#include +#include #include #if !defined(OMNITRACE_FOLD_EXPRESSION) @@ -127,6 +131,42 @@ join(QuoteStrings&&, DelimT&& _delim, Args&&... _args) return (_ret.length() > _len) ? _ret.substr(_len) : std::string{}; } } + +template +auto +join(std::array&& _delim, Args&&... _args) +{ + return join("", std::get<0>(_delim), + join(std::get<1>(_delim), std::forward(_args)...), + std::get<2>(_delim)); +} + +template +auto +join(QuoteStrings&&, std::array&& _delim, Args&&... _args) +{ + return join(QuoteStrings{}, "", std::get<0>(_delim), + join(std::get<1>(_delim), std::forward(_args)...), + std::get<2>(_delim)); +} + +template +auto +join(std::tuple&& _delim, Args&&... _args) +{ + return join("", std::get<0>(_delim), + join(std::get<1>(_delim), std::forward(_args)...), + std::get<2>(_delim)); +} + +template +auto +join(QuoteStrings&&, std::tuple&& _delim, Args&&... _args) +{ + return join(QuoteStrings{}, "", std::get<0>(_delim), + join(std::get<1>(_delim), std::forward(_args)...), + std::get<2>(_delim)); +} } // namespace } // namespace common } // namespace omnitrace diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index ed700872d8..8b918427a8 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -266,6 +266,9 @@ struct OMNITRACE_HIDDEN_API indirect "omnitrace_register_source"); OMNITRACE_DLSYM(omnitrace_register_coverage_f, m_omnihandle, "omnitrace_register_coverage"); + OMNITRACE_DLSYM(omnitrace_progress_f, m_omnihandle, "omnitrace_progress"); + OMNITRACE_DLSYM(omnitrace_annotated_progress_f, m_omnihandle, + "omnitrace_annotated_progress"); OMNITRACE_DLSYM(kokkosp_print_help_f, m_omnihandle, "kokkosp_print_help"); OMNITRACE_DLSYM(kokkosp_parse_args_f, m_omnihandle, "kokkosp_parse_args"); @@ -343,8 +346,10 @@ struct OMNITRACE_HIDDEN_API indirect _cb.stop_thread_trace = &omnitrace_user_stop_thread_trace_dl; _cb.push_region = &omnitrace_user_push_region_dl; _cb.pop_region = &omnitrace_user_pop_region_dl; + _cb.progress = &omnitrace_user_progress_dl; _cb.push_annotated_region = &omnitrace_user_push_annotated_region_dl; _cb.pop_annotated_region = &omnitrace_user_pop_annotated_region_dl; + _cb.annotated_progress = &omnitrace_user_annotated_progress_dl; (*omnitrace_user_configure_f)(OMNITRACE_USER_REPLACE_CONFIG, _cb, nullptr); } } @@ -370,6 +375,9 @@ public: omnitrace_annotation_t*, size_t) = nullptr; int (*omnitrace_pop_category_region_f)(omnitrace_category_t, const char*, omnitrace_annotation_t*, size_t) = nullptr; + void (*omnitrace_progress_f)(const char*) = nullptr; + void (*omnitrace_annotated_progress_f)(const char*, omnitrace_annotation_t*, + size_t) = nullptr; // libomnitrace-user functions int (*omnitrace_user_configure_f)(int, user_cb_t, user_cb_t*) = nullptr; @@ -772,6 +780,12 @@ extern "C" return OMNITRACE_DL_INVOKE(get_indirect().omnitrace_pop_region_f, name); } + int omnitrace_user_progress_dl(const char* name) + { + OMNITRACE_DL_INVOKE(get_indirect().omnitrace_progress_f, name); + return 0; + } + int omnitrace_user_push_annotated_region_dl(const char* name, omnitrace_annotation_t* _annotations, size_t _annotation_count) @@ -792,6 +806,28 @@ extern "C" _annotation_count); } + int omnitrace_user_annotated_progress_dl(const char* name, + omnitrace_annotation_t* _annotations, + size_t _annotation_count) + { + OMNITRACE_DL_INVOKE(get_indirect().omnitrace_annotated_progress_f, name, + _annotations, _annotation_count); + return 0; + } + + void omnitrace_progress(const char* _name) + { + return OMNITRACE_DL_INVOKE(get_indirect().omnitrace_progress_f, _name); + } + + void omnitrace_annotated_progress(const char* _name, + omnitrace_annotation_t* _annotations, + size_t _annotation_count) + { + return OMNITRACE_DL_INVOKE(get_indirect().omnitrace_annotated_progress_f, _name, + _annotations, _annotation_count); + } + //----------------------------------------------------------------------------------// // // KokkosP @@ -1033,11 +1069,13 @@ omnitrace_preload() { // reset_omnitrace_preload(); omnitrace_preinit_library(); + auto _causal = get_env("OMNITRACE_USE_CAUSAL", false); + auto _mode = get_env("OMNITRACE_MODE", (_causal) ? "causal" : "sampling"); OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init", - ::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", "sampling", - false, "main") + ::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", _mode, + false, "omnitrace") .c_str()); - omnitrace_init("sampling", false, "omnitrace"); + omnitrace_init(_mode.c_str(), false, nullptr); omnitrace_init_tooling(); } diff --git a/source/lib/omnitrace-dl/dl.hpp b/source/lib/omnitrace-dl/dl.hpp index bf60b0ecf2..d4c3c2b4b1 100644 --- a/source/lib/omnitrace-dl/dl.hpp +++ b/source/lib/omnitrace-dl/dl.hpp @@ -92,6 +92,9 @@ extern "C" const char* source) OMNITRACE_PUBLIC_API; void omnitrace_register_coverage(const char* file, const char* func, size_t address) OMNITRACE_PUBLIC_API; + void omnitrace_progress(const char*) OMNITRACE_PUBLIC_API; + void omnitrace_annotated_progress(const char*, omnitrace_annotation_t*, + size_t) OMNITRACE_PUBLIC_API; #if defined(OMNITRACE_DL_SOURCE) && (OMNITRACE_DL_SOURCE > 0) void omnitrace_preinit_library(void) OMNITRACE_HIDDEN_API; @@ -111,6 +114,9 @@ extern "C" int omnitrace_user_pop_annotated_region_dl(const char*, omnitrace_annotation_t*, size_t) OMNITRACE_HIDDEN_API; + int omnitrace_user_progress_dl(const char* name) OMNITRACE_HIDDEN_API; + int omnitrace_user_annotated_progress_dl(const char*, omnitrace_annotation_t*, + size_t) OMNITRACE_HIDDEN_API; // KokkosP struct OMNITRACE_HIDDEN_API SpaceHandle { diff --git a/source/lib/omnitrace-dl/main.c b/source/lib/omnitrace-dl/main.c index bf882eaaa1..4eadbb6c46 100644 --- a/source/lib/omnitrace-dl/main.c +++ b/source/lib/omnitrace-dl/main.c @@ -97,11 +97,11 @@ omnitrace_main(int argc, char** argv, char** envp) const char* mode = getenv("OMNITRACE_MODE"); omnitrace_init(mode ? mode : "sampling", false, argv[0]); omnitrace_init_tooling(); - omnitrace_push_trace("main(int argc, char** argv)"); + omnitrace_push_trace(basename(argv[0])); int ret = main_real(argc, argv, envp); - omnitrace_pop_trace("main(int argc, char** argv)"); + omnitrace_pop_trace(basename(argv[0])); omnitrace_finalize(); return ret; diff --git a/source/lib/omnitrace-user/CMakeLists.txt b/source/lib/omnitrace-user/CMakeLists.txt index 0d668c9459..7358fc4bab 100644 --- a/source/lib/omnitrace-user/CMakeLists.txt +++ b/source/lib/omnitrace-user/CMakeLists.txt @@ -17,6 +17,7 @@ target_sources( omnitrace-user-library PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/user.cpp ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/user.h + ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/causal.h ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/types.h) target_include_directories( omnitrace-user-library PUBLIC $ @@ -38,6 +39,7 @@ omnitrace_strip_target(omnitrace-user-library) install( FILES ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/user.h ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/types.h + ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/causal.h ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/categories.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/omnitrace) diff --git a/source/lib/omnitrace-user/omnitrace/causal.h b/source/lib/omnitrace-user/omnitrace/causal.h new file mode 100644 index 0000000000..b5b1b5149d --- /dev/null +++ b/source/lib/omnitrace-user/omnitrace/causal.h @@ -0,0 +1,86 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/** @file causal.h */ + +#ifndef OMNITRACE_CAUSAL_H_ +#define OMNITRACE_CAUSAL_H_ + +/** + * @defgroup OMNITRACE_CASUAL_GROUP OmniTrace Causal Profiling Defines + * + * @{ + */ + +#if !defined(OMNITRACE_CAUSAL_ENABLED) +/** Preprocessor switch to enable/disable instrumentation for causal profiling */ +# define OMNITRACE_CAUSAL_ENABLED 1 +#endif + +#if OMNITRACE_CAUSAL_ENABLED > 0 +# include + +# if !defined(OMNITRACE_CAUSAL_LABEL) +/** @cond OMNITRACE_HIDDEN_DEFINES */ +# define OMNITRACE_CAUSAL_STR2(x) # x +# define OMNITRACE_CAUSAL_STR(x) OMNITRACE_CAUSAL_STR2(x) +/** @endcond */ +/** Default label for a causal progress point */ +# define OMNITRACE_CAUSAL_LABEL __FILE__ ":" OMNITRACE_CAUSAL_STR(__LINE__) +# endif +# if !defined(OMNITRACE_CAUSAL_PROGRESS) +/** Adds a throughput progress point with label `:` */ +# define OMNITRACE_CAUSAL_PROGRESS omnitrace_user_progress(OMNITRACE_CAUSAL_LABEL); +# endif +# if !defined(OMNITRACE_CAUSAL_PROGRESS_NAMED) +/** Adds a throughput progress point with user defined label. Each instance should use a + * unique label. */ +# define OMNITRACE_CAUSAL_PROGRESS_NAMED(LABEL) omnitrace_user_progress(LABEL); +# endif +# if !defined(OMNITRACE_CAUSAL_BEGIN) +/** Starts a latency progress point (region of interest) with user defined label. Each + * instance should use a unique label. */ +# define OMNITRACE_CAUSAL_BEGIN(LABEL) omnitrace_user_push_region(LABEL); +# endif +# if !defined(OMNITRACE_CAUSAL_END) +/** End the latency progress point (region of interest) for the matching user defined + * label. */ +# define OMNITRACE_CAUSAL_END(LABEL) omnitrace_user_pop_region(LABEL); +# endif +#else +# if !defined(OMNITRACE_CAUSAL_PROGRESS) +# define OMNITRACE_CAUSAL_PROGRESS +# endif +# if !defined(OMNITRACE_CAUSAL_PROGRESS_NAMED) +# define OMNITRACE_CAUSAL_PROGRESS_NAMED(LABEL) +# endif +# if !defined(OMNITRACE_CAUSAL_BEGIN) +# define OMNITRACE_CAUSAL_BEGIN(LABEL) +# endif +# if !defined(OMNITRACE_CAUSAL_END) +# define OMNITRACE_CAUSAL_END(LABEL) +# endif +#endif + +/** @} */ + +#endif // OMNITRACE_CAUSAL_H_ diff --git a/source/lib/omnitrace-user/omnitrace/types.h b/source/lib/omnitrace-user/omnitrace/types.h index 8b026e7015..1a8498fe97 100644 --- a/source/lib/omnitrace-user/omnitrace/types.h +++ b/source/lib/omnitrace-user/omnitrace/types.h @@ -49,8 +49,10 @@ extern "C" omnitrace_trace_func_t stop_thread_trace; omnitrace_region_func_t push_region; omnitrace_region_func_t pop_region; + omnitrace_region_func_t progress; omnitrace_annotated_region_func_t push_annotated_region; omnitrace_annotated_region_func_t pop_annotated_region; + omnitrace_annotated_region_func_t annotated_progress; /// @var start_trace /// @brief callback for enabling tracing globally @@ -64,10 +66,14 @@ extern "C" /// @brief callback for starting a trace region /// @var pop_region /// @brief callback for ending a trace region + /// @var progress + /// @brief callback for marking an causal profiling event /// @var push_annotated_region /// @brief callback for starting a trace region + annotations /// @var pop_annotated_region /// @brief callback for ending a trace region + annotations + /// @var annotated_progress + /// @brief callback for marking an causal profiling event + annotations } omnitrace_user_callbacks_t; /// @enum OMNITRACE_USER_CONFIGURE_MODE @@ -104,7 +110,7 @@ extern "C" #ifndef OMNITRACE_USER_CALLBACKS_INIT # define OMNITRACE_USER_CALLBACKS_INIT \ { \ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL \ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL \ } #endif diff --git a/source/lib/omnitrace-user/omnitrace/user.h b/source/lib/omnitrace-user/omnitrace/user.h index 2b6db9bdac..64e7aa6100 100644 --- a/source/lib/omnitrace-user/omnitrace/user.h +++ b/source/lib/omnitrace-user/omnitrace/user.h @@ -104,6 +104,13 @@ extern "C" extern int omnitrace_user_pop_annotated_region(const char*, omnitrace_annotation_t*, size_t) OMNITRACE_PUBLIC_API; + /// mark causal progress + extern int omnitrace_user_progress(const char*) OMNITRACE_PUBLIC_API; + + /// mark causal progress with annotations + extern int omnitrace_user_annotated_progress(const char*, omnitrace_annotation_t*, + size_t) OMNITRACE_PUBLIC_API; + /// @fn int omnitrace_user_configure(omnitrace_user_configure_mode_t mode, /// omnitrace_user_callbacks_t inp, /// omnitrace_user_callbacks_t* out) diff --git a/source/lib/omnitrace-user/user.cpp b/source/lib/omnitrace-user/user.cpp index 0b55a530df..72fa03fe87 100644 --- a/source/lib/omnitrace-user/user.cpp +++ b/source/lib/omnitrace-user/user.cpp @@ -80,6 +80,11 @@ extern "C" return invoke(_callbacks.pop_region, id); } + int omnitrace_user_progress(const char* id) + { + return invoke(_callbacks.progress, id); + } + int omnitrace_user_push_annotated_region(const char* id, annotation_t* _annotations, size_t _annotation_count) { @@ -94,6 +99,12 @@ extern "C" _annotation_count); } + int omnitrace_user_annotated_progress(const char* id, annotation_t* _annotations, + size_t _annotation_count) + { + return invoke(_callbacks.annotated_progress, id, _annotations, _annotation_count); + } + int omnitrace_user_configure(omnitrace_user_configure_mode_t mode, omnitrace_user_callbacks_t inp, omnitrace_user_callbacks_t* out) @@ -121,8 +132,10 @@ extern "C" _update(_v.stop_thread_trace, inp.stop_thread_trace); _update(_v.push_region, inp.push_region); _update(_v.pop_region, inp.pop_region); + _update(_v.progress, inp.progress); _update(_v.push_annotated_region, inp.push_annotated_region); _update(_v.pop_annotated_region, inp.pop_annotated_region); + _update(_v.annotated_progress, inp.annotated_progress); _callbacks = _v; break; @@ -141,8 +154,10 @@ extern "C" _update(_v.stop_thread_trace, inp.stop_thread_trace); _update(_v.push_region, inp.push_region); _update(_v.pop_region, inp.pop_region); + _update(_v.progress, inp.progress); _update(_v.push_annotated_region, inp.push_annotated_region); _update(_v.pop_annotated_region, inp.pop_annotated_region); + _update(_v.annotated_progress, inp.annotated_progress); _callbacks = _v; break; diff --git a/source/lib/omnitrace/CMakeLists.txt b/source/lib/omnitrace/CMakeLists.txt index 2356c720f3..476a844b9a 100644 --- a/source/lib/omnitrace/CMakeLists.txt +++ b/source/lib/omnitrace/CMakeLists.txt @@ -32,6 +32,7 @@ target_link_libraries( $ $ $ + $ $ $ $ @@ -57,7 +58,8 @@ add_library(omnitrace::omnitrace-object-library ALIAS omnitrace-object-library) target_sources( omnitrace-object-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/library.cpp ${CMAKE_CURRENT_LIST_DIR}/regions.cpp - ${CMAKE_CURRENT_LIST_DIR}/api.cpp ${CMAKE_CURRENT_LIST_DIR}/api.hpp) + ${CMAKE_CURRENT_LIST_DIR}/progress.cpp ${CMAKE_CURRENT_LIST_DIR}/api.cpp + ${CMAKE_CURRENT_LIST_DIR}/api.hpp) add_subdirectory(library) diff --git a/source/lib/omnitrace/api.cpp b/source/lib/omnitrace/api.cpp index aa39254dfe..ae04bd3e21 100644 --- a/source/lib/omnitrace/api.cpp +++ b/source/lib/omnitrace/api.cpp @@ -100,6 +100,19 @@ omnitrace_pop_category_region(omnitrace_category_t _category, const char* _name, return 0; } +extern "C" void +omnitrace_progress(const char* _name) +{ + omnitrace_progress_hidden(_name); +} + +extern "C" void +omnitrace_annotated_progress(const char* _name, omnitrace_annotation_t* _annotations, + size_t _annotation_count) +{ + omnitrace_annotated_progress_hidden(_name, _annotations, _annotation_count); +} + extern "C" void omnitrace_init_library(void) { diff --git a/source/lib/omnitrace/api.hpp b/source/lib/omnitrace/api.hpp index 483ce730fd..e4d75fee32 100644 --- a/source/lib/omnitrace/api.hpp +++ b/source/lib/omnitrace/api.hpp @@ -86,6 +86,13 @@ extern "C" void omnitrace_register_coverage(const char* file, const char* func, size_t address) OMNITRACE_PUBLIC_API; + /// mark causal progress + void omnitrace_progress(const char*) OMNITRACE_PUBLIC_API; + + /// mark causal progress with annotations + void omnitrace_annotated_progress(const char*, omnitrace_annotation_t*, + size_t) OMNITRACE_PUBLIC_API; + // these are the real implementations for internal calling convention void omnitrace_init_library_hidden(void) OMNITRACE_HIDDEN_API; bool omnitrace_init_tooling_hidden(void) OMNITRACE_HIDDEN_API; @@ -108,4 +115,7 @@ extern "C" const char*) OMNITRACE_HIDDEN_API; void omnitrace_register_coverage_hidden(const char*, const char*, size_t) OMNITRACE_HIDDEN_API; + void omnitrace_progress_hidden(const char*) OMNITRACE_HIDDEN_API; + void omnitrace_annotated_progress_hidden(const char*, omnitrace_annotation_t*, + size_t) OMNITRACE_HIDDEN_API; } diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 1e3cbe805c..00bd6cf4d0 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -26,12 +26,16 @@ #include "api.hpp" #include "common/setup.hpp" +#include "library/causal/data.hpp" +#include "library/causal/experiment.hpp" +#include "library/causal/sampling.hpp" #include "library/components/exit_gotcha.hpp" #include "library/components/fork_gotcha.hpp" #include "library/components/fwd.hpp" #include "library/components/mpi_gotcha.hpp" #include "library/components/pthread_gotcha.hpp" #include "library/components/rocprofiler.hpp" +#include "library/concepts.hpp" #include "library/config.hpp" #include "library/coverage.hpp" #include "library/critical_trace.hpp" @@ -52,6 +56,7 @@ #include "library/utility.hpp" #include "omnitrace/categories.h" // in omnitrace-user +#include #include #include #include @@ -67,6 +72,8 @@ #include #include #include +#include +#include using namespace omnitrace; @@ -92,9 +99,39 @@ namespace auto _timemory_manager = tim::manager::instance(); auto _timemory_settings = tim::settings::shared_instance(); +bool +ensure_initialization(bool _offset, int64_t _glob_n, int64_t _offset_n) +{ + auto _exit_info = component::exit_gotcha::get_exit_info(); + if(_exit_info.is_known && _exit_info.exit_code != EXIT_SUCCESS) return _offset; + + auto _tid = utility::get_thread_index(); + auto _max_threads = grow_data(_tid + 1); + + if(_tid > 0 && _tid < _max_threads) + { + const auto& _info = thread_info::get(); + OMNITRACE_BASIC_VERBOSE_F(3, + "thread info: %s, offset: %s, global counter: %li, " + "offset counter: %li, max threads: %li\n", + std::to_string(static_cast(_info)).c_str(), + std::to_string(_offset).c_str(), _glob_n, _offset_n, + _max_threads); + } + + return _offset; +} + auto ensure_finalization(bool _static_init = false) { + if(_static_init) + { + auto _idx = threading::add_callback(&ensure_initialization); + if(_idx < 0) + throw exception("failure adding threading callback"); + } + const auto& _info = thread_info::init(); const auto& _tid = _info->index_data; if(_tid) @@ -149,6 +186,51 @@ is_system_backend() using Device = critical_trace::Device; using Phase = critical_trace::Phase; + +template +struct fini_bundle +{ + using data_type = std::tuple; + + TIMEMORY_DEFAULT_OBJECT(fini_bundle) + + fini_bundle(std::string_view _label) + : m_label{ _label } + {} + + template + void start(Args&&... _args) + { + TIMEMORY_FOLD_EXPRESSION(tim::operation::start{}( + std::get(m_data), std::forward(_args)...)); + } + + template + void stop(Args&&... _args) + { + TIMEMORY_FOLD_EXPRESSION(tim::operation::stop{}( + std::get(m_data), std::forward(_args)...)); + } + + std::string as_string(bool _print_prefix = true) const + { + std::stringstream _ss; + if(_print_prefix && m_label.length() > 0) _ss << m_label << " : "; + _ss << timemory::join::join(", ", std::get(m_data)...); + return _ss.str(); + } + + std::string_view m_label = {}; + data_type m_data = {}; +}; + +template +struct fini_bundle> +{ + using base_type = fini_bundle; +}; + +using fini_bundle_t = typename fini_bundle::base_type; } // namespace //======================================================================================// @@ -365,13 +447,21 @@ omnitrace_init_tooling_hidden() OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); process_sampler::setup(); } - if(get_use_sampling()) + if(get_use_causal()) { - OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - sampling::setup(); + { + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + causal::sampling::setup(); + } + push_enable_sampling_on_child_threads(get_use_causal()); + sampling::unblock_signals(); } - if(get_use_sampling()) + else if(get_use_sampling()) { + { + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + sampling::setup(); + } push_enable_sampling_on_child_threads(get_use_sampling()); sampling::unblock_signals(); } @@ -400,6 +490,8 @@ omnitrace_init_tooling_hidden() tasking::setup(); + if(get_use_causal()) causal::start_experimenting(); + if(get_use_timemory()) { comp::user_global_bundle::global_init(); @@ -457,13 +549,14 @@ omnitrace_init_tooling_hidden() //======================================================================================// extern "C" void -omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _argv0) +omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _argv0_c) { static int _total_count = 0; static auto _args = std::make_pair(std::string_view{ _mode }, _is_binary_rewrite); auto _count = _total_count++; auto _mode_sv = std::string_view{ _mode }; + auto _argv0 = (_argv0_c) ? std::string{ _argv0_c } : config::get_exe_name(); // this function may be called multiple times if multiple libraries are instrumented // we want to guard against multiple calls which with different arguments if(_count > 0 && @@ -495,21 +588,23 @@ omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _a "called after omnitrace was initialized. state = %s. Mode-based settings " "(via -M passed to omnitrace exe) may not be properly " "configured.\n", - _mode, std::to_string(_is_binary_rewrite).c_str(), _argv0, + _mode, std::to_string(_is_binary_rewrite).c_str(), _argv0.c_str(), std::to_string(get_state()).c_str()); } } - tracing::get_finalization_functions().emplace_back([_argv0]() { + tracing::get_finalization_functions().emplace_back([_argv0_c]() { OMNITRACE_CI_THROW(get_state() != State::Active, "Finalizer function for popping main invoked in non-active " "state :: state = %s\n", std::to_string(get_state()).c_str()); if(get_state() == State::Active) { + auto _name = (_argv0_c) ? std::string{ _argv0_c } : config::get_exe_name(); // if main hasn't been popped yet, pop it - OMNITRACE_BASIC_VERBOSE(2, "Running omnitrace_pop_trace(%s)...\n", _argv0); - omnitrace_pop_trace_hidden(_argv0); + OMNITRACE_BASIC_VERBOSE(2, "Running omnitrace_pop_trace(%s)...\n", + _name.c_str()); + omnitrace_pop_trace_hidden(_name.c_str()); } }); @@ -521,7 +616,7 @@ omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _a OMNITRACE_CONDITIONAL_BASIC_PRINT_F( get_debug_env() || get_verbose_env() > 2, "mode: %s | is binary rewrite: %s | command: %s\n", _mode, - (_is_binary_rewrite) ? "y" : "n", _argv0); + (_is_binary_rewrite) ? "y" : "n", _argv0.c_str()); tim::set_env("OMNITRACE_MODE", _mode, 0); config::is_binary_rewrite() = _is_binary_rewrite; @@ -561,6 +656,8 @@ omnitrace_finalize_hidden(void) { // disable thread id recycling during finalization threading::recycle_ids() = false; + // disable initialization callback + threading::remove_callback(&ensure_initialization); set_thread_state(ThreadState::Completed); @@ -629,13 +726,6 @@ omnitrace_finalize_hidden(void) tim::signals::enable_signal_detection({ tim::signals::sys_signal::Interrupt }, [](int) {}); - std::string _bundle_name = OMNITRACE_FUNCTION; - comp::user_global_bundle _bundle{ _bundle_name.c_str() }; - _bundle.clear(); - _bundle.insert(); - _bundle.start(); - OMNITRACE_DEBUG_F("Copying over all timemory hash information to main thread...\n"); // copy these over so that all hashes are known auto& _hzero = tracing::get_timemory_hash_ids(0); @@ -663,6 +753,9 @@ omnitrace_finalize_hidden(void) get_main_bundle()->stop(); } + fini_bundle_t _finalization{}; + _finalization.start(); + if(get_use_rcclp()) { OMNITRACE_VERBOSE_F(1, "Shutting down RCCLP...\n"); @@ -741,6 +834,12 @@ omnitrace_finalize_hidden(void) rocprofiler::rocm_cleanup(); } + if(get_use_causal()) + { + OMNITRACE_VERBOSE_F(1, "Shutting down causal sampling...\n"); + causal::sampling::shutdown(); + } + if(get_use_sampling()) { OMNITRACE_VERBOSE_F(1, "Shutting down sampling...\n"); @@ -764,6 +863,7 @@ omnitrace_finalize_hidden(void) // if they are still running (e.g. thread-pool still alive), the // thread-specific data will be wrong if try to stop them from // the main thread. + auto _thr_verbose = (config::get_use_causal()) ? 1 : 0; for(auto& itr : thread_data::instances()) { if(itr && itr->get() && @@ -772,7 +872,7 @@ omnitrace_finalize_hidden(void) std::string _msg = JOIN("", *itr); auto _pos = _msg.find(">>> "); if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5); - OMNITRACE_VERBOSE_F(0, "%s\n", _msg.c_str()); + OMNITRACE_VERBOSE_F(_thr_verbose, "%s\n", _msg.c_str()); } } @@ -785,6 +885,12 @@ omnitrace_finalize_hidden(void) sampling::post_process(); } + if(get_use_causal()) + { + OMNITRACE_VERBOSE_F(1, "Finishing the causal experiments...\n"); + causal::finish_experimenting(); + } + if(get_use_critical_trace() || (get_use_rocm_smi() && get_use_roctracer())) { OMNITRACE_VERBOSE_F(1, "Generating the critical trace...\n"); @@ -929,18 +1035,6 @@ omnitrace_finalize_hidden(void) } } - _bundle.stop(); - auto _get_metric = [](auto* _v, std::string_view _tail) -> std::string { - return (_v) ? JOIN("", *_v, _tail) : std::string{}; - }; - - OMNITRACE_VERBOSE_F(0, "Finalization metrics: %s%s%s%s%s\n", - _get_metric(_bundle.get(), ", ").c_str(), - _get_metric(_bundle.get(), ", ").c_str(), - _get_metric(_bundle.get(), ", ").c_str(), - _get_metric(_bundle.get(), ", ").c_str(), - _get_metric(_bundle.get(), "").c_str()); - if(_timemory_manager && _timemory_manager != nullptr) { _timemory_manager->add_metadata([](auto& ar) { @@ -967,6 +1061,8 @@ omnitrace_finalize_hidden(void) "omnitrace", _cfg); } + _finalization.stop(); + if(_perfetto_output_error) { OMNITRACE_THROW("Error opening perfetto output file: %s", @@ -984,7 +1080,7 @@ omnitrace_finalize_hidden(void) config::finalize(); - OMNITRACE_VERBOSE_F(0, "Finalized\n"); + OMNITRACE_VERBOSE_F(0, "Finalized: %s\n", _finalization.as_string().c_str()); } //======================================================================================// diff --git a/source/lib/omnitrace/library/CMakeLists.txt b/source/lib/omnitrace/library/CMakeLists.txt index 518af62a1b..50d7f84420 100644 --- a/source/lib/omnitrace/library/CMakeLists.txt +++ b/source/lib/omnitrace/library/CMakeLists.txt @@ -9,8 +9,10 @@ set(library_sources ${CMAKE_CURRENT_LIST_DIR}/critical_trace.cpp ${CMAKE_CURRENT_LIST_DIR}/debug.cpp ${CMAKE_CURRENT_LIST_DIR}/dynamic_library.cpp - ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp + ${CMAKE_CURRENT_LIST_DIR}/exception.cpp ${CMAKE_CURRENT_LIST_DIR}/gpu.cpp + ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp + ${CMAKE_CURRENT_LIST_DIR}/locking.cpp ${CMAKE_CURRENT_LIST_DIR}/mproc.cpp ${CMAKE_CURRENT_LIST_DIR}/ompt.cpp ${CMAKE_CURRENT_LIST_DIR}/perfetto.cpp @@ -19,28 +21,30 @@ set(library_sources ${CMAKE_CURRENT_LIST_DIR}/runtime.cpp ${CMAKE_CURRENT_LIST_DIR}/sampling.cpp ${CMAKE_CURRENT_LIST_DIR}/state.cpp - ${CMAKE_CURRENT_LIST_DIR}/thread_data.cpp + ${CMAKE_CURRENT_LIST_DIR}/thread_deleter.cpp ${CMAKE_CURRENT_LIST_DIR}/thread_info.cpp ${CMAKE_CURRENT_LIST_DIR}/timemory.cpp ${CMAKE_CURRENT_LIST_DIR}/tracing.cpp) set(library_headers ${CMAKE_CURRENT_LIST_DIR}/categories.hpp - ${CMAKE_CURRENT_LIST_DIR}/config.hpp ${CMAKE_CURRENT_LIST_DIR}/common.hpp ${CMAKE_CURRENT_LIST_DIR}/concepts.hpp + ${CMAKE_CURRENT_LIST_DIR}/config.hpp ${CMAKE_CURRENT_LIST_DIR}/coverage.hpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.hpp ${CMAKE_CURRENT_LIST_DIR}/critical_trace.hpp ${CMAKE_CURRENT_LIST_DIR}/debug.hpp ${CMAKE_CURRENT_LIST_DIR}/dynamic_library.hpp ${CMAKE_CURRENT_LIST_DIR}/gpu.hpp + ${CMAKE_CURRENT_LIST_DIR}/locking.hpp ${CMAKE_CURRENT_LIST_DIR}/mproc.hpp ${CMAKE_CURRENT_LIST_DIR}/ompt.hpp ${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp ${CMAKE_CURRENT_LIST_DIR}/process_sampler.hpp ${CMAKE_CURRENT_LIST_DIR}/ptl.hpp ${CMAKE_CURRENT_LIST_DIR}/rcclp.hpp + ${CMAKE_CURRENT_LIST_DIR}/redirect.hpp ${CMAKE_CURRENT_LIST_DIR}/rocm.hpp ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.hpp ${CMAKE_CURRENT_LIST_DIR}/rocprofiler.hpp @@ -49,6 +53,7 @@ set(library_headers ${CMAKE_CURRENT_LIST_DIR}/sampling.hpp ${CMAKE_CURRENT_LIST_DIR}/state.hpp ${CMAKE_CURRENT_LIST_DIR}/thread_data.hpp + ${CMAKE_CURRENT_LIST_DIR}/thread_deleter.hpp ${CMAKE_CURRENT_LIST_DIR}/thread_info.hpp ${CMAKE_CURRENT_LIST_DIR}/timemory.hpp ${CMAKE_CURRENT_LIST_DIR}/tracing.hpp @@ -81,7 +86,22 @@ if(OMNITRACE_USE_ROCM_SMI) PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp) endif() +add_subdirectory(binary) +add_subdirectory(causal) add_subdirectory(components) +add_subdirectory(containers) add_subdirectory(coverage) add_subdirectory(rocm) add_subdirectory(tracing) + +set(ndebug_sources + ${CMAKE_CURRENT_LIST_DIR}/components/mpi_gotcha.cpp + ${CMAKE_CURRENT_LIST_DIR}/components/backtrace_metrics.cpp + ${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp + ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp + ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp + ${CMAKE_CURRENT_LIST_DIR}/ompt.cpp) + +set_source_files_properties( + ${ndebug_sources} DIRECTORY ${PROJECT_SOURCE_DIR}/source/lib/omnitrace + PROPERTIES COMPILE_DEFINITIONS NDEBUG COMPILE_OPTIONS "-g0;-O3") diff --git a/source/lib/omnitrace/library/binary/CMakeLists.txt b/source/lib/omnitrace/library/binary/CMakeLists.txt new file mode 100644 index 0000000000..9a022fa363 --- /dev/null +++ b/source/lib/omnitrace/library/binary/CMakeLists.txt @@ -0,0 +1,22 @@ +# +set(binary_sources + ${CMAKE_CURRENT_LIST_DIR}/address_multirange.cpp + ${CMAKE_CURRENT_LIST_DIR}/address_range.cpp + ${CMAKE_CURRENT_LIST_DIR}/analysis.cpp + ${CMAKE_CURRENT_LIST_DIR}/dwarf_entry.cpp + ${CMAKE_CURRENT_LIST_DIR}/link_map.cpp + ${CMAKE_CURRENT_LIST_DIR}/scope_filter.cpp + ${CMAKE_CURRENT_LIST_DIR}/symbol.cpp) + +set(binary_headers + ${CMAKE_CURRENT_LIST_DIR}/address_multirange.hpp + ${CMAKE_CURRENT_LIST_DIR}/address_range.hpp + ${CMAKE_CURRENT_LIST_DIR}/analysis.hpp + ${CMAKE_CURRENT_LIST_DIR}/dwarf_entry.hpp + ${CMAKE_CURRENT_LIST_DIR}/fwd.hpp + ${CMAKE_CURRENT_LIST_DIR}/binary_info.hpp + ${CMAKE_CURRENT_LIST_DIR}/link_map.hpp + ${CMAKE_CURRENT_LIST_DIR}/scope_filter.hpp + ${CMAKE_CURRENT_LIST_DIR}/symbol.hpp) + +target_sources(omnitrace-object-library PRIVATE ${binary_sources} ${binary_headers}) diff --git a/source/lib/omnitrace/library/binary/address_multirange.cpp b/source/lib/omnitrace/library/binary/address_multirange.cpp new file mode 100644 index 0000000000..893eaaed06 --- /dev/null +++ b/source/lib/omnitrace/library/binary/address_multirange.cpp @@ -0,0 +1,75 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/binary/address_multirange.hpp" + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +address_multirange& +address_multirange::operator+=(std::pair&& _v) +{ + coarse_range = address_range{ std::min(coarse_range.low, _v.second), + std::max(coarse_range.high, _v.second) }; + return *this; +} + +address_multirange& +address_multirange::operator+=(std::pair&& _v) +{ + coarse_range = address_range{ std::min(coarse_range.low, _v.second.low), + std::max(coarse_range.high, _v.second.high) }; + + return *this; +} + +address_multirange& +address_multirange::operator+=(uintptr_t _v) +{ + *this += std::make_pair(coarse{}, _v); + + for(auto&& itr : m_fine_ranges) + if(itr.contains(_v)) return *this; + + m_fine_ranges.emplace(address_range{ _v }); + return *this; +} + +address_multirange& +address_multirange::operator+=(address_range _v) +{ + *this += std::make_pair(coarse{}, _v); + + for(auto&& itr : m_fine_ranges) + if(itr.contains(_v)) return *this; + + m_fine_ranges.emplace(_v); + return *this; +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/address_multirange.hpp b/source/lib/omnitrace/library/binary/address_multirange.hpp new file mode 100644 index 0000000000..24fabcb8b4 --- /dev/null +++ b/source/lib/omnitrace/library/binary/address_multirange.hpp @@ -0,0 +1,76 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/address_range.hpp" +#include "library/binary/fwd.hpp" + +#include + +#include +#include + +namespace omnitrace +{ +namespace binary +{ +struct address_multirange +{ + struct coarse + {}; + + TIMEMORY_DEFAULT_OBJECT(address_multirange) + + address_multirange& operator+=(std::pair&&); + address_multirange& operator+=(std::pair&& _v); + address_multirange& operator+=(uintptr_t _v); + address_multirange& operator+=(address_range _v); + + template + bool contains(Tp&& _v) const; + + address_range coarse_range = {}; + + auto size() const { return m_fine_ranges.size(); } + auto empty() const { return m_fine_ranges.empty(); } + auto range_size() const { return coarse_range.size(); } + +private: + std::set m_fine_ranges = {}; +}; + +template +OMNITRACE_INLINE bool +address_multirange::contains(Tp&& _v) const +{ + using type = concepts::unqualified_type_t; + static_assert(std::is_integral::value || + std::is_same::value, + "Error! operator+= supports only integrals or address_ranges"); + + if(!coarse_range.contains(_v)) return false; + return std::any_of(m_fine_ranges.begin(), m_fine_ranges.end(), + [_v](auto&& itr) { return itr.contains(_v); }); +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/address_range.cpp b/source/lib/omnitrace/library/binary/address_range.cpp new file mode 100644 index 0000000000..2c43db1cad --- /dev/null +++ b/source/lib/omnitrace/library/binary/address_range.cpp @@ -0,0 +1,190 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/binary/address_range.hpp" +#include "library/debug.hpp" + +namespace omnitrace +{ +namespace binary +{ +address_range::address_range(uintptr_t _v) +: low{ _v } +, high{ _v } +{} + +address_range::address_range(uintptr_t _low, uintptr_t _high) +: low{ _low } +, high{ _high } +{ + TIMEMORY_REQUIRE(high >= low) + << "Error! address_range high must be >= low. low=" << as_hex(low) + << ", high=" << as_hex(high) << "\n"; +} + +bool +address_range::is_range() const +{ + return (low < high); +} + +std::string +address_range::as_string(int _depth) const +{ + std::stringstream _ss{}; + _ss << std::hex; + _ss << std::setw(2 * _depth) << ""; + _ss.fill('0'); + _ss << "0x" << std::setw(16) << low << "-" + << "0x" << std::setw(16) << high; + return _ss.str(); +} + +uintptr_t +address_range::size() const +{ + return (low == high) ? 1 : (high > low) ? (high - low + 1) : (low - high + 1); +} + +bool +address_range::is_valid() const +{ + return (low <= high && (low + 1) > 1); +} + +bool +address_range::contains(uintptr_t _v) const +{ + return (is_range()) ? (low <= _v && high > _v) : (_v == low); +} + +bool +address_range::contains(address_range _v) const +{ + return (*this == _v) || (contains(_v.low) && contains(_v.high)); +} + +bool +address_range::overlaps(address_range _v) const +{ + if(contains(_v)) return false; + int64_t _lhs_diff = (high - low); + int64_t _rhs_diff = (_v.high - _v.low); + int64_t _diff = (std::max(high, _v.high) - std::min(low, _v.low)); + return (_diff < (_lhs_diff + _rhs_diff)); +} + +bool +address_range::contiguous_with(address_range _v) const +{ + return (_v.low == high || low == _v.high); +} + +bool +address_range::operator==(address_range _v) const +{ + // if arg is range and this is not range, call this function with arg + // if(_v.is_range() && !is_range()) return false; + // check if arg is in range + // if(is_range() && !_v.is_range()) return false; + // both are ranges or both are just address + return std::tie(low, high) == std::tie(_v.low, _v.high); +} + +bool +address_range::operator<(address_range _v) const +{ + if(is_range() && !_v.is_range()) + { + return (low == _v.low) ? true : (low < _v.low); + } + else if(!is_range() && _v.is_range()) + { + return (low == _v.low) ? false : (low < _v.low); + } + else if(!is_range() && !_v.is_range()) + { + return (low < _v.low); + } + return std::tie(low, high) < std::tie(_v.low, _v.high); + // if(_v.low == _v.high && _v.low >= low && _v.low < high) return false; + // return (low == _v.low) ? (high > _v.high) : (low < _v.low); +} + +bool +address_range::operator>(address_range _v) const +{ + return !(*this < _v) && !(*this == _v); +} + +address_range& +address_range::operator+=(uintptr_t _v) +{ + if(is_valid()) + { + low += _v; + high += _v; + } + else + { + low = _v; + high = _v; + } + return *this; +} + +address_range& +address_range::operator-=(uintptr_t _v) +{ + if(is_valid()) + { + low -= _v; + high -= _v; + } + else + { + low = _v; + high = _v; + } + return *this; +} + +address_range& +address_range::operator+=(address_range _v) +{ + if(!contiguous_with(_v)) + throw exception( + "attempting to add two address ranges that are not contiguous"); + + low = std::min(low, _v.low); + high = std::max(high, _v.high); + return *this; +} + +hash_value_t +address_range::hash() const +{ + return (is_range()) ? tim::get_combined_hash_id(hash_value_t{ low }, high) + : hash_value_t{ low }; +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/address_range.hpp b/source/lib/omnitrace/library/binary/address_range.hpp new file mode 100644 index 0000000000..ffd0de2717 --- /dev/null +++ b/source/lib/omnitrace/library/binary/address_range.hpp @@ -0,0 +1,104 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/fwd.hpp" +#include "library/common.hpp" +#include "library/timemory.hpp" + +#include +#include + +#include +#include + +namespace omnitrace +{ +namespace binary +{ +struct address_range +{ + // set to low to max and high to min to support std::min(...) + // and std::max(...) assignment + uintptr_t low = std::numeric_limits::max(); + uintptr_t high = std::numeric_limits::min(); + + TIMEMORY_DEFAULT_OBJECT(address_range) + + explicit address_range(uintptr_t _v); + address_range(uintptr_t _low, uintptr_t _high); + + bool contains(uintptr_t) const; + bool contains(address_range) const; + bool overlaps(address_range) const; + bool contiguous_with(address_range) const; + + bool operator==(address_range _v) const; + bool operator!=(address_range _v) const { return !(*this == _v); } + bool operator<(address_range _v) const; + bool operator>(address_range _v) const; + + address_range& operator+=(uintptr_t); + address_range& operator-=(uintptr_t); + address_range& operator+=(address_range); + + bool is_range() const; + hash_value_t hash() const; + std::string as_string(int _depth = 0) const; + bool is_valid() const; + uintptr_t size() const; + explicit operator bool() const { return is_valid(); } + + template + void serialize(ArchiveT& ar, const unsigned) + { + ar(cereal::make_nvp("low", low)); + ar(cereal::make_nvp("high", high)); + } +}; +} // namespace binary + +inline binary::address_range +operator+(binary::address_range _lhs, uintptr_t _v) +{ + return (_lhs += _v); +} + +inline binary::address_range +operator+(uintptr_t _v, binary::address_range _lhs) +{ + return (_lhs += _v); +} +} // namespace omnitrace + +namespace std +{ +template <> +struct hash<::omnitrace::binary::address_range> +{ + using address_range_t = ::omnitrace::binary::address_range; + + auto operator()(const address_range_t& _v) const { return _v.hash(); } + auto operator()(address_range_t&& _v) const { return _v.hash(); } +}; +} // namespace std diff --git a/source/lib/omnitrace/library/binary/analysis.cpp b/source/lib/omnitrace/library/binary/analysis.cpp new file mode 100644 index 0000000000..6a8083eb9f --- /dev/null +++ b/source/lib/omnitrace/library/binary/analysis.cpp @@ -0,0 +1,194 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/config.hpp" + +#if !defined(TIMEMORY_USE_BFD) +# error "BFD support not enabled" +#endif + +#define PACKAGE "omnitrace" + +#include + +#include "library/binary/address_range.hpp" +#include "library/binary/analysis.hpp" +#include "library/binary/binary_info.hpp" +#include "library/binary/dwarf_entry.hpp" +#include "library/binary/fwd.hpp" +#include "library/binary/scope_filter.hpp" +#include "library/binary/symbol.hpp" +#include "library/common.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/state.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +namespace +{ +binary_info +parse_line_info(const std::string& _name) +{ + auto _info = binary_info{}; + + auto& _bfd = _info.bfd; + _bfd = std::make_shared(_name); + + OMNITRACE_VERBOSE(0, "[binary] Reading line info for '%s'...\n", _name.c_str()); + + if(_bfd && _bfd->is_good()) + { + auto& _section_map = _info.sections; + auto _section_set = std::set{}; + auto _processed = std::set{}; + for(auto&& itr : _bfd->get_symbols()) + { + if(itr.symsize == 0) continue; + auto& _sym = _info.symbols.emplace_back(symbol{ itr }); + // if(itr.symsize == 0) continue; + auto* _section = static_cast(itr.section); + _section_set.emplace(_section); + _processed.emplace(itr.address); + _info.ranges.emplace_back( + address_range{ itr.address, itr.address + itr.symsize }); + _sym.read_bfd(*_bfd); + } + + for(auto* itr : _section_set) + { + auto* _section = const_cast(itr); + bfd_vma _section_vma = bfd_section_vma(_section); + bfd_size_type _section_len = bfd_section_size(_section); + auto _section_range = + address_range{ _section_vma, _section_vma + _section_len }; + _section_map[_section_range] = _section; + } + + TIMEMORY_REQUIRE(_section_set.size() == _section_map.size()) + << "section set size (" << _section_set.size() << ") != section map size (" + << _section_map.size() << ")\n"; + + _info.debug_info = dwarf_entry::process_dwarf(_bfd->fd, _info.ranges); + + for(auto& itr : _info.symbols) + itr.read_dwarf(_info.debug_info); + + _info.sort(); + } + + OMNITRACE_VERBOSE(1, "[binary] Reading line info for '%s'... %zu entries\n", + _bfd->name.c_str(), _info.symbols.size()); + + return _info; +} +} // namespace + +std::vector +get_binary_info(const std::vector& _files, + const std::vector& _filters) +{ + auto _satisfies_filter = [&_filters](auto _scope, const std::string& _value) { + for(const auto& itr : _filters) // NOLINT + { + // if the filter is for the specified scope and itr does not satisfy the + // include/exclude mode, return false + if((itr.scope & _scope) == _scope && !itr(_value)) return false; + } + return true; + }; + + auto _satisfies_binary_filter = [&_satisfies_filter](const std::string& _value) { + return _satisfies_filter(scope_filter::BINARY_FILTER, _value); + }; + + // filter function used by procfs::get_contiguous_maps + // ensures that we do not process omnitrace/gotcha/libunwind libraries + // and do not process the libraries outside of the binary scope + auto _filter = [&_satisfies_binary_filter](const procfs::maps& _v) { + if(_v.pathname.empty()) return false; + auto _path = filepath::realpath(_v.pathname, nullptr, false); + return (filepath::exists(_path) && _satisfies_binary_filter(_path)); + }; + + auto _data = std::vector{}; + _data.reserve(_files.size()); + { + auto _exists = std::set{}; + for(const auto& itr : _files) + { + auto _filename = filepath::realpath(itr, nullptr, false); + if(filepath::exists(_filename) && _satisfies_binary_filter(_filename) && + _exists.find(_filename) == _exists.end()) + { + _data.emplace_back(parse_line_info(_filename)); + _exists.emplace(_filename); + } + } + } + + // get the memory maps + auto _maps = procfs::get_contiguous_maps(process::get_id(), _filter, true); + + for(auto& itr : _data) + { + for(const auto& mitr : _maps) + if(itr.bfd->name == mitr.pathname) itr.mappings.emplace_back(mitr); + } + + for(auto& itr : _data) + { + for(const auto& mitr : itr.mappings) + { + auto mrange = address_range{ mitr.load_address, mitr.last_address }; + for(auto& sitr : itr.symbols) + { + auto _addr = sitr.address + mitr.load_address; + if(mrange.contains(_addr)) sitr.load_address = mitr.load_address; + } + } + } + + for(auto& itr : _data) + itr.sort(); + + return _data; +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/analysis.hpp b/source/lib/omnitrace/library/binary/analysis.hpp new file mode 100644 index 0000000000..6121898686 --- /dev/null +++ b/source/lib/omnitrace/library/binary/analysis.hpp @@ -0,0 +1,59 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "common/defines.h" +#include "library/binary/fwd.hpp" +#include "library/common.hpp" +#include "library/defines.hpp" +#include "library/exception.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +namespace procfs = ::tim::procfs; // NOLINT + +using bfd_file = ::tim::unwind::bfd_file; +using hash_value_t = ::tim::hash_value_t; + +std::vector +get_binary_info(const std::vector&, const std::vector&); +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/binary_info.hpp b/source/lib/omnitrace/library/binary/binary_info.hpp new file mode 100644 index 0000000000..ebd45c26da --- /dev/null +++ b/source/lib/omnitrace/library/binary/binary_info.hpp @@ -0,0 +1,76 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/address_range.hpp" +#include "library/binary/dwarf_entry.hpp" +#include "library/binary/fwd.hpp" +#include "library/binary/symbol.hpp" +#include "library/utility.hpp" + +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +struct binary_info +{ + std::shared_ptr bfd = {}; + std::vector mappings = {}; + std::deque symbols = {}; + std::deque debug_info = {}; + std::vector ranges = {}; + std::unordered_map sections = {}; + + void sort(); + + template + RetT* find_section(uintptr_t); +}; + +inline void +binary_info::sort() +{ + utility::filter_sort_unique(mappings); + utility::filter_sort_unique(symbols); + utility::filter_sort_unique(ranges); + utility::filter_sort_unique(debug_info); +} + +template +inline RetT* +binary_info::find_section(uintptr_t _addr) +{ + for(const auto& sitr : sections) + { + if(sitr.first.contains(_addr)) return static_cast(sitr.second); + } + return nullptr; +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/dwarf_entry.cpp b/source/lib/omnitrace/library/binary/dwarf_entry.cpp new file mode 100644 index 0000000000..be29ca228c --- /dev/null +++ b/source/lib/omnitrace/library/binary/dwarf_entry.cpp @@ -0,0 +1,201 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/binary/dwarf_entry.hpp" +#include "library/binary/fwd.hpp" +#include "library/timemory.hpp" +#include "library/utility.hpp" + +#include +#include + +namespace omnitrace +{ +namespace binary +{ +namespace +{ +using utility::combine; + +auto +get_dwarf_address_ranges(Dwarf_Die* _die) +{ + auto _ranges = std::vector{}; + + if(dwarf_tag(_die) != DW_TAG_compile_unit) return _ranges; + + Dwarf_Addr _low_pc; + Dwarf_Addr _high_pc; + dwarf_lowpc(_die, &_low_pc); + dwarf_highpc(_die, &_high_pc); + + _ranges.emplace_back(address_range{ _low_pc, _high_pc }); + + Dwarf_Addr _base_addr; + ptrdiff_t _offset = 0; + do + { + _ranges.emplace_back(address_range{ 0, 0 }); + } while((_offset = dwarf_ranges(_die, _offset, &_base_addr, &_ranges.back().low, + &_ranges.back().high)) > 0); + // will always have one extra + _ranges.pop_back(); + + return _ranges; +} + +auto +get_dwarf_entry(Dwarf_Die* _die) +{ + auto _line_info = std::deque{}; + + if(dwarf_tag(_die) != DW_TAG_compile_unit) return _line_info; + + Dwarf_Lines* _lines = nullptr; + size_t _num_lines = 0; + if(dwarf_getsrclines(_die, &_lines, &_num_lines) == 0) + { + _line_info.resize(_num_lines); + for(size_t j = 0; j < _num_lines; ++j) + { + auto& itr = _line_info.at(j); + auto* _line = dwarf_onesrcline(_lines, j); + if(_line) + { + int _lineno = 0; + uintptr_t _address = 0; + dwarf_lineno(_line, &_lineno); + dwarf_linecol(_line, &itr.col); + dwarf_linebeginstatement(_line, &itr.begin_statement); + dwarf_lineendsequence(_line, &itr.end_sequence); + dwarf_lineblock(_line, &itr.line_block); + dwarf_lineepiloguebegin(_line, &itr.epilogue_begin); + dwarf_lineprologueend(_line, &itr.prologue_end); + dwarf_lineisa(_line, &itr.isa); + dwarf_linediscriminator(_line, &itr.discriminator); + dwarf_lineaddr(_line, &_address); + itr.address = address_range{ _address }; + if(_lineno > 0) itr.line = _lineno; + const auto* _file = dwarf_linesrc(_line, nullptr, nullptr); + if(!_file) _file = dwarf_diename(_die); + itr.file = filepath::realpath(_file, nullptr, false); + } + } + } + + return _line_info; +} +} // namespace + +bool +dwarf_entry::operator<(const dwarf_entry& _rhs) const +{ + return std::tie(address, line, col, discriminator) < + std::tie(_rhs.address, _rhs.line, _rhs.col, _rhs.discriminator); +} + +bool +dwarf_entry::operator==(const dwarf_entry& _rhs) const +{ + return std::tie(address, line, col, discriminator, vliw_op_index, isa, file) == + std::tie(_rhs.address, _rhs.line, _rhs.col, _rhs.discriminator, + _rhs.vliw_op_index, _rhs.isa, _rhs.file); +} + +bool +dwarf_entry::operator!=(const dwarf_entry& _rhs) const +{ + return !(*this == _rhs); +} + +bool +dwarf_entry::is_valid() const +{ + return (*this != dwarf_entry{} && !file.empty()); +} + +std::deque +dwarf_entry::process_dwarf(int _fd, std::vector& _ranges) +{ + auto* _dwarf_v = dwarf_begin(_fd, DWARF_C_READ); + auto _line_info = std::deque{}; + + size_t cu_header_size = 0; + Dwarf_Off cu_off = 0; + Dwarf_Off next_cu_off = 0; + for(; dwarf_nextcu(_dwarf_v, cu_off, &next_cu_off, &cu_header_size, nullptr, nullptr, + nullptr) == 0; + cu_off = next_cu_off) + { + Dwarf_Off cu_die_off = cu_off + cu_header_size; + Dwarf_Die cu_die; + if(dwarf_offdie(_dwarf_v, cu_die_off, &cu_die) != nullptr) + { + Dwarf_Die* _die = &cu_die; + if(dwarf_tag(_die) == DW_TAG_compile_unit) + { + combine(_line_info, get_dwarf_entry(_die)); + combine(_ranges, get_dwarf_address_ranges(_die)); + } + } + } + + dwarf_end(_dwarf_v); + utility::filter_sort_unique(_line_info); + utility::filter_sort_unique(_ranges); + + return _line_info; +} + +template +void +dwarf_entry::serialize(ArchiveT& ar, const unsigned int) +{ +#define OMNITRACE_SERIALIZE_MEMBER(MEMBER) ar(::tim::cereal::make_nvp(#MEMBER, MEMBER)); + + OMNITRACE_SERIALIZE_MEMBER(file) + OMNITRACE_SERIALIZE_MEMBER(line) + OMNITRACE_SERIALIZE_MEMBER(col) + OMNITRACE_SERIALIZE_MEMBER(address) + OMNITRACE_SERIALIZE_MEMBER(discriminator) + // OMNITRACE_SERIALIZE_MEMBER(begin_statement) + // OMNITRACE_SERIALIZE_MEMBER(end_sequence) + // OMNITRACE_SERIALIZE_MEMBER(line_block) + // OMNITRACE_SERIALIZE_MEMBER(prologue_end) + // OMNITRACE_SERIALIZE_MEMBER(epilogue_begin) + // OMNITRACE_SERIALIZE_MEMBER(vliw_op_index) + // OMNITRACE_SERIALIZE_MEMBER(isa) +} + +template void +dwarf_entry::serialize(cereal::JSONInputArchive&, + const unsigned int); + +template void +dwarf_entry::serialize( + cereal::MinimalJSONOutputArchive&, const unsigned int); + +template void +dwarf_entry::serialize(cereal::PrettyJSONOutputArchive&, + const unsigned int); +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/dwarf_entry.hpp b/source/lib/omnitrace/library/binary/dwarf_entry.hpp new file mode 100644 index 0000000000..0abb5366ad --- /dev/null +++ b/source/lib/omnitrace/library/binary/dwarf_entry.hpp @@ -0,0 +1,62 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/address_range.hpp" +#include "library/binary/fwd.hpp" + +namespace omnitrace +{ +namespace binary +{ +struct dwarf_entry +{ + TIMEMORY_DEFAULT_OBJECT(dwarf_entry) + + bool begin_statement = false; + bool end_sequence = false; + bool line_block = false; + bool prologue_end = false; + bool epilogue_begin = false; + unsigned int line = 0; + int col = 0; + unsigned int vliw_op_index = 0; + unsigned int isa = 0; + unsigned int discriminator = 0; + address_range address = { 0, 0 }; + std::string file = {}; + + bool is_valid() const; + + bool operator<(const dwarf_entry&) const; + bool operator==(const dwarf_entry&) const; + bool operator!=(const dwarf_entry&) const; + explicit operator bool() const { return is_valid(); } + + static std::deque process_dwarf(int _fd, std::vector&); + + template + void serialize(ArchiveT&, const unsigned int); +}; +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/fwd.hpp b/source/lib/omnitrace/library/binary/fwd.hpp new file mode 100644 index 0000000000..e4c9b7ca61 --- /dev/null +++ b/source/lib/omnitrace/library/binary/fwd.hpp @@ -0,0 +1,62 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "common/defines.h" +#include "library/common.hpp" +#include "library/defines.hpp" +#include "library/exception.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +namespace procfs = ::tim::procfs; // NOLINT + +using bfd_file = ::tim::unwind::bfd_file; +using hash_value_t = ::tim::hash_value_t; + +struct address_range; +struct address_multirange; +struct scope_filter; +struct symbol; +struct dwarf_entry; +struct binary_info; +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/link_map.cpp b/source/lib/omnitrace/library/binary/link_map.cpp new file mode 100644 index 0000000000..962c8fa064 --- /dev/null +++ b/source/lib/omnitrace/library/binary/link_map.cpp @@ -0,0 +1,133 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/binary/link_map.hpp" +#include "library/common.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/timemory.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +std::set +get_link_map(const char* _lib, const std::string& _exclude_linked_by, + const std::string& _exclude_re) +{ + auto _get_chain = [](const char* _name) { + void* _handle = dlopen(_name, RTLD_LAZY | RTLD_NOLOAD); + auto _chain = std::set{}; + if(_handle) + { + struct link_map* _link_map = nullptr; + dlinfo(_handle, RTLD_DI_LINKMAP, &_link_map); + struct link_map* _next = _link_map; + while(_next) + { + if(_name == nullptr && _next == _link_map && + std::string_view{ _next->l_name }.empty()) + { + // only insert exe name if dlopened the exe and + // empty name is first entry + _chain.emplace(config::get_exe_realpath()); + } + else if(!std::string_view{ _next->l_name }.empty()) + { + _chain.emplace(_next->l_name); + } + _next = _next->l_next; + } + } + return _chain; + }; + + auto _full_chain = _get_chain(_lib); + auto _excl_chain = (_exclude_linked_by.empty()) + ? std::set{} + : _get_chain(_exclude_linked_by.c_str()); + auto _fini_chain = std::set{}; + + for(const auto& itr : _full_chain) + { + if(_excl_chain.find(itr) == _excl_chain.end()) + { + if(_exclude_re.empty() || !std::regex_search(itr, std::regex{ _exclude_re })) + _fini_chain.emplace(itr); + else + _excl_chain.emplace(itr); + } + } + + auto _name = (!_lib) ? config::get_exe_realpath() : std::string{ _lib }; + for(const auto& itr : _fini_chain) + { + OMNITRACE_VERBOSE(2, "[linkmap][%s]: %s\n", filepath::basename(_name), + itr.real().c_str()); + } + + for(const auto& itr : _excl_chain) + { + OMNITRACE_VERBOSE(3, "[linkmap][%s]: %s\n", _exclude_linked_by.c_str(), + link_file{ itr }.real().c_str()); + } + + return _fini_chain; +} + +bool +link_file::operator<(const link_file& _rhs) const +{ + if(name == _rhs.name) return false; + + auto _lhs_base = base(); + auto _lhs_real = real(); + auto _rhs_base = _rhs.base(); + auto _rhs_real = _rhs.real(); + + if(_lhs_base == _rhs_base || _lhs_real == _rhs_real) return false; + + return (_lhs_real < _rhs_real); +} + +std::string_view +link_file::base() const +{ + return std::string_view{ filepath::basename(name) }; +} + +std::string +link_file::real() const +{ + return filepath::realpath(name, nullptr, false); +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/link_map.hpp b/source/lib/omnitrace/library/binary/link_map.hpp new file mode 100644 index 0000000000..fecfebdbf0 --- /dev/null +++ b/source/lib/omnitrace/library/binary/link_map.hpp @@ -0,0 +1,54 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +struct link_file +{ + link_file(std::string_view&& _v) + : name{ _v } + {} + + std::string_view base() const; + std::string real() const; + bool operator<(const link_file&) const; + + std::string name = {}; +}; + +// default parameters: get the linked binaries for the exe but exclude the linked binaries +// from libomnitrace +std::set +get_link_map(const char* _lib = nullptr, + const std::string& _exclude_linked_by = "libomnitrace.so", + const std::string& _exclude_re = "libomnitrace-([a-zA-Z]+)\\.so"); +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/scope_filter.cpp b/source/lib/omnitrace/library/binary/scope_filter.cpp new file mode 100644 index 0000000000..01dd02c6a0 --- /dev/null +++ b/source/lib/omnitrace/library/binary/scope_filter.cpp @@ -0,0 +1,46 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/binary/scope_filter.hpp" +#include "library/exception.hpp" + +#include + +namespace omnitrace +{ +namespace binary +{ +bool +scope_filter::operator()(std::string_view _value) const +{ + if(mode == FILTER_INCLUDE) + return (expression.empty()) + ? true + : std::regex_search(_value.data(), std::regex{ expression }); + else if(mode == FILTER_EXCLUDE) + return (expression.empty()) + ? false + : !std::regex_search(_value.data(), std::regex{ expression }); + throw exception{ "invalid scope filter mode" }; +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/scope_filter.hpp b/source/lib/omnitrace/library/binary/scope_filter.hpp new file mode 100644 index 0000000000..a12b015a11 --- /dev/null +++ b/source/lib/omnitrace/library/binary/scope_filter.hpp @@ -0,0 +1,75 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/defines.hpp" + +#include +#include + +namespace omnitrace +{ +namespace binary +{ +struct scope_filter +{ + enum filter_mode : uint8_t + { + FILTER_INCLUDE = 0, + FILTER_EXCLUDE + }; + + enum filter_scope : uint8_t + { + UNIVERSAL_FILTER = (1 << 0), + BINARY_FILTER = (1 << 1), + SOURCE_FILTER = (1 << 2), + FUNCTION_FILTER = (1 << 3) + }; + + filter_mode mode = FILTER_INCLUDE; + filter_scope scope = UNIVERSAL_FILTER; + std::string expression = {}; + + bool operator()(std::string_view _value) const; + + template + static bool satisfies_filter(const ContainerT&, filter_scope, + std::string_view) OMNITRACE_PURE; +}; + +template +inline bool +scope_filter::satisfies_filter(const ContainerT& _filters, filter_scope _scope, + std::string_view _value) +{ + for(const auto& itr : _filters) // NOLINT + { + // if the filter is for the specified scope and itr does not satisfy the + // include/exclude mode, return false + if((itr.scope & _scope) > 0 && !itr(_value)) return false; + } + return true; +} +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/symbol.cpp b/source/lib/omnitrace/library/binary/symbol.cpp new file mode 100644 index 0000000000..b2fdff880a --- /dev/null +++ b/source/lib/omnitrace/library/binary/symbol.cpp @@ -0,0 +1,356 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/config.hpp" +#include "library/debug.hpp" + +#if !defined(TIMEMORY_USE_BFD) +# error "BFD support not enabled" +#endif + +#define PACKAGE "omnitrace" +#define L_LNNO_SIZE 4 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "library/binary/dwarf_entry.hpp" +#include "library/binary/fwd.hpp" +#include "library/binary/scope_filter.hpp" +#include "library/binary/symbol.hpp" +#include "library/timemory.hpp" +#include "library/utility.hpp" + +#include + +namespace omnitrace +{ +namespace binary +{ +namespace +{ +std::vector +read_inliner_info(bfd* _inp) +{ + auto _data = std::vector{}; + while(true) + { + const char* _file = nullptr; + const char* _func = nullptr; + unsigned int _line = 0; + if(bfd_find_inliner_info(_inp, &_file, &_func, &_line) != 0) + { + if(_file && _func && _line > 0) + _data.emplace_back(inlined_symbol{ + _line, filepath::realpath(_file, nullptr, false), _func }); + } + else + { + break; + } + } + + return _data; +} +} // namespace + +symbol::symbol(const base_type& _v) +: base_type{ _v } +, address{ _v.address, _v.address + _v.symsize } +{} + +bool +symbol::operator==(const symbol& _rhs) const +{ + return std::tie(address, base_type::name) == + std::tie(_rhs.address, _rhs.base_type::name); +} + +bool +symbol::operator<(const symbol& _rhs) const +{ + return std::tie(address, base_type::binding, base_type::visibility, base_type::name) < + std::tie(_rhs.address, _rhs.base_type::binding, base_type::visibility, + base_type::name); +} + +bool +symbol::operator()(const std::vector& _filters) const +{ + using sf = scope_filter; + + // apply filters to the main symbol + return (sf::satisfies_filter(_filters, sf::FUNCTION_FILTER, demangle(func)) && + (sf::satisfies_filter(_filters, sf::SOURCE_FILTER, file) || + sf::satisfies_filter(_filters, sf::SOURCE_FILTER, join(':', file, line)))); +} + +symbol& +symbol::operator+=(const symbol& _rhs) +{ + if(address.contiguous_with(_rhs.address) && + std::tie(line, load_address, func, file) == + std::tie(_rhs.line, _rhs.load_address, _rhs.func, _rhs.file)) + { + address += _rhs.address; + utility::combine(inlines, _rhs.inlines); + utility::combine(dwarf_info, _rhs.dwarf_info); + } + else + { + throw exception("incompatible symbol+="); + } + + return *this; +} + +symbol::operator bool() const +{ + return address.is_valid() && (file.length() + func.length() + line) > 0; +} + +size_t +symbol::read_dwarf(const std::deque& _info) +{ + for(const auto& itr : _info) + { + if(address.contains(itr.address)) dwarf_info.emplace_back(itr); + } + + // make sure the dwarf info is sorted by address (low to high) + std::sort(dwarf_info.begin(), dwarf_info.end(), + [](const dwarf_entry& _lhs, const dwarf_entry& _rhs) { + return _lhs.address < _rhs.address; + }); + + // helper for getting the end address + auto _get_next_address = [&](auto nitr, uintptr_t _low) { + while(++nitr != dwarf_info.end()) + { + if(nitr->address.low > _low) + { + return nitr->address.low; + } + } + // return the end address of the symbol + return address.high; + }; + // convert the single addresses into ranges + for(auto itr = dwarf_info.begin(); itr != dwarf_info.end(); ++itr) + { + // if address is already a range, do not update it + if(!itr->address.is_range()) + itr->address = address_range{ itr->address.low, + _get_next_address(itr, itr->address.low) }; + } + + return dwarf_info.size(); +} + +bool +symbol::read_bfd(bfd_file& _bfd) +{ + auto* _section = static_cast(section); + bfd_vma _vma = bfd_section_vma(_section); + bfd_size_type _size = bfd_section_size(_section); + + auto& _pc = address.low; + auto& _pc_end = address.high; + + if(_pc < _vma || _pc >= _vma + _size) return false; + // add one to vma + size because address range is exclusive of last address + if(_pc_end > _vma + _size) _pc_end = (_vma + _size); + + auto* _inp = static_cast(_bfd.data); + auto* _syms = reinterpret_cast(_bfd.syms); + + { + const char* _file = nullptr; + const char* _func = nullptr; + unsigned int _line = 0; + unsigned int _discriminator = 0; + + // if(bfd_find_nearest_line(_inp, _section, _syms, _pc - _vma, &_file, + // &_func, &_line) != 0) + if(bfd_find_nearest_line_discriminator(_inp, _section, _syms, _pc - _vma, &_file, + &_func, &_line, &_discriminator) != 0) + { + if(_file) file = _file; + if(_func) func = _func; + if(_file && strnlen(_file, 1) > 0) + file = _file; + else if(!_file || strnlen(_file, 1) == 0) + file = bfd_get_filename(_inp); + if(!func.empty()) + { + file = filepath::realpath(file, nullptr, false); + line = _line; + inlines = read_inliner_info(_inp); + return true; + } + } + } + + return false; +} + +symbol +symbol::clone() const +{ + auto _sym = symbol{ static_cast(*this) }; + _sym.line = line; + _sym.load_address = load_address; + _sym.address = address; + _sym.func = func; + _sym.file = file; + + return _sym; +} + +template +Tp +symbol::get_inline_symbols(const std::vector& _filters) const +{ + using sf = scope_filter; + using value_type = typename Tp::value_type; + + auto _data = Tp{}; + + for(const auto& itr : inlines) + { + if(sf::satisfies_filter(_filters, sf::FUNCTION_FILTER, demangle(itr.func)) && + (sf::satisfies_filter(_filters, sf::SOURCE_FILTER, itr.file) || + sf::satisfies_filter(_filters, sf::SOURCE_FILTER, + join(':', itr.file, itr.line)))) + { + if constexpr(concepts::is_unqualified_same::value) + { + auto _sym = clone(); + _sym.func = itr.func; + _sym.line = itr.line; + _sym.file = itr.file; + _data.emplace_back(_sym); + } + else if constexpr(concepts::is_unqualified_same::value) + { + _data.emplace_back(itr); + } + } + } + + return _data; +} + +template +Tp +symbol::get_debug_line_info(const std::vector& _filters) const +{ + using sf = scope_filter; + using value_type = typename Tp::value_type; + + auto _data = Tp{}; + + if(sf::satisfies_filter(_filters, sf::FUNCTION_FILTER, demangle(func))) + { + for(const auto& itr : dwarf_info) + { + if(sf::satisfies_filter(_filters, sf::SOURCE_FILTER, itr.file) || + sf::satisfies_filter(_filters, sf::SOURCE_FILTER, + join(':', itr.file, itr.line))) + { + if constexpr(concepts::is_unqualified_same::value) + { + auto _sym = clone(); + _sym.address = itr.address; + _sym.file = itr.file; + _sym.line = itr.line; + _data.emplace_back(_sym); + } + else if constexpr(concepts::is_unqualified_same::value) + { + _data.emplace_back(itr); + } + } + } + } + + return _data; +} + +template +void +inlined_symbol::serialize(ArchiveT& ar, const unsigned int) +{ + using ::tim::cereal::make_nvp; + ar(make_nvp("func", func), make_nvp("file", file), make_nvp("line", line)); +} + +template +void +symbol::serialize(ArchiveT& ar, const unsigned int) +{ + using ::tim::cereal::make_nvp; + ar(make_nvp("address", address), make_nvp("load_address", load_address), + make_nvp("line", line), make_nvp("func", func), make_nvp("file", file), + make_nvp("inlines", inlines), make_nvp("dwarf_info", dwarf_info)); + if constexpr(concepts::is_output_archive::value) + ar(cereal::make_nvp("dfunc", demangle(func))); +} + +template void +symbol::serialize(cereal::JSONInputArchive&, + const unsigned int); + +template void +symbol::serialize(cereal::MinimalJSONOutputArchive&, + const unsigned int); + +template void +symbol::serialize(cereal::PrettyJSONOutputArchive&, + const unsigned int); + +template std::deque +symbol::get_inline_symbols>( + const std::vector& _filters) const; + +template std::vector +symbol::get_inline_symbols>( + const std::vector& _filters) const; + +template std::deque +symbol::get_debug_line_info>( + const std::vector& _filters) const; + +template std::vector +symbol::get_debug_line_info>( + const std::vector& _filters) const; +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/binary/symbol.hpp b/source/lib/omnitrace/library/binary/symbol.hpp new file mode 100644 index 0000000000..ebbfc62780 --- /dev/null +++ b/source/lib/omnitrace/library/binary/symbol.hpp @@ -0,0 +1,96 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/address_range.hpp" +#include "library/binary/fwd.hpp" + +#include + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace binary +{ +struct inlined_symbol +{ + unsigned int line = 0; + std::string file = {}; + std::string func = {}; + + template + void serialize(ArchiveT&, const unsigned int); +}; + +struct symbol : private tim::unwind::bfd_file::symbol +{ + using base_type = tim::unwind::bfd_file::symbol; + + symbol() = default; + symbol(const base_type& _v); + + ~symbol() = default; + symbol(const symbol&) = default; + symbol(symbol&&) noexcept = default; + + symbol& operator=(const symbol&) = default; + symbol& operator=(symbol&&) noexcept = default; + + bool operator==(const symbol&) const; + bool operator<(const symbol&) const; + bool operator()(const std::vector&) const; + symbol& operator+=(const symbol&); + explicit operator bool() const; + + bool read_bfd(bfd_file&); + size_t read_dwarf(const std::deque&); + address_range ipaddr() const { return address + load_address; } + symbol clone() const; + + template > + Tp get_inline_symbols(const std::vector&) const; + + template > + Tp get_debug_line_info(const std::vector&) const; + + template + void serialize(ArchiveT&, const unsigned int); + + using base_type::binding; + using base_type::section; + using base_type::visibility; + + unsigned int line = 0; + uintptr_t load_address = 0; + address_range address = {}; + std::string func = {}; + std::string file = {}; + std::vector inlines = {}; + std::vector dwarf_info = {}; +}; +} // namespace binary +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/CMakeLists.txt b/source/lib/omnitrace/library/causal/CMakeLists.txt new file mode 100644 index 0000000000..b4ab0062fc --- /dev/null +++ b/source/lib/omnitrace/library/causal/CMakeLists.txt @@ -0,0 +1,22 @@ +# +set(causal_sources + ${CMAKE_CURRENT_LIST_DIR}/data.cpp + ${CMAKE_CURRENT_LIST_DIR}/delay.cpp + ${CMAKE_CURRENT_LIST_DIR}/experiment.cpp + # ${CMAKE_CURRENT_LIST_DIR}/perf.cpp + ${CMAKE_CURRENT_LIST_DIR}/sample_data.cpp + ${CMAKE_CURRENT_LIST_DIR}/sampling.cpp + ${CMAKE_CURRENT_LIST_DIR}/selected_entry.cpp) + +set(causal_headers + ${CMAKE_CURRENT_LIST_DIR}/data.hpp + ${CMAKE_CURRENT_LIST_DIR}/delay.hpp + ${CMAKE_CURRENT_LIST_DIR}/experiment.hpp + # ${CMAKE_CURRENT_LIST_DIR}/perf.hpp + ${CMAKE_CURRENT_LIST_DIR}/sample_data.hpp + ${CMAKE_CURRENT_LIST_DIR}/sampling.hpp + ${CMAKE_CURRENT_LIST_DIR}/selected_entry.hpp) + +target_sources(omnitrace-object-library PRIVATE ${causal_sources} ${causal_headers}) + +add_subdirectory(components) diff --git a/source/lib/omnitrace/library/causal/components/CMakeLists.txt b/source/lib/omnitrace/library/causal/components/CMakeLists.txt new file mode 100644 index 0000000000..ef71d103c5 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/CMakeLists.txt @@ -0,0 +1,16 @@ +# +set(component_sources + ${CMAKE_CURRENT_LIST_DIR}/backtrace.cpp + ${CMAKE_CURRENT_LIST_DIR}/blocking_gotcha.cpp + ${CMAKE_CURRENT_LIST_DIR}/causal_gotcha.cpp + ${CMAKE_CURRENT_LIST_DIR}/progress_point.cpp + ${CMAKE_CURRENT_LIST_DIR}/unblocking_gotcha.cpp) + +set(component_headers + ${CMAKE_CURRENT_LIST_DIR}/backtrace.hpp + ${CMAKE_CURRENT_LIST_DIR}/blocking_gotcha.hpp + ${CMAKE_CURRENT_LIST_DIR}/causal_gotcha.hpp + ${CMAKE_CURRENT_LIST_DIR}/progress_point.hpp + ${CMAKE_CURRENT_LIST_DIR}/unblocking_gotcha.hpp) + +target_sources(omnitrace-object-library PRIVATE ${component_sources} ${component_headers}) diff --git a/source/lib/omnitrace/library/causal/components/backtrace.cpp b/source/lib/omnitrace/library/causal/components/backtrace.cpp new file mode 100644 index 0000000000..7210383712 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/backtrace.cpp @@ -0,0 +1,227 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/components/backtrace.hpp" +#include "library/causal/data.hpp" +#include "library/causal/delay.hpp" +#include "library/causal/experiment.hpp" +#include "library/concepts.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/runtime.hpp" +#include "library/state.hpp" +#include "library/thread_data.hpp" +#include "library/thread_info.hpp" +#include "library/tracing.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +namespace +{ +using ::tim::backtrace::get_unw_signal_frame_stack_raw; + +auto& +get_delay_statistics() +{ + using thread_data_t = + thread_data>, category::sampling>; + + static_assert( + use_placement_new_when_generating_unique_ptr::value, + "delay statistics thread data should use placement new to allocate unique_ptr"); + + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +auto& +get_in_use() +{ + using thread_data_t = thread_data, category::sampling>; + + static_assert( + use_placement_new_when_generating_unique_ptr::value, + "sampling is_use thread data should use placement new to allocate unique_ptr"); + + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +struct scoped_in_use +{ + scoped_in_use(int64_t _tid = utility::get_thread_index()) + : value{ get_in_use()->at(_tid) } + { + value = true; + } + ~scoped_in_use() { value = false; } + + bool& value; +}; + +auto +is_in_use(int64_t _tid = threading::get_id()) +{ + return get_in_use()->at(_tid); +} + +} // namespace + +void +backtrace::start() +{ + // do not delete these lines. The thread data needs to be allocated + // before it is called in sampler or else a deadlock will occur when + // the sample interrupts a malloc call + (void) get_delay_statistics(); + (void) get_in_use(); +} + +void +backtrace::stop() +{} + +void +backtrace::sample(int _sig) +{ + constexpr size_t depth = ::omnitrace::causal::unwind_depth; + constexpr int64_t ignore_depth = ::omnitrace::causal::unwind_offset; + + // update the last sample for backtrace signal(s) even when in use + static thread_local int64_t _last_sample = 0; + + if(is_in_use()) + { + if(_sig == get_realtime_signal()) _last_sample = tracing::now(); + return; + } + scoped_in_use _in_use{}; + + m_index = causal::experiment::get_index(); + m_stack = get_unw_signal_frame_stack_raw(); + + // the batch handler timer delivers a signal according to the thread CPU + // clock, ensuring that setting the current selection and processing the + // delays only happens when the thread is active + if(_sig == get_cputime_signal()) + { + if(!causal::experiment::is_active()) + causal::set_current_selection(m_stack); + else + causal::delay::process(); + } + else if(_sig == get_realtime_signal()) + { + auto _this_sample = tracing::now(); + auto& _period_stat = get_delay_statistics()->at(threading::get_id()); + if(_last_sample > 0) _period_stat += (_this_sample - _last_sample); + _last_sample = _this_sample; + + if(causal::experiment::is_active() && causal::experiment::is_selected(m_stack)) + { + m_selected = true; + causal::experiment::add_selected(); + // compute the delay time based on the rate of taking samples, + // unless we have taken less than 10, in which case, we just + // use the pre-computed value. + auto _delay = + (_period_stat.get_count() < 10) + ? causal::experiment::get_delay() + : (_period_stat.get_mean() * causal::experiment::get_delay_scaling()); + causal::delay::get_local() += _delay; + } + } + else + { + OMNITRACE_THROW("unhandled signal %i\n", _sig); + } +} + +template +Tp +backtrace::get_period(uint64_t _units) +{ + using cast_type = std::conditional_t::value, Tp, double>; + + double _realtime_freq = + (get_use_sampling_realtime()) ? get_sampling_real_freq() : 0.0; + double _cputime_freq = (get_use_sampling_cputime()) ? get_sampling_cpu_freq() : 0.0; + + auto _freq = std::max(_realtime_freq, _cputime_freq); + double _period = 1.0 / _freq; + int64_t _period_nsec = static_cast(_period * units::sec) % units::sec; + return static_cast(_period_nsec) / static_cast(_units); +} + +tim::statistics +backtrace::get_period_stats() +{ + scoped_in_use _in_use{}; + auto _data = tim::statistics{}; + if(!get_delay_statistics()) return _data; + for(size_t i = 0; i < get_delay_statistics()->size(); ++i) + { + scoped_in_use _thr_in_use{ static_cast(i) }; + const auto& itr = get_delay_statistics()->at(i); + if(itr.get_count() > 1) _data += itr; + } + return _data; +} + +void +backtrace::reset_period_stats() +{ + scoped_in_use _in_use{}; + for(size_t i = 0; i < get_delay_statistics()->size(); ++i) + { + scoped_in_use _thr_in_use{ static_cast(i) }; + get_delay_statistics()->at(i).reset(); + } +} +} // namespace component +} // namespace causal +} // namespace omnitrace + +#define INSTANTIATE_BT_CAUSAL_PERIOD(TYPE) \ + template TYPE omnitrace::causal::component::backtrace::get_period(uint64_t); + +INSTANTIATE_BT_CAUSAL_PERIOD(float) +INSTANTIATE_BT_CAUSAL_PERIOD(double) +INSTANTIATE_BT_CAUSAL_PERIOD(int64_t) +INSTANTIATE_BT_CAUSAL_PERIOD(uint64_t) diff --git a/source/lib/omnitrace/library/causal/components/backtrace.hpp b/source/lib/omnitrace/library/causal/components/backtrace.hpp new file mode 100644 index 0000000000..ae1841bea3 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/backtrace.hpp @@ -0,0 +1,91 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/causal/data.hpp" +#include "library/causal/sample_data.hpp" +#include "library/common.hpp" +#include "library/components/fwd.hpp" +#include "library/defines.hpp" +#include "library/timemory.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +struct backtrace +: tim::component::empty_base +, tim::concepts::component +{ + using value_type = void; + using sample_data_set_t = std::set; + + static std::string label() { return "causal::backtrace"; } + static std::string description() + { + return "Causal profiling data collected in backtrace"; + } + + backtrace() = default; + ~backtrace() = default; + backtrace(const backtrace&) = default; + backtrace(backtrace&&) noexcept = default; + + backtrace& operator=(const backtrace&) = default; + backtrace& operator=(backtrace&&) noexcept = default; + + static void start(); + static void stop(); + + void sample(int = -1); + + auto get_selected() const { return m_selected; } + auto get_index() const { return m_index; } + auto get_stack() const { return m_stack; } + + template + static Tp get_period(uint64_t _units = units::nsec); + + static tim::statistics get_period_stats(); + static void reset_period_stats(); + +private: + bool m_selected = false; + uint32_t m_index = 0; + causal::unwind_addr_t m_stack = {}; +}; +} // namespace component +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/components/blocking_gotcha.cpp b/source/lib/omnitrace/library/causal/components/blocking_gotcha.cpp new file mode 100644 index 0000000000..d76c960e12 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/blocking_gotcha.cpp @@ -0,0 +1,154 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/components/blocking_gotcha.hpp" +#include "library/causal/delay.hpp" +#include "library/causal/experiment.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/runtime.hpp" +#include "library/state.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +std::string +blocking_gotcha::label() +{ + return "causal_blocking_gotcha"; +} + +std::string +blocking_gotcha::description() +{ + return "Handles executing all necessary pauses before the thread performs some " + "blocking function"; +} + +void +blocking_gotcha::preinit() +{ + configure(); +} + +void +blocking_gotcha::configure() +{ + blocking_gotcha_t::get_initializer() = []() { + if(!config::get_use_causal()) return; + + blocking_gotcha_t::configure( + comp::gotcha_config<0, int, pthread_mutex_t*>{ "pthread_mutex_lock" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<1, int, pthread_mutex_t*>{ "pthread_mutex_trylock" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<2, int, pthread_rwlock_t*>{ "pthread_rwlock_wrlock" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<3, int, pthread_rwlock_t*>{ "pthread_rwlock_trywrlock" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<4, int, pthread_spinlock_t*>{ "pthread_spin_lock" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<5, int, pthread_spinlock_t*>{ "pthread_spin_trylock" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<6, int, pthread_t, void**>{ "pthread_join" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<7, int, pthread_cond_t*, pthread_mutex_t*>{ + "pthread_cond_wait" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<8, int, pthread_cond_t*, pthread_mutex_t*, + const struct timespec*>{ "pthread_cond_timedwait" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<9, int, const sigset_t*, int*>{ "sigwait" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<10, int, const sigset_t*, int*, siginfo_t*>{ + "sigwaitinfo" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<11, int, const sigset_t*, int*, siginfo_t*, + const struct timespec*>{ "sigtimedwait" }); + + blocking_gotcha_t::configure( + comp::gotcha_config<12, int, const sigset_t*>{ "sigsuspend" }); + }; +} + +void +blocking_gotcha::shutdown() +{ + blocking_gotcha_t::disable(); +} + +void +blocking_gotcha::start() +{ + if(causal::experiment::is_active() && + get_thread_state() == ::omnitrace::ThreadState::Enabled && delay_value == 0) + delay_value = causal::delay::get_global().load(); +} + +void +blocking_gotcha::audit(const comp::gotcha_data& _data, audit::outgoing, int _ret) +{ + // if one of the try/timed functions did not succeed, reset the delay value to zero + if(_ret != 0 && _ret != ETIMEDOUT && + std::set{ 1, 3, 5, 8, 11 }.count(_data.index) > 0) + { + delay_value = 0; + } +} + +void +blocking_gotcha::stop() +{ + if(delay_value > 0 && causal::experiment::is_active() && + get_thread_state() == ::omnitrace::ThreadState::Enabled) + { + causal::delay::postblock(delay_value); + delay_value = 0; + } +} +} // namespace component +} // namespace causal +} // namespace omnitrace + +TIMEMORY_INVOKE_PREINIT(omnitrace::causal::component::blocking_gotcha) diff --git a/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp b/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp new file mode 100644 index 0000000000..d828459eca --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/blocking_gotcha.hpp @@ -0,0 +1,76 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/common.hpp" +#include "library/defines.hpp" +#include "library/timemory.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +using timespec_t = struct timespec; +// this is used to wrap pthread_mutex() +struct blocking_gotcha : comp::base +{ + static constexpr size_t gotcha_capacity = 13; + + TIMEMORY_DEFAULT_OBJECT(blocking_gotcha) + + // string id for component + static std::string label(); + static std::string description(); + static void preinit(); + + // generate the gotcha wrappers + static void configure(); + static void shutdown(); + + void start(); + void audit(const comp::gotcha_data&, audit::outgoing, int); + void stop(); + +private: + int64_t delay_value = 0; +}; + +using blocking_gotcha_t = + comp::gotcha, category::causal>; +} // namespace component +} // namespace causal +} // namespace omnitrace + +OMNITRACE_DEFINE_CONCRETE_TRAIT(prevent_reentry, causal::component::blocking_gotcha_t, + false_type) diff --git a/source/lib/omnitrace/library/causal/components/causal_gotcha.cpp b/source/lib/omnitrace/library/causal/components/causal_gotcha.cpp new file mode 100644 index 0000000000..0292b35b0b --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/causal_gotcha.cpp @@ -0,0 +1,95 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/components/causal_gotcha.hpp" +#include "library/causal/components/blocking_gotcha.hpp" +#include "library/causal/components/unblocking_gotcha.hpp" +#include "library/config.hpp" + +#include +#include +#include + +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +namespace +{ +using bundle_t = tim::lightweight_tuple; + +auto& +get_bundle() +{ + static auto _v = std::unique_ptr{}; + if(!_v) _v = std::make_unique("causal_gotcha"); + return _v; +} + +bool is_configured = false; +} // namespace + +//--------------------------------------------------------------------------------------// + +void +causal_gotcha::configure() +{ + if(!is_configured) + { + blocking_gotcha::configure(); + unblocking_gotcha::configure(); + is_configured = true; + } +} + +void +causal_gotcha::shutdown() +{ + if(is_configured) + { + blocking_gotcha::shutdown(); + unblocking_gotcha::shutdown(); + is_configured = false; + } +} + +void +causal_gotcha::start() +{ + configure(); + get_bundle()->start(); +} + +void +causal_gotcha::stop() +{ + get_bundle()->stop(); + shutdown(); +} +} // namespace component +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp b/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp new file mode 100644 index 0000000000..f089d45edd --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/causal_gotcha.hpp @@ -0,0 +1,54 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/common.hpp" +#include "library/defines.hpp" +#include "library/timemory.hpp" + +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +struct causal_gotcha : tim::component::base +{ + TIMEMORY_DEFAULT_OBJECT(causal_gotcha) + + // string id for component + static std::string label() { return "causal_gotcha"; } + + // generate the gotcha wrappers + static void configure(); + static void shutdown(); + + static void start(); + static void stop(); +}; +} // namespace component +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/components/progress_point.cpp b/source/lib/omnitrace/library/causal/components/progress_point.cpp new file mode 100644 index 0000000000..f375ede750 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/progress_point.cpp @@ -0,0 +1,241 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/components/progress_point.hpp" +#include "library/causal/experiment.hpp" +#include "library/common.hpp" +#include "library/concepts.hpp" +#include "library/debug.hpp" +#include "library/thread_data.hpp" +#include "library/timemory.hpp" + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +namespace +{ +using progress_allocator_t = tim::data::ring_buffer_allocator; +using progress_map_t = std::unordered_map; + +auto& +get_progress_map() +{ + using thread_data_t = thread_data>; + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +progress_map_t& +get_progress_map(int64_t _tid) +{ + return get_progress_map()->at(_tid); +} + +auto& +get_progress_allocator(int64_t _tid) +{ + static auto& _v = thread_data::instances(construct_on_init{}); + return _v.at(_tid); +} +} // namespace + +std::unordered_map +progress_point::get_progress_points() +{ + auto _data = std::unordered_map{}; + if(!get_progress_map()) return _data; + for(const auto& titr : *get_progress_map()) + { + for(const auto& itr : titr) + { + if(itr.second) + { + auto& ditr = _data[itr.first]; + ditr += *itr.second; + ditr.set_hash(itr.second->get_hash()); + itr.second->set_value(0); + } + } + } + return _data; +} + +std::string +progress_point::label() +{ + return "progress_point"; +} + +std::string +progress_point::description() +{ + return "Tracks progress point latency and throughput for causal profiling"; +} + +void +progress_point::start() +{ + ++m_arrival; +} + +void +progress_point::stop() +{ + ++m_departure; +} + +void +progress_point::mark() +{ + ++m_delta; +} + +void +progress_point::set_value(int64_t _v) +{ + m_delta = _v; + m_arrival = _v; + m_departure = _v; +} + +progress_point& +progress_point::operator+=(const progress_point& _v) +{ + if(this != &_v) + { + m_delta += _v.m_delta; + m_arrival += _v.m_arrival; + m_departure += _v.m_departure; + } + return *this; +} + +progress_point& +progress_point::operator-=(const progress_point& _v) +{ + if(this != &_v) + { + m_delta -= _v.m_delta; + m_arrival -= _v.m_arrival; + m_departure -= _v.m_departure; + } + return *this; +} + +bool +progress_point::is_throughput_point() const +{ + return (m_delta != 0); +} + +bool +progress_point::is_latency_point() const +{ + return (m_arrival != 0 || m_departure != 0); +} + +int64_t +progress_point::get_delta() const +{ + return m_delta; +} + +int64_t +progress_point::get_arrival() const +{ + if(!is_latency_point()) return m_arrival; + // when it is a latency point, we want the difference to be greater than zero + return (m_arrival >= m_departure) ? (m_arrival + 1) : m_arrival; +} + +int64_t +progress_point::get_departure() const +{ + // if(!is_latency_point()) return m_departure; + // return (m_departure <= m_arrival) ? m_departure : (m_departure + 1); + return m_departure; +} + +int64_t +progress_point::get_latency_delta() const +{ + return (get_arrival() - get_departure()); +} + +int64_t +progress_point::get_laps() const +{ + return std::max(get_delta(), get_latency_delta()); +} + +void +progress_point::print(std::ostream& os) const +{ + os << tim::get_hash_identifier(m_hash) << " :: "; + tim::operation::base_printer(os, *this); +} +} // namespace component +} // namespace causal +} // namespace omnitrace + +namespace tim +{ +namespace operation +{ +namespace causal = omnitrace::causal; + +void +push_node::operator()(type& _obj, scope::config, + hash_value_t _hash, + int64_t _tid) const +{ + auto itr = causal::component::get_progress_map(_tid).emplace(_hash, nullptr); + if(itr.second && !itr.first->second) + { + auto& _alloc = causal::component::get_progress_allocator(_tid); + auto* _val = _alloc->allocate(1); + _alloc->construct(_val); + _val->set_hash(_hash); + itr.first->second = _val; + } + _obj.set_hash(_hash); + _obj.set_iterator(itr.first->second); +} + +void +pop_node::operator()(type& _obj, int64_t) const +{ + auto* itr = _obj.get_iterator(); + if(itr && !(_obj.get_is_invalid() || _obj.get_is_running())) + { + *itr += _obj; + } +} +} // namespace operation +} // namespace tim diff --git a/source/lib/omnitrace/library/causal/components/progress_point.hpp b/source/lib/omnitrace/library/causal/components/progress_point.hpp new file mode 100644 index 0000000000..56d5b07e29 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/progress_point.hpp @@ -0,0 +1,157 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/common.hpp" +#include "library/components/fwd.hpp" +#include "library/defines.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +struct progress_point : comp::base +{ + using base_type = comp::base; + using value_type = int64_t; + using hash_type = tim::hash_value_t; + using iterator_type = progress_point*; + + static std::string label(); + static std::string description(); + + TIMEMORY_DEFAULT_OBJECT(progress_point) + + void start(); + void stop(); + void mark(); + void set_value(int64_t); + progress_point& operator+=(const progress_point&); + progress_point& operator-=(const progress_point&); + + bool is_throughput_point() const; + bool is_latency_point() const; + void print(std::ostream& os) const; + + void set_hash(hash_type _v) { m_hash = _v; } + void set_iterator(iterator_type _v) { m_iterator = _v; } + auto get_iterator() const { return m_iterator; } + auto get_hash() const { return m_hash; } + int64_t get_delta() const; + int64_t get_arrival() const; + int64_t get_departure() const; + int64_t get_latency_delta() const; + int64_t get_laps() const; + + template + void load(ArchiveT& ar, const unsigned) + { + namespace cereal = ::tim::cereal; + auto _name = std::string{}; + + ar(cereal::make_nvp("name", _name)); + ar(cereal::make_nvp("delta", m_delta)); + ar(cereal::make_nvp("arrival", m_arrival)); + ar(cereal::make_nvp("departure", m_departure)); + m_hash = tim::hash::add_hash_id(_name); + } + + template + void save(ArchiveT& ar, const unsigned) const + { + namespace cereal = ::tim::cereal; + ar(cereal::make_nvp("hash", m_hash)); + ar(cereal::make_nvp("name", std::string{ tim::get_hash_identifier(m_hash) })); + ar(cereal::make_nvp("delta", m_delta)); + ar(cereal::make_nvp("arrival", m_arrival)); + ar(cereal::make_nvp("departure", m_departure)); + } + + static std::unordered_map get_progress_points(); + +private: + hash_type m_hash = 0; + int64_t m_delta = 0; + int64_t m_arrival = 0; + int64_t m_departure = 0; + progress_point* m_iterator = nullptr; +}; +} // namespace component +} // namespace causal +} // namespace omnitrace + +OMNITRACE_DEFINE_CONCRETE_TRAIT(uses_storage, causal::component::progress_point, + false_type) +OMNITRACE_DEFINE_CONCRETE_TRAIT(flat_storage, causal::component::progress_point, + true_type) +OMNITRACE_DEFINE_CONCRETE_TRAIT(uses_timing_units, causal::component::progress_point, + true_type) +OMNITRACE_DEFINE_CONCRETE_TRAIT(is_timing_category, causal::component::progress_point, + true_type) + +namespace tim +{ +namespace operation +{ +template <> +struct push_node +{ + using type = omnitrace::causal::component::progress_point; + + TIMEMORY_DEFAULT_OBJECT(push_node) + + push_node(type& _obj, scope::config _scope, hash_value_t _hash, + int64_t _tid = threading::get_id()) + { + (*this)(_obj, _scope, _hash, _tid); + } + + void operator()(type& _obj, scope::config, hash_value_t _hash, + int64_t _tid = threading::get_id()) const; +}; + +template <> +struct pop_node +{ + using type = omnitrace::causal::component::progress_point; + + TIMEMORY_DEFAULT_OBJECT(pop_node) + + pop_node(type& _obj, int64_t _tid = threading::get_id()) { (*this)(_obj, _tid); } + + void operator()(type& _obj, int64_t _tid = threading::get_id()) const; +}; +} // namespace operation +} // namespace tim diff --git a/source/lib/omnitrace/library/causal/components/unblocking_gotcha.cpp b/source/lib/omnitrace/library/causal/components/unblocking_gotcha.cpp new file mode 100644 index 0000000000..f29d1b3139 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/unblocking_gotcha.cpp @@ -0,0 +1,135 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/components/unblocking_gotcha.hpp" +#include "library/causal/delay.hpp" +#include "library/causal/experiment.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/runtime.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +std::string +unblocking_gotcha::label() +{ + return "causal_unblocking_gotcha"; +} + +std::string +unblocking_gotcha::description() +{ + return "Handles executing all necessary pauses before the thread performs some " + "blocking function"; +} + +void +unblocking_gotcha::preinit() +{ + configure(); +} + +void +unblocking_gotcha::configure() +{ + unblocking_gotcha_t::get_initializer() = []() { + if(!config::get_use_causal()) return; + + unblocking_gotcha_t::configure( + comp::gotcha_config<0, int, pthread_mutex_t*>{ "pthread_mutex_unlock" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<1, int, pthread_rwlock_t*>{ "pthread_rwlock_unlock" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<2, int, pthread_spinlock_t*>{ "pthread_spin_unlock" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<3, int, pthread_barrier_t*>{ "pthread_barrier_wait" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<4, int, pthread_cond_t*>{ "pthread_cond_signal" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<5, int, pthread_cond_t*>{ "pthread_cond_broadcast" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<6, int, pthread_t, int>{ "pthread_kill" }); + + unblocking_gotcha_t::configure( + comp::gotcha_config<7, void, void*>{ "pthread_exit" }); + }; +} + +void +unblocking_gotcha::shutdown() +{ + unblocking_gotcha_t::disable(); +} + +void +unblocking_gotcha::start() +{ + if(causal::experiment::is_active() && + get_thread_state() == ::omnitrace::ThreadState::Enabled) + causal::delay::process(); +} + +void +unblocking_gotcha::stop() +{ + if(causal::experiment::is_active() && + get_thread_state() == ::omnitrace::ThreadState::Enabled) + causal::delay::credit(); +} + +void +unblocking_gotcha::set_data(const comp::gotcha_data& _data) +{ + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + auto _hash = tim::add_hash_id(_data.tool_id); + auto&& _ident = tim::get_hash_identifier(_hash); + if(_ident != _data.tool_id) + throw ::omnitrace::exception( + JOIN("", "Error! resolving hash for \"", _data.tool_id, "\" (", _hash, + ") returns ", _ident.c_str())); +#if defined(OMNITRACE_CI) + OMNITRACE_VERBOSE_F(3, "data set for '%s'...\n", _data.tool_id.c_str()); +#endif +} +} // namespace component +} // namespace causal +} // namespace omnitrace + +TIMEMORY_INVOKE_PREINIT(omnitrace::causal::component::unblocking_gotcha) diff --git a/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp b/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp new file mode 100644 index 0000000000..62a53db8e0 --- /dev/null +++ b/source/lib/omnitrace/library/causal/components/unblocking_gotcha.hpp @@ -0,0 +1,75 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/common.hpp" +#include "library/defines.hpp" +#include "library/timemory.hpp" + +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace component +{ +using timespec_t = struct timespec; +// this is used to wrap pthread_mutex() +struct unblocking_gotcha : comp::base +{ + static constexpr size_t gotcha_capacity = 8; + + TIMEMORY_DEFAULT_OBJECT(unblocking_gotcha) + + // string id for component + static std::string label(); + static std::string description(); + static void preinit(); + + // generate the gotcha wrappers + static void configure(); + static void shutdown(); + + static void start(); + static void stop(); + + static void set_data(const comp::gotcha_data&); +}; + +using unblocking_gotcha_t = + comp::gotcha, category::causal>; +} // namespace component +} // namespace causal +} // namespace omnitrace + +OMNITRACE_DEFINE_CONCRETE_TRAIT(prevent_reentry, causal::component::unblocking_gotcha_t, + false_type) +OMNITRACE_DEFINE_CONCRETE_TRAIT(static_data, causal::component::unblocking_gotcha_t, + true_type) diff --git a/source/lib/omnitrace/library/causal/data.cpp b/source/lib/omnitrace/library/causal/data.cpp new file mode 100644 index 0000000000..d0c8fb454e --- /dev/null +++ b/source/lib/omnitrace/library/causal/data.cpp @@ -0,0 +1,949 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/data.hpp" +#include "library/binary/address_multirange.hpp" +#include "library/binary/analysis.hpp" +#include "library/binary/binary_info.hpp" +#include "library/binary/fwd.hpp" +#include "library/binary/link_map.hpp" +#include "library/binary/scope_filter.hpp" +#include "library/causal/delay.hpp" +#include "library/causal/experiment.hpp" +#include "library/causal/sampling.hpp" +#include "library/causal/selected_entry.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/ptl.hpp" +#include "library/runtime.hpp" +#include "library/state.hpp" +#include "library/thread_data.hpp" +#include "library/thread_info.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace +{ +using random_engine_t = std::mt19937_64; +using progress_bundles_t = component_bundle_cache; + +auto speedup_seeds = std::vector{}; +auto speedup_divisions = get_env("OMNITRACE_CAUSAL_SPEEDUP_DIVISIONS", 5); +auto speedup_dist = []() { + size_t _n = std::max(1, 100 / speedup_divisions); + std::vector _v(_n, uint16_t{ 0 }); + std::generate(_v.begin(), _v.end(), + [_value = 0]() mutable { return (_value += speedup_divisions); }); + // approximately 25% of bins should be zero speedup + size_t _nzero = std::ceil(_v.size() / 4.0); + _v.resize(_v.size() + _nzero, 0); + std::sort(_v.begin(), _v.end()); + OMNITRACE_CI_THROW(_v.back() > 100, "Error! last value is too large: %i\n", + (int) _v.back()); + return _v; +}(); + +auto perform_experiment_impl_completed = std::unique_ptr>{}; +auto num_progress_points = std::atomic{ 0 }; + +template +auto& +get_engine() +{ + static auto _seed = []() -> hash_value_t { + auto _seed_v = + config::get_setting_value("OMNITRACE_CAUSAL_RANDOM_SEED").second; + if(_seed_v == 0) _seed_v = std::random_device{}(); + return _seed_v; + }(); + + static thread_local auto _v = + random_engine_t{ tim::get_combined_hash_id(_seed, utility::get_thread_index()) }; + return _v; +} + +binary::address_multirange& +get_eligible_address_ranges() +{ + static auto _v = binary::address_multirange{}; + return _v; +} + +using sf = binary::scope_filter; + +auto +get_filters(std::set _scopes = { + sf::BINARY_FILTER, sf::SOURCE_FILTER, sf::FUNCTION_FILTER }) +{ + auto _filters = std::vector{}; + + // exclude internal libraries used by omnitrace + if(_scopes.count(sf::BINARY_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_EXCLUDE, sf::BINARY_FILTER, + "lib(omnitrace[-\\.]|dyninst|tbbmalloc|gotcha\\.|unwind\\.so\\.99)" }); + + // in function mode, it generally doesn't help to experiment on main function since + // telling the user to "make the main function" faster is literally useless since it + // contains everything that could be made faster + if(config::get_causal_mode() == CausalMode::Function && + _scopes.count(sf::FUNCTION_FILTER) > 0) + _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, + "( main\\(|^main$|^main\\.cold$)" }); + + bool _use_default_excludes = + config::get_setting_value("OMNITRACE_CAUSAL_FUNCTION_EXCLUDE_DEFAULTS") + .second; + + if(_use_default_excludes && _scopes.count(sf::FUNCTION_FILTER) > 0) + { + // symbols starting with leading underscore are generally system functions + _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, "^_" }); + + if(config::get_causal_mode() == CausalMode::Function) + { + // exclude STL implementation functions + _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, "::_M" }); + } + } + + // in function mode, it generally doesn't help to claim + // "make main function" faster since it contains everything + // that could be made faster + if(config::get_causal_mode() == CausalMode::Function && + _scopes.count(sf::FUNCTION_FILTER) > 0) + { + _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, + "(^main$|^main.cold$|int main\\()" }); + } + + using utility::get_regex_or; + + auto _source_end_converter = [](const std::string& _v) { return _v + "$"; }; + + // include handling + { + auto _binary_include = get_regex_or(config::get_causal_binary_scope(), ""); + auto _source_include = + get_regex_or(config::get_causal_source_scope(), _source_end_converter, ""); + auto _function_include = get_regex_or(config::get_causal_function_scope(), ""); + + auto _current_include = + std::make_tuple(_binary_include, _source_include, _function_include); + static auto _former_include = decltype(_current_include){}; + + if(_former_include != _current_include) + { + if(!_binary_include.empty()) + OMNITRACE_VERBOSE(0, "[causal] binary scope : %s\n", + _binary_include.c_str()); + if(!_source_include.empty()) + OMNITRACE_VERBOSE(0, "[causal] source scope : %s\n", + _source_include.c_str()); + if(!_function_include.empty()) + OMNITRACE_VERBOSE(0, "[causal] function scope : %s\n", + _function_include.c_str()); + _former_include = _current_include; + } + + if(!_binary_include.empty() && _scopes.count(sf::BINARY_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_INCLUDE, sf::BINARY_FILTER, _binary_include }); + + if(!_source_include.empty() && _scopes.count(sf::SOURCE_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_INCLUDE, sf::SOURCE_FILTER, _source_include }); + + if(!_function_include.empty() && _scopes.count(sf::FUNCTION_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_INCLUDE, sf::FUNCTION_FILTER, _function_include }); + } + + // exclude handling + { + auto _binary_exclude = get_regex_or(config::get_causal_binary_exclude(), ""); + auto _source_exclude = + get_regex_or(config::get_causal_source_exclude(), _source_end_converter, ""); + auto _function_exclude = get_regex_or(config::get_causal_function_exclude(), ""); + + auto _current_exclude = + std::make_tuple(_binary_exclude, _source_exclude, _function_exclude); + static auto _former_exclude = decltype(_current_exclude){}; + + if(_former_exclude != _current_exclude) + { + if(!_binary_exclude.empty()) + OMNITRACE_VERBOSE(0, "[causal] binary exclude : %s\n", + _binary_exclude.c_str()); + if(!_source_exclude.empty()) + OMNITRACE_VERBOSE(0, "[causal] source exclude : %s\n", + _source_exclude.c_str()); + if(!_function_exclude.empty()) + OMNITRACE_VERBOSE(0, "[causal] function exclude : %s\n", + _function_exclude.c_str()); + _former_exclude = _current_exclude; + } + + if(!_binary_exclude.empty() && _scopes.count(sf::BINARY_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_EXCLUDE, sf::BINARY_FILTER, _binary_exclude }); + + if(!_source_exclude.empty() && _scopes.count(sf::SOURCE_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_EXCLUDE, sf::SOURCE_FILTER, _source_exclude }); + + if(!_function_exclude.empty() && _scopes.count(sf::FUNCTION_FILTER) > 0) + _filters.emplace_back( + sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, _function_exclude }); + } + + return _filters; +} + +using binary_info_t = std::vector; +std::pair& +get_cached_binary_info() +{ + static auto _v = []() { + // get the linked binaries for the exe (excluding ones from libomnitrace) + auto _link_map = binary::get_link_map(); + auto _files = std::vector{}; + _files.reserve(_link_map.size()); + for(const auto& itr : _link_map) + _files.emplace_back(itr.real()); + + auto _discarded = std::vector{}; + auto _requested = binary::get_binary_info(_files, get_filters()); + return std::make_pair(_requested, _discarded); + }(); + return _v; +} + +bool +satisfies_filter(const binary::scope_filter::filter_scope& _scope, + const std::string& _value) +{ + static auto _filters = get_filters(); + return binary::scope_filter::satisfies_filter(_filters, _scope, _value); +} + +auto +compute_eligible_lines_impl() +{ + const auto& _binary_info = get_cached_binary_info().first; + auto& _filter_info = get_cached_binary_info().second; + auto _filters = get_filters(); + + auto& _eligible_ar = get_eligible_address_ranges(); + for(const auto& litr : _binary_info) + { + for(const auto& ditr : litr.mappings) + { + _eligible_ar += + std::make_pair(binary::address_multirange::coarse{}, + address_range_t{ ditr.load_address, ditr.last_address }); + } + + for(const auto& ditr : litr.symbols) + { + _eligible_ar += ditr.address + ditr.load_address; + } + + auto& _filtered = _filter_info.emplace_back(); + _filtered.bfd = litr.bfd; + _filtered.mappings = litr.mappings; + _filtered.ranges = litr.ranges; + _filtered.sections = litr.sections; + + for(const auto& ditr : litr.symbols) + { + auto _sym = ditr.clone(); + + _sym.inlines = + ditr.get_inline_symbols>(_filters); + + _sym.dwarf_info = + ditr.get_debug_line_info>(_filters); + + if(ditr(_filters) || (_sym.inlines.size() + _sym.dwarf_info.size()) > 0) + { + _filtered.symbols.emplace_back(_sym); + } + } + + for(const auto& ditr : litr.debug_info) + { + if(sf::satisfies_filter(_filters, sf::SOURCE_FILTER, ditr.file) || + sf::satisfies_filter(_filters, sf::SOURCE_FILTER, + join(':', ditr.file, ditr.line))) + { + _filtered.debug_info.emplace_back(ditr); + } + } + + _filtered.sort(); + } + + OMNITRACE_VERBOSE( + 0, "[causal] eligible address ranges: %zu, coarse address range: %zu [%s]\n", + _eligible_ar.size(), _eligible_ar.range_size(), + _eligible_ar.coarse_range.as_string().c_str()); + + if(_eligible_ar.empty()) + { + auto _cfg = settings::compose_filename_config{}; + _cfg.subdirectory = "causal/binary-info"; + _cfg.use_suffix = config::get_use_pid(); + save_line_info(_cfg, config::get_verbose()); + } + + OMNITRACE_CONDITIONAL_THROW( + _eligible_ar.empty(), + "Error! binary analysis (after filters) resulted in zero eligible instruction " + "pointer addresses for causal experimentation"); +} + +void +save_maps_info_impl(std::ostream& _ofs) +{ + auto _maps_file = join("/", "/proc", process::get_id(), "maps"); + auto _ifs = std::ifstream{ _maps_file }; + auto _maps = std::stringstream{}; + if(_ifs) + { + _maps << _maps_file << "\n"; + while(_ifs) + { + std::string _line{}; + getline(_ifs, _line); + if(!_line.empty()) _maps << " " << _line << "\n"; + } + } + _ofs << _maps.str(); +} + +void +save_line_info_impl(std::ostream& _ofs, + const std::vector& _binary_data) +{ + auto _write_impl = [&_ofs](const binary::binary_info& _data) { + for(const auto& itr : _data.mappings) + { + _ofs << itr.pathname << " [" << as_hex(itr.load_address) << " - " + << as_hex(itr.last_address) << "]\n"; + } + + auto _emitted_dwarf_addresses = std::set{}; + for(const auto& itr : _data.symbols) + { + auto _addr = itr.address; + auto _addr_off = itr.address + itr.load_address; + _ofs << " " << as_hex(_addr_off) << " [" << as_hex(_addr) + << "] :: " << itr.file; + if(itr.line > 0) _ofs << ":" << itr.line; + if(!itr.func.empty()) _ofs << " [" << tim::demangle(itr.func) << "]"; + _ofs << "\n"; + + for(const auto& ditr : itr.inlines) + { + _ofs << " " << ditr.file << ":" << ditr.line; + if(!ditr.func.empty()) _ofs << " [" << tim::demangle(ditr.func) << "]"; + _ofs << "\n"; + } + + for(const auto& ditr : itr.dwarf_info) + { + _ofs << " " << as_hex(ditr.address) << " :: " << ditr.file << ":" + << ditr.line; + _ofs << "\n"; + _emitted_dwarf_addresses.emplace(ditr.address.low); + } + } + + for(const auto& itr : _data.debug_info) + { + if(_emitted_dwarf_addresses.count(itr.address.low) > 0) continue; + _ofs << " " << as_hex(itr.address) << " :: " << itr.file << ":" + << itr.line; + _ofs << "\n"; + } + + _ofs << "\n" << std::flush; + }; + + for(const auto& itr : _binary_data) + _write_impl(itr); +} + +void +compute_eligible_lines() +{ + static auto _once = std::once_flag{}; + std::call_once(_once, compute_eligible_lines_impl); +} + +void +perform_experiment_impl(std::shared_ptr> _started) // NOLINT +{ + using clock_type = std::chrono::high_resolution_clock; + using duration_nsec_t = std::chrono::duration; + using duration_sec_t = std::chrono::duration>; + + const auto& _thr_info = thread_info::init(true); + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + OMNITRACE_CONDITIONAL_THROW(!_thr_info->is_offset, + "Error! causal profiling thread should be offset"); + + if(!perform_experiment_impl_completed) + perform_experiment_impl_completed = std::make_unique>(); + + perform_experiment_impl_completed->set_value_at_thread_exit(); + + compute_eligible_lines(); + + // notify that thread has started + if(_started) _started->set_value(); + + // pause at least one second to determine sampling rate + // std::this_thread::sleep_for(std::chrono::seconds{ 1 }); + + if(!config::get_causal_end_to_end()) + { + // wait for at least one progress point to start + while(num_progress_points.load(std::memory_order_relaxed) == 0) + { + std::this_thread::sleep_for(std::chrono::milliseconds{ 1 }); + } + } + + double _delay_sec = + config::get_setting_value("OMNITRACE_CAUSAL_DELAY").second; + double _duration_sec = + config::get_setting_value("OMNITRACE_CAUSAL_DURATION").second; + auto _duration_nsec = duration_nsec_t{ _duration_sec * units::sec }; + + if(_delay_sec > 0.0) + { + OMNITRACE_VERBOSE(1, "[causal] delaying experimentation for %.2f seconds...\n", + _delay_sec); + uint64_t _delay_nsec = _delay_sec * units::sec; + std::this_thread::sleep_for(std::chrono::nanoseconds{ _delay_nsec }); + } + + auto _impl_count = 0; + auto _start_time = clock_type::now(); + auto _exceeded_duration = [&]() { + if(_duration_sec > 1.0e-3) + { + auto _elapsed = clock_type::now() - _start_time; + if(_elapsed >= _duration_nsec) + { + OMNITRACE_VERBOSE( + 1, + "[causal] stopping experimentation after %.2f seconds " + "(elapsed: %.2f seconds)...\n", + _duration_sec, + std::chrono::duration_cast(_elapsed).count()); + causal::sampling::post_process(); + return true; + } + } + return false; + }; + + while(get_state() < State::Finalized) + { + auto _impl_no = _impl_count++; + auto _experim = experiment{}; + + // loop until started or finalized + while(!_experim.start()) + { + if(get_state() == State::Finalized) + { + auto _memory = std::stringstream{}; + auto _binary = std::stringstream{}; + auto _scoped = std::stringstream{}; + auto _sample = std::stringstream{}; + save_maps_info_impl(_memory); + save_line_info_impl(_binary, get_cached_binary_info().first); + save_line_info_impl(_scoped, get_cached_binary_info().second); + + auto _samples = std::map{}; + for(const auto& itr : get_samples()) + { + for(const auto& iitr : itr.second) + { + _samples[iitr.address] += iitr.count; + } + } + + for(const auto& itr : _samples) + { + if(itr.second > 0) + { + auto _linfo = get_line_info(itr.first, true); + // if(_linfo.size() > 1) _linfo.pop_front(); + for(const auto& iitr : _linfo) + { + _sample << " " << std::setw(8) << itr.second + << " :: " << as_hex(itr.first) << " [" << iitr.file + << ":" << iitr.line << "][" << demangle(iitr.func) + << "]\n"; + } + + if(_linfo.empty()) + { + _sample << " " << std::setw(8) << itr.second + << " :: " << as_hex(itr.first) << "\n"; + } + } + } + + std::cerr << std::flush; + auto _cerr = tim::log::warning_stream(std::cerr); + _cerr << "\nmaps:\n\n" << _memory.str() << "\n"; + _cerr << "\nbinary:\n\n" << _binary.str() << "\n"; + _cerr << "\nscoped:\n\n" << _scoped.str() << "\n"; + _cerr << "\nsample:\n\n" << _sample.str() << "\n"; + std::cerr << std::flush; + + OMNITRACE_CONDITIONAL_THROW(_impl_no == 0, "experiment never started"); + return; + } + } + + // wait for the experiment to complete + if(config::get_causal_end_to_end()) + { + mark_progress_point(config::get_exe_name(), true); + while(get_state() < State::Finalized) + { + std::this_thread::yield(); + std::this_thread::sleep_for(std::chrono::milliseconds{ 100 }); + if(_exceeded_duration()) return; + } + } + else + { + _experim.wait(); + } + + while(!_experim.stop()) + { + if(get_state() == State::Finalized) return; + } + + if(_exceeded_duration()) return; + } +} + +// thread-safe read/write ring-buffer via atomics +using pc_ring_buffer_t = tim::data_storage::atomic_ring_buffer; +// latest_eligible_pcs is an array of unwind_depth size -> samples will +// use lowest indexes for most recent functions address in the call-stack +auto latest_eligible_pc = []() { + auto _arr = std::array, unwind_depth>{}; + for(auto& itr : _arr) + itr = std::make_unique(units::get_page_size() / + (sizeof(uintptr_t) + 1)); + return _arr; +}(); +} // namespace + +//--------------------------------------------------------------------------------------// + +bool +is_eligible_address(uintptr_t _v) +{ + return get_eligible_address_ranges().coarse_range.contains(_v); +} + +void +save_line_info(const settings::compose_filename_config& _cfg, int _verbose) +{ + auto _write = [_verbose](const std::string& ofname, const auto& _data) { + auto _ofs = std::ofstream{}; + if(tim::filepath::open(_ofs, ofname)) + { + if(_verbose >= 0) + operation::file_output_message{}( + ofname, std::string{ "causal_symbol_info" }); + save_line_info_impl(_ofs, _data); + save_maps_info_impl(_ofs); + } + else + { + throw ::omnitrace::exception("Error opening " + ofname); + } + }; + + _write(tim::settings::compose_output_filename( + join('-', config::get_causal_output_filename(), "binary"), "txt", _cfg), + get_cached_binary_info().first); + _write(tim::settings::compose_output_filename( + join('-', config::get_causal_output_filename(), "scoped"), "txt", _cfg), + get_cached_binary_info().second); +} + +void +set_current_selection(unwind_addr_t _stack) +{ + if(experiment::is_active()) return; + + size_t _n = 0; + for(auto itr : _stack) + { + auto& _pcs = latest_eligible_pc.at(_n); + if(_pcs && is_eligible_address(itr)) + { + _pcs->write(&itr); + // increment after valid found -> first valid pc for call-stack + ++_n; + } + } +} + +selected_entry +sample_selection(size_t _nitr, size_t _wait_ns) +{ + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + size_t _n = 0; + + auto _select_address = [&](auto& _address_vec) { + // this isn't necessary bc of check before calling this lambda but + // kept because of size() - 1 in distribution range + if(OMNITRACE_UNLIKELY(_address_vec.empty())) + { + OMNITRACE_WARNING(0, "no addresses for sample selection...\n"); + return selected_entry{}; + } + + while(!_address_vec.empty()) + { + // randomly select an address + auto _dist = + std::uniform_int_distribution{ 0, _address_vec.size() - 1 }; + auto _idx = _dist(get_engine()); + + uintptr_t _addr = _address_vec.at(_idx); + uintptr_t _sym_addr = 0; + uintptr_t _lookup_addr = _addr; + auto _dl_info = unwind::dlinfo::construct(_addr); + + _address_vec.erase(_address_vec.begin() + _idx); + + if(get_causal_mode() == CausalMode::Function) + _sym_addr = (_dl_info.symbol) ? _dl_info.symbol.address() : _addr; + + // lookup the PC line info at either the address or the symbol address + auto linfo = get_line_info(_lookup_addr, false); + + // unlikely this will be empty but just in case + if(linfo.empty()) continue; + + // debugging for continuous integration + if(OMNITRACE_UNLIKELY(config::get_is_continuous_integration() || + config::get_debug())) + { + auto _location = + (_dl_info.location) + ? filepath::realpath(std::string{ _dl_info.location.name }, + nullptr, false) + : std::string{}; + for(const auto& itr : linfo) + { + if(OMNITRACE_UNLIKELY(config::get_debug())) + { + OMNITRACE_WARNING( + 0, "[%s][%s][%s][%s] %s [%s:%i][%s][%zu]\n", + as_hex(_lookup_addr).c_str(), as_hex(_addr).c_str(), + as_hex(_sym_addr).c_str(), + (_location.empty()) ? "" : _location.data(), + demangle(itr.func).c_str(), itr.file.c_str(), itr.line, + itr.address.as_string().c_str(), itr.address.size()); + } + } + } + + auto& _linfo_v = (config::get_causal_mode() == CausalMode::Function) + ? linfo.front() + : linfo.back(); + return selected_entry{ _addr, _sym_addr, _linfo_v }; + // return selected_entry{ address_range_t{ _addr }, + // address_range_t{ _sym_addr }, + // { _linfo_v.second } }; + } + return selected_entry{}; + }; + + while(_n++ < _nitr) + { + auto _addresses = std::deque{}; + for(auto& aitr : latest_eligible_pc) + { + if(OMNITRACE_UNLIKELY(!aitr)) + { + OMNITRACE_WARNING(0, "invalid ring buffer...\n"); + continue; + } + + auto _naddrs = aitr->count(); + if(_naddrs == 0) continue; + + for(size_t i = 0; i < _naddrs; ++i) + { + uintptr_t _addr = 0; + if(!aitr->is_empty() && aitr->read(&_addr) != nullptr) + { + if(_addr > 0) _addresses.emplace_back(_addr); + } + } + + if(!_addresses.empty()) + { + auto _selection = _select_address(_addresses); + if(_selection) return _selection; + } + } + + std::this_thread::yield(); + std::this_thread::sleep_for(std::chrono::nanoseconds{ _wait_ns }); + } + + return selected_entry{}; +} + +std::deque +get_line_info(uintptr_t _addr, bool _include_discarded) +{ + static auto _glob_filters = get_filters({ sf::BINARY_FILTER }); + static auto _scope_filters = get_filters(); + auto _data = std::deque{}; + auto _get_line_info = [&](const auto& _info, const auto& _filters) { + // search for exact matches first + for(const binary::binary_info& litr : _info) + { + auto _local_data = std::deque{}; + + for(const auto& ditr : litr.symbols) + { + auto _ipaddr = ditr.ipaddr(); + if(!_ipaddr.contains(_addr)) continue; + + if(config::get_causal_mode() == CausalMode::Function) + { + // check if the primary symbol satisfy the constraints + if(ditr(_filters)) _local_data.emplace_back(ditr); + + // the primary symbol may not satisfy the constraints but the inlined + // functions may + utility::combine(_local_data, ditr.get_inline_symbols(_filters)); + } + else if(config::get_causal_mode() == CausalMode::Line) + { + auto _debug_data = std::deque{}; + for(const auto& itr : ditr.get_debug_line_info(_filters)) + { + if(itr.ipaddr().contains(_addr)) _debug_data.emplace_back(itr); + } + utility::combine(_local_data, _debug_data); + } + else + { + throw exception( + join(" ", "Causal mode not supported:", + std::to_string(config::get_causal_mode()))); + } + } + + if(!_local_data.empty()) + { + // combine and only allow first match + utility::combine(_data, _local_data); + break; + } + } + }; + + if(_include_discarded) + _get_line_info(get_cached_binary_info().first, _glob_filters); + else + _get_line_info(get_cached_binary_info().second, _scope_filters); + + return _data; +} + +void +push_progress_point(std::string_view _name) +{ + if(config::get_causal_end_to_end()) return; + + ++num_progress_points; + + auto _hash = tim::add_hash_id(_name); + auto& _data = progress_bundles_t::instance(utility::get_thread_index()); + auto* _bundle = _data.construct(_hash); + _bundle->push(); + _bundle->start(); +} + +void +pop_progress_point(std::string_view _name) +{ + if(config::get_causal_end_to_end()) return; + + auto& _data = progress_bundles_t::instance(utility::get_thread_index()); + if(_data.empty()) return; + if(_name.empty()) + { + auto* itr = _data.back(); + itr->stop(); + itr->pop(); + _data.pop_back(); + return; + } + else + { + auto _hash = tim::add_hash_id(_name); + for(auto itr = _data.rbegin(); itr != _data.rend(); ++itr) + { + if((*itr)->get_hash() == _hash) + { + (*itr)->stop(); + (*itr)->pop(); + _data.destroy(itr); + return; + } + } + } +} + +void +mark_progress_point(std::string_view _name, bool _force) +{ + if(config::get_causal_end_to_end() && !_force) return; + + ++num_progress_points; + + auto _hash = tim::add_hash_id(_name); + auto& _data = progress_bundles_t::instance(utility::get_thread_index()); + auto* _bundle = _data.construct(_hash); + _bundle->push(); + _bundle->mark(); + _bundle->pop(); + _data.pop_back(); +} + +uint16_t +sample_virtual_speedup() +{ + if(speedup_dist.empty()) + return 0; + else if(speedup_dist.size() == 1) + return speedup_dist.front(); + else + { + struct virtual_speedup + {}; + auto _dist = + std::uniform_int_distribution{ size_t{ 0 }, speedup_dist.size() - 1 }; + return speedup_dist.at(_dist(get_engine())); + } +} + +void +start_experimenting() +{ + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + auto _user_speedup_dist = config::get_causal_fixed_speedup(); + if(!_user_speedup_dist.empty()) + { + speedup_dist.clear(); + for(auto itr : _user_speedup_dist) + { + OMNITRACE_CONDITIONAL_ABORT_F(itr > 100, + "Virtual speedups must be in range [0, 100]. " + "Invalid virtual speedup: %lu\n", + itr); + speedup_dist.emplace_back(static_cast(itr)); + } + } + + compute_eligible_lines(); + + auto _cfg = settings::compose_filename_config{}; + _cfg.subdirectory = "causal/binary-info"; + _cfg.use_suffix = config::get_use_pid(); + save_line_info(_cfg, config::get_verbose()); + + if(get_state() < State::Finalized) + { + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + auto _promise = std::make_shared>(); + std::thread{ perform_experiment_impl, _promise }.detach(); + _promise->get_future().wait_for(std::chrono::seconds{ 2 }); + } +} + +void +finish_experimenting() +{ + if(perform_experiment_impl_completed) + { + perform_experiment_impl_completed->get_future().wait_for( + std::chrono::seconds{ 5 }); + perform_experiment_impl_completed.reset(); + } + sampling::post_process(); + experiment::save_experiments(); +} +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/data.hpp b/source/lib/omnitrace/library/causal/data.hpp new file mode 100644 index 0000000000..257195dc83 --- /dev/null +++ b/source/lib/omnitrace/library/causal/data.hpp @@ -0,0 +1,76 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/analysis.hpp" +#include "library/binary/fwd.hpp" +#include "library/causal/fwd.hpp" +#include "library/containers/c_array.hpp" +#include "library/containers/static_vector.hpp" +#include "library/defines.hpp" +#include "library/thread_data.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +void +save_line_info(const settings::compose_filename_config&, int _verbose); + +std::deque +get_line_info(uintptr_t _addr, bool include_discarded = true); + +bool is_eligible_address(uintptr_t); + +void set_current_selection(unwind_addr_t); + +selected_entry +sample_selection(size_t _nitr = 1000, size_t _wait_ns = 10000); + +void push_progress_point(std::string_view); + +void pop_progress_point(std::string_view); + +void +mark_progress_point(std::string_view, bool force = false); + +uint16_t +sample_virtual_speedup(); + +void +start_experimenting(); + +void +finish_experimenting(); +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/delay.cpp b/source/lib/omnitrace/library/causal/delay.cpp new file mode 100644 index 0000000000..d6ed4bab10 --- /dev/null +++ b/source/lib/omnitrace/library/causal/delay.cpp @@ -0,0 +1,193 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/delay.hpp" +#include "library/causal/experiment.hpp" +#include "library/runtime.hpp" +#include "library/state.hpp" +#include "library/thread_data.hpp" +#include "library/thread_info.hpp" +#include "library/tracing.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace +{ +auto& +get_delay_data() +{ + using thread_data_t = thread_data, delay>; + static auto& _v = thread_data_t::construct( + construct_on_init{}, []() { return delay::get_global().load(); }); + return _v; +} + +int64_t +compute_sleep_for_overhead() +{ + using random_engine_t = std::mt19937_64; + auto _engine = random_engine_t{ std::random_device{}() }; + auto _dist = std::uniform_int_distribution{ 0, 5 }; + size_t _ntot = 250; + size_t _nwarm = 50; + auto _stats = tim::statistics{}; + for(size_t i = 0; i < _ntot; ++i) + { + auto _val = _dist(_engine); + int64_t _beg = tracing::now(); + std::this_thread::sleep_for(std::chrono::nanoseconds{ _val }); + int64_t _end = tracing::now(); + if(i < _nwarm) continue; + auto _diff = (_end - _beg); + OMNITRACE_CONDITIONAL_THROW( + _diff < _val, "Error! sleep_for(%zu) [nanoseconds] >= %zu", _val, _diff); + _stats += (_diff - _val); + } + + OMNITRACE_BASIC_VERBOSE(2, + "[causal] overhead of std::this_thread::sleep_for(...) " + "invocation = %6.3f usec +/- %e\n", + _stats.get_mean() / units::usec, + _stats.get_stddev() / units::usec); + + tim::manager::instance()->add_metadata([_stats](auto& ar) { + ar(tim::cereal::make_nvp("causal thread sleep overhead [nsec]", _stats)); + }); + + (void) get_delay_data(); + + return _stats.get_mean(); +} + +int64_t sleep_for_overhead = compute_sleep_for_overhead(); +} // namespace + +void +delay::process() +{ + if(!trait::runtime_enabled::get()) return; + if(get_state() >= ::omnitrace::State::Finalized) return; + + if(causal::experiment::is_active()) + { + if(get_global() < get_local()) + { + auto _diff = (get_local() - get_global()); + if(_diff > sleep_for_overhead) get_global() += _diff; + } + else if(get_global() > get_local()) + { + auto _beg = tracing::now(); + std::this_thread::sleep_for( + std::chrono::nanoseconds{ get_global() - get_local() }); + get_local() += (tracing::now() - _beg); + } + } + else + { + get_local() = get_global(); + } +} + +void +delay::credit() +{ + if(!trait::runtime_enabled::get()) return; + if(get_state() >= ::omnitrace::State::Finalized) return; + + auto _diff = get_global() - get_local(); + if(_diff > 0) + { + get_local() += _diff; + } +} + +void +delay::preblock() +{ + if(!trait::runtime_enabled::get()) return; + if(get_state() >= ::omnitrace::State::Finalized) return; + + auto _diff = get_global() - get_local(); + if(_diff > 0) + { + get_local() += _diff; + } +} + +void +delay::postblock(int64_t _preblock_global_delay_value) +{ + if(!trait::runtime_enabled::get()) return; + if(get_state() >= ::omnitrace::State::Finalized) return; + + get_local() += (get_global() - _preblock_global_delay_value); +} + +int64_t +delay::sync() +{ + auto _v = get_global().load(std::memory_order_seq_cst); + if(get_delay_data()) get_delay_data()->fill(_v); + return _v; +} + +std::atomic& +delay::get_global() +{ + static auto _v = std::atomic{ 0 }; + return _v; +} + +int64_t& +delay::get_local(int64_t _tid) +{ + auto& _data = get_delay_data(); + static thread_local auto _thr_init = []() { + using thread_data_t = thread_data, delay>; + thread_data_t::construct(construct_on_thread{ threading::get_id() }, + get_global().load()); + return true; + }(); + return _data->at(_tid); + (void) _thr_init; +} + +uint64_t +delay::compute_total_delay(uint64_t _baseline) +{ + return get_global().load() - _baseline; +} +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/delay.hpp b/source/lib/omnitrace/library/causal/delay.hpp new file mode 100644 index 0000000000..24a509cde6 --- /dev/null +++ b/source/lib/omnitrace/library/causal/delay.hpp @@ -0,0 +1,62 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/common.hpp" +#include "library/components/fwd.hpp" +#include "library/defines.hpp" +#include "library/thread_data.hpp" + +#include +#include +#include + +#include +#include + +namespace omnitrace +{ +namespace causal +{ +struct delay +: tim::component::empty_base +, tim::concepts::component +{ + using value_type = void; + + TIMEMORY_DEFAULT_OBJECT(delay) + + static void process(); + static void credit(); + static void preblock(); + static void postblock(int64_t); + static int64_t sync(); + + static std::atomic& get_global(); + static int64_t& get_local(int64_t _tid = threading::get_id()); + + static int64_t get(int64_t _tid = threading::get_id()); + static uint64_t compute_total_delay(uint64_t); +}; +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/experiment.cpp b/source/lib/omnitrace/library/causal/experiment.cpp new file mode 100644 index 0000000000..c8ae8c2df8 --- /dev/null +++ b/source/lib/omnitrace/library/causal/experiment.cpp @@ -0,0 +1,671 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/experiment.hpp" +#include "common/defines.h" +#include "library/causal/components/backtrace.hpp" +#include "library/causal/components/progress_point.hpp" +#include "library/causal/data.hpp" +#include "library/causal/delay.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/state.hpp" +#include "library/thread_data.hpp" +#include "library/thread_info.hpp" +#include "library/tracing.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace +{ +using backtrace_causal = omnitrace::causal::component::backtrace; +namespace cereal = ::tim::cereal; + +auto current_experiment_value = experiment{}; +auto current_selected_count = std::atomic{ 0 }; +auto current_experiment = std::atomic{ nullptr }; +auto experiment_history = std::vector{}; +int64_t global_scaling = 1; +int64_t global_scaling_increments = 0; +bool use_exp_speedup_scaling = + get_env("OMNITRACE_CAUSAL_SCALE_EXPERIMENT_TIME_BY_SPEEDUP", false); +} // namespace + +bool +experiment::sample::operator==(const sample& _v) const +{ + return std::tie(address, info.line, info.file, info.func, location) == + std::tie(_v.address, _v.info.line, _v.info.file, _v.info.func, _v.location); +} + +bool +experiment::sample::operator<(const sample& _v) const +{ + if(info.line > 0 && _v.info.line > 0) + { + return std::tie(info.line, info.file) == std::tie(_v.info.line, _v.info.file); + } + else if((info.line + _v.info.line) > 0) + { + return std::tie(info.file, location, info.line) < + std::tie(_v.info.file, _v.location, _v.info.line); + } + return (location < _v.location); +} + +const auto& +experiment::sample::operator+=(const sample& _v) const +{ + if(*this == _v && this != &_v) count += _v.count; + return *this; +} + +template +void +experiment::sample::serialize(ArchiveT& ar, const unsigned) +{ + namespace cereal = ::tim::cereal; + ar(cereal::make_nvp("location", location), cereal::make_nvp("count", count), + cereal::make_nvp("info", info)); +} + +template +void +experiment::record::serialize(ArchiveT& ar, const unsigned) +{ + namespace cereal = ::tim::cereal; + ar(cereal::make_nvp("startup_time", startup), + cereal::make_nvp("experiments", experiments), + cereal::make_nvp("runtime", runtime)); + auto _samples = std::vector{}; + if constexpr(concepts::is_input_archive::value) + { + ar(cereal::make_nvp("samples", _samples)); + for(auto& itr : _samples) + samples.emplace(std::move(itr)); + } + else + { + ar(cereal::make_nvp("samples", samples)); + } +} + +template +void +experiment::serialize(ArchiveT& ar, const unsigned) +{ + namespace cereal = ::tim::cereal; + + ar(cereal::make_nvp("index", index), + cereal::make_nvp("virtual_speedup", virtual_speedup), + cereal::make_nvp("sampling_period", sampling_period), + cereal::make_nvp("start_time", start_time), cereal::make_nvp("end_time", end_time), + cereal::make_nvp("experiment_time", experiment_time), + cereal::make_nvp("batch_size", batch_size), cereal::make_nvp("duration", duration), + cereal::make_nvp("scaling_factor", scaling_factor), + cereal::make_nvp("selected", selected), + cereal::make_nvp("sample_delay", sample_delay), + cereal::make_nvp("delay_scaling", delay_scaling), + cereal::make_nvp("total_delay", total_delay), + cereal::make_nvp("global_delay", global_delay), + cereal::make_nvp("selection", selection)); + + if constexpr(concepts::is_input_archive::value) + { + auto _ppts = std::vector{}; + init_progress.clear(); + fini_progress.clear(); + ar(cereal::make_nvp("progress_points", _ppts)); + for(auto itr : _ppts) + fini_progress.emplace(itr.get_hash(), itr); + } + else + { + auto _ppts = std::vector{}; + { + auto ppts = fini_progress; + for(auto& pitr : ppts) + pitr.second.set_hash(pitr.first); + for(auto pitr : init_progress) + ppts[pitr.first] -= pitr.second; + _ppts.reserve(ppts.size()); + for(auto& pitr : ppts) + _ppts.emplace_back(pitr.second); + } + ar(cereal::make_nvp("progress_points", _ppts)); + } + + ar(cereal::make_nvp("period_stats", period_stats)); +} + +std::string +experiment::label() +{ + return "causal_experiment"; +} + +std::string +experiment::description() +{ + return "Records an experiment for causal profiling"; +} + +const std::atomic& +experiment::get_current_experiment() +{ + return current_experiment; +} + +bool +experiment::start() +{ + if(running && tracing::now() < start_time + experiment_time) return false; + + selection = sample_selection(); + if(!selection) return false; + + // sampling period in nanoseconds + sampling_period = backtrace_causal::get_period(units::nsec); + // adjust for the real sampling period + period_stats = causal::component::backtrace::get_period_stats(); + if(period_stats.get_count() > 10) sampling_period = period_stats.get_mean(); + + // experiment time is scaled up for longer speedups + index = experiment_history.size() + 1; + virtual_speedup = sample_virtual_speedup(); + delay_scaling = virtual_speedup / 100.0; + if(use_exp_speedup_scaling) scaling_factor *= (1.0 + delay_scaling); + + experiment_time = global_scaling * scaling_factor * sampling_period * batch_size; + sample_delay = sampling_period * delay_scaling; + total_delay = delay::sync(); + init_progress = component::progress_point::get_progress_points(); + start_time = tracing::now(); + + OMNITRACE_VERBOSE(0, "Starting causal experiment #%-3u: %s\n", index, + as_string().c_str()); + + current_experiment_value = *this; + current_selected_count.store(0); + current_experiment.store(this); + return true; +} + +bool +experiment::wait() const +{ + auto _now = tracing::now(); + auto _wait = experiment_time - (_now - start_time); + auto _end = _now + _wait; + auto _incr = std::min(_wait / 100, 1000000); + while(tracing::now() < _end && get_state() < State::Finalized) + { + std::this_thread::yield(); + std::this_thread::sleep_for(std::chrono::nanoseconds{ _incr }); + } + return (tracing::now() >= _end); +} + +bool +experiment::stop() +{ + auto _now = tracing::now(); + if(_now < start_time + experiment_time) return false; + + current_experiment.store(nullptr); + selected = current_selected_count.load(); + running = false; + end_time = _now; + experiment_time = (end_time - start_time); + global_delay = delay::compute_total_delay(0); + total_delay = (global_delay - total_delay); + duration = (experiment_time > total_delay) ? (experiment_time - total_delay) : 0; + fini_progress = component::progress_point::get_progress_points(); + period_stats = causal::component::backtrace::get_period_stats(); + + // sync data + delay::sync(); + + // for larger speedups, we increased the experiment time, so we want to artificially + // increase num by the same factor. E.g. 10 throughput points at speedup 50 should + // really look like 15 + double _scale_num = 1.0 + ((use_exp_speedup_scaling) ? delay_scaling : 0.0); + auto _prog_stats = tim::statistics{}; + for(auto fitr : fini_progress) + { + auto _pt = fitr.second - init_progress[fitr.first]; + int64_t _num = + std::max({ _pt.get_laps(), _pt.get_arrival(), _pt.get_departure() }); + if(_num > 0) _prog_stats += (_num * _scale_num); + } + + auto _mean = (_prog_stats.get_count() > 0) ? _prog_stats.get_mean() : 0; + auto _high = (_prog_stats.get_count() > 0) ? _prog_stats.get_max() : 0; + if(_high < 5) + { + global_scaling *= 2; + ++global_scaling_increments; // keep track of how many successive increments have + // been performed + } + else if(_mean > 10 && global_scaling > 1) + { + global_scaling /= 2; + global_scaling_increments = 0; + } + + if(OMNITRACE_UNLIKELY(global_scaling_increments >= 5)) + { + OMNITRACE_WARNING( + 0, + "Warning! causal experimentation hasn't seen at least 5 progress points " + "in the last %li experiments. Progress points are necessary for measuring " + "the effect of the virtual speed-up. Please visit " + "https://amdresearch.github.io/omnitrace/ for documentation on progress " + "points and how to add them\n", + global_scaling_increments); + } + + if(_high > 0) experiment_history.emplace_back(*this); + + std::this_thread::sleep_for(std::chrono::nanoseconds{ sampling_period * batch_size }); + return true; +} + +std::string +experiment::as_string() const +{ + std::stringstream _ss{}; + auto _dur = static_cast(experiment_time) / static_cast(units::sec); + _ss << std::boolalpha << "speed-up: " << std::setw(3) << virtual_speedup + << "%, period: " << std::setw(4) << std::fixed << std::setprecision(2) + << (sampling_period / static_cast(units::msec)) << " msec"; + if(!config::get_causal_end_to_end()) + _ss << ", duration: " << std::setw(5) << std::fixed << std::setprecision(3) + << _dur << " sec"; + _ss << " :: experiment: " << as_hex(selection.address) << " "; + //_ss << " [" << selection.info.ipaddr().as_string() << "]"; + if(selection.symbol_address > 0 && selection.address != selection.symbol_address) + _ss << "(symbol@" << as_hex(selection.symbol_address) << ") "; + if(!selection.symbol.file.empty() && selection.symbol.line > 0) + _ss << "[" << filepath::basename(selection.symbol.file) << ":" + << selection.symbol.line << "]"; + + auto _patch = [](std::string _v) { + auto _pos = std::string::npos; + using strpair_t = std::pair; + for(const auto& itr : + { strpair_t{ + "::basic_string, std::allocator > ", + "::string" }, + strpair_t{ "::__cxx11::", "::" } }) + { + while((_pos = _v.find(itr.first)) != std::string::npos) + _v = _v.replace(_pos, itr.first.length(), itr.second); + } + return _v; + }; + auto _func = _patch(demangle(selection.symbol.func)); + _ss << "['" << _func << "']"; + + return _ss.str(); +} + +// in nanoseconds +uint64_t +experiment::get_delay() +{ + if(!current_experiment.load()) return 0; + return current_experiment_value.sample_delay; +} + +double +experiment::get_delay_scaling() +{ + if(!current_experiment.load()) return 0; + return current_experiment_value.delay_scaling; +} + +uint32_t +experiment::get_index() +{ + if(!is_active()) return 0; + return current_experiment_value.index; +} + +bool +experiment::is_active() +{ + return (current_experiment.load(std::memory_order_relaxed) != nullptr); +} + +bool +experiment::is_selected(unwind_addr_t _stack) +{ + if(is_active()) + { + for(auto itr : _stack) + if(current_experiment_value.selection.contains(itr)) return true; + } + return false; +} + +void +experiment::add_selected() +{ + if(current_experiment.load() == nullptr) return; + ++current_selected_count; +} + +std::vector +experiment::get_experiments() +{ + return experiment_history; +} + +void +experiment::save_experiments() +{ + auto _cfg = settings::compose_filename_config{}; + _cfg.subdirectory = "causal"; + _cfg.use_suffix = config::get_use_pid(); + save_experiments(config::get_causal_output_filename(), _cfg); +} + +void // NOLINTNEXTLINE +experiment::save_experiments(std::string _fname_base, const filename_config_t& _cfg) +{ + const auto& _info0 = thread_info::get(0, InternalTID); + + // if(experiment_history.size() > 1) + // experiment_history.erase(experiment_history.begin()); + + auto current_record = record{}; + current_record.startup = _info0->lifetime.first; + + // update experiments + { + for(auto& itr : experiment_history) + { + if(itr.duration == 0 || itr.experiment_time == 0) continue; + current_record.experiments.emplace_back(std::move(itr)); + } + experiment_history.clear(); + } + + // update runtime value + { + uint64_t _beg_runtime = std::numeric_limits::max(); + uint64_t _end_runtime = std::numeric_limits::min(); + for(auto& itr : current_record.experiments) + { + if(itr.duration == 0) continue; + if(itr.experiment_time == 0) continue; + _beg_runtime = std::min(_beg_runtime, itr.start_time); + _end_runtime = std::max(_end_runtime, itr.end_time); + } + current_record.runtime = (_end_runtime - _beg_runtime); + } + + // update sample data + { + auto _add_sample = [¤t_record](sample&& _v) { + auto fitr = current_record.samples.find(_v); + if(fitr != current_record.samples.end()) + *fitr += _v; + else + current_record.samples.emplace(std::move(_v)); + }; + + auto _total_samples = std::map{}; + for(const auto& itr : get_samples()) + { + for(const auto& sitr : itr.second) + { + _total_samples[sitr.address] += sitr.count; + } + } + + auto _binfo_cfg = settings::compose_filename_config{}; + _binfo_cfg.subdirectory = "causal/binary-info"; + _binfo_cfg.use_suffix = config::get_use_pid(); + save_line_info(_binfo_cfg, config::get_verbose()); + + for(const auto& itr : _total_samples) + { + auto _addr = itr.first; + auto _count = itr.second; + if(_count > 0) + { + auto _linfo = get_line_info(_addr, true); + for(const auto& iitr : _linfo) + { + auto _name = (iitr.line > 0) ? join(":", iitr.file, iitr.line) + : demangle(iitr.func); + + _name = join(" :: ", as_hex(_addr), _name); + _add_sample(sample{ _count, _addr, _name, iitr }); + } + + if(_linfo.empty() && config::get_debug()) + { + _add_sample( + sample{ _count, _addr, as_hex(_addr), sample::line_info{} }); + } + } + } + } + + bool _causal_output_reset = + config::get_setting_value("OMNITRACE_CAUSAL_FILE_RESET").second; + + // if(current_record.experiments.empty()) return; + + { + auto _saved_experiments = (_causal_output_reset) + ? std::vector{} + : load_experiments(_fname_base, _cfg, false); + _saved_experiments.emplace_back(current_record); + std::stringstream oss{}; + { + auto ar = + tim::policy::output_archive::get(oss); + + ar->setNextName("omnitrace"); + ar->startNode(); + ar->setNextName("causal"); + ar->startNode(); + (*ar)(cereal::make_nvp("records", _saved_experiments)); + ar->finishNode(); + ar->finishNode(); + } + + auto _fname = tim::settings::compose_output_filename(_fname_base, "json", _cfg); + auto ofs = std::ofstream{}; + if(tim::filepath::open(ofs, _fname)) + { + if(get_verbose() >= 0) + operation::file_output_message{}( + _fname, std::string{ "causal_experiments" }); + ofs << oss.str() << "\n"; + } + else + { + OMNITRACE_THROW("Error opening causal experiments output file: %s", + _fname.c_str()); + } + } + + auto _fname = tim::settings::compose_output_filename(_fname_base, "coz", _cfg); + + // read in existing data + auto _existing = std::stringstream{}; + if(!_causal_output_reset) + { + std::ifstream ifs{ _fname }; + if(ifs) + { + while(ifs && ifs.good()) + { + std::string _line; + std::getline(ifs, _line); + _existing << _line << "\n"; + } + } + } + + std::ofstream ofs{}; + ofs.setf(std::ios::fixed); + if(tim::filepath::open(ofs, _fname)) + { + if(get_verbose() >= 0) + operation::file_output_message{}( + _fname, std::string{ "causal_experiments" }); + + ofs << _existing.str(); + ofs << "startup\ttime=" << current_record.startup << "\n"; + + for(auto& itr : current_record.experiments) + { + auto& _selection = itr.selection; + auto& _line_info = _selection.symbol; + + std::string _name = (_selection.symbol_address > 0) + ? _line_info.func + : join(":", _line_info.file, _line_info.line); + + OMNITRACE_CONDITIONAL_THROW( + _name.empty(), + "Error! causal experiment selection has no name: address=%s, file=%s, " + "line=%u, func=%s", + as_hex(_line_info.address).c_str(), _line_info.file.c_str(), + _line_info.line, _line_info.func.c_str()); + + ofs << "experiment\tselected=" << demangle(_name) + << "\tspeedup=" << std::setprecision(2) + << static_cast(itr.virtual_speedup / 100.0) + << "\tduration=" << itr.duration << "\tselected-samples=" << itr.selected + << "\n"; + + auto ppts = itr.fini_progress; + for(auto pitr : itr.init_progress) + ppts[pitr.first] -= pitr.second; + + for(auto pitr : ppts) + { + // if(pitr.second.get_laps() == 0) continue; + if(get_causal_end_to_end() && pitr.second.get_laps() > 1) continue; + if(pitr.second.is_throughput_point() && pitr.second.get_delta() != 0) + { + ofs << "throughput-point\tname=" + << tim::demangle(tim::get_hash_identifier(pitr.first)) + << "\tdelta=" << pitr.second.get_delta() << "\n"; + if(get_causal_end_to_end()) break; + } + if(pitr.second.is_latency_point()) + { + if(get_causal_end_to_end()) continue; + auto _delta = std::max(pitr.second.get_latency_delta(), 1); + ofs << "latency-point\tname=" + << tim::demangle(tim::get_hash_identifier(pitr.first)) + << "\tarrivals=" << pitr.second.get_arrival() + << "\tdepartures=" << pitr.second.get_departure() + << "\tdifference=" << _delta << "\n"; + } + } + } + + ofs << "runtime\ttime=" << current_record.runtime << "\n"; + + for(const auto& itr : current_record.samples) + { + ofs << "samples\tlocation=" << itr.location << "\tcount=" << itr.count; + if(config::get_debug()) ofs << "\taddress=" << as_hex(itr.address); + ofs << "\n"; + } + } + else + { + OMNITRACE_THROW("Error opening causal experiments output file: %s", + _fname.c_str()); + } +} + +std::vector +experiment::load_experiments(bool _throw_on_error) +{ + auto _cfg = settings::compose_filename_config{}; + _cfg.subdirectory = "causal"; + _cfg.use_suffix = config::get_use_pid(); + return load_experiments(config::get_causal_output_filename(), _cfg, _throw_on_error); +} + +std::vector +experiment::load_experiments(std::string _fname, const filename_config_t& _cfg, + bool _throw_on_error) +{ + _fname = tim::settings::compose_input_filename(_fname, "json", _cfg); + + auto ifs = std::ifstream{}; + auto _data = std::vector{}; + if(tim::filepath::open(ifs, _fname)) + { + auto ar = tim::policy::input_archive::get(ifs); + + ar->setNextName("omnitrace"); + ar->startNode(); + ar->setNextName("causal"); + ar->startNode(); + (*ar)(cereal::make_nvp("records", _data)); + ar->finishNode(); + ar->finishNode(); + } + else + { + if(_throw_on_error) + { + OMNITRACE_THROW("Error opening causal experiments input file: %s", + _fname.c_str()); + } + } + + return _data; +} +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/experiment.hpp b/source/lib/omnitrace/library/causal/experiment.hpp new file mode 100644 index 0000000000..bf4ea77ab6 --- /dev/null +++ b/source/lib/omnitrace/library/causal/experiment.hpp @@ -0,0 +1,139 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/dwarf_entry.hpp" +#include "library/binary/symbol.hpp" +#include "library/causal/components/backtrace.hpp" +#include "library/causal/components/progress_point.hpp" +#include "library/causal/data.hpp" +#include "library/causal/sample_data.hpp" +#include "library/causal/selected_entry.hpp" +#include "library/containers/c_array.hpp" +#include "library/defines.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +using hash_value_t = ::tim::hash_value_t; + +struct experiment +{ + using progress_points_t = + std::unordered_map; + using experiments_t = std::vector; + using filename_config_t = settings::compose_filename_config; + using sample_dataset_t = std::set; + using period_stats_t = tim::statistics; + + struct sample + { + using line_info = binary::symbol; + + mutable uint64_t count = 0; + uintptr_t address = 0; + std::string location = {}; + line_info info = {}; + + bool operator==(const sample&) const; + bool operator<(const sample&) const; + const auto& operator+=(const sample&) const; + + template + void serialize(ArchiveT& ar, const unsigned); + }; + + struct record + { + int64_t startup = 0; + uint64_t runtime = 0; + std::vector experiments = {}; + std::set samples = {}; + + template + void serialize(ArchiveT& ar, const unsigned); + }; + + static std::string label(); + static std::string description(); + static const std::atomic& get_current_experiment(); + + TIMEMORY_DEFAULT_OBJECT(experiment) + + bool start(); + bool wait() const; // returns false if interrupted + bool stop(); + std::string as_string() const; + + template + void serialize(ArchiveT& ar, const unsigned version); + + // in nanoseconds + static uint64_t get_delay(); + static double get_delay_scaling(); + static uint32_t get_index(); + static bool is_active(); + static bool is_selected(unwind_addr_t); + static void add_selected(); + static experiments_t get_experiments(); + + static void save_experiments(); + static void save_experiments(std::string, const filename_config_t&); + static std::vector load_experiments(bool _throw_on_err = true); + static std::vector load_experiments(std::string, const filename_config_t&, + bool = true); + + bool running = false; + uint16_t virtual_speedup = 0; /// 0-100 in multiples of 5 + uint32_t index = 0; /// experiment number + uint64_t sampling_period = 0; /// period b/t samples [nsec] + uint64_t start_time = 0; /// start of experiment [nsec] + uint64_t end_time = 0; /// end of experiment [nsec] + uint64_t experiment_time = 0; /// how long the experiment ran [nsec] + uint64_t duration = 0; /// runtime - delays [nsec] + uint64_t batch_size = 10; /// batch factor for experiment/cooloff + uint64_t scaling_factor = 50; /// scaling factor for experiment time + uint64_t sample_delay = 0; /// how long to delay [nsec] + uint64_t total_delay = 0; /// total delays [nsec] + uint64_t selected = 0; /// num times selected line sampled + uint64_t global_delay = 0; + double delay_scaling = 0.0; /// virtual_speedup / 100. + selected_entry selection = {}; /// which line was selected + progress_points_t init_progress = {}; /// progress points at start + progress_points_t fini_progress = {}; /// progress points at end + period_stats_t period_stats = {}; /// stats for sampling period +}; +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/fwd.hpp b/source/lib/omnitrace/library/causal/fwd.hpp new file mode 100644 index 0000000000..22a8880bef --- /dev/null +++ b/source/lib/omnitrace/library/causal/fwd.hpp @@ -0,0 +1,52 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/fwd.hpp" +#include "library/containers/static_vector.hpp" +#include "library/defines.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace unwind = ::tim::unwind; + +namespace causal +{ +static constexpr size_t unwind_depth = 8; +static constexpr size_t unwind_offset = 0; +using unwind_stack_t = unwind::stack; +using unwind_addr_t = container::static_vector; +using hash_value_t = tim::hash_value_t; + +struct selected_entry; +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/perf.cpp b/source/lib/omnitrace/library/causal/perf.cpp new file mode 100644 index 0000000000..662e59bbe3 --- /dev/null +++ b/source/lib/omnitrace/library/causal/perf.cpp @@ -0,0 +1,555 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/perf.hpp" +#include "library/timemory.hpp" +#include "library/utility.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace perf +{ +namespace +{ +struct SizeParams +{ + const size_t num_pages = 2; + const size_t page = units::get_page_size(); + const size_t data = num_pages * page; + const size_t mmap = data + page; +}; +const SizeParams sizes = {}; +} // namespace + +long +perf_event_open(struct perf_event_attr* hw_event, pid_t _pid, int _cpu, int group_fd, + unsigned long flags) +{ + return syscall(__NR_perf_event_open, hw_event, _pid, _cpu, group_fd, flags); +} + +/// Move constructor +perf_event::perf_event(perf_event&& rhs) noexcept +{ + // Release resources if the current perf_event is initialized and not equal to this + // one + if(m_fd != -1 && m_fd != rhs.m_fd) + { + ::close(m_fd); + TIMEMORY_INFO << "Closed perf event fd " << m_fd; + } + + if(m_mapping != nullptr && m_mapping != rhs.m_mapping) munmap(m_mapping, sizes.mmap); + + // take rhs perf event's file descriptor and replace it with -1 + m_fd = rhs.m_fd; + rhs.m_fd = -1; + + // take rhs perf_event's mapping and replace it with nullptr + m_mapping = rhs.m_mapping; + rhs.m_mapping = nullptr; + + // Copy over the sample type and read format + m_sample_type = rhs.m_sample_type; + m_read_format = rhs.m_read_format; +} + +/// Close the perf_event file descriptor and unmap the ring buffer +perf_event::~perf_event() { close(); } + +/// Move assignment +perf_event& +perf_event::operator=(perf_event&& rhs) noexcept +{ + if(&rhs == this) return *this; + + // Release resources if the current perf_event is initialized and not equal to this + // one + if(m_fd != -1 && m_fd != rhs.m_fd) ::close(m_fd); + if(m_mapping != nullptr && m_mapping != rhs.m_mapping) munmap(m_mapping, sizes.mmap); + + // take rhs perf event's file descriptor and replace it with -1 + m_fd = rhs.m_fd; + rhs.m_fd = -1; + + // take rhs perf_event's mapping and replace it with nullptr + m_mapping = rhs.m_mapping; + rhs.m_mapping = nullptr; + + // Copy over the sample type and read format + m_sample_type = rhs.m_sample_type; + m_read_format = rhs.m_read_format; + + return *this; +} + +// Open a perf_event file and map it (if sampling is enabled) +bool +perf_event::open(struct perf_event_attr& _pe, pid_t _pid, int _cpu) +{ + m_sample_type = _pe.sample_type; + m_read_format = _pe.read_format; + + // Set some mandatory fields + _pe.size = sizeof(struct perf_event_attr); + _pe.disabled = 1; + + // Open the file + m_fd = perf_event_open(&_pe, _pid, _cpu, -1, 0); + if(m_fd == -1) + { + std::string path = "/proc/sys/kernel/perf_event_paranoid"; + + FILE* file = fopen(path.c_str(), "r"); + OMNITRACE_PREFER(file != nullptr) + << "Failed to open " << path << ": " << strerror(errno); + + if(file == nullptr) return false; + + char value_str[3]; + int res = fread(value_str, sizeof(value_str), 1, file); + TIMEMORY_REQUIRE(res != -1) + << "Failed to read from " << path << ": " << strerror(errno); + + if(res == -1) return false; + + value_str[2] = '\0'; + int value = atoi(value_str); + + TIMEMORY_WARNING << "Failed to open perf event. " + << "Consider tweaking " << path << " to 2 or less " + << "(current value is " << value << "), " + << "or run omnitrace as a privileged user (with CAP_SYS_ADMIN)."; + return false; + } + + // If sampling, map the perf event file + if(_pe.sample_type != 0 && _pe.sample_period != 0) + { + void* ring_buffer = + mmap(nullptr, sizes.mmap, PROT_READ | PROT_WRITE, MAP_SHARED, m_fd, 0); + + OMNITRACE_PREFER(ring_buffer != MAP_FAILED) + << "Mapping perf_event ring buffer failed. " + << "Make sure the current user has permission " + "to invoke the perf tool, and that " + << "the program being profiled does not use " + "an excessive number of threads (>1000).\n"; + + if(ring_buffer == MAP_FAILED) return false; + + m_mapping = reinterpret_cast(ring_buffer); + } + + return true; +} + +bool +perf_event::open(double _freq, uint32_t _batch_size, pid_t _pid, int _cpu) +{ + uint64_t _period = (1.0 / _freq) * units::sec; + struct perf_event_attr _pe; + + memset(&_pe, 0, sizeof(_pe)); + _pe.type = PERF_TYPE_SOFTWARE; + _pe.config = PERF_COUNT_SW_TASK_CLOCK; + _pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_CALLCHAIN; + _pe.sample_period = _period; + _pe.wakeup_events = _batch_size; + _pe.sample_period = _period; + _pe.wakeup_events = _batch_size; // This is ignored on linux 3.13 (why?) + _pe.exclude_idle = 1; + _pe.exclude_kernel = 1; + _pe.precise_ip = 0; + _pe.disabled = 1; + + return open(_pe, _pid, _cpu); +} + +/// Read event count +uint64_t +perf_event::get_count() const +{ + uint64_t count; + TIMEMORY_REQUIRE(read(m_fd, &count, sizeof(uint64_t)) == sizeof(uint64_t)) + << "Failed to read event count from perf_event file"; + return count; +} + +/// Start counting events +void +perf_event::start() const +{ + if(m_fd != -1) + { + TIMEMORY_REQUIRE(ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) != -1) + << "Failed to start perf event: " << strerror(errno); + } +} + +/// Stop counting events +void +perf_event::stop() const +{ + if(m_fd != -1) + { + TIMEMORY_REQUIRE(ioctl(m_fd, PERF_EVENT_IOC_DISABLE, 0) != -1) + << "Failed to stop perf event: " << strerror(errno) << " (" << m_fd << ")"; + } +} + +void +perf_event::close() +{ + if(m_fd != -1) + { + ::close(m_fd); + m_fd = -1; + } + + if(m_mapping != nullptr) + { + munmap(m_mapping, sizes.mmap); + m_mapping = nullptr; + } +} + +void +perf_event::set_ready_signal(int sig) const +{ + // Set the perf_event file to async + TIMEMORY_REQUIRE(fcntl(m_fd, F_SETFL, fcntl(m_fd, F_GETFL, 0) | O_ASYNC) != -1) + << "failed to set perf_event file to async mode"; + + // Set the notification signal for the perf file + TIMEMORY_REQUIRE(fcntl(m_fd, F_SETSIG, sig) != -1) + << "failed to set perf_event file signal"; + + // Set the current thread as the owner of the file (to target signal delivery) + TIMEMORY_REQUIRE(fcntl(m_fd, F_SETOWN, gettid()) != -1) + << "failed to set the owner of the perf_event file"; +} + +void +perf_event::iterator::next() +{ + struct perf_event_header _hdr; + + // Copy out the record header + perf_event::copy_from_ring_buffer(m_mapping, m_index, &_hdr, + sizeof(struct perf_event_header)); + + // Advance to the next record + m_index += _hdr.size; +} + +perf_event::iterator::iterator(perf_event& _source, struct perf_event_mmap_page* _mapping) +: m_source{ _source } +, m_mapping{ _mapping } +{ + if(_mapping != nullptr) + { + m_index = _mapping->data_tail; + m_head = _mapping->data_head; + } + else + { + m_index = 0; + m_head = 0; + } +} + +perf_event::iterator::~iterator() +{ + if(m_mapping != nullptr) + { + m_mapping->data_tail = m_index; + } +} + +perf_event::iterator& +perf_event::iterator::operator++() +{ + next(); + return *this; +} + +bool +perf_event::iterator::operator!=(const iterator& other) const +{ + return has_data() != other.has_data(); +} + +perf_event::record +perf_event::iterator::get() +{ + // Copy out the record header + perf_event::copy_from_ring_buffer(m_mapping, m_index, _buf, + sizeof(struct perf_event_header)); + + // Get a pointer to the header + struct perf_event_header* header = reinterpret_cast(_buf); + + // Copy out the entire record + perf_event::copy_from_ring_buffer(m_mapping, m_index, _buf, header->size); + + return perf_event::record(m_source, header); +} + +bool +perf_event::iterator::has_data() const +{ + // If there is no ring buffer, there is no data + if(m_mapping == nullptr) + { + return false; + } + + // If there isn't enough data in the ring buffer to hold a header, there is no data + if(m_index + sizeof(struct perf_event_header) >= m_head) + { + return false; + } + + struct perf_event_header _hdr; + perf_event::copy_from_ring_buffer(m_mapping, m_index, &_hdr, + sizeof(struct perf_event_header)); + + // If the first record is larger than the available data, nothing can be read + if(m_index + _hdr.size > m_head) + { + return false; + } + + return true; +} + +void +perf_event::copy_from_ring_buffer(struct perf_event_mmap_page* _mapping, ptrdiff_t _index, + void* _dest, size_t _nbytes) +{ + uintptr_t _base = reinterpret_cast(_mapping) + sizes.page; + size_t _beg_idx = _index % sizes.data; + size_t _end_idx = _beg_idx + _nbytes; + + if(_end_idx <= sizes.data) + { + memcpy(_dest, reinterpret_cast(_base + _beg_idx), _nbytes); + } + else + { + size_t _chunk_size2 = _end_idx - sizes.data; + size_t _chunk_size1 = _nbytes - _chunk_size2; + + void* _dest2 = + reinterpret_cast(reinterpret_cast(_dest) + _chunk_size1); + + memcpy(_dest, reinterpret_cast(_base + _beg_idx), _chunk_size1); + memcpy(_dest2, reinterpret_cast(_base), _chunk_size2); + } +} + +uint64_t +perf_event::record::get_ip() const +{ + TIMEMORY_ASSERT(is_sample() && m_source.is_sampling(sample::ip)) + << "Record does not have an ip field"; + return *locate_field(); +} + +uint64_t +perf_event::record::get_pid() const +{ + TIMEMORY_ASSERT(is_sample() && m_source.is_sampling(sample::pid_tid)) + << "Record does not have a `pid` field"; + return locate_field()[0]; +} + +uint64_t +perf_event::record::get_tid() const +{ + TIMEMORY_ASSERT(is_sample() && m_source.is_sampling(sample::pid_tid)) + << "Record does not have a `tid` field"; + return locate_field()[1]; +} + +uint64_t +perf_event::record::get_time() const +{ + TIMEMORY_ASSERT(is_sample() && m_source.is_sampling(sample::time)) + << "Record does not have a 'time' field"; + return *locate_field(); +} + +uint32_t +perf_event::record::get_cpu() const +{ + TIMEMORY_ASSERT(is_sample() && m_source.is_sampling(sample::cpu)) + << "Record does not have a 'cpu' field"; + return *locate_field(); +} + +container::c_array +perf_event::record::get_callchain() const +{ + TIMEMORY_ASSERT(is_sample() && m_source.is_sampling(sample::callchain)) + << "Record does not have a callchain field"; + + uint64_t* _base = locate_field(); + uint64_t _size = *_base; + // Advance the callchain array pointer past the size + _base++; + return container::wrap_c_array(_base, _size); +} + +template +Tp +perf_event::record::locate_field() const +{ + uintptr_t p = + reinterpret_cast(m_header) + sizeof(struct perf_event_header); + + // Walk through the fields in the sample structure. Once the requested field is + // reached, return. Skip past any unrequested fields that are included in the sample + // type + + // ip + if constexpr(SampleT == sample::ip) return reinterpret_cast(p); + if(m_source.is_sampling(sample::ip)) p += sizeof(uint64_t); + + // pid, tid + if constexpr(SampleT == sample::pid_tid) return reinterpret_cast(p); + if(m_source.is_sampling(sample::pid_tid)) p += sizeof(uint32_t) + sizeof(uint32_t); + + // time + if constexpr(SampleT == sample::time) return reinterpret_cast(p); + if(m_source.is_sampling(sample::time)) p += sizeof(uint64_t); + + // addr + if constexpr(SampleT == sample::addr) return reinterpret_cast(p); + if(m_source.is_sampling(sample::addr)) p += sizeof(uint64_t); + + // id + if constexpr(SampleT == sample::id) return reinterpret_cast(p); + if(m_source.is_sampling(sample::id)) p += sizeof(uint64_t); + + // stream_id + if constexpr(SampleT == sample::stream_id) return reinterpret_cast(p); + if(m_source.is_sampling(sample::stream_id)) p += sizeof(uint64_t); + + // cpu + if constexpr(SampleT == sample::cpu) return reinterpret_cast(p); + if(m_source.is_sampling(sample::cpu)) p += sizeof(uint32_t) + sizeof(uint32_t); + + // period + if constexpr(SampleT == sample::period) return reinterpret_cast(p); + if(m_source.is_sampling(sample::period)) p += sizeof(uint64_t); + + // value + if constexpr(SampleT == sample::read) return reinterpret_cast(p); + if(m_source.is_sampling(sample::read)) + { + uint64_t read_format = m_source.get_read_format(); + if(read_format & PERF_FORMAT_GROUP) + { + // Get the number of values in the read format structure + uint64_t nr = *reinterpret_cast(p); + // The default size of each entry is a u64 + size_t sz = sizeof(uint64_t); + // If requested, the id will be included with each value + if(read_format & PERF_FORMAT_ID) sz += sizeof(uint64_t); + // Skip over the entry count, and each entry + p += sizeof(uint64_t) + nr * sz; + } + else + { + // Skip over the value + p += sizeof(uint64_t); + // Skip over the id, if included + if(read_format & PERF_FORMAT_ID) p += sizeof(uint64_t); + } + + // Skip over the time_enabled field + if(read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) p += sizeof(uint64_t); + // Skip over the time_running field + if(read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) p += sizeof(uint64_t); + } + + // callchain + if constexpr(SampleT == sample::callchain) return reinterpret_cast(p); + if(m_source.is_sampling(sample::callchain)) + { + uint64_t nr = *reinterpret_cast(p); + p += sizeof(uint64_t) + nr * sizeof(uint64_t); + } + + // raw + if constexpr(SampleT == sample::raw) return reinterpret_cast(p); + if(m_source.is_sampling(sample::raw)) + { + uint32_t raw_size = *reinterpret_cast(p); + p += sizeof(uint32_t) + raw_size; + } + + // branch_stack + if constexpr(SampleT == sample::branch_stack) return reinterpret_cast(p); + if(m_source.is_sampling(sample::branch_stack)) + TIMEMORY_FATAL << "Branch stack sampling is not supported"; + + // regs + if constexpr(SampleT == sample::regs) return reinterpret_cast(p); + if(m_source.is_sampling(sample::regs)) + TIMEMORY_FATAL << "Register sampling is not supported"; + + // stack + if constexpr(SampleT == sample::stack) return reinterpret_cast(p); + if(m_source.is_sampling(sample::stack)) + TIMEMORY_FATAL << "Stack sampling is not supported"; + + // end + if constexpr(SampleT == sample::last) return reinterpret_cast(p); + + TIMEMORY_FATAL << "Unsupported sample field requested!"; +} +} // namespace perf +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/perf.hpp b/source/lib/omnitrace/library/causal/perf.hpp new file mode 100644 index 0000000000..17c96aacf9 --- /dev/null +++ b/source/lib/omnitrace/library/causal/perf.hpp @@ -0,0 +1,256 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/containers/c_array.hpp" +#include "library/defines.hpp" + +#include +#include +#include +#include +#include + +#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 30 +# include +# define gettid() syscall(SYS_gettid) +#endif + +// Workaround for missing hw_breakpoint.h include file: +// This include file just defines constants used to configure watchpoint registers. +// This will be constant across x86 systems. +enum +{ + HW_BREAKPOINT_X = 4 +}; + +namespace omnitrace +{ +namespace causal +{ +namespace perf +{ +/// An enum class with all the available sampling data +enum class sample : uint64_t +{ + ip = PERF_SAMPLE_IP, + pid_tid = PERF_SAMPLE_TID, + time = PERF_SAMPLE_TIME, + addr = PERF_SAMPLE_ADDR, + id = PERF_SAMPLE_ID, + stream_id = PERF_SAMPLE_STREAM_ID, + cpu = PERF_SAMPLE_CPU, + period = PERF_SAMPLE_PERIOD, + +#if defined(PREF_SAMPLE_READ) + read = PERF_SAMPLE_READ, +#else + read = 0, +#endif + + callchain = PERF_SAMPLE_CALLCHAIN, + raw = PERF_SAMPLE_RAW, + +#if defined(PERF_SAMPLE_BRANCH_STACK) + branch_stack = PERF_SAMPLE_BRANCH_STACK, +#else + branch_stack = 0, +#endif + +#if defined(PERF_SAMPLE_REGS_USER) + regs = PERF_SAMPLE_REGS_USER, +#else + regs = 0, +#endif + +#if defined(PERF_SAMPLE_STACK_USER) + stack = PERF_SAMPLE_STACK_USER, +#else + stack = 0, +#endif + + last = PERF_SAMPLE_MAX +}; + +struct perf_event +{ + enum class record_type; + struct record; + struct sample_record; + + /// Default constructor + perf_event() = default; + /// Move constructor + perf_event(perf_event&& other) noexcept; + + /// Close the perf event file and unmap the ring buffer + ~perf_event(); + + /// Move assignment is supported + perf_event& operator=(perf_event&& other) noexcept; + + perf_event(const perf_event&) = delete; + perf_event& operator=(const perf_event&) = delete; + + /// Open a perf_event file using the given options structure + bool open(struct perf_event_attr& pe, pid_t pid = 0, int cpu = -1); + bool open(double, uint32_t, pid_t pid = 0, int cpu = -1); + + /// Read event count + uint64_t get_count() const; + + /// Start counting events and collecting samples + void start() const; + + /// Stop counting events + void stop() const; + + /// Close the perf_event file and unmap the ring buffer + void close(); + + /// Configure the perf_event file to deliver a signal when samples are ready to be + /// processed + void set_ready_signal(int sig) const; + + /// Check if this perf_event was configured to collect a type of sample data + inline bool is_sampling(sample s) const + { + return (m_sample_type & static_cast(s)) != 0u; + } + + /// Get the configuration for this perf_event's read format + inline uint64_t get_read_format() const { return m_read_format; } + + /// An enum to distinguish types of records in the mmapped ring buffer + enum class record_type + { + mmap = PERF_RECORD_MMAP, + lost = PERF_RECORD_LOST, + comm = PERF_RECORD_COMM, + exit = PERF_RECORD_EXIT, + throttle = PERF_RECORD_THROTTLE, + unthrottle = PERF_RECORD_UNTHROTTLE, + fork = PERF_RECORD_FORK, + read = PERF_RECORD_READ, + sample = PERF_RECORD_SAMPLE, + +#if defined(PERF_RECORD_MMAP2) + mmap2 = PERF_RECORD_MMAP2 +#else + mmap2 = 0 +#endif + }; + + class iterator; + + /// A generic record type + struct record + { + friend class perf_event::iterator; + + record_type get_type() const { return static_cast(m_header->type); } + + inline bool is_mmap() const { return get_type() == record_type::mmap; } + inline bool is_lost() const { return get_type() == record_type::lost; } + inline bool is_comm() const { return get_type() == record_type::comm; } + inline bool is_exit() const { return get_type() == record_type::exit; } + inline bool is_throttle() const { return get_type() == record_type::throttle; } + inline bool is_unthrottle() const + { + return get_type() == record_type::unthrottle; + } + inline bool is_fork() const { return get_type() == record_type::fork; } + inline bool is_read() const { return get_type() == record_type::read; } + inline bool is_sample() const { return get_type() == record_type::sample; } + inline bool is_mmap2() const { return get_type() == record_type::mmap2; } + + uint64_t get_ip() const; + uint64_t get_pid() const; + uint64_t get_tid() const; + uint64_t get_time() const; + uint32_t get_cpu() const; + container::c_array get_callchain() const; + + private: + record(const perf_event& source, struct perf_event_header* header) + : m_source(source) + , m_header(header) + {} + + template + Tp locate_field() const; + + const perf_event& m_source; + struct perf_event_header* m_header; + }; + + class iterator + { + public: + iterator(perf_event& source, struct perf_event_mmap_page* mapping); + ~iterator(); + + void next(); + record get(); + bool has_data() const; + + iterator& operator++(); + record operator*() { return get(); } + bool operator!=(const iterator& other) const; + + private: + perf_event& m_source; + size_t m_index = 0; + size_t m_head = 0; + struct perf_event_mmap_page* m_mapping = nullptr; + + // Buffer to hold the current record. Just a hack until records play nice with the + // ring buffer + uint8_t _buf[4096]; + }; + + /// Get an iterator to the beginning of the memory mapped ring buffer + iterator begin() { return iterator(*this, m_mapping); } + + // Get an iterator to the end of the memory mapped ring buffer + iterator end() { return iterator(*this, nullptr); } + +private: + // Copy data out of the mmap ring buffer + static void copy_from_ring_buffer(struct perf_event_mmap_page* mapping, + ptrdiff_t index, void* dest, size_t bytes); + + /// File descriptor for the perf event + long m_fd = -1; + + /// Memory mapped perf event region + struct perf_event_mmap_page* m_mapping = nullptr; + + /// The sample type from this perf_event's configuration + uint64_t m_sample_type = 0; + /// The read format from this perf event's configuration + uint64_t m_read_format = 0; +}; +} // namespace perf +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/sample_data.cpp b/source/lib/omnitrace/library/causal/sample_data.cpp new file mode 100644 index 0000000000..cbd310634f --- /dev/null +++ b/source/lib/omnitrace/library/causal/sample_data.cpp @@ -0,0 +1,71 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/sample_data.hpp" + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace +{ +auto samples = std::map>{}; +} + +std::set +get_samples(uint32_t _index) +{ + return samples[_index]; +} + +std::map> +get_samples() +{ + return samples; +} + +void +add_sample(uint32_t _index, uintptr_t _v) +{ + auto& _samples = samples[_index]; + auto _value = sample_data{ _v }; + _value.count = 1; + auto itr = _samples.find(_value); + if(itr == _samples.end()) + _samples.emplace(_value); + else + itr->count += 1; +} + +void +add_samples(uint32_t _index, const std::vector& _v) +{ + for(const auto& itr : _v) + add_sample(_index, itr); +} +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/sample_data.hpp b/source/lib/omnitrace/library/causal/sample_data.hpp new file mode 100644 index 0000000000..3d38e859cf --- /dev/null +++ b/source/lib/omnitrace/library/causal/sample_data.hpp @@ -0,0 +1,64 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/defines.hpp" +#include "library/timemory.hpp" + +#include +#include + +namespace omnitrace +{ +namespace causal +{ +struct sample_data +{ + uintptr_t address = 0x0; + mutable uint64_t count = 0; + + bool operator==(sample_data _v) const { return (address == _v.address); } + bool operator!=(sample_data _v) const { return !(*this == _v); } + bool operator<(sample_data _v) const { return (address < _v.address); } + bool operator>(sample_data _v) const { return (address > _v.address); } + bool operator<=(sample_data _v) const { return (address <= _v.address); } + bool operator>=(sample_data _v) const { return (address >= _v.address); } + + template + void serialize(ArchiveT& ar, const unsigned) + { + ar(cereal::make_nvp("address", address), cereal::make_nvp("count", count)); + } +}; + +std::map> +get_samples(); + +void +add_samples(uint32_t, const std::vector&); + +std::set get_samples(uint32_t); + +void add_sample(uint32_t, uintptr_t); +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/sampling.cpp b/source/lib/omnitrace/library/causal/sampling.cpp new file mode 100644 index 0000000000..f43ad2f63a --- /dev/null +++ b/source/lib/omnitrace/library/causal/sampling.cpp @@ -0,0 +1,375 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/sampling.hpp" +#include "library/causal/components/backtrace.hpp" +#include "library/causal/data.hpp" +#include "library/common.hpp" +#include "library/concepts.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" +#include "library/ptl.hpp" +#include "library/runtime.hpp" +#include "library/sampling.hpp" +#include "library/state.hpp" +#include "library/thread_data.hpp" +#include "library/thread_info.hpp" +#include "library/utility.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace sampling +{ +using ::tim::sampling::dynamic; +using ::tim::sampling::timer; + +using causal_bundle_t = tim::lightweight_tuple; +using causal_sampler_t = tim::sampling::sampler; +} // namespace sampling +} // namespace causal +} // namespace omnitrace + +OMNITRACE_DEFINE_CONCRETE_TRAIT(prevent_reentry, causal::sampling::causal_sampler_t, + std::true_type) + +OMNITRACE_DEFINE_CONCRETE_TRAIT(provide_backtrace, causal::sampling::causal_sampler_t, + std::false_type) + +OMNITRACE_DEFINE_CONCRETE_TRAIT(buffer_size, causal::sampling::causal_sampler_t, + TIMEMORY_ESC(std::integral_constant)) + +namespace omnitrace +{ +namespace causal +{ +namespace sampling +{ +namespace +{ +using causal_sampler_allocator_t = typename causal_sampler_t::allocator_t; +using causal_sampler_bundle_t = typename causal_sampler_t::bundle_type; +using causal_sampler_buffer_t = tim::data_storage::ring_buffer; + +struct causal_sampling +{}; + +std::set +configure(bool _setup, int64_t _tid = threading::get_id()); + +std::shared_ptr& +get_causal_sampler_allocator(bool _construct) +{ + static auto _v = std::shared_ptr{}; + if(!_v && _construct) _v = std::make_shared(); + return _v; +} + +auto& +get_causal_sampler_signals() +{ + using thread_data_t = thread_data>, causal_sampling>; + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +auto& +get_causal_sampler_running() +{ + using thread_data_t = thread_data, causal_sampling>; + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +auto& +get_causal_samplers() +{ + using thread_data_t = + thread_data>, causal_sampling>; + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +std::set& +get_causal_sampler_signals(int64_t _tid) +{ + auto& _data = get_causal_sampler_signals(); + if(static_cast(_tid) >= _data->size()) + _data->resize(_tid + 1, std::set{}); + return _data->at(_tid); +} + +bool& +get_causal_sampler_running(int64_t _tid) +{ + auto& _data = get_causal_sampler_running(); + if(static_cast(_tid) >= _data->size()) _data->resize(_tid + 1, false); + return _data->at(_tid); +} + +auto& +get_causal_sampler(int64_t _tid) +{ + auto& _data = get_causal_samplers(); + if(static_cast(_tid) >= _data->size()) _data->resize(_tid + 1); + return _data->at(_tid); +} + +void +causal_offload_buffer(int64_t, causal_sampler_buffer_t&& _buf) +{ + auto _data = std::move(_buf); + auto _processed = std::map>{}; + while(!_data.is_empty()) + { + auto _bundle = causal_sampler_bundle_t{}; + _data.read(&_bundle); + auto* _bt_causal = _bundle.get(); + if(_bt_causal) + { + for(auto&& itr : _bt_causal->get_stack()) + { + if(itr > 0) _processed[_bt_causal->get_index()].emplace_back(itr); + } + } + } + _data.destroy(); + + if(!_processed.empty()) + { + tasking::general::get_task_group().exec([_processed]() { + static std::mutex _mutex; + auto _lk = std::scoped_lock{ _mutex }; + for(const auto& itr : _processed) + add_samples(itr.first, itr.second); + }); + } +} + +std::set +configure(bool _setup, int64_t _tid) +{ + const auto& _info = thread_info::get(_tid, SequentTID); + auto& _causal = get_causal_sampler(_tid); + auto& _running = get_causal_sampler_running(_tid); + auto& _signal_types = get_causal_sampler_signals(_tid); + + OMNITRACE_CONDITIONAL_THROW(get_use_sampling(), + "Internal error! configuring causal profiling not " + "permitted when sampling is enabled"); + + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + + if(_setup && _signal_types.empty()) _signal_types = get_sampling_signals(_tid); + + if(_setup && !_causal && !_running && !_signal_types.empty()) + { + auto _verbose = std::min(get_verbose() - 2, 2); + if(get_debug_sampling()) _verbose = 2; + + // if this thread has an offset ID, that means it was created internally + // and is probably here bc it called a function which was instrumented. + // thus we should not start a sampler for it + if(_tid > 0 && _info && _info->is_offset) return std::set{}; + // if the thread state is disabled or completed, return + if(_info && _info->index_data->sequent_value == _tid && + get_thread_state() == ThreadState::Disabled) + return std::set{}; + + (void) get_debug_sampling(); // make sure query in sampler does not allocate + assert(_tid == threading::get_id()); + + auto _causal_alloc = get_causal_sampler_allocator(true); + _causal = std::make_unique(_causal_alloc, "omnitrace", _tid, + _verbose); + + TIMEMORY_REQUIRE(_causal) << "nullptr to causal profiling instance"; + + _causal->set_flags(SA_RESTART); + _causal->set_verbose(_verbose); + _causal->set_offload(&causal_offload_buffer); + + _causal->configure(timer{ get_realtime_signal(), CLOCK_REALTIME, SIGEV_THREAD_ID, + 1000.0, 1.0e-6, _tid, threading::get_sys_tid() }); + + _causal->configure(timer{ get_cputime_signal(), CLOCK_THREAD_CPUTIME_ID, + SIGEV_THREAD_ID, 1000.0, 1.0e-6, _tid, + threading::get_sys_tid() }); + + _running = true; + if(_tid == 0) causal::component::backtrace::start(); + _causal->start(); + } + else if(!_setup && _causal && _running) + { + OMNITRACE_DEBUG("Destroying causal sampler for thread %lu...\n", _tid); + _running = false; + + if(_tid == threading::get_id() && !_signal_types.empty()) + block_signals(_signal_types); + + if(_tid == 0) + { + block_samples(); + + // this propagates to all threads + _causal->ignore(_signal_types); + + for(int64_t i = 1; i < OMNITRACE_MAX_THREADS; ++i) + { + if(get_causal_sampler(i)) get_causal_sampler(i)->reset(); + } + } + + _causal->stop(); + + OMNITRACE_DEBUG("Causal sampler destroyed for thread %lu\n", _tid); + } + + return _signal_types; +} + +void +post_process_causal(int64_t _tid, const std::vector& _data); +} // namespace + +std::set +get_signal_types(int64_t _tid) +{ + return (get_causal_sampler_signals()) ? get_causal_sampler_signals(_tid) + : std::set{}; +} + +std::set +setup() +{ + if(!get_use_causal()) return std::set{}; + return configure(true); +} + +std::set +shutdown() +{ + auto _v = configure(false); + return _v; +} + +void +block_samples() +{ + trait::runtime_enabled::set(false); +} + +void +unblock_samples() +{ + trait::runtime_enabled::set(true); +} + +void +block_signals(std::set _signals) +{ + if(_signals.empty()) _signals = get_signal_types(threading::get_id()); + if(_signals.empty()) return; + + ::omnitrace::sampling::block_signals(_signals); +} + +void +unblock_signals(std::set _signals) +{ + if(_signals.empty()) _signals = get_signal_types(threading::get_id()); + if(_signals.empty()) return; + + ::omnitrace::sampling::unblock_signals(_signals); +} + +void +post_process() +{ + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + OMNITRACE_VERBOSE(2 || get_debug_sampling(), + "Stopping causal sampling components...\n"); + + for(size_t i = 0; i < max_supported_threads; ++i) + { + auto& _causal = get_causal_sampler(i); + if(_causal) _causal->stop(); + } + + configure(false, 0); + + for(size_t i = 0; i < max_supported_threads; ++i) + { + auto& _causal = get_causal_sampler(i); + auto _causal_data = + (_causal) ? _causal->get_data() : std::vector{}; + + if(!_causal_data.empty()) post_process_causal(i, _causal_data); + } + + for(size_t i = 0; i < max_supported_threads; ++i) + { + get_causal_sampler(i).reset(); + } + + if(get_causal_sampler_allocator(false)) + { + get_causal_sampler_allocator(false).reset(); + } +} + +namespace +{ +void +post_process_causal(int64_t, const std::vector& _data) +{ + for(const auto& itr : _data) + { + const auto* _bt_causal = itr.get(); + for(auto&& ditr : _bt_causal->get_stack()) + { + if(ditr > 0) add_sample(_bt_causal->get_index(), ditr); + } + } +} +} // namespace +} // namespace sampling +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/sampling.hpp b/source/lib/omnitrace/library/causal/sampling.hpp new file mode 100644 index 0000000000..7562444229 --- /dev/null +++ b/source/lib/omnitrace/library/causal/sampling.hpp @@ -0,0 +1,62 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/concepts.hpp" +#include "library/defines.hpp" + +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +namespace sampling +{ +std::set +get_signal_types(int64_t _tid); + +void +block_samples(); + +void +unblock_samples(); + +void block_signals(std::set = {}); + +void unblock_signals(std::set = {}); + +std::set +setup(); + +std::set +shutdown(); + +void +post_process(); +} // namespace sampling +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/selected_entry.cpp b/source/lib/omnitrace/library/causal/selected_entry.cpp new file mode 100644 index 0000000000..6c0c9944f2 --- /dev/null +++ b/source/lib/omnitrace/library/causal/selected_entry.cpp @@ -0,0 +1,52 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/causal/selected_entry.hpp" +#include "library/common.hpp" +#include "library/timemory.hpp" + +namespace omnitrace +{ +namespace causal +{ +template +void +selected_entry::serialize(ArchiveT& ar, const unsigned int) +{ + using ::tim::cereal::make_nvp; + ar(make_nvp("address", address), make_nvp("symbol_address", symbol_address), + make_nvp("info", symbol)); +} + +template void +selected_entry::serialize(cereal::JSONInputArchive&, + const unsigned int); + +template void +selected_entry::serialize( + cereal::MinimalJSONOutputArchive&, const unsigned int); + +template void +selected_entry::serialize( + cereal::PrettyJSONOutputArchive&, const unsigned int); +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/causal/selected_entry.hpp b/source/lib/omnitrace/library/causal/selected_entry.hpp new file mode 100644 index 0000000000..848af86c18 --- /dev/null +++ b/source/lib/omnitrace/library/causal/selected_entry.hpp @@ -0,0 +1,70 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/binary/dwarf_entry.hpp" +#include "library/binary/fwd.hpp" +#include "library/binary/symbol.hpp" +#include "library/causal/fwd.hpp" +#include "library/debug.hpp" +#include "library/defines.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace causal +{ +struct selected_entry +{ + TIMEMORY_DEFAULT_OBJECT(selected_entry) + + uintptr_t address = 0x0; + uintptr_t symbol_address = 0x0; + binary::symbol symbol = {}; + + template + void serialize(ArchiveT&, const unsigned int); + + bool contains(uintptr_t) const; + explicit operator bool() const { return (address > 0 && symbol.address); } +}; + +inline bool +selected_entry::contains(uintptr_t _v) const +{ + return (_v == address || (symbol_address > 0 && _v == symbol_address) || + symbol.ipaddr().contains(_v)); +} +} // namespace causal +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/common.hpp b/source/lib/omnitrace/library/common.hpp index 16405eacfb..a8df0996f2 100644 --- a/source/lib/omnitrace/library/common.hpp +++ b/source/lib/omnitrace/library/common.hpp @@ -113,12 +113,19 @@ namespace threading = ::tim::threading; // NOLINT namespace scope = ::tim::scope; // NOLINT namespace policy = ::tim::policy; // NOLINT namespace trait = ::tim::trait; // NOLINT +namespace cereal = ::tim::cereal; // NOLINT using ::tim::auto_lock_t; // NOLINT using ::tim::demangle; // NOLINT using ::tim::get_env; // NOLINT +using ::tim::set_env; // NOLINT using ::tim::try_demangle; // NOLINT using ::tim::type_mutex; // NOLINT + +struct construct_on_thread +{ + int64_t index = threading::get_id(); +}; } // namespace omnitrace // same sort of functionality as python's " ".join([...]) diff --git a/source/lib/omnitrace/library/components/backtrace.cpp b/source/lib/omnitrace/library/components/backtrace.cpp index dfbec0b628..b2fe1cbbb8 100644 --- a/source/lib/omnitrace/library/components/backtrace.cpp +++ b/source/lib/omnitrace/library/components/backtrace.cpp @@ -118,6 +118,7 @@ backtrace::filter_and_patch(const std::vector& _data) bool _keep_internal = get_sampling_keep_internal(); const auto _npos = std::string::npos; if(_keep_internal) return 1; + if(_lbl.find("omnitrace_main") != _npos) return 0; if(_lbl.find("omnitrace::common::") != _npos) return 0; if(_lbl.find("omnitrace::") != _npos) return 0; if(_lbl.find("tim::") != _npos) return 0; @@ -184,7 +185,7 @@ backtrace::sample(int) { using namespace tim::backtrace; constexpr bool with_signal_frame = false; - constexpr size_t ignore_depth = 4; + constexpr size_t ignore_depth = 3; // ignore depth based on: // 1. this frame // 2. tim::sampling::sampler<...>::sample(...) [always inline] diff --git a/source/lib/omnitrace/library/components/backtrace.hpp b/source/lib/omnitrace/library/components/backtrace.hpp index 03522a7bc3..63a91f4487 100644 --- a/source/lib/omnitrace/library/components/backtrace.hpp +++ b/source/lib/omnitrace/library/components/backtrace.hpp @@ -79,6 +79,7 @@ struct backtrace bool empty() const; size_t size() const; std::vector get() const; + data_t get_data() const { return m_data; } private: data_t m_data = {}; diff --git a/source/lib/omnitrace/library/components/backtrace_metrics.cpp b/source/lib/omnitrace/library/components/backtrace_metrics.cpp index 9eca1133ca..c18bba6f6a 100644 --- a/source/lib/omnitrace/library/components/backtrace_metrics.cpp +++ b/source/lib/omnitrace/library/components/backtrace_metrics.cpp @@ -96,8 +96,7 @@ struct perfetto_rusage unique_ptr_t>& get_papi_labels(int64_t _tid) { - static auto& _v = - papi_label_instances::instances(papi_label_instances::construct_on_init{}); + static auto& _v = papi_label_instances::instances(construct_on_init{}); return _v.at(_tid); } @@ -119,8 +118,7 @@ get_backtrace_metrics_init(int64_t _tid) unique_ptr_t& get_sampler_running(int64_t _tid) { - static auto& _v = sampler_running_instances::instances( - sampler_running_instances::construct_on_init{}, false); + static auto& _v = sampler_running_instances::instances(construct_on_init{}, false); return _v.at(_tid); } } // namespace diff --git a/source/lib/omnitrace/library/components/category_region.hpp b/source/lib/omnitrace/library/components/category_region.hpp index 7b85e26ff2..ebc1e18296 100644 --- a/source/lib/omnitrace/library/components/category_region.hpp +++ b/source/lib/omnitrace/library/components/category_region.hpp @@ -22,10 +22,12 @@ #pragma once +#include "library/causal/data.hpp" #include "library/config.hpp" #include "library/critical_trace.hpp" #include "library/defines.hpp" #include "library/runtime.hpp" +#include "library/state.hpp" #include "library/timemory.hpp" #include "library/tracing.hpp" #include "library/tracing/annotation.hpp" @@ -41,6 +43,9 @@ namespace tim { namespace quirk { +struct causal : concepts::quirk_type +{}; + struct perfetto : concepts::quirk_type {}; @@ -53,6 +58,34 @@ namespace omnitrace { namespace component { +using tim::is_one_of; +using tim::type_list; + +// these categories increment push/pop counts, which are used for sanity checks since +// they should ALWAYS be popped if they were pushed +using tracing_count_categories_t = + type_list; + +// these categories are added to the critical trace +using critical_trace_categories_t = type_list; + +// convert these categories to throughput points +using causal_throughput_categories_t = + type_list; + +// define this outside of category region functions so that the +// static thread_local is global instead of per-template instantiation +inline ThreadState +get_thread_status() +{ + static thread_local auto _thread_init_once = std::once_flag{}; + std::call_once(_thread_init_once, tracing::thread_init); + + return get_thread_state(); +} + // timemory component which calls omnitrace functions // (used in gotcha wrappers) template @@ -70,6 +103,9 @@ struct category_region : comp::base, void> template static void stop(std::string_view name, Args&&...); + template + static void mark(std::string_view name, Args&&...); + template static void audit(const gotcha_data_t&, audit::incoming, Args&&...); @@ -96,7 +132,7 @@ category_region::start(std::string_view name, Args&&... args) // unconditionally return if thread is disabled or finalized if(get_thread_state() == ThreadState::Disabled) return; - if(get_state() == State::Finalized) return; + if(get_state() >= State::Finalized) return; OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); @@ -105,31 +141,36 @@ category_region::start(std::string_view name, Args&&... args) // tooling one time and as it exits set it to active and return true. if(get_state() != State::Active && !omnitrace_init_tooling_hidden()) return; - tracing::thread_init(); - - // thread initialization may have disabled the thread - if(get_thread_state() == ThreadState::Disabled) return; - - tracing::thread_init_sampling(); + if(get_thread_status() == ThreadState::Disabled) return; constexpr bool _ct_use_timemory = - (sizeof...(OptsT) == 0 || - tim::is_one_of>::value); + (sizeof...(OptsT) == 0 || is_one_of>::value); constexpr bool _ct_use_perfetto = - (sizeof...(OptsT) == 0 || - tim::is_one_of>::value); + (sizeof...(OptsT) == 0 || is_one_of>::value); - OMNITRACE_CONDITIONAL_PRINT(tracing::debug_push, - "[%s][PID=%i][state=%s] omnitrace_push_region(%s)\n", - category_name, process::get_id(), - std::to_string(get_state()).c_str(), name.data()); + constexpr bool _ct_use_causal = + (sizeof...(OptsT) == 0 || is_one_of>::value); - if constexpr(tim::is_one_of>::value) + OMNITRACE_CONDITIONAL_PRINT( + tracing::debug_push, + "[%s][PID=%i][state=%s][thread_state=%s] omnitrace_push_region(%s)\n", + category_name, process::get_id(), std::to_string(get_state()).c_str(), + std::to_string(get_thread_state()).c_str(), name.data()); + + if constexpr(is_one_of::value) { ++tracing::push_count(); } + if constexpr(_ct_use_causal) + { + if constexpr(!is_one_of::value) + { + if(get_use_causal()) causal::push_progress_point(name); + } + } + if constexpr(_ct_use_perfetto) { if(get_use_perfetto()) @@ -146,7 +187,7 @@ category_region::start(std::string_view name, Args&&... args) } } - if constexpr(tim::is_one_of>::value) + if constexpr(is_one_of::value) { using Device = critical_trace::Device; using Phase = critical_trace::Phase; @@ -178,22 +219,24 @@ category_region::stop(std::string_view name, Args&&... args) OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); constexpr bool _ct_use_timemory = - (sizeof...(OptsT) == 0 || - tim::is_one_of>::value); + (sizeof...(OptsT) == 0 || is_one_of>::value); constexpr bool _ct_use_perfetto = - (sizeof...(OptsT) == 0 || - tim::is_one_of>::value); + (sizeof...(OptsT) == 0 || is_one_of>::value); - OMNITRACE_CONDITIONAL_PRINT(tracing::debug_pop, - "[%s][PID=%i][state=%s] omnitrace_pop_region(%s)\n", - category_name, process::get_id(), - std::to_string(get_state()).c_str(), name.data()); + constexpr bool _ct_use_causal = + (sizeof...(OptsT) == 0 || is_one_of>::value); + + OMNITRACE_CONDITIONAL_PRINT( + tracing::debug_pop, + "[%s][PID=%i][state=%s][thread_state=%s] omnitrace_pop_region(%s)\n", + category_name, process::get_id(), std::to_string(get_state()).c_str(), + std::to_string(get_thread_state()).c_str(), name.data()); // only execute when active if(get_state() == State::Active) { - if constexpr(tim::is_one_of>::value) + if constexpr(is_one_of::value) { ++tracing::pop_count(); } @@ -216,7 +259,19 @@ category_region::stop(std::string_view name, Args&&... args) } } - if constexpr(tim::is_one_of>::value) + if constexpr(_ct_use_causal) + { + if constexpr(is_one_of::value) + { + if(get_use_causal()) causal::mark_progress_point(name); + } + else + { + if(get_use_causal()) causal::pop_progress_point(name); + } + } + + if constexpr(is_one_of::value) { using Device = critical_trace::Device; using Phase = critical_trace::Phase; @@ -249,6 +304,41 @@ category_region::stop(std::string_view name, Args&&... args) } } +template +template +void +category_region::mark(std::string_view name, Args&&...) +{ + constexpr bool _ct_use_causal = + (sizeof...(OptsT) == 0 || is_one_of>::value); + + if constexpr(!_ct_use_causal) return; + + // skip if category is disabled + if(!trait::runtime_enabled::get()) return; + + // the expectation here is that if the state is not active then the call + // to omnitrace_init_tooling_hidden will activate all the appropriate + // tooling one time and as it exits set it to active and return true. + if(get_state() != State::Active && !omnitrace_init_tooling_hidden()) return; + + // unconditionally return if thread is disabled or finalized + if(get_thread_state() >= ThreadState::Completed) return; + + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + if(get_use_causal()) + { + OMNITRACE_CONDITIONAL_PRINT( + tracing::debug_mark, + "[%s][PID=%i][state=%s][thread_state=%s] omnitrace_progress(%s)\n", + category_name, process::get_id(), std::to_string(get_state()).c_str(), + std::to_string(get_thread_state()).c_str(), name.data()); + + causal::mark_progress_point(name); + } +} + template template void @@ -347,6 +437,13 @@ struct local_category_region : comp::base, void return impl_type::template stop(m_prefix, std::forward(args)...); } + template + auto mark(Args&&... args) + { + if(m_prefix.empty()) return; + return impl_type::template mark(m_prefix, std::forward(args)...); + } + template auto audit(Args&&... args) -> decltype(impl_type::template audit(std::declval(), diff --git a/source/lib/omnitrace/library/components/comm_data.cpp b/source/lib/omnitrace/library/components/comm_data.cpp index 5d39bca611..2a160fea48 100644 --- a/source/lib/omnitrace/library/components/comm_data.cpp +++ b/source/lib/omnitrace/library/components/comm_data.cpp @@ -120,9 +120,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou auto _name = std::string_view{ _data.tool_id }; tracker_t _a{ _name }; add(_a, count * _size); - tracker_t _b{ JOIN('_', _name, "dst", dst) }; + tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) }; add(_b, count * _size); - add(JOIN('_', _name, "dst", dst, "tag", tag), count * _size); + add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size); } // MPI_Recv @@ -139,9 +139,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count, auto _name = std::string_view{ _data.tool_id }; tracker_t _a{ _name }; add(_a, count * _size); - tracker_t _b{ JOIN('_', _name, "dst", dst) }; + tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) }; add(_b, count * _size); - add(JOIN('_', _name, "dst", dst, "tag", tag), count * _size); + add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size); } // MPI_Isend @@ -158,9 +158,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou auto _name = std::string_view{ _data.tool_id }; tracker_t _a{ _name }; add(_a, count * _size); - tracker_t _b{ JOIN('_', _name, "dst", dst) }; + tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) }; add(_b, count * _size); - add(JOIN('_', _name, "dst", dst, "tag", tag), count * _size); + add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size); } // MPI_Irecv @@ -177,9 +177,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count, auto _name = std::string_view{ _data.tool_id }; tracker_t _a{ _name }; add(_a, count * _size); - tracker_t _b{ JOIN('_', _name, "dst", dst) }; + tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) }; add(_b, count * _size); - add(JOIN('_', _name, "dst", dst, "tag", tag), count * _size); + add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size); } // MPI_Bcast @@ -196,7 +196,7 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count, auto _name = std::string_view{ _data.tool_id }; tracker_t _t{ _name }; add(_t, count * _size); - add(JOIN('_', _name, "root", root), count * _size); + add(JOIN('/', _name, JOIN('=', "root", root)), count * _size); } // MPI_Allreduce @@ -232,20 +232,22 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen tracker_t _t{ _name }; add(_t, sendcount * _send_size + recvcount * _recv_size); { - tracker_t _b{ JOIN('_', _name, "send") }; + tracker_t _b{ JOIN('/', _name, "send") }; add(_b, sendcount * _send_size); - tracker_t _c{ JOIN('_', _name, "send", dst) }; + tracker_t _c{ JOIN('/', _name, JOIN('=', "send", dst)) }; add(_b, sendcount * _send_size); - add(JOIN('_', _name, "send", "tag", sendtag), sendcount * _send_size); - add(JOIN('_', _name, "send", dst, "tag", sendtag), sendcount * _send_size); + add(JOIN('/', _name, "send", JOIN('=', "tag", sendtag)), sendcount * _send_size); + add(JOIN('/', _name, JOIN('=', "send", dst), JOIN('=', "tag", sendtag)), + sendcount * _send_size); } { - tracker_t _b{ JOIN('_', _name, "recv") }; + tracker_t _b{ JOIN('/', _name, "recv") }; add(_b, recvcount * _recv_size); - tracker_t _c{ JOIN('_', _name, "recv", src) }; + tracker_t _c{ JOIN('/', _name, JOIN('=', "recv", src)) }; add(_b, recvcount * _recv_size); - add(JOIN('_', _name, "recv", "tag", recvtag), recvcount * _recv_size); - add(JOIN('_', _name, "recv", src, "tag", recvtag), recvcount * _recv_size); + add(JOIN('/', _name, "recv", JOIN('=', "tag", recvtag)), recvcount * _recv_size); + add(JOIN('/', _name, JOIN('=', "recv", src), JOIN('=', "tag", recvtag)), + recvcount * _recv_size); } } @@ -267,10 +269,10 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen auto _name = std::string_view{ _data.tool_id }; tracker_t _t{ _name }; add(_t, sendcount * _send_size + recvcount * _recv_size); - tracker_t _r(JOIN('_', _name, "root", root)); + tracker_t _r(JOIN('/', _name, JOIN('=', "root", root))); add(_r, sendcount * _send_size + recvcount * _recv_size); - add(JOIN('_', _name, "root", root, "send"), sendcount * _send_size); - add(JOIN('_', _name, "root", root, "recv"), recvcount * _recv_size); + add(JOIN('/', _name, JOIN('=', "root", root), "send"), sendcount * _send_size); + add(JOIN('/', _name, JOIN('=', "root", root), "recv"), recvcount * _recv_size); } // MPI_Alltoall @@ -290,8 +292,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen auto _name = std::string_view{ _data.tool_id }; tracker_t _t{ _name }; add(_t, sendcount * _send_size + recvcount * _recv_size); - add(JOIN('_', _name, "send"), sendcount * _send_size); - add(JOIN('_', _name, "recv"), recvcount * _recv_size); + add(JOIN('/', _name, "send"), sendcount * _send_size); + add(JOIN('/', _name, "recv"), recvcount * _recv_size); } #endif @@ -311,7 +313,7 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v auto _name = std::string_view{ _data.tool_id }; tracker_t _t{ _name }; add(_t, count * _size); - add(JOIN('_', _name, "root", root), count * _size); + add(JOIN('/', _name, JOIN('=', "root", root)), count * _size); } // ncclSend @@ -350,7 +352,7 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, size_t tracker_t _t{ _name }; add(_t, count * _size); - add(JOIN('_', _name, _label, peer), count * _size); + add(JOIN('/', _name, JOIN('=', _label, peer)), count * _size); } // ncclBroadcast @@ -367,7 +369,7 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v auto _name = std::string_view{ _data.tool_id }; tracker_t _t{ _name }; add(_t, count * _size); - add(JOIN('_', _data.tool_id, "root", root), count * _size); + add(JOIN('/', _data.tool_id, JOIN('=', "root", root)), count * _size); } // ncclAllReduce diff --git a/source/lib/omnitrace/library/components/exit_gotcha.cpp b/source/lib/omnitrace/library/components/exit_gotcha.cpp index 39e3bc4a3f..6447d648e2 100644 --- a/source/lib/omnitrace/library/components/exit_gotcha.cpp +++ b/source/lib/omnitrace/library/components/exit_gotcha.cpp @@ -29,6 +29,7 @@ #include "library/timemory.hpp" #include +#include #include #include @@ -45,6 +46,7 @@ exit_gotcha::configure() exit_gotcha_t::configure<0, void>("abort"); exit_gotcha_t::configure<1, void, int>("exit"); exit_gotcha_t::configure<2, void, int>("quick_exit"); + exit_gotcha_t::configure<3, void, int>("_Exit"); }; } @@ -56,6 +58,8 @@ template void invoke_exit_gotcha(const exit_gotcha::gotcha_data& _data, FuncT _func, Args... _args) { + threading::clear_callbacks(); + if(config::settings_are_configured()) { OMNITRACE_VERBOSE(0, "%s called %s(%s)...\n", get_exe_name().c_str(), diff --git a/source/lib/omnitrace/library/components/exit_gotcha.hpp b/source/lib/omnitrace/library/components/exit_gotcha.hpp index 32489308f7..b9d7df1ff1 100644 --- a/source/lib/omnitrace/library/components/exit_gotcha.hpp +++ b/source/lib/omnitrace/library/components/exit_gotcha.hpp @@ -38,7 +38,7 @@ namespace component { struct exit_gotcha : tim::component::base { - static constexpr size_t gotcha_capacity = 3; + static constexpr size_t gotcha_capacity = 4; using gotcha_data = tim::component::gotcha_data; using exit_func_t = void (*)(int); @@ -56,7 +56,7 @@ struct exit_gotcha : tim::component::base static inline void start() {} static inline void stop() {} - // exit + // exit / _Exit / quick_exit void operator()(const gotcha_data&, exit_func_t, int) const; // abort void operator()(const gotcha_data&, abort_func_t) const; diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index 79898a3398..fde9ddb898 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "library/components/pthread_create_gotcha.hpp" +#include "library/causal/delay.hpp" #include "library/components/category_region.hpp" #include "library/components/roctracer.hpp" #include "library/config.hpp" @@ -133,14 +134,10 @@ stop_bundle(bundle_t& _bundle, int64_t _tid, Args&&... _args) //--------------------------------------------------------------------------------------// pthread_create_gotcha::wrapper::wrapper(routine_t _routine, void* _arg, - bool _enable_sampling, bool _offset, - int64_t _parent, promise_t _p) -: m_enable_sampling{ _enable_sampling } -, m_offset{ _offset } -, m_parent_tid{ _parent } -, m_routine{ _routine } + wrapper_config _config) +: m_routine{ _routine } , m_arg{ _arg } -, m_promise{ std::move(_p) } +, m_config{ std::move(_config) } {} void* @@ -150,26 +147,34 @@ pthread_create_gotcha::wrapper::operator()() const if(is_shutdown && *is_shutdown) { - if(m_promise) m_promise->set_value(); + if(m_config.promise) m_config.promise->set_value(); // execute the original function return m_routine(m_arg); } push_thread_state(ThreadState::Internal); - int64_t _tid = -1; - void* _ret = nullptr; - auto _is_sampling = false; - auto _bundle = std::shared_ptr{}; - auto _signals = std::set{}; - auto _coverage = (get_mode() == Mode::Coverage); - // const auto& _parent_info = thread_info::get(m_parent_tid, InternalTID); - auto _dtor = [&]() { + int64_t _tid = -1; + void* _ret = nullptr; + auto _is_sampling = false; + auto _bundle = std::shared_ptr{}; + auto _signals = std::set{}; + auto _coverage = (get_mode() == Mode::Coverage); + const auto& _parent_info = thread_info::get(m_config.parent_tid, InternalTID); + auto _dtor = [&]() { set_thread_state(ThreadState::Internal); if(_is_sampling) { - sampling::block_signals(_signals); - sampling::shutdown(); + if(m_config.enable_causal) + { + causal::sampling::block_signals(_signals); + causal::sampling::shutdown(); + } + else if(m_config.enable_sampling) + { + sampling::block_signals(_signals); + sampling::shutdown(); + } } if(_tid >= 0) @@ -190,8 +195,8 @@ pthread_create_gotcha::wrapper::operator()() const auto _active = (get_state() == ::omnitrace::State::Active && bundles != nullptr && bundles_mutex != nullptr); - const auto& _info = thread_info::init(m_offset); - if(_active && !_coverage && !m_offset) + const auto& _info = thread_info::init(m_config.offset); + if(_active && !_coverage && !m_config.offset) { _tid = _info->index_data->sequent_value; threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str()); @@ -210,8 +215,19 @@ pthread_create_gotcha::wrapper::operator()() const .first->second; } if(_bundle) start_bundle(*_bundle); - get_cpu_cid_stack(_tid, m_parent_tid); - if(m_enable_sampling) + get_cpu_cid_stack(_tid, m_config.parent_tid); + if(m_config.enable_causal) + { + // children inherit the parent delay data + if(_parent_info && _parent_info->index_data) + causal::delay::get_local(_tid) = + causal::delay::get_local(_parent_info->index_data->sequent_value); + _is_sampling = true; + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + _signals = causal::sampling::setup(); + causal::sampling::unblock_signals(); + } + else if(m_config.enable_sampling) { _is_sampling = true; OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); @@ -221,7 +237,7 @@ pthread_create_gotcha::wrapper::operator()() const } // notify the wrapper that all internal work is completed - if(m_promise) m_promise->set_value(); + if(m_config.promise) m_config.promise->set_value(); // Internal -> Enabled pop_thread_state(); @@ -347,11 +363,14 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); auto _coverage = (_mode == Mode::Coverage); - auto _use_sampling = get_use_sampling(); + auto _use_sampling = config::get_use_sampling(); + auto _use_causal = config::get_use_causal(); auto _offset = (!_enabled || !_active || _info->is_offset); auto _use_bundle = (_active && !_coverage && !_offset); auto _enable_sampling = (_use_sampling && _sample_child && _active && !_coverage && !_offset); + auto _enable_causal = + (_use_causal && _sample_child && _active && !_coverage && !_offset); static bool debug_threading_get_id = get_env(TIMEMORY_SETTINGS_PREFIX "DEBUG_THREADING_GET_ID", false); @@ -360,12 +379,13 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, OMNITRACE_VERBOSE( _verbose, "Creating new thread :: global_state=%s, thread_state=%s, mode=%s, active=%s, " - "coverage=%s, use_sampling=%s, sample_children=%s, tid=%li, use_bundle=%s, " - "enable_sampling=%s, thread_info=(%s)...\n", + "coverage=%s, use_causal=%s, use_sampling=%s, sample_children=%s, tid=%li, " + "use_bundle=%s, enable_causal=%s, enable_sampling=%s, thread_info=(%s)...\n", std::to_string(_glob_state).c_str(), std::to_string(_thr_state).c_str(), std::to_string(_mode).c_str(), std::to_string(_active).c_str(), - std::to_string(_coverage).c_str(), std::to_string(_use_sampling).c_str(), - std::to_string(_sample_child).c_str(), _tid, std::to_string(_use_bundle).c_str(), + std::to_string(_coverage).c_str(), std::to_string(_use_causal).c_str(), + std::to_string(_use_sampling).c_str(), std::to_string(_sample_child).c_str(), + _tid, std::to_string(_use_bundle).c_str(), std::to_string(_enable_causal).c_str(), std::to_string(_enable_sampling).c_str(), JOIN("", *_info).c_str()); if(debug_threading_get_id) @@ -391,9 +411,11 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, } set_thread_state(ThreadState::Disabled); - auto _blocked = get_sampling_signals(); - auto _promise = (_active) ? std::make_shared>() : promise_t{}; - auto* _wrap = new wrapper(func, arg, _enable_sampling, _offset, _tid, _promise); + auto _blocked = get_sampling_signals(); + auto _promise = (_active) ? std::make_shared>() : promise_t{}; + auto _config = + wrapper_config{ _enable_causal, _enable_sampling, _offset, _tid, _promise }; + auto* _wrap = new wrapper{ func, arg, _config }; set_thread_state(ThreadState::Internal); // block the signals in entire process @@ -409,6 +431,9 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, start_bundle(*_bundle, audit::incoming{}, thread, attr, func, arg); } + // threads must process their delays before creating a new thread + causal::delay::process(); + // create the thread auto _ret = (*m_wrappee)(thread, attr, &wrapper::wrap, static_cast(_wrap)); diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp index 05703cde59..e327d213ce 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.hpp @@ -42,20 +42,26 @@ struct pthread_create_gotcha : tim::component::base using wrappee_t = int (*)(pthread_t*, const pthread_attr_t*, routine_t, void*); using promise_t = std::shared_ptr>; + struct wrapper_config + { + bool enable_causal = false; + bool enable_sampling = false; + bool offset = false; + int64_t parent_tid = 0; + promise_t promise = {}; + }; + struct wrapper { - wrapper(routine_t _routine, void* _arg, bool, bool, int64_t, promise_t); + wrapper(routine_t _routine, void* _arg, wrapper_config _cfg); void* operator()() const; static void* wrap(void* _arg); private: - bool m_enable_sampling = false; - bool m_offset = false; - int64_t m_parent_tid = 0; - routine_t m_routine = nullptr; - void* m_arg = nullptr; - promise_t m_promise = {}; + routine_t m_routine = nullptr; + void* m_arg = nullptr; + wrapper_config m_config = {}; }; TIMEMORY_DEFAULT_OBJECT(pthread_create_gotcha) diff --git a/source/lib/omnitrace/library/components/pthread_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_gotcha.cpp index 6dab1b9b62..60f64f8178 100644 --- a/source/lib/omnitrace/library/components/pthread_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_gotcha.cpp @@ -23,7 +23,6 @@ #include "library/components/pthread_gotcha.hpp" #include "library/components/pthread_create_gotcha.hpp" #include "library/components/pthread_mutex_gotcha.hpp" -#include "library/components/roctracer.hpp" #include "library/config.hpp" #include "library/debug.hpp" #include "library/runtime.hpp" diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 55ebc4a8bf..556c8ae341 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -156,7 +156,7 @@ pthread_mutex_gotcha::configure() "pthread_spin_unlock" }); } - if(config::get_trace_thread_join()) + if(config::get_trace_thread_join() && !get_use_causal()) { pthread_mutex_gotcha_t::configure( comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" }); @@ -208,10 +208,9 @@ pthread_mutex_gotcha::operator()(uintptr_t&& _id, int (*_callee)(Args...), uint32_t _depth = 0; int64_t _ts = 0; - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - if(_id < std::numeric_limits::max() && get_use_critical_trace()) { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry(); _ts = comp::wall_clock::record(); } @@ -222,11 +221,11 @@ pthread_mutex_gotcha::operator()(uintptr_t&& _id, int (*_callee)(Args...), if(_id < std::numeric_limits::max() && get_use_critical_trace()) { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); add_critical_trace( threading::get_id(), _cid, 0, _parent_cid, _ts, comp::wall_clock::record(), 0, _id, get_hashes().at(m_data->index), _depth); } - tim::consume_parameters(_id, _cid, _parent_cid, _depth, _ts); return _ret; } diff --git a/source/lib/omnitrace/library/components/rocprofiler.cpp b/source/lib/omnitrace/library/components/rocprofiler.cpp index cf3d542359..46ca870597 100644 --- a/source/lib/omnitrace/library/components/rocprofiler.cpp +++ b/source/lib/omnitrace/library/components/rocprofiler.cpp @@ -61,7 +61,7 @@ unique_ptr_t& rocm_data(int64_t _tid) { using thread_data_t = thread_data; - static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{}); + static auto& _v = thread_data_t::instances(construct_on_init{}); return _v.at(_tid); } diff --git a/source/lib/omnitrace/library/concepts.hpp b/source/lib/omnitrace/library/concepts.hpp index 5b0dbf48bd..c9926367a0 100644 --- a/source/lib/omnitrace/library/concepts.hpp +++ b/source/lib/omnitrace/library/concepts.hpp @@ -25,20 +25,33 @@ #include "library/defines.hpp" #include +#include #include #include +#include namespace omnitrace { namespace concepts = ::tim::concepts; // NOLINT +static constexpr size_t max_supported_threads = OMNITRACE_MAX_THREADS; + template struct thread_deleter; // unique ptr type for omnitrace template using unique_ptr_t = std::unique_ptr>; + +using construct_on_init = std::true_type; + +using tim::identity; // NOLINT +using tim::identity_t; // NOLINT + +template +struct use_placement_new_when_generating_unique_ptr : std::false_type +{}; } // namespace omnitrace namespace tim diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index c9b1563186..e9ea9695f7 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -314,6 +315,10 @@ configure_settings(bool _init) !_config->get("OMNITRACE_USE_PERFETTO"), "backend", "timemory"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_CAUSAL", + "Enable causal profiling analysis", false, "backend", + "causal", "analysis"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_ROCTRACER", "Enable ROCm API and kernel tracing", true, "backend", "roctracer", "rocm"); @@ -366,9 +371,19 @@ configure_settings(bool _init) false, "rocm", "rccl", "backend"); OMNITRACE_CONFIG_CL_SETTING( - bool, "OMNITRACE_KOKKOS_KERNEL_LOGGER", "Enables kernel logging", false, + bool, "OMNITRACE_KOKKOSP_KERNEL_LOGGER", "Enables kernel logging", false, "--omnitrace-kokkos-kernel-logger", "kokkos", "debugging", "advanced"); + OMNITRACE_CONFIG_SETTING(int64_t, "OMNITRACE_KOKKOSP_NAME_LENGTH_MAX", + "Set this to a value > 0 to help avoid unnamed Kokkos Tools " + "callbacks. Generally, unnamed callbacks are the demangled " + "name of the function, which is very long", + 0, "kokkos", "debugging", "advanced"); + + OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_KOKKOSP_PREFIX", + "Set to [kokkos] to maintain old naming convention", "", + "kokkos", "debugging", "advanced"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_OMPT", "Enable support for OpenMP-Tools", false, "openmp", "ompt", "backend"); @@ -682,6 +697,101 @@ configure_settings(bool _init) std::string, "OMNITRACE_TMPDIR", "Base directory for temporary files", get_env("TMPDIR", "/tmp"), "io", "data", "advanced"); + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_MODE", + "Perform causal experiments at the function-scope or line-scope. Ideally, use " + "function first to locate function with highest impact and then switch to line " + "mode + OMNITRACE_CAUSAL_FUNCTION_SCOPE set to the function being targeted.", + std::string{ "function" }, "causal", "analysis", "advanced"); + + OMNITRACE_CONFIG_SETTING( + double, "OMNITRACE_CAUSAL_DELAY", + "Length of time to wait (in seconds) before starting the first causal experiment", + 0.0, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + double, "OMNITRACE_CAUSAL_DURATION", + "Length of time to perform causal experimentation (in seconds) after the first " + "experiment has started. After this amount of time has elapsed, no more causal " + "experiments will be performed and the application will continue without any " + "overhead from causal profiling. Any value <= 0 means until the application " + "completes", + 0.0, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + bool, "OMNITRACE_CAUSAL_END_TO_END", + "Perform causal experiment over the length of the entire application", false, + "causal", "analysis", "advanced"); + + OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_CAUSAL_FILE", + "Name of causal output filename (w/o extension)", + std::string{ "experiments" }, "causal", "analysis", + "advanced", "io"); + + OMNITRACE_CONFIG_SETTING( + bool, "OMNITRACE_CAUSAL_FILE_RESET", + "Overwrite any existing causal output file instead of appending to it", false, + "causal", "analysis", "advanced", "io"); + + OMNITRACE_CONFIG_SETTING( + uint64_t, "OMNITRACE_CAUSAL_RANDOM_SEED", + "Seed for random number generator which selects speedups and experiments -- " + "please note that the lines selected for experimentation are not reproducible " + "but the speedup selection is. If set to zero, std::random_device{}() will be " + "used.", + 0, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_CAUSAL_FIXED_SPEEDUP", + "List of virtual speedups between 0 and 100 (inclusive) to " + "sample from for causal profiling", + std::string{}, "causal", "analysis", "advanced"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_BINARY_SCOPE", + "Limits causal experiments to the binaries matching the provided list of regular " + "expressions (separated by tab, semi-colon, and/or quotes (single or double))", + std::string{ "%MAIN%" }, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_SOURCE_SCOPE", + "Limits causal experiments to the source files or source file + lineno pair " + "(i.e. or :) matching the provided list of regular " + "expressions (separated by tab, semi-colon, and/or quotes (single or double))", + std::string{}, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_FUNCTION_SCOPE", + "List of regex entries for causal profiling (separated by tab, " + "semi-colon, and/or quotes (single or double))", + std::string{}, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_BINARY_EXCLUDE", + "Excludes binaries matching the list of provided regexes from causal experiments " + "(separated by tab, semi-colon, and/or quotes (single or double))", + std::string{}, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_SOURCE_EXCLUDE", + "Excludes source files or source file + lineno pair (i.e. or " + ":) matching the list of provided regexes from causal experiments " + "(separated by tab, semi-colon, and/or quotes (single or double))", + std::string{}, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + std::string, "OMNITRACE_CAUSAL_FUNCTION_EXCLUDE", + "Excludes functions matching the list of provided regexes from causal " + "experiments (separated by tab, semi-colon, and/or quotes (single or double))", + std::string{}, "causal", "analysis"); + + OMNITRACE_CONFIG_SETTING( + bool, "OMNITRACE_CAUSAL_FUNCTION_EXCLUDE_DEFAULTS", + "This controls adding a series of function exclude regexes to avoid " + "experimenting on STL implementation functions, etc. which are, " + "generally, not helpful. Details: excludes demangled function names " + "starting with '_' or containing '::_M'.", + true, "causal", "analysis", "advanced"); + // set the defaults _config->get_flamegraph_output() = false; _config->get_ctest_notes() = false; @@ -819,8 +929,9 @@ configure_settings(bool _init) auto _cmd = tim::read_command_line(process::get_id()); auto _cmd_env = tim::get_env("OMNITRACE_COMMAND_LINE", ""); if(!_cmd_env.empty()) _cmd = tim::delimit(_cmd_env, " "); - auto _exe = (_cmd.empty()) ? "exe" : _cmd.front(); - auto _pos = _exe.find_last_of('/'); + auto _exe = (_cmd.empty()) ? "exe" : _cmd.front(); + get_exe_realpath() = filepath::realpath(_exe, nullptr, false); + auto _pos = _exe.find_last_of('/'); if(_pos < _exe.length() - 1) _exe = _exe.substr(_pos + 1); get_exe_name() = _exe; _config->set_tag(_exe); @@ -948,12 +1059,15 @@ configure_mode_settings() } }; + auto _use_causal = get_setting_value("OMNITRACE_USE_CAUSAL"); + if(_use_causal.first && _use_causal.second) set_env("OMNITRACE_MODE", "causal", 1); + if(get_mode() == Mode::Coverage) { set_default_setting_value("OMNITRACE_USE_CODE_COVERAGE", true); _set("OMNITRACE_USE_PERFETTO", false); _set("OMNITRACE_USE_TIMEMORY", false); - //_set("OMNITRACE_USE_CAUSAL", false); + _set("OMNITRACE_USE_CAUSAL", false); _set("OMNITRACE_USE_ROCM_SMI", false); _set("OMNITRACE_USE_ROCTRACER", false); _set("OMNITRACE_USE_ROCPROFILER", false); @@ -964,6 +1078,15 @@ configure_mode_settings() _set("OMNITRACE_USE_PROCESS_SAMPLING", false); _set("OMNITRACE_CRITICAL_TRACE", false); } + else if(get_mode() == Mode::Causal) + { + _set("OMNITRACE_USE_CAUSAL", true); + _set("OMNITRACE_USE_PERFETTO", false); + _set("OMNITRACE_USE_TIMEMORY", false); + _set("OMNITRACE_CRITICAL_TRACE", false); + _set("OMNITRACE_USE_SAMPLING", false); + _set("OMNITRACE_USE_PROCESS_SAMPLING", false); + } else if(get_mode() == Mode::Sampling) { set_default_setting_value("OMNITRACE_USE_SAMPLING", true); @@ -973,8 +1096,10 @@ configure_mode_settings() if(gpu::device_count() == 0) { +#if OMNITRACE_HIP_VERSION > 0 OMNITRACE_BASIC_VERBOSE(1, "No HIP devices were found: disabling roctracer, " "rocprofiler, and rocm_smi...\n"); +#endif _set("OMNITRACE_USE_ROCPROFILER", false); _set("OMNITRACE_USE_ROCTRACER", false); _set("OMNITRACE_USE_ROCM_SMI", false); @@ -1010,7 +1135,7 @@ configure_mode_settings() { _set("OMNITRACE_USE_PERFETTO", false); _set("OMNITRACE_USE_TIMEMORY", false); - //_set("OMNITRACE_USE_CAUSAL", false); + _set("OMNITRACE_USE_CAUSAL", false); _set("OMNITRACE_USE_ROCM_SMI", false); _set("OMNITRACE_USE_ROCTRACER", false); _set("OMNITRACE_USE_ROCPROFILER", false); @@ -1117,6 +1242,7 @@ configure_disabled_settings() _handle_use_option("OMNITRACE_USE_SAMPLING", "sampling"); _handle_use_option("OMNITRACE_USE_PROCESS_SAMPLING", "process_sampling"); + _handle_use_option("OMNITRACE_USE_CAUSAL", "causal"); _handle_use_option("OMNITRACE_USE_KOKKOSP", "kokkos"); _handle_use_option("OMNITRACE_USE_PERFETTO", "perfetto"); _handle_use_option("OMNITRACE_USE_TIMEMORY", "timemory"); @@ -1436,6 +1562,18 @@ get_exe_name() return _v; } +std::string& +get_exe_realpath() +{ + static std::string _v = []() { + auto _cmd_line = tim::read_command_line(process::get_id()); + if(!_cmd_line.empty()) + return filepath::realpath(_cmd_line.front(), nullptr, false); + return std::string{}; + }(); + return _v; +} + std::string get_config_file() { @@ -1449,15 +1587,18 @@ get_mode() if(!settings_are_configured()) { auto _mode = tim::get_env_choice( - "OMNITRACE_MODE", "trace", { "trace", "sampling", "coverage" }); + "OMNITRACE_MODE", "trace", { "trace", "sampling", "causal", "coverage" }); if(_mode == "sampling") return Mode::Sampling; + else if(_mode == "causal") + return Mode::Causal; else if(_mode == "coverage") return Mode::Coverage; return Mode::Trace; } static auto _m = std::unordered_map{ { "trace", Mode::Trace }, + { "causal", Mode::Causal }, { "sampling", Mode::Sampling }, { "coverage", Mode::Coverage } }; static auto _v = get_config()->find("OMNITRACE_MODE"); @@ -1564,6 +1705,13 @@ get_use_timemory() return static_cast&>(*_v->second).get(); } +bool& +get_use_causal() +{ + static auto _v = get_config()->find("OMNITRACE_USE_CAUSAL"); + return static_cast&>(*_v->second).get(); +} + bool get_use_roctracer() { @@ -1671,7 +1819,7 @@ get_use_kokkosp() bool get_use_kokkosp_kernel_logger() { - static auto _v = get_config()->find("OMNITRACE_KOKKOS_KERNEL_LOGGER"); + static auto _v = get_config()->find("OMNITRACE_KOKKOSP_KERNEL_LOGGER"); return static_cast&>(*_v->second).get(); } @@ -2214,5 +2362,152 @@ get_tmp_file(std::string _basename, std::string _ext) _existing_files.emplace(_fname, std::move(_v)); return _existing_files.at(_fname); } + +CausalMode +get_causal_mode() +{ + if(!settings_are_configured()) + { + auto _mode = tim::get_env_choice("OMNITRACE_CAUSAL_MODE", "function", + { "line", "function" }); + if(_mode == "line") return CausalMode::Line; + return CausalMode::Function; + } + static auto _causal_mode = []() { + auto _m = std::unordered_map{ + { "line", CausalMode::Line }, + { "func", CausalMode::Function }, + { "function", CausalMode::Function } + }; + auto _v = get_config()->find("OMNITRACE_CAUSAL_MODE"); + try + { + return _m.at(static_cast&>(*_v->second).get()); + } catch(std::runtime_error& _e) + { + auto _mode = static_cast&>(*_v->second).get(); + std::stringstream _ss{}; + for(const auto& itr : _v->second->get_choices()) + _ss << ", " << itr; + auto _msg = (_ss.str().length() > 2) ? _ss.str().substr(2) : std::string{}; + OMNITRACE_THROW("[%s] invalid causal mode %s. Choices: %s\n", __FUNCTION__, + _mode.c_str(), _msg.c_str()); + } + return CausalMode::Function; + }(); + return _causal_mode; +} + +bool +get_causal_end_to_end() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_END_TO_END"); + return static_cast&>(*_v->second).get(); +} + +std::vector +get_causal_fixed_speedup() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_FIXED_SPEEDUP"); + return parse_numeric_range>( + static_cast&>(*_v->second).get(), + "causal fixed speedup", 5); +} + +std::string +get_causal_output_filename() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_FILE"); + auto _fname = static_cast&>(*_v->second).get(); + for(auto&& itr : std::initializer_list{ ".txt", ".json", ".xml" }) + { + auto _pos = _fname.find(itr); + // if extension is found at end of string, remove + if(_pos != std::string::npos && (_pos + itr.length()) == _fname.length()) + _fname = _fname.substr(0, _fname.length() - itr.length()); + } + return _fname; +} + +namespace +{ +std::vector +format_causal_scopes(std::vector _value, const std::string& _tag) +{ + const auto _config = get_config(); + const auto _main_re = std::regex{ "(^|[^a-zA-Z])(MAIN|%MAIN%)($|[^a-zA-Z])" }; + const auto _space_re = std::regex{ "^([ ]*)(.*)([ ]*)$" }; + for(auto& itr : _value) + { + // replace any output/input keys, e.g. %argv0% + itr = settings::format(itr, _tag); + // replace MAIN or %MAIN% with (|) + if(std::regex_search(itr, _main_re)) + { + itr = std::regex_replace( + itr, _main_re, + join("", "$1", "(", get_exe_name(), "|", get_exe_realpath(), ")", "$3")); + } + // trim leading and trailing spaces since we didn't delimit spaces + if(std::regex_search(itr, _space_re)) + itr = std::regex_replace(itr, _space_re, "$2"); + } + return _value; +} +} // namespace + +std::vector +get_causal_binary_scope() +{ + auto&& _config = get_config(); + static auto _v = _config->find("OMNITRACE_CAUSAL_BINARY_SCOPE"); + return format_causal_scopes( + tim::delimit(static_cast&>(*_v->second).get(), + "\t\"';"), + _config->get_tag()); +} + +std::vector +get_causal_source_scope() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_SOURCE_SCOPE"); + return tim::delimit(static_cast&>(*_v->second).get(), + "\t\"';"); +} + +std::vector +get_causal_function_scope() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_FUNCTION_SCOPE"); + return tim::delimit(static_cast&>(*_v->second).get(), + "\t\"';"); +} + +std::vector +get_causal_binary_exclude() +{ + auto&& _config = get_config(); + static auto _v = _config->find("OMNITRACE_CAUSAL_BINARY_EXCLUDE"); + return format_causal_scopes( + tim::delimit(static_cast&>(*_v->second).get(), + "\t\"';"), + _config->get_tag()); +} + +std::vector +get_causal_source_exclude() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_SOURCE_EXCLUDE"); + return tim::delimit(static_cast&>(*_v->second).get(), + "\t\"';"); +} + +std::vector +get_causal_function_exclude() +{ + static auto _v = get_config()->find("OMNITRACE_CAUSAL_FUNCTION_EXCLUDE"); + return tim::delimit(static_cast&>(*_v->second).get(), + "\t\"';"); +} } // namespace config } // namespace omnitrace diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index 5c4c86b8dc..e58b49cbe4 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -79,6 +79,9 @@ print_settings(bool include_env = true); std::string& get_exe_name(); +std::string& +get_exe_realpath(); + template bool set_setting_value(const std::string& _name, Tp&& _v) @@ -166,6 +169,9 @@ get_use_perfetto() OMNITRACE_HOT; bool& get_use_timemory() OMNITRACE_HOT; +bool& +get_use_causal() OMNITRACE_HOT; + bool get_use_roctracer() OMNITRACE_HOT; @@ -365,7 +371,7 @@ struct tmp_file void close(); void remove(); - operator bool() const { return stream.is_open() && stream.good(); } + explicit operator bool() const { return stream.is_open() && stream.good(); } std::string filename = {}; std::fstream stream = {}; @@ -373,5 +379,35 @@ struct tmp_file std::shared_ptr get_tmp_file(std::string _basename, std::string _ext = "dat"); + +CausalMode +get_causal_mode(); + +bool +get_causal_end_to_end(); + +std::vector +get_causal_fixed_speedup(); + +std::string +get_causal_output_filename(); + +std::vector +get_causal_binary_scope(); + +std::vector +get_causal_source_scope(); + +std::vector +get_causal_function_scope(); + +std::vector +get_causal_binary_exclude(); + +std::vector +get_causal_source_exclude(); + +std::vector +get_causal_function_exclude(); } // namespace config } // namespace omnitrace diff --git a/source/lib/omnitrace/library/containers/CMakeLists.txt b/source/lib/omnitrace/library/containers/CMakeLists.txt new file mode 100644 index 0000000000..8624008df3 --- /dev/null +++ b/source/lib/omnitrace/library/containers/CMakeLists.txt @@ -0,0 +1,8 @@ +# +set(containers_sources) + +set(containers_headers ${CMAKE_CURRENT_LIST_DIR}/stable_vector.hpp + ${CMAKE_CURRENT_LIST_DIR}/static_vector.hpp) + +target_sources(omnitrace-object-library PRIVATE ${containers_sources} + ${containers_headers}) diff --git a/source/lib/omnitrace/library/containers/c_array.hpp b/source/lib/omnitrace/library/containers/c_array.hpp new file mode 100644 index 0000000000..4dc9aaaec9 --- /dev/null +++ b/source/lib/omnitrace/library/containers/c_array.hpp @@ -0,0 +1,132 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/exception.hpp" + +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace container +{ +template +struct c_array +{ + // Construct an array wrapper from a base pointer and array size + c_array(Tp* _base, size_t _size) + : m_base{ _base } + , m_size{ _size } + {} + + ~c_array() = default; + c_array(const c_array&) = default; + c_array& operator=(const c_array&) = default; + c_array& operator=(c_array&&) noexcept = default; + + // Get the size of the wrapped array + size_t size() const { return m_size; } + + // Access an element by index + Tp& operator[](size_t i) { return m_base[i]; } + + // Access an element by index + const Tp& operator[](size_t i) const { return m_base[i]; } + + // Access an element by index with bounds check + Tp& at(size_t i) + { + if(i < m_size) return m_base[i]; + throw ::omnitrace::exception( + std::string{ typeid(*this).name() } + std::to_string(i) + " exceeds size " + + std::to_string(m_size)); + } + + // Access an element by index with bounds check + const Tp& at(size_t i) const + { + if(i < m_size) return m_base[i]; + throw ::omnitrace::exception( + std::string{ typeid(*this).name() } + std::to_string(i) + " exceeds size " + + std::to_string(m_size)); + } + + // Get a slice of this array, from a start index (inclusive) to end index (exclusive) + c_array slice(size_t start, size_t end) + { + return c_array(&m_base[start], end - start); + } + + operator Tp*() const { return m_base; } + + // Iterator class for convenient range-based for loop support + template + struct iterator + { + // Start the iterator at a given pointer + iterator(Tp* p) + : m_ptr{ p } + {} + + // Advance to the next element + void operator++() { ++m_ptr; } + void operator++(int) { m_ptr++; } + + // Get the current element + Up& operator*() const { return *m_ptr; } + + // Compare iterators + bool operator==(const iterator& rhs) const { return m_ptr == rhs.m_ptr; } + bool operator!=(const iterator& rhs) const { return m_ptr != rhs.m_ptr; } + + private: + Tp* m_ptr = nullptr; + }; + + // Get an iterator positioned at the beginning of the wrapped array + iterator begin() { return iterator{ m_base }; } + iterator begin() const { return iterator{ m_base }; } + + // Get an iterator positioned at the end of the wrapped array + iterator end() { return iterator{ &m_base[m_size] }; } + iterator end() const { return iterator{ &m_base[m_size] }; } + +private: + Tp* m_base = nullptr; + size_t m_size = 0; +}; + +// Function for automatic template argument deduction +template +c_array +wrap_c_array(Tp* base, size_t size) +{ + return c_array(base, size); +} +} // namespace container +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/containers/operators.hpp b/source/lib/omnitrace/library/containers/operators.hpp new file mode 100644 index 0000000000..15e8007c47 --- /dev/null +++ b/source/lib/omnitrace/library/containers/operators.hpp @@ -0,0 +1,240 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/defines.hpp" + +#include +#include + +#define OMNITRACE_IMPORT_TEMPLATE2(template_name) +#define OMNITRACE_IMPORT_TEMPLATE1(template_name) + +// Import a 2-type-argument operator template into boost (if necessary) and +// provide a specialization of 'is_chained_base<>' for it. +#define OMNITRACE_OPERATOR_TEMPLATE2(template_name2) \ + OMNITRACE_IMPORT_TEMPLATE2(template_name2) \ + template \ + struct is_chained_base<::omnitrace::container::template_name2> \ + { \ + using value = ::omnitrace::container::true_t; \ + }; + +// Import a 1-type-argument operator template into boost (if necessary) and +// provide a specialization of 'is_chained_base<>' for it. +#define OMNITRACE_OPERATOR_TEMPLATE1(template_name1) \ + OMNITRACE_IMPORT_TEMPLATE1(template_name1) \ + template \ + struct is_chained_base<::omnitrace::container::template_name1> \ + { \ + using value = ::omnitrace::container::true_t; \ + }; + +#define OMNITRACE_OPERATOR_TEMPLATE(template_name) \ + template , \ + typename O = typename is_chained_base::value> \ + struct template_name; \ + \ + template \ + struct template_name : template_name##2 < T \ + , U \ + , B > \ + {}; \ + \ + template \ + struct template_name, true_t> : template_name##1 < T \ + , U > \ + {}; \ + \ + template \ + struct template_name : template_name##1 < T \ + , B > \ + {}; \ + \ + template \ + struct is_chained_base> \ + { \ + using value = ::omnitrace::container::true_t; \ + }; \ + \ + OMNITRACE_OPERATOR_TEMPLATE2(template_name##2) \ + OMNITRACE_OPERATOR_TEMPLATE1(template_name##1) + +#define OMNITRACE_BINARY_OPERATOR_COMMUTATIVE(NAME, OP) \ + template > \ + struct NAME##2 \ + : B{ friend T operator OP(T lhs, const U& rhs){ return lhs OP## = rhs; \ + } \ + friend T operator OP(const U& lhs, T rhs) { return rhs OP## = lhs; } \ + } \ + ; \ + \ + template > \ + struct NAME##1 \ + : B{ friend T operator OP(T lhs, const T& rhs){ return lhs OP## = rhs; \ + } \ + } \ + ; + +#define OMNITRACE_BINARY_OPERATOR_NON_COMMUTATIVE(NAME, OP) \ + template > \ + struct NAME##2 \ + : B{ friend T operator OP(T lhs, const U& rhs){ return lhs OP## = rhs; \ + } \ + } \ + ; + +namespace omnitrace +{ +namespace container +{ +struct true_t +{}; + +struct false_t +{}; + +template +class empty_base +{}; + +template +struct is_chained_base +{ + using value = true_t; +}; + +OMNITRACE_BINARY_OPERATOR_COMMUTATIVE(addable, +) +OMNITRACE_BINARY_OPERATOR_NON_COMMUTATIVE(subtractable, -) + +OMNITRACE_OPERATOR_TEMPLATE(addable) + +template > +struct incrementable : B +{ + friend T operator++(T& x, int) + { + incrementable_type nrv(x); + ++x; + return nrv; + } + +private: // The use of this typedef works around a Borland bug + typedef T incrementable_type; +}; + +template > +struct decrementable : B +{ + friend T operator--(T& x, int) + { + decrementable_type nrv(x); + --x; + return nrv; + } + +private: // The use of this typedef works around a Borland bug + typedef T decrementable_type; +}; + +template > +struct dereferenceable : B +{ + P operator->() const { return ::std::addressof(*static_cast(*this)); } +}; + +template > +struct indexable : B +{ + R operator[](I n) const { return *(static_cast(*this) + n); } +}; + +template > +struct equality_comparable1 : B +{ + friend bool operator!=(const T& x, const T& y) { return !static_cast(x == y); } +}; + +template > +struct input_iteratable +: equality_comparable1>> +{}; + +template > +struct output_iteratable : incrementable +{}; + +template > +struct forward_iteratable : input_iteratable +{}; + +template > +struct bidirectional_iteratable : forward_iteratable> +{}; + +// template > +// struct subtractable2; + +template > +struct additive2 : addable2> +{}; + +template > +struct less_than_comparable1 : B +{ + friend bool operator>(const T& x, const T& y) { return y < x; } + friend bool operator<=(const T& x, const T& y) { return !static_cast(y < x); } + friend bool operator>=(const T& x, const T& y) { return !static_cast(x < y); } +}; + +// To avoid repeated derivation from equality_comparable, +// which is an indirect base typename of bidirectional_iterable, +// random_access_iteratable must not be derived from totally_ordered1 +// but from less_than_comparable1 only. (Helmut Zeisel, 02-Dec-2001) +template > +struct random_access_iteratable +: bidirectional_iteratable< + T, P, less_than_comparable1>>> +{}; + +template +struct iterator_helper +{ + using iterator_category = CategoryT; + using value_type = Tp; + using difference_type = DistanceT; + using pointer = PointerT; + using reference = ReferenceT; +}; + +template +struct random_access_iterator_helper +: random_access_iteratable> +{ + friend D requires_difference_operator(const T& x, const T& y) { return x - y; } +}; // random_access_iterator_helper +} // namespace container +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/containers/stable_vector.hpp b/source/lib/omnitrace/library/containers/stable_vector.hpp new file mode 100644 index 0000000000..7830489413 --- /dev/null +++ b/source/lib/omnitrace/library/containers/stable_vector.hpp @@ -0,0 +1,391 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/containers/operators.hpp" +#include "library/containers/static_vector.hpp" +#include "library/defines.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace container +{ +template +class stable_vector +{ +public: + using value_type = Tp; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + + static constexpr const size_t chunk_size = ChunkSizeV; + +private: + template + struct is_pow2 + { + static constexpr bool value = (N & (N - 1)) == 0; + }; + + static_assert(ChunkSizeV > 0, "ChunkSize needs to be greater than zero"); + static_assert(is_pow2::value, "ChunkSize needs to be a power of 2"); + + using this_type = stable_vector; + using const_this_type = const stable_vector; + + template + struct iterator_base + { + iterator_base(ContainerT* c = nullptr, size_type i = 0) + : m_container(c) + , m_index(i) + {} + + iterator_base& operator+=(size_type i) + { + m_index += i; + return *this; + } + iterator_base& operator-=(size_type i) + { + m_index -= i; + return *this; + } + iterator_base& operator++() + { + ++m_index; + return *this; + } + iterator_base& operator--() + { + --m_index; + return *this; + } + + difference_type operator-(const iterator_base& it) + { + assert(m_container == it.m_container); + return m_index - it.m_index; + } + + bool operator<(const iterator_base& it) const + { + assert(m_container == it.m_container); + return m_index < it.m_index; + } + bool operator==(const iterator_base& it) const + { + return m_container == it.m_container && m_index == it.m_index; + } + + protected: + ContainerT* m_container; + size_type m_index; + }; + +public: + struct const_iterator; + + struct iterator + : public iterator_base + //, std::iterator + , public random_access_iterator_helper + { + using iterator_base::iterator_base; + friend struct const_iterator; + + reference operator*() { return (*this->m_container)[this->m_index]; } + }; + + struct const_iterator + : public iterator_base + //, std::iterator + , public random_access_iterator_helper + { + using iterator_base::iterator_base; + + const_iterator(const iterator& it) + : iterator_base(it.m_container, it.m_index) + {} + + const_reference operator*() const { return (*this->m_container)[this->m_index]; } + + bool operator==(const const_iterator& it) const + { + return iterator_base::operator==(it); + } + + friend bool operator==(const iterator& l, const const_iterator& r) + { + return r == l; + } + }; + + stable_vector() = default; + explicit stable_vector(size_type count, const Tp& value); + explicit stable_vector(size_type count); + + template ::iterator_category, + std::input_iterator_tag>::value>> + stable_vector(InputItrT first, InputItrT last); + + stable_vector(std::initializer_list); + + stable_vector(const stable_vector& other); + stable_vector(stable_vector&& other) noexcept; + + stable_vector& operator=(stable_vector v); + + iterator begin() noexcept { return { this, 0 }; } + const_iterator begin() const noexcept { return { this, 0 }; } + const_iterator cbegin() const noexcept { return begin(); } + + iterator end() noexcept { return { this, size() }; } + const_iterator end() const noexcept { return { this, size() }; } + const_iterator cend() const noexcept { return end(); } + + size_type size() const noexcept + { + return empty() ? 0 : (m_chunks.size() - 1) * ChunkSizeV + m_chunks.back()->size(); + } + size_type max_size() const noexcept { return std::numeric_limits::max(); } + size_type capacity() const noexcept { return m_chunks.size() * ChunkSizeV; } + + bool empty() const noexcept { return m_chunks.size() == 0; } + + void reserve(size_type new_capacity); + void shrink_to_fit() noexcept {} + + bool operator==(const this_type& c) const + { + return size() == c.size() && std::equal(cbegin(), cend(), c.cbegin()); + } + bool operator!=(const this_type& c) const { return !operator==(c); } + + void swap(this_type& v) { std::swap(m_chunks, v.m_chunks); } + + friend void swap(this_type& l, this_type& r) { l.swap(r); } + + reference front() { return m_chunks.front()->front(); } + const_reference front() const { return front(); } + + reference back() { return m_chunks.back()->back(); } + const_reference back() const { return back(); } + + void push_back(const Tp& t); + void push_back(Tp&& t); + + template + void emplace_back(Args&&... args); + + reference operator[](size_type i); + + const_reference operator[](size_type i) const; + + reference at(size_type i); + + const_reference at(size_type i) const; + +private: + using chunk_type = container::static_vector; + using storage_type = std::vector>; + + void add_chunk(); + chunk_type& last_chunk(); + + storage_type m_chunks; +}; + +template +stable_vector::stable_vector(size_type count, const Tp& value) +{ + for(size_type i = 0; i < count; ++i) + { + push_back(value); + } +} + +template +stable_vector::stable_vector(size_type count) +{ + for(size_type i = 0; i < count; ++i) + { + emplace_back(); + } +} + +template +template +stable_vector::stable_vector(InputItrT first, InputItrT last) +{ + for(; first != last; ++first) + { + push_back(*first); + } +} + +template +stable_vector::stable_vector(const stable_vector& other) +{ + for(const auto& chunk : other.m_chunks) + { + m_chunks.emplace_back(std::make_unique(*chunk)); + } +} + +template +stable_vector::stable_vector(stable_vector&& other) noexcept +: m_chunks(std::move(other.m_chunks)) +{} + +template +stable_vector::stable_vector(std::initializer_list ilist) +{ + for(const auto& t : ilist) + { + push_back(t); + } +} + +template +stable_vector& +stable_vector::operator=(stable_vector v) +{ + swap(v); + return *this; +} + +template +void +stable_vector::add_chunk() +{ + m_chunks.emplace_back(std::make_unique()); +} + +template +typename stable_vector::chunk_type& +stable_vector::last_chunk() +{ + if(OMNITRACE_UNLIKELY(m_chunks.empty() || m_chunks.back()->size() == ChunkSizeV)) + { + add_chunk(); + } + + return *m_chunks.back(); +} + +template +void +stable_vector::reserve(size_type new_capacity) +{ + const size_t initial_capacity = capacity(); + for(difference_type i = new_capacity - initial_capacity; i > 0; i -= ChunkSizeV) + { + add_chunk(); + } +} + +template +void +stable_vector::push_back(const Tp& t) +{ + last_chunk().push_back(t); +} + +template +void +stable_vector::push_back(Tp&& t) +{ + last_chunk().push_back(std::move(t)); +} + +template +template +void +stable_vector::emplace_back(Args&&... args) +{ + last_chunk().emplace_back(std::forward(args)...); +} + +template +typename stable_vector::reference +stable_vector::operator[](size_type i) +{ + return (*m_chunks[i / ChunkSizeV])[i % ChunkSizeV]; +} + +template +typename stable_vector::const_reference +stable_vector::operator[](size_type i) const +{ + return const_cast(*this)[i]; +} + +template +typename stable_vector::reference +stable_vector::at(size_type i) +{ + if(OMNITRACE_UNLIKELY(i >= size())) + { + throw ::omnitrace::exception( + "stable_vector::at(" + std::to_string(i) + "). size is " + + std::to_string(size())); + } + + return operator[](i); +} + +template +typename stable_vector::const_reference +stable_vector::at(size_type i) const +{ + return const_cast(*this).at(i); +} + +template +auto +resize(stable_vector& _v, size_t _n, Args&&... args) +{ + if(_n > _v.capacity()) _v.reserve(_n); + + while(_v.size() < _n) + _v.emplace_back(std::forward(args)...); + + return _v.size(); +} +} // namespace container +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/containers/static_vector.hpp b/source/lib/omnitrace/library/containers/static_vector.hpp new file mode 100644 index 0000000000..3d293c9405 --- /dev/null +++ b/source/lib/omnitrace/library/containers/static_vector.hpp @@ -0,0 +1,194 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/common.hpp" +#include "library/debug.hpp" +#include "library/exception.hpp" + +#include + +#include +#include +#include + +namespace omnitrace +{ +namespace container +{ +template +struct static_vector +{ + using count_type = std::conditional_t, size_t>; + using this_type = static_vector; + using value_type = Tp; + + static_vector() = default; + static_vector(const static_vector&) = default; + static_vector(static_vector&&) noexcept = default; + static_vector& operator=(const static_vector&) = default; + static_vector& operator=(static_vector&&) noexcept = default; + + static_vector(size_t _n, Tp _v = {}); + + static_vector& operator=(std::initializer_list&& _v); + static_vector& operator=(std::pair, size_t>&&); + + template + value_type& emplace_back(Args&&... _v); + + template + decltype(auto) push_back(Up&& _v) + { + return emplace_back(Tp{ std::forward(_v) }); + } + + void pop_back() { --m_size; } + + void clear(); + void reserve(size_t) noexcept {} + void shrink_to_fit() noexcept {} + auto capacity() noexcept { return N; } + + size_t size() const { return m_size; } + bool empty() const { return (size() == 0); } + + auto begin() { return m_data.begin(); } + auto begin() const { return m_data.begin(); } + auto cbegin() const { return m_data.cbegin(); } + + auto end() { return m_data.begin() + size(); } + auto end() const { return m_data.begin() + size(); } + auto cend() const { return m_data.cbegin() + size(); } + + decltype(auto) operator[](size_t _idx) { return m_data[_idx]; } + decltype(auto) operator[](size_t _idx) const { return m_data[_idx]; } + + decltype(auto) at(size_t _idx) { return m_data.at(_idx); } + decltype(auto) at(size_t _idx) const { return m_data.at(_idx); } + + decltype(auto) front() { return m_data.front(); } + decltype(auto) front() const { return m_data.front(); } + decltype(auto) back() { return *(m_data.begin() + size() - 1); } + decltype(auto) back() const { return *(m_data.begin() + size() - 1); } + + void swap(this_type& _v); + + friend void swap(this_type& _lhs, this_type& _rhs) { _lhs.swap(_rhs); } + +private: + count_type m_size = count_type{ 0 }; + std::array m_data = {}; +}; + +template +static_vector::static_vector(size_t _n, Tp _v) +{ + m_size.store(_n); + m_data.fill(_v); +} + +template +static_vector& +static_vector::operator=(std::initializer_list&& _v) +{ + if(OMNITRACE_UNLIKELY(_v.size() > N)) + { + throw exception( + std::string{ "static_vector::operator=(initializer_list) size > " } + + std::to_string(N)); + } + + clear(); + for(auto&& itr : _v) + m_data[m_size++] = itr; + return *this; +} + +template +static_vector& +static_vector::operator=(std::pair, size_t>&& _v) +{ + if constexpr(AtomicSizeV) m_size.store(0); + + m_data = std::move(_v.first); + + if constexpr(AtomicSizeV) + m_size.store(_v.second); + else + m_size = _v.second; + + return *this; +} + +template +void +static_vector::clear() +{ + if constexpr(AtomicSizeV) + m_size.store(0); + else + m_size = 0; +} + +template +void +static_vector::swap(this_type& _v) +{ + if constexpr(AtomicSizeV) + { + auto _t_size = m_size; + auto _v_size = _v.m_size; + std::swap(m_data, _v.m_data); + m_size.store(_v_size); + _v.m_size.store(_t_size); + } + else + { + std::swap(m_size, _v.m_size); + std::swap(m_data, _v.m_data); + } +} + +template +template +Tp& +static_vector::emplace_back(Args&&... _v) +{ + auto _idx = m_size++; + if(_idx >= N) + { + throw exception( + std::string{ "static_vector::emplace_back - reached capacity " } + + std::to_string(N)); + } + + if constexpr(std::is_assignable(_v))...>::value) + m_data[_idx] = { std::forward(_v)... }; + else + m_data[_idx] = Tp{ std::forward(_v)... }; + return m_data[_idx]; +} + +} // namespace container +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/coverage.cpp b/source/lib/omnitrace/library/coverage.cpp index e3257446ce..8d47b0ac10 100644 --- a/source/lib/omnitrace/library/coverage.cpp +++ b/source/lib/omnitrace/library/coverage.cpp @@ -87,8 +87,7 @@ get_coverage_data() auto& get_coverage_count(int64_t _tid = tim::threading::get_id()) { - static auto& _v = - coverage_thread_data::instances(coverage_thread_data::construct_on_init{}); + static auto& _v = coverage_thread_data::instances(construct_on_init{}); return _v.at(_tid); } } // namespace diff --git a/source/lib/omnitrace/library/cpu_freq.cpp b/source/lib/omnitrace/library/cpu_freq.cpp index 0f322413e9..40b79030df 100644 --- a/source/lib/omnitrace/library/cpu_freq.cpp +++ b/source/lib/omnitrace/library/cpu_freq.cpp @@ -57,7 +57,7 @@ namespace { using cpu_data_tuple_t = std::tuple; -std::deque cpu_data = {}; +std::deque data = {}; template void init_perfetto_counter_tracks(type_list) @@ -97,7 +97,7 @@ sample() auto _freqs = component::cpu_freq{}.sample(); // user and kernel mode times are in microseconds - cpu_data.emplace_back( + data.emplace_back( _ts, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(), _rcache.get_num_priority_context_switch() + _rcache.get_num_voluntary_context_switch(), @@ -161,7 +161,7 @@ post_process() { OMNITRACE_VERBOSE(1, "Post-processing %zu cpu frequency and memory usage entries...\n", - cpu_data.size()); + data.size()); auto _process_frequencies = [](size_t _idx, size_t _offset) { using freq_track = perfetto_counter_track; @@ -177,7 +177,7 @@ post_process() freq_track::emplace(_idx, addendum("Frequency"), "MHz"); } - for(auto& itr : cpu_data) + for(auto& itr : data) { uint64_t _ts = std::get<0>(itr); double _freq = std::get<8>(itr).at(_offset); @@ -203,7 +203,7 @@ post_process() OMNITRACE_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; - for(auto& itr : cpu_data) + for(auto& itr : data) { uint64_t _ts = std::get<0>(itr); if(!_thread_info->is_valid_time(_ts)) continue; diff --git a/source/lib/omnitrace/library/debug.cpp b/source/lib/omnitrace/library/debug.cpp index 0219971158..d90bcfcd2d 100644 --- a/source/lib/omnitrace/library/debug.cpp +++ b/source/lib/omnitrace/library/debug.cpp @@ -21,12 +21,17 @@ // SOFTWARE. #include "library/debug.hpp" +#include "library/binary/address_range.hpp" #include "library/runtime.hpp" #include "library/state.hpp" #include #include +#include +#include +#include + namespace omnitrace { namespace debug @@ -45,6 +50,9 @@ get_source_location_history() static thread_local auto _v = source_location_history{}; return _v; } + +auto _protect_lock = std::atomic{ false }; +auto _protect_unlock = std::atomic{ false }; } // namespace void @@ -58,19 +66,23 @@ set_source_location(source_location&& _v) lock::lock() : m_lk{ tim::type_mutex(), std::defer_lock } { - if(!m_lk.owns_lock()) + if(!m_lk.owns_lock() && !_protect_lock) { + _protect_lock.store(true); push_thread_state(ThreadState::Internal); m_lk.lock(); + _protect_lock.store(false); } } lock::~lock() { - if(m_lk.owns_lock()) + if(m_lk.owns_lock() && !_protect_unlock) { + _protect_unlock.store(true); m_lk.unlock(); pop_thread_state(); + _protect_unlock.store(false); } } @@ -85,4 +97,29 @@ get_file() return _v; } } // namespace debug + +template +std::string +as_hex(Tp _v, size_t _width) +{ + std::stringstream _ss; + _ss.fill('0'); + _ss << "0x" << std::hex << std::setw(_width) << _v; + return _ss.str(); +} + +template <> +std::string +as_hex(address_range_t _v, size_t _width) +{ + return (_v.is_range()) ? JOIN('-', as_hex(_v.low, _width), as_hex(_v.high, _width)) + : as_hex(_v.low, _width); +} + +template std::string as_hex(int32_t, size_t); +template std::string as_hex(uint32_t, size_t); +template std::string as_hex(int64_t, size_t); +template std::string as_hex(uint64_t, size_t); +template std::string +as_hex(void*, size_t); } // namespace omnitrace diff --git a/source/lib/omnitrace/library/debug.hpp b/source/lib/omnitrace/library/debug.hpp index 7f8e757623..bb63d477d3 100644 --- a/source/lib/omnitrace/library/debug.hpp +++ b/source/lib/omnitrace/library/debug.hpp @@ -23,6 +23,7 @@ #pragma once #include "library/defines.hpp" +#include "library/exception.hpp" #include #include @@ -126,6 +127,27 @@ get_chars(T&& _c, std::index_sequence) } } // namespace } // namespace debug + +namespace binary +{ +struct address_range; +} + +using address_range_t = binary::address_range; + +template +std::string +as_hex(Tp, size_t _wdith = 16); + +template <> +std::string as_hex(address_range_t, size_t); + +extern template std::string as_hex(int32_t, size_t); +extern template std::string as_hex(uint32_t, size_t); +extern template std::string as_hex(int64_t, size_t); +extern template std::string as_hex(uint64_t, size_t); +extern template std::string +as_hex(void*, size_t); } // namespace omnitrace #if !defined(OMNITRACE_DEBUG_BUFFER_LEN) @@ -213,7 +235,8 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::flush(); \ ::omnitrace::debug::lock _debug_lk{}; \ OMNITRACE_FPRINTF_STDERR_COLOR(info); \ - fprintf(::omnitrace::debug::get_file(), "[omnitrace]%s", \ + fprintf(::omnitrace::debug::get_file(), "[omnitrace][%i]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -241,7 +264,8 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::flush(); \ ::omnitrace::debug::lock _debug_lk{}; \ OMNITRACE_FPRINTF_STDERR_COLOR(info); \ - fprintf(::omnitrace::debug::get_file(), "[omnitrace][%s]%s", OMNITRACE_FUNCTION, \ + fprintf(::omnitrace::debug::get_file(), "[omnitrace][%i][%s]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -270,7 +294,8 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::flush(); \ ::omnitrace::debug::lock _debug_lk{}; \ OMNITRACE_FPRINTF_STDERR_COLOR(warning); \ - fprintf(::omnitrace::debug::get_file(), "[omnitrace]%s", \ + fprintf(::omnitrace::debug::get_file(), "[omnitrace][%i]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -298,7 +323,8 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::flush(); \ ::omnitrace::debug::lock _debug_lk{}; \ OMNITRACE_FPRINTF_STDERR_COLOR(warning); \ - fprintf(::omnitrace::debug::get_file(), "[omnitrace][%s]%s", OMNITRACE_FUNCTION, \ + fprintf(::omnitrace::debug::get_file(), "[omnitrace][%i][%s]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -306,7 +332,7 @@ get_chars(T&& _c, std::index_sequence) //--------------------------------------------------------------------------------------// -#define OMNITRACE_CONDITIONAL_THROW(COND, ...) \ +#define OMNITRACE_CONDITIONAL_THROW_E(COND, TYPE, ...) \ if(COND) \ { \ char _msg_buffer[OMNITRACE_DEBUG_BUFFER_LEN]; \ @@ -316,30 +342,44 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ auto len = strlen(_msg_buffer); \ snprintf(_msg_buffer + len, OMNITRACE_DEBUG_BUFFER_LEN - len, __VA_ARGS__); \ - throw std::runtime_error( \ + throw ::omnitrace::exception( \ ::tim::log::string(::tim::log::color::fatal(), _msg_buffer)); \ } -#define OMNITRACE_CONDITIONAL_BASIC_THROW(COND, ...) \ +#define OMNITRACE_CONDITIONAL_BASIC_THROW_E(COND, TYPE, ...) \ if(COND) \ { \ char _msg_buffer[OMNITRACE_DEBUG_BUFFER_LEN]; \ - snprintf(_msg_buffer, OMNITRACE_DEBUG_BUFFER_LEN, "[omnitrace][%s]%s", \ - OMNITRACE_FUNCTION, \ + snprintf(_msg_buffer, OMNITRACE_DEBUG_BUFFER_LEN, "[omnitrace][%i][%s]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ auto len = strlen(_msg_buffer); \ snprintf(_msg_buffer + len, OMNITRACE_DEBUG_BUFFER_LEN - len, __VA_ARGS__); \ - throw std::runtime_error( \ + throw ::omnitrace::exception( \ ::tim::log::string(::tim::log::color::fatal(), _msg_buffer)); \ } +#define OMNITRACE_CI_THROW_E(COND, TYPE, ...) \ + OMNITRACE_CONDITIONAL_THROW_E( \ + ::omnitrace::get_is_continuous_integration() && (COND), TYPE, __VA_ARGS__) + +#define OMNITRACE_CI_BASIC_THROW_E(COND, TYPE, ...) \ + OMNITRACE_CONDITIONAL_BASIC_THROW_E( \ + ::omnitrace::get_is_continuous_integration() && (COND), TYPE, __VA_ARGS__) + +//--------------------------------------------------------------------------------------// + +#define OMNITRACE_CONDITIONAL_THROW(COND, ...) \ + OMNITRACE_CONDITIONAL_THROW_E((COND), std::runtime_error, __VA_ARGS__) + +#define OMNITRACE_CONDITIONAL_BASIC_THROW(COND, ...) \ + OMNITRACE_CONDITIONAL_BASIC_THROW_E((COND), std::runtime_error, __VA_ARGS__) + #define OMNITRACE_CI_THROW(COND, ...) \ - OMNITRACE_CONDITIONAL_THROW(::omnitrace::get_is_continuous_integration() && (COND), \ - __VA_ARGS__) + OMNITRACE_CI_THROW_E((COND), std::runtime_error, __VA_ARGS__) #define OMNITRACE_CI_BASIC_THROW(COND, ...) \ - OMNITRACE_CONDITIONAL_BASIC_THROW( \ - ::omnitrace::get_is_continuous_integration() && (COND), __VA_ARGS__) + OMNITRACE_CI_BASIC_THROW_E((COND), std::runtime_error, __VA_ARGS__) //--------------------------------------------------------------------------------------// @@ -364,7 +404,8 @@ get_chars(T&& _c, std::index_sequence) { \ ::omnitrace::debug::flush(); \ OMNITRACE_FPRINTF_STDERR_COLOR(fatal); \ - fprintf(::omnitrace::debug::get_file(), "[omnitrace]%s", \ + fprintf(::omnitrace::debug::get_file(), "[omnitrace][%i]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -396,7 +437,8 @@ get_chars(T&& _c, std::index_sequence) { \ ::omnitrace::debug::flush(); \ OMNITRACE_FPRINTF_STDERR_COLOR(fatal); \ - fprintf(::omnitrace::debug::get_file(), "[omnitrace][%s]%s", OMNITRACE_FUNCTION, \ + fprintf(::omnitrace::debug::get_file(), "[omnitrace][%i][%s]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -537,6 +579,26 @@ get_chars(T&& _c, std::index_sequence) #define OMNITRACE_WARNING_IF_F(COND, ...) \ OMNITRACE_CONDITIONAL_WARN_F((COND), __VA_ARGS__) +#define OMNITRACE_WARNING_OR_CI_THROW(LEVEL, ...) \ + { \ + if(::omnitrace::get_is_continuous_integration()) \ + { \ + OMNITRACE_CI_THROW(true, __VA_ARGS__); \ + } \ + else \ + { \ + OMNITRACE_CONDITIONAL_WARN(::omnitrace::get_debug() || \ + (::omnitrace::get_verbose() >= LEVEL), \ + __VA_ARGS__) \ + } \ + } + +#define OMNITRACE_REQUIRE(...) TIMEMORY_REQUIRE(__VA_ARGS__) +#define OMNITRACE_PREFER(COND) \ + (COND) ? ::tim::log::base() \ + : (::omnitrace::get_is_continuous_integration()) ? TIMEMORY_FATAL \ + : TIMEMORY_WARNING + //--------------------------------------------------------------------------------------// // // Basic print macros (basic means it will not provide PID/RANK or TID) and will not diff --git a/source/lib/omnitrace/library/exception.cpp b/source/lib/omnitrace/library/exception.cpp new file mode 100644 index 0000000000..2e2d330af9 --- /dev/null +++ b/source/lib/omnitrace/library/exception.cpp @@ -0,0 +1,128 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/exception.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace omnitrace +{ +namespace +{ +template +void +consume_args(Args&&...) +{} + +template +auto +get_backtrace(Args... _arg) +{ + auto _bt = std::stringstream{}; + if constexpr(sizeof...(Args) > 0) + { + ((_bt << _arg), ...) << "\n"; + } + tim::unwind::detailed_backtrace<2>(_bt, true); + return strdup(_bt.str().c_str()); + consume_args(_arg...); +} +} // namespace + +template +exception::exception(const std::string& _msg) +: Tp{ _msg } +, m_what{ get_backtrace(_msg) } +{} + +template +exception::exception(const char* _msg) +: Tp{ _msg } +, m_what{ get_backtrace(_msg) } +{} + +template +exception::~exception() +{ + free(m_what); +} + +template +exception::exception(const exception& _rhs) +: Tp{ _rhs } +, m_what{ strdup(_rhs.m_what) } +{} + +template +exception& +exception::operator=(const exception& _rhs) +{ + if(this != &_rhs) + { + Tp::operator=(_rhs); + m_what = strdup(_rhs.m_what); + } + return *this; +} + +template +const char* +exception::what() const noexcept +{ + return (m_what) ? m_what : Tp::what(); +} + +template class exception; +template class exception; +template class exception; +template class exception; +template class exception; +template class exception; +template class exception; +template class exception; +template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +// template class exception; +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/exception.hpp b/source/lib/omnitrace/library/exception.hpp new file mode 100644 index 0000000000..fd098ef603 --- /dev/null +++ b/source/lib/omnitrace/library/exception.hpp @@ -0,0 +1,53 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include + +namespace omnitrace +{ +template +class exception : public Tp +{ +public: + explicit exception(const std::string& _msg); + explicit exception(const char* _msg); + + ~exception() override; + + exception(exception&&) noexcept = default; + exception& operator=(exception&&) noexcept = default; + + exception(const exception&); + exception& operator=(const exception&); + + const char* what() const noexcept override; + +private: + char* m_what = nullptr; +}; +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/kokkosp.cpp b/source/lib/omnitrace/library/kokkosp.cpp index 7b25b4f546..2ec85fb8ad 100644 --- a/source/lib/omnitrace/library/kokkosp.cpp +++ b/source/lib/omnitrace/library/kokkosp.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,64 @@ namespace { bool _standalone_initialized = false; std::vector _initialize_arguments = {}; +size_t _name_len_limit = 0; +std::string _kp_prefix = {}; + +template +void +set_invalid_id(Tp* _v) +{ + constexpr bool is32 = std::is_same::value; + constexpr bool is64 = std::is_same::value; + static_assert(is32 || is64, "only support uint32_t or uint64_t"); + + *_v = std::numeric_limits::max(); +} + +template +bool +is_invalid_id(Tp _v) +{ + constexpr bool is32 = std::is_same::value; + constexpr bool is64 = std::is_same::value; + static_assert(is32 || is64, "only support uint32_t or uint64_t"); + + return (_v == std::numeric_limits::max()); +} + +template +auto +strlength(Tp&& _v) +{ + using type = ::tim::concepts::unqualified_type_t; + if constexpr(std::is_same::value || + std::is_same::value) + return _v.length(); + else + return strnlen(_v, std::max(_name_len_limit, 1)); +} + +template +bool +violates_name_rules(Arg&& _arg, Args&&... _args) +{ + // for causal profiling we only consider callbacks which are explicitly named + if(omnitrace::config::get_use_causal() && + (std::string_view{ _arg }.find("Kokkos::") == 0 || + std::string_view{ _arg }.find("Space::") != std::string_view::npos)) + return true; + + size_t _len = + (strlength(std::forward(_arg)) + ... + strlength(std::forward(_args))); + + // ignore labels without names + if(_len == 0) + return true; + else if(_name_len_limit == 0) + return false; + + return (_len >= _name_len_limit); +} } // namespace //--------------------------------------------------------------------------------------// @@ -205,6 +264,13 @@ extern "C" fprintf(stderr, "%sDone\n%s", tim::log::color::info(), tim::log::color::end()); } + + _name_len_limit = omnitrace::config::get_setting_value( + "OMNITRACE_KOKKOSP_NAME_LENGTH_MAX") + .second; + _kp_prefix = + omnitrace::config::get_setting_value("OMNITRACE_KOKKOSP_PREFIX") + .second; } void kokkosp_finalize_library() @@ -229,12 +295,13 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, uint32_t devid, uint64_t* kernid) { + if(violates_name_rules(name)) return set_invalid_id(kernid); + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - auto pname = - (devid > std::numeric_limits::max()) // junk device number - ? TIMEMORY_JOIN(" ", "[kokkos]", name) - : TIMEMORY_JOIN(" ", TIMEMORY_JOIN("", "[kokkos][dev", devid, ']'), name); - *kernid = kokkosp::get_unique_id(); + auto pname = (devid > std::numeric_limits::max()) // junk device number + ? JOIN(" ", _kp_prefix, name, "[for]") + : JOIN(" ", _kp_prefix, name, JOIN("", "[for][dev", devid, ']')); + *kernid = kokkosp::get_unique_id(); kokkosp::logger_t{}.mark(1, __FUNCTION__, name, *kernid); kokkosp::create_profiler(pname, *kernid); kokkosp::start_profiler(*kernid); @@ -242,6 +309,8 @@ extern "C" void kokkosp_end_parallel_for(uint64_t kernid) { + if(is_invalid_id(kernid)) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__, kernid); kokkosp::stop_profiler(kernid); @@ -252,11 +321,13 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid, uint64_t* kernid) { + if(violates_name_rules(name)) return set_invalid_id(kernid); + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); auto pname = (devid > std::numeric_limits::max()) // junk device number - ? TIMEMORY_JOIN(" ", "[kokkos]", name) - : TIMEMORY_JOIN(" ", TIMEMORY_JOIN("", "[kokkos][dev", devid, ']'), name); + ? JOIN(" ", _kp_prefix, name, "[reduce]") + : JOIN(" ", _kp_prefix, name, JOIN("", "[reduce][dev", devid, ']')); *kernid = kokkosp::get_unique_id(); kokkosp::logger_t{}.mark(1, __FUNCTION__, name, *kernid); kokkosp::create_profiler(pname, *kernid); @@ -265,6 +336,8 @@ extern "C" void kokkosp_end_parallel_reduce(uint64_t kernid) { + if(is_invalid_id(kernid)) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__, kernid); kokkosp::stop_profiler(kernid); @@ -275,11 +348,13 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, uint32_t devid, uint64_t* kernid) { + if(violates_name_rules(name)) return set_invalid_id(kernid); + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); auto pname = (devid > std::numeric_limits::max()) // junk device number - ? TIMEMORY_JOIN(" ", "[kokkos]", name) - : TIMEMORY_JOIN(" ", TIMEMORY_JOIN("", "[kokkos][dev", devid, ']'), name); + ? JOIN(" ", _kp_prefix, name, "[scan]") + : JOIN(" ", _kp_prefix, name, JOIN("", "[scan][dev", devid, ']')); *kernid = kokkosp::get_unique_id(); kokkosp::logger_t{}.mark(1, __FUNCTION__, name, *kernid); kokkosp::create_profiler(pname, *kernid); @@ -288,6 +363,8 @@ extern "C" void kokkosp_end_parallel_scan(uint64_t kernid) { + if(is_invalid_id(kernid)) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__, kernid); kokkosp::stop_profiler(kernid); @@ -298,11 +375,13 @@ extern "C" void kokkosp_begin_fence(const char* name, uint32_t devid, uint64_t* kernid) { + if(violates_name_rules(name)) return set_invalid_id(kernid); + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); auto pname = (devid > std::numeric_limits::max()) // junk device number - ? TIMEMORY_JOIN(" ", "[kokkos]", name) - : TIMEMORY_JOIN(" ", TIMEMORY_JOIN("", "[kokkos][dev", devid, ']'), name); + ? JOIN(" ", _kp_prefix, name, "[fence]") + : JOIN(" ", _kp_prefix, name, JOIN("", "[fence][dev", devid, ']')); *kernid = kokkosp::get_unique_id(); kokkosp::logger_t{}.mark(1, __FUNCTION__, name, *kernid); kokkosp::create_profiler(pname, *kernid); @@ -311,6 +390,8 @@ extern "C" void kokkosp_end_fence(uint64_t kernid) { + if(is_invalid_id(kernid)) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__, kernid); kokkosp::stop_profiler(kernid); @@ -323,9 +404,9 @@ extern "C" { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(1, __FUNCTION__, name); - kokkosp::get_profiler_stack().push_back( - kokkosp::profiler_t(name)); - kokkosp::get_profiler_stack().back().start(); + kokkosp::get_profiler_stack() + .emplace_back(kokkosp::profiler_t(name)) + .start(); } void kokkosp_pop_profile_region() @@ -343,8 +424,8 @@ extern "C" { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); *secid = kokkosp::get_unique_id(); - auto pname = TIMEMORY_JOIN(" ", "[kokkos]", name); - kokkosp::create_profiler(pname, *secid); + auto pname = std::string{ name }; + kokkosp::create_profiler(name, *secid); } void kokkosp_destroy_profile_section(uint32_t secid) @@ -366,7 +447,7 @@ extern "C" { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__, secid); - kokkosp::start_profiler(secid); + kokkosp::stop_profiler(secid); } //----------------------------------------------------------------------------------// @@ -374,23 +455,31 @@ extern "C" void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { + if(violates_name_rules(label)) return; + if(omnitrace::config::get_use_causal()) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(0, __FUNCTION__, space.name, label, - TIMEMORY_JOIN("", '[', ptr, ']'), size); - kokkosp::profiler_alloc_t<>{ TIMEMORY_JOIN(" ", "[kokkos][allocate]", space.name, - label) } - .store(std::plus{}, size); + JOIN("", '[', ptr, ']'), size); + auto pname = + JOIN(" ", _kp_prefix, label, JOIN("", '[', space.name, "][allocate]")); + kokkosp::profiler_alloc_t<>{ pname }.store(std::plus{}, size); + kokkosp::profiler_t{ pname }.mark(); } void kokkosp_deallocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { + if(violates_name_rules(label)) return; + if(omnitrace::config::get_use_causal()) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(0, __FUNCTION__, space.name, label, - TIMEMORY_JOIN("", '[', ptr, ']'), size); - kokkosp::profiler_alloc_t<>{ TIMEMORY_JOIN(" ", "[kokkos][deallocate]", - space.name, label) } - .store(std::plus{}, size); + JOIN("", '[', ptr, ']'), size); + auto pname = + JOIN(" ", _kp_prefix, label, JOIN("", '[', space.name, "][deallocate]")); + kokkosp::profiler_alloc_t<>{ pname }.store(std::plus{}, size); + kokkosp::profiler_t{ pname }.mark(); } //----------------------------------------------------------------------------------// @@ -399,14 +488,16 @@ extern "C" const void* dst_ptr, SpaceHandle src_handle, const char* src_name, const void* src_ptr, uint64_t size) { + if(violates_name_rules(dst_name, src_name)) return; + if(omnitrace::config::get_use_causal()) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(1, __FUNCTION__, dst_handle.name, dst_name, - TIMEMORY_JOIN("", '[', dst_ptr, ']'), src_handle.name, - src_name, TIMEMORY_JOIN("", '[', src_ptr, ']'), size); + JOIN("", '[', dst_ptr, ']'), src_handle.name, src_name, + JOIN("", '[', src_ptr, ']'), size); - auto name = TIMEMORY_JOIN(" ", "[kokkos][deep_copy]", - TIMEMORY_JOIN('=', dst_handle.name, dst_name), - TIMEMORY_JOIN('=', src_handle.name, src_name)); + auto name = JOIN(" ", _kp_prefix, JOIN('=', dst_handle.name, dst_name), "<-", + JOIN('=', src_handle.name, src_name), "[deep_copy]"); auto& _data = kokkosp::get_profiler_stack(); _data.emplace_back(name); @@ -418,6 +509,7 @@ extern "C" void kokkosp_end_deep_copy() { + if(omnitrace::config::get_use_causal()) return; OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__); auto& _data = kokkosp::get_profiler_stack(); @@ -432,33 +524,51 @@ extern "C" void kokkosp_profile_event(const char* name) { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - kokkosp::profiler_t{}.mark(name); + auto _name = tim::get_hash_identifier_fast(tim::add_hash_id(name)); + kokkosp::profiler_t{ _name }.mark(); } //----------------------------------------------------------------------------------// void kokkosp_dual_view_sync(const char* label, const void* const, bool is_device) { + if(violates_name_rules(label)) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); if(omnitrace::config::get_use_perfetto()) { auto _name = tim::get_hash_identifier_fast( - tim::add_hash_id(TIMEMORY_JOIN(" ", "[kokkos][dual_view_sync]", label))); + tim::add_hash_id(JOIN(" ", _kp_prefix, label, "[dual_view_sync]"))); TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() }, "target", (is_device) ? "device" : "host"); } + else if(omnitrace::config::get_use_causal()) + { + auto _name = tim::get_hash_identifier_fast(tim::add_hash_id(JOIN( + "", label, " [dual_view_sync][", (is_device) ? "device" : "host", "]"))); + kokkosp::profiler_t{ _name }.mark(); + } } void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device) { + if(violates_name_rules(label)) return; + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); if(omnitrace::config::get_use_perfetto()) { - auto _name = tim::get_hash_identifier_fast(tim::add_hash_id( - TIMEMORY_JOIN(" ", "[kokkos][dual_view_modify]", label))); + auto _name = tim::get_hash_identifier_fast( + tim::add_hash_id(JOIN(" ", _kp_prefix, label, "[dual_view_modify]"))); TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() }, "target", (is_device) ? "device" : "host"); } + else if(omnitrace::config::get_use_causal()) + { + auto _name = tim::get_hash_identifier_fast( + tim::add_hash_id(JOIN(" ", _kp_prefix, label, "[dual_view_modify][", + (is_device) ? "device" : "host", "]"))); + kokkosp::profiler_t{ _name }.mark(); + } } //----------------------------------------------------------------------------------// diff --git a/source/lib/omnitrace/library/locking.cpp b/source/lib/omnitrace/library/locking.cpp new file mode 100644 index 0000000000..f8d36b4269 --- /dev/null +++ b/source/lib/omnitrace/library/locking.cpp @@ -0,0 +1,102 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "locking.hpp" + +namespace omnitrace +{ +namespace locking +{ +void +atomic_mutex::lock() +{ + while(!try_lock()) + {} +} + +void +atomic_mutex::unlock() +{ + if((m_value.load() & 1) == 1) ++m_value; +} + +bool +atomic_mutex::try_lock() +{ + auto _targ = m_value.load(std::memory_order_relaxed); + if((_targ & 1) == 0) + { + return ( + m_value.compare_exchange_strong(_targ, _targ + 1, std::memory_order_relaxed)); + } + return false; +} + +atomic_lock::atomic_lock(atomic_mutex& _v) +: m_mutex{ _v } +{ + lock(); +} + +atomic_lock::atomic_lock(atomic_mutex& _v, std::defer_lock_t) +: m_mutex{ _v } +{} + +atomic_lock::~atomic_lock() { unlock(); } + +bool +atomic_lock::owns_lock() const +{ + return m_owns; +} + +void +atomic_lock::lock() +{ + if(!owns_lock()) + { + m_mutex.lock(); + m_owns = true; + } +} + +void +atomic_lock::unlock() +{ + if(owns_lock()) + { + m_mutex.unlock(); + m_owns = false; + } +} + +bool +atomic_lock::try_lock() +{ + if(!owns_lock()) + { + m_owns = m_mutex.try_lock(); + } + return m_owns; +} +} // namespace locking +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/locking.hpp b/source/lib/omnitrace/library/locking.hpp new file mode 100644 index 0000000000..c63e2ac279 --- /dev/null +++ b/source/lib/omnitrace/library/locking.hpp @@ -0,0 +1,78 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace omnitrace +{ +namespace locking +{ +/// simple mutex which spins on an atomic while trying to lock. +/// Provided for internal use for when there is low contention +/// but we want to avoid using pthread mutexes since those +/// are wrapped by library +struct atomic_mutex +{ + atomic_mutex() = default; + ~atomic_mutex() = default; + + atomic_mutex(const atomic_mutex&) = delete; + atomic_mutex(atomic_mutex&&) noexcept = delete; + + atomic_mutex& operator=(const atomic_mutex&) = delete; + atomic_mutex& operator=(atomic_mutex&&) noexcept = delete; + + void lock(); + void unlock(); + bool try_lock(); + +private: + std::atomic m_value = {}; +}; + +struct atomic_lock +{ + atomic_lock(atomic_mutex&); + atomic_lock(atomic_mutex&, std::defer_lock_t); + ~atomic_lock(); + + atomic_lock(const atomic_lock&) = delete; + atomic_lock(atomic_lock&&) noexcept = delete; + + atomic_lock& operator=(const atomic_lock&) = delete; + atomic_lock& operator=(atomic_lock&&) noexcept = delete; + + bool owns_lock() const; + + void lock(); + void unlock(); + bool try_lock(); + +private: + bool m_owns = false; + atomic_mutex& m_mutex; +}; +} // namespace locking +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/ptl.cpp b/source/lib/omnitrace/library/ptl.cpp index 0785526ee7..afd782a55a 100644 --- a/source/lib/omnitrace/library/ptl.cpp +++ b/source/lib/omnitrace/library/ptl.cpp @@ -26,6 +26,7 @@ #include "library/defines.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" +#include "library/state.hpp" #include "library/thread_data.hpp" #include "library/thread_info.hpp" @@ -71,6 +72,7 @@ auto _thread_pool_cfg = []() { thread_info::init(true); threading::set_thread_name( JOIN('.', "ptl", PTL::Threading::GetThreadId()).c_str()); + set_thread_state(ThreadState::Disabled); sampling::block_signals(); }; _v.finalizer = []() {}; @@ -95,6 +97,19 @@ get_thread_pool() } } // namespace +namespace general +{ +namespace +{ +auto& +get_thread_pool_state() +{ + static auto _v = State::PreInit; + return _v; +} +} // namespace +} // namespace general + namespace roctracer { namespace @@ -144,7 +159,7 @@ join() if(critical_trace::get_thread_pool_state() == State::Active) { - OMNITRACE_DEBUG_F("waiting for all critical tasks to complete...\n"); + OMNITRACE_DEBUG_F("waiting for all critical trace tasks to complete...\n"); for(size_t i = 0; i < max_supported_threads; ++i) critical_trace::get_task_group(i).join(); } @@ -152,6 +167,13 @@ join() { OMNITRACE_DEBUG_F("critical-trace thread-pool is not active...\n"); } + + if(general::get_thread_pool_state() == State::Active) + { + OMNITRACE_DEBUG_F("waiting for all general tasks to complete...\n"); + for(size_t i = 0; i < max_supported_threads; ++i) + general::get_task_group(i).join(); + } } void @@ -189,6 +211,18 @@ shutdown() OMNITRACE_DEBUG_F("critical-trace thread-pool is not active...\n"); } + if(general::get_thread_pool_state() == State::Active) + { + OMNITRACE_DEBUG_F("Waiting on completion of general tasks...\n"); + for(size_t i = 0; i < max_supported_threads; ++i) + { + general::get_task_group(i).join(); + general::get_task_group(i).clear(); + general::get_task_group(i).set_pool(nullptr); + } + general::get_thread_pool_state() = State::Finalized; + } + if(get_thread_pool_state() == State::Active) { OMNITRACE_DEBUG_F("Destroying the omnitrace thread pool...\n"); @@ -207,6 +241,17 @@ initialize_threadpool(size_t _v) return get_thread_pool().initialize_threadpool(_v); } +PTL::TaskGroup& +general::get_task_group(int64_t _tid) +{ + struct local + {}; + using thread_data_t = thread_data, local>; + static auto& _v = + thread_data_t::instances(construct_on_init{}, &tasking::get_thread_pool()); + return *_v.at(_tid); +} + PTL::TaskGroup& roctracer::get_task_group(int64_t _tid) { diff --git a/source/lib/omnitrace/library/ptl.hpp b/source/lib/omnitrace/library/ptl.hpp index 47f35b663b..8c87f740a8 100644 --- a/source/lib/omnitrace/library/ptl.hpp +++ b/source/lib/omnitrace/library/ptl.hpp @@ -44,6 +44,18 @@ shutdown(); size_t initialize_threadpool(size_t); +//--------------------------------------------------------------------------------------// +// +// general +// +//--------------------------------------------------------------------------------------// + +namespace general +{ +PTL::TaskGroup& +get_task_group(int64_t _tid = utility::get_thread_index()); +} + //--------------------------------------------------------------------------------------// // // roctracer diff --git a/source/lib/omnitrace/library/rocm/hsa_rsrc_factory.hpp b/source/lib/omnitrace/library/rocm/hsa_rsrc_factory.hpp index 5c2a54270f..238e6663ae 100644 --- a/source/lib/omnitrace/library/rocm/hsa_rsrc_factory.hpp +++ b/source/lib/omnitrace/library/rocm/hsa_rsrc_factory.hpp @@ -22,6 +22,8 @@ #pragma once +#include "library/exception.hpp" + #define AMD_INTERNAL_BUILD 1 #include @@ -57,7 +59,7 @@ char _buffer[HSA_MESSAGE_LENGTH]; \ snprintf(_buffer, HSA_MESSAGE_LENGTH - 1, "%s: %s", msg, \ emsg ? emsg : ""); \ - throw std::runtime_error(_buffer); \ + throw ::omnitrace::exception(_buffer); \ } \ } while(0) @@ -71,7 +73,7 @@ char _buffer[HSA_MESSAGE_LENGTH]; \ snprintf(_buffer, HSA_MESSAGE_LENGTH - 1, "%s: %s", msg, \ emsg ? emsg : ""); \ - throw std::runtime_error(_buffer); \ + throw ::omnitrace::exception(_buffer); \ } \ } while(0) diff --git a/source/lib/omnitrace/library/rocm_smi.cpp b/source/lib/omnitrace/library/rocm_smi.cpp index cc10cd49c3..f27dd2c7ff 100644 --- a/source/lib/omnitrace/library/rocm_smi.cpp +++ b/source/lib/omnitrace/library/rocm_smi.cpp @@ -450,10 +450,10 @@ device_count() std::call_once(_once, _rsmi_init_once); OMNITRACE_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_num_devices)); - } catch(const std::exception& _e) + } catch(std::exception& _e) { - OMNITRACE_BASIC_PRINT("Exception thrown getting the rocm-smi devices: %s\n", - _e.what()); + OMNITRACE_BASIC_VERBOSE(1, "Exception thrown getting the rocm-smi devices: %s\n", + _e.what()); } return _num_devices; } diff --git a/source/lib/omnitrace/library/roctracer.cpp b/source/lib/omnitrace/library/roctracer.cpp index 5e5763048b..43230fdd61 100644 --- a/source/lib/omnitrace/library/roctracer.cpp +++ b/source/lib/omnitrace/library/roctracer.cpp @@ -101,7 +101,7 @@ get_roctracer_hip_data(int64_t _tid = threading::get_id()) { using data_t = std::unordered_map; using thread_data_t = thread_data; - static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{}); + static auto& _v = thread_data_t::instances(construct_on_init{}); return _v.at(_tid); } @@ -142,7 +142,7 @@ get_roctracer_cid_data(int64_t _tid = threading::get_id()) { using thread_data_t = thread_data, category::roctracer>; - static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{}); + static auto& _v = thread_data_t::instances(construct_on_init{}); return *_v.at(_tid); } @@ -151,7 +151,7 @@ get_hip_activity_callbacks(int64_t _tid = threading::get_id()) { using thread_data_t = thread_data>, category::roctracer>; - static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{}); + static auto& _v = thread_data_t::instances(construct_on_init{}); return _v.at(_tid); } diff --git a/source/lib/omnitrace/library/runtime.cpp b/source/lib/omnitrace/library/runtime.cpp index d420eaa3de..5a61e92154 100644 --- a/source/lib/omnitrace/library/runtime.cpp +++ b/source/lib/omnitrace/library/runtime.cpp @@ -60,6 +60,13 @@ get_sampling_on_child_threads_history(int64_t _idx = utility::get_thread_index() { static auto _v = utility::get_filled_array( []() { return utility::get_reserved_vector(32); }); + + if(_idx >= OMNITRACE_MAX_THREADS) + { + static thread_local auto _tl_v = utility::get_reserved_vector(32); + return _tl_v; + } + return _v.at(_idx); } @@ -96,17 +103,19 @@ get_cputime_signal() std::set get_sampling_signals(int64_t) { - auto _sigreal = get_realtime_signal(); - auto _sigprof = get_cputime_signal(); + auto _v = std::set{}; + if(config::get_use_causal()) + { + _v.emplace(get_cputime_signal()); + _v.emplace(get_realtime_signal()); + } + else + { + if(config::get_use_sampling_cputime()) _v.emplace(get_cputime_signal()); + if(config::get_use_sampling_realtime()) _v.emplace(get_realtime_signal()); + } - if(config::get_use_sampling_realtime() && config::get_use_sampling_cputime()) - return std::set{ _sigreal, _sigprof }; - else if(config::get_use_sampling_realtime()) - return std::set{ _sigreal }; - else if(config::get_use_sampling_cputime()) - return std::set{ _sigprof }; - - return std::set{}; + return _v; } std::atomic& @@ -124,8 +133,8 @@ get_cpu_cid_stack(int64_t _tid, int64_t _parent) using init_data_t = thread_data; using thread_data_t = thread_data, omnitrace_cpu_cid_stack>; - static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{}); - static auto& _b = init_data_t::instances(init_data_t::construct_on_init{}, false); + static auto& _v = thread_data_t::instances(construct_on_init{}); + static auto& _b = init_data_t::instances(construct_on_init{}, false); auto& _v_tid = _v.at(_tid); if(_b.at(_tid) && !(*_b.at(_tid))) @@ -147,8 +156,8 @@ get_cpu_cid_parents(int64_t _tid) struct omnitrace_cpu_cid_stack {}; using thread_data_t = thread_data; - static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{}, - cpu_cid_parent_map_t{}); + static auto& _v = + thread_data_t::instances(construct_on_init{}, cpu_cid_parent_map_t{}); return _v.at(_tid); } diff --git a/source/lib/omnitrace/library/runtime.hpp b/source/lib/omnitrace/library/runtime.hpp index 295f708b23..6bfe6f3502 100644 --- a/source/lib/omnitrace/library/runtime.hpp +++ b/source/lib/omnitrace/library/runtime.hpp @@ -23,6 +23,7 @@ #pragma once #include "api.hpp" +#include "library/causal/components/causal_gotcha.hpp" #include "library/common.hpp" #include "library/components/exit_gotcha.hpp" #include "library/components/fork_gotcha.hpp" @@ -51,7 +52,8 @@ using preinit_bundle_t = tim::lightweight_tuple; // started during init phase -using init_bundle_t = tim::lightweight_tuple; +using init_bundle_t = tim::lightweight_tuple; // bundle of components around omnitrace_init and omnitrace_finalize using main_bundle_t = diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index 0e8059f993..aa81055645 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -28,6 +28,7 @@ #include "library/components/fwd.hpp" #include "library/config.hpp" #include "library/debug.hpp" +#include "library/locking.hpp" #include "library/ptl.hpp" #include "library/runtime.hpp" #include "library/state.hpp" @@ -58,12 +59,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -76,20 +79,36 @@ #include #include +namespace tim +{ +namespace math +{ +template +TIMEMORY_INLINE Tp +plus(Tp&& _lhs, const Up& _rhs) +{ + Tp _v = _lhs; + plus(_v, _rhs); + return _v; +} +} // namespace math +} // namespace tim namespace omnitrace { namespace sampling { +using ::tim::sampling::dynamic; +using tim::sampling::timer; + using hw_counters = typename component::backtrace_metrics::hw_counters; using signal_type_instances = thread_data, category::sampling>; using sampler_running_instances = thread_data; using bundle_t = - tim::lightweight_tuple; -using sampler_t = tim::sampling::sampler; + tim::lightweight_tuple; +using sampler_t = tim::sampling::sampler; using sampler_instances = thread_data; using sampler_init_instances = thread_data; - -using tim::sampling::timer; } // namespace sampling } // namespace omnitrace @@ -225,8 +244,7 @@ get_sampler_init(int64_t _tid = threading::get_id()) unique_ptr_t& get_sampler_running(int64_t _tid) { - static auto& _v = sampler_running_instances::instances( - sampler_running_instances::construct_on_init{}, false); + static auto& _v = sampler_running_instances::instances(construct_on_init{}, false); return _v.at(_tid); } @@ -356,10 +374,10 @@ get_offload_file() return _v; } -std::mutex& +locking::atomic_mutex& get_offload_mutex() { - static auto _v = std::mutex{}; + static auto _v = locking::atomic_mutex{}; return _v; } @@ -369,12 +387,25 @@ using sampler_buffer_t = tim::data_storage::ring_buffer; void offload_buffer(int64_t _seq, sampler_buffer_t&& _buf) { - auto _lk = std::unique_lock{ get_offload_mutex() }; + OMNITRACE_REQUIRE(get_use_tmp_files()) + << "Error! sampling allocator tries to offload buffer of samples but " + "omnitrace was configured to not use temporary files\n"; + + // use homemade atomic_mutex/atomic_lock since contention will be low + // and using pthread_lock might trigger our wrappers + auto _lk = locking::atomic_lock{ get_offload_mutex() }; auto& _file = get_offload_file(); - if(!_file) return; + + OMNITRACE_REQUIRE(_file) + << "Error! sampling allocator tried to offload buffer of samples but the " + "offload file does not exist\n"; OMNITRACE_VERBOSE_F(3, "Saving sampling buffer for thread %li...\n", _seq); auto& _fs = _file->stream; + + OMNITRACE_REQUIRE(_fs.good()) + << "Error! temporary file for offloading buffer is in an invalid state\n"; + _fs.write(reinterpret_cast(&_seq), sizeof(_seq)); auto _data = std::move(_buf); _data.save(_fs); @@ -388,14 +419,27 @@ load_offload_buffer() auto _data = std::map>{}; if(!get_use_tmp_files()) return _data; - auto _lk = std::unique_lock{ get_offload_mutex() }; + // use homemade atomic_mutex/atomic_lock since contention will be low + // and using pthread_lock might trigger our wrappers + auto _lk = locking::atomic_lock{ get_offload_mutex() }; auto& _file = get_offload_file(); - if(!_file) return _data; + if(!_file) + { + OMNITRACE_WARNING_F( + 0, "[sampling] returning no data because the offload file no longer exists"); + return _data; + } auto& _fs = _file->stream; _fs.close(); _file->open(std::ios::binary | std::ios::in); + + if(!_fs) + { + OMNITRACE_WARNING_F(0, "[sampling] %s failed to open", _file->filename.c_str()); + } + while(!_fs.eof()) { int64_t _seq = 0; @@ -421,6 +465,10 @@ configure(bool _setup, int64_t _tid) bool _is_running = (!_running) ? false : *_running; auto& _signal_types = sampling::get_signal_types(_tid); + OMNITRACE_CONDITIONAL_THROW(get_use_causal(), + "Internal error! configuring sampling not permitted when " + "causal profiling is enabled"); + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); auto&& _cpu_tids = get_sampling_cpu_tids(); @@ -520,7 +568,7 @@ configure(bool _setup, int64_t _tid) if(_timer) { OMNITRACE_VERBOSE( - 1, + 2, "[SIG%i] Sampler for thread %lu will be triggered %.1fx per " "second of %s-time (every %.3e milliseconds)...\n", itr, _tid, _timer->get_frequency(units::sec), _type, @@ -669,19 +717,24 @@ unblock_signals(std::set _signals) void post_process() { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + size_t _total_data = 0; + size_t _total_threads = 0; + auto _external_samples = std::atomic{ 0 }; + auto _internal_samples = std::atomic{ 0 }; + + OMNITRACE_VERBOSE(2 || get_debug_sampling(), "Stopping sampling components...\n"); omnitrace::component::backtrace::stop(); - OMNITRACE_VERBOSE(2 || get_debug_sampling(), "Stopping backtrace metrics...\n"); - - for(size_t i = 0; i < max_supported_threads; ++i) - backtrace_metrics::configure(false, i); - - size_t _total_data = 0; - size_t _total_threads = 0; - for(size_t i = 0; i < max_supported_threads; ++i) { + backtrace_metrics::configure(false, i); auto& _sampler = get_sampler(i); - if(_sampler) _sampler->set_offload(nullptr); + if(_sampler) + { + _sampler->stop(); + _sampler->set_offload(nullptr); + } } auto _loaded_data = load_offload_buffer(); @@ -715,7 +768,6 @@ post_process() OMNITRACE_VERBOSE(3 || get_debug_sampling(), "Getting sampler data for thread %lu...\n", i); - _sampler->stop(); auto _raw_data = _sampler->get_data(); for(auto litr : _loaded_data[i]) { @@ -742,45 +794,58 @@ post_process() std::vector _data{}; for(auto& itr : _raw_data) { - _data.reserve(_data.size() + itr.size()); auto* _bt = itr.get(); auto* _ts = itr.get(); - if(!_bt || !_ts) continue; - if(_bt->empty()) continue; - if(!_thread_info->is_valid_time(_ts->get_timestamp())) continue; - _data.emplace_back(&itr); + if(_thread_info && _bt && !_bt->empty() && _ts && + _thread_info->is_valid_time(_ts->get_timestamp())) + { + _data.emplace_back(&itr); + } } - if(_data.empty()) + _total_data += _data.size(); + _total_threads += (!_data.empty()) ? 1 : 0; + + if(!_data.empty()) + { + OMNITRACE_VERBOSE(2 || get_debug_sampling(), + "Sampler data for thread %lu has %zu valid entries...\n", i, + _data.size()); + + if(get_use_perfetto()) post_process_perfetto(i, _init, _data); + if(get_use_timemory()) post_process_timemory(i, _init, _data); + } + else { OMNITRACE_VERBOSE(2 || get_debug_sampling(), "Sampler data for thread %lu has zero valid entries out of " "%zu... (skipped)\n", i, _raw_data.size()); - continue; } - - OMNITRACE_VERBOSE(2 || get_debug_sampling(), - "Sampler data for thread %lu has %zu valid entries...\n", i, - _raw_data.size()); - - _total_data += _raw_data.size(); - _total_threads += 1; - - if(get_use_perfetto()) post_process_perfetto(i, _init, _data); - if(get_use_timemory()) post_process_timemory(i, _init, _data); } - OMNITRACE_VERBOSE(3 || get_debug_sampling(), "Destroying samplers...\n"); + OMNITRACE_VERBOSE(3 || get_debug_sampling(), + "Destroying samplers and allocators...\n"); for(size_t i = 0; i < max_supported_threads; ++i) - { get_sampler(i).reset(); + + for(auto& itr : get_sampler_allocators()) + { + if(itr) itr.reset(); + } + + if(get_offload_file()) + { + get_offload_file()->remove(); + get_offload_file().reset(); } OMNITRACE_VERBOSE(1 || get_debug_sampling(), - "Collected %zu samples from %zu threads...\n", _total_data, - _total_threads); + "Collected %zu samples from %zu threads... %zu samples out of %zu " + "were taken while within instrumented routines\n", + _total_data, _total_threads, _internal_samples.load(), + (_internal_samples + _external_samples)); } namespace diff --git a/source/lib/omnitrace/library/state.cpp b/source/lib/omnitrace/library/state.cpp index 8ef5c93316..ed4ed1fee8 100644 --- a/source/lib/omnitrace/library/state.cpp +++ b/source/lib/omnitrace/library/state.cpp @@ -51,6 +51,12 @@ get_thread_state_history(int64_t _idx = utility::get_thread_index()) static auto _v = utility::get_filled_array( []() { return utility::get_reserved_vector(32); }); + if(_idx >= OMNITRACE_MAX_THREADS) + { + static thread_local auto _tl_v = utility::get_reserved_vector(32); + return _tl_v; + } + return _v.at(_idx); } } // namespace @@ -147,8 +153,20 @@ to_string(omnitrace::Mode _v) { case omnitrace::Mode::Trace: return "Trace"; case omnitrace::Mode::Sampling: return "Sampling"; + case omnitrace::Mode::Causal: return "Causal"; case omnitrace::Mode::Coverage: return "Coverage"; } return {}; } + +std::string +to_string(omnitrace::CausalMode _v) +{ + switch(_v) + { + case omnitrace::CausalMode::Line: return "Line"; + case omnitrace::CausalMode::Function: return "Function"; + } + return {}; +} } // namespace std diff --git a/source/lib/omnitrace/library/state.hpp b/source/lib/omnitrace/library/state.hpp index 7eb86c3409..11f8475001 100644 --- a/source/lib/omnitrace/library/state.hpp +++ b/source/lib/omnitrace/library/state.hpp @@ -24,6 +24,7 @@ #include "library/defines.hpp" +#include #include namespace omnitrace @@ -51,9 +52,16 @@ enum class Mode : unsigned short { Trace = 0, Sampling, + Causal, Coverage }; +enum class CausalMode : unsigned short +{ + Line = 0, + Function +}; + // // Runtime configuration data // @@ -99,4 +107,7 @@ to_string(omnitrace::ThreadState _v); std::string to_string(omnitrace::Mode _v); + +std::string +to_string(omnitrace::CausalMode _v); } // namespace std diff --git a/source/lib/omnitrace/library/thread_data.hpp b/source/lib/omnitrace/library/thread_data.hpp index 275bd9e809..9ee81dfec7 100644 --- a/source/lib/omnitrace/library/thread_data.hpp +++ b/source/lib/omnitrace/library/thread_data.hpp @@ -26,10 +26,14 @@ #include "library/common.hpp" #include "library/concepts.hpp" #include "library/config.hpp" +#include "library/containers/stable_vector.hpp" #include "library/defines.hpp" #include "library/state.hpp" +#include "library/thread_deleter.hpp" #include "library/timemory.hpp" +#include "library/utility.hpp" +#include #include #include @@ -38,11 +42,10 @@ #include #include #include +#include namespace omnitrace { -ThreadState set_thread_state(ThreadState); - // bundle of components used in instrumentation using instrumentation_bundle_t = tim::component_bundle; -template -struct thread_deleter; +using grow_functor_t = int64_t (*)(int64_t); -// unique ptr type for omnitrace -template -using unique_ptr_t = std::unique_ptr>; - -static constexpr size_t max_supported_threads = OMNITRACE_MAX_THREADS; - -template <> -struct thread_deleter +inline auto& +grow_functors() { - void operator()() const; -}; - -extern template struct thread_deleter; + static auto _v = container::stable_vector{}; + return _v; +} template -struct thread_deleter +struct base_thread_data { - void operator()(Tp* ptr) const + base_thread_data() { - thread_deleter{}(); - delete ptr; + auto _func = [](int64_t _sz) -> int64_t { + auto& _v = Tp::instance(); + if(_v && _v->capacity() < static_cast(_sz + 1)) + { + _v->reserve(_v->capacity() + 1); + _v->resize(_v->capacity()); + } + return (_v) ? _v->capacity() : 0; + }; + grow_functors().emplace_back(std::move(_func)); } }; -template -struct generate -{ - using type = Tp; - - template - auto operator()(Args&&... _args) const - { - if constexpr(concepts::is_unique_pointer::value) - { - using value_type = typename type::element_type; - return type{ new value_type{ invoke(std::forward(_args), 0)... } }; - } - else - { - return type{ invoke(std::forward(_args), 0)... }; - } - } - -private: - template - static auto invoke(Up&& _v, int, - std::enable_if_t::value, int> = 0) - -> decltype(std::forward(_v)()) - { - return std::forward(_v)(); - } - - template - static auto&& invoke(Up&& _v, long) - { - return std::forward(_v); - } -}; - -using construct_on_init = std::true_type; - -struct construct_on_thread -{ - int64_t index = threading::get_id(); -}; - template struct thread_data { - using value_type = unique_ptr_t; - using instance_array_t = std::array; - using construct_on_init = std::true_type; + using value_type = unique_ptr_t; + using instance_array_t = std::array; template static void construct(construct_on_thread&&, Args&&...); @@ -165,7 +125,8 @@ thread_data::construct(construct_on_thread&& _t, Args&&... // construct outside of lambda to prevent data-race static auto& _instances = instances(); if(!_instances.at(_t.index)) - _instances.at(_t.index) = generate{}(std::forward(_args)...); + _instances.at(_t.index) = + utility::generate{}(std::forward(_args)...); } template @@ -200,12 +161,23 @@ thread_data::instances(construct_on_init, Args&&... _args) static auto& _v = [&]() -> instance_array_t& { auto& _internal = instances(); for(size_t i = 0; i < MaxThreads; ++i) - _internal.at(i) = generate{}(std::forward(_args)...); + _internal.at(i) = + utility::generate{}(std::forward(_args)...); return _internal; }(); return _v; } +template +struct use_placement_new_when_generating_unique_ptr< + thread_data, Tag, MaxThreads>> : std::true_type +{}; + +template +struct use_placement_new_when_generating_unique_ptr< + thread_data, Tag, MaxThreads>> : std::true_type +{}; + //--------------------------------------------------------------------------------------// // // thread_data with std::optional @@ -214,53 +186,152 @@ thread_data::instances(construct_on_init, Args&&... _args) template struct thread_data, Tag, MaxThreads> +: base_thread_data, Tag, MaxThreads>> { - using value_type = std::optional; - using instance_array_t = std::array; + using this_type = thread_data, Tag, MaxThreads>; + using value_type = std::optional; + using array_type = container::stable_vector; + using functor_type = std::function; + + thread_data() = default; + ~thread_data() = default; + + explicit thread_data(functor_type&& _init) + : m_init{ std::move(_init) } + {} + + thread_data(const thread_data&) = default; + thread_data(thread_data&&) noexcept = default; + + thread_data& operator=(const thread_data&) = default; + thread_data& operator=(thread_data&&) noexcept = default; + + static unique_ptr_t& instance(); template - static void construct(construct_on_thread&&, Args&&...); - static value_type& instance(); - static instance_array_t& instances(); + static unique_ptr_t& instance(construct_on_init, Args&&...); + template static value_type& instance(construct_on_thread&&, Args&&...); + template - static instance_array_t& instances(construct_on_init, Args&&...); + static unique_ptr_t& construct(construct_on_init, Args&&...); - static constexpr size_t size() { return MaxThreads; } + template + static value_type& construct(construct_on_thread&&, Args&&...); - decltype(auto) begin() { return instances().begin(); } - decltype(auto) end() { return instances().end(); } + size_t size() { return m_data.size(); } - decltype(auto) begin() const { return instances().begin(); } - decltype(auto) end() const { return instances().end(); } + decltype(auto) data() { return m_data; } + decltype(auto) data() const { return m_data; } + + decltype(auto) begin() { return m_data.begin(); } + decltype(auto) end() { return m_data.end(); } + + decltype(auto) begin() const { return m_data.begin(); } + decltype(auto) end() const { return m_data.end(); } + + decltype(auto) at(size_t _idx) { return m_data.at(_idx); } + decltype(auto) at(size_t _idx) const { return m_data.at(_idx); } + + decltype(auto) operator[](size_t _idx) { return m_data[_idx]; } + decltype(auto) operator[](size_t _idx) const { return m_data[_idx]; } + + decltype(auto) reserve(size_t _n) { return m_data.reserve(_n); } + decltype(auto) capacity() const { return m_data.capacity(); } + decltype(auto) empty() const { return m_data.empty(); } + + void resize(size_t _n) { container::resize(m_data, _n, m_init()); } + + template + void resize(size_t _n, Up&& _v) + { + static_assert(std::is_assignable::value, + "value is not assignable to optional"); + container::resize(m_data, _n, std::forward(_v)); + } + +private: + array_type m_data = {}; + functor_type m_init = []() { return value_type{}; }; }; +template +unique_ptr_t, Tag, MaxThreads>>& +thread_data, Tag, MaxThreads>::instance() +{ + static auto _v = unique_ptr_t{}; + return _v; +} + template template -void +unique_ptr_t, Tag, MaxThreads>>& +thread_data, Tag, MaxThreads>::instance(construct_on_init, + Args&&... _args) +{ + static auto& _v = [&]() -> unique_ptr_t& { + auto& _ref = instance(); + if(!_ref) + _ref = utility::generate>{}( + std::forward(_args)...); + if(_ref->size() < MaxThreads) _ref->resize(MaxThreads); + return _ref; + }(); + return _v; +} + +template +template +unique_ptr_t, Tag, MaxThreads>>& +thread_data, Tag, MaxThreads>::construct(construct_on_init, + Args&&... _args) +{ + // construct outside of lambda to prevent data-race + static auto& _ref = instance(construct_on_init{}); + static auto _v = [&]() { + if(_ref) + { + for(auto& itr : *_ref) + itr = utility::generate{}(std::forward(_args)...); + } + return (_ref != nullptr); + }(); + return _ref; + (void) _v; +} + +template +template +std::optional& thread_data, Tag, MaxThreads>::construct(construct_on_thread&& _t, Args&&... _args) { // construct outside of lambda to prevent data-race - static auto& _instances = instances(); - if(!_instances.at(_t.index)) - _instances.at(_t.index) = generate{}(std::forward(_args)...); -} + static auto& _instance = instance(construct_on_init{}); + static auto _constructed = container::stable_vector{}; + static auto _grow = []() { + container::resize(_constructed, MaxThreads, false); + grow_functors().emplace_back([](int64_t _n) -> int64_t { + if(static_cast(_n) >= _constructed.size()) + { + _constructed.reserve(_constructed.capacity() + 1); + container::resize(_constructed, _constructed.capacity(), false); + } + return _constructed.size(); + }); + return true; + }(); -template -std::optional& -thread_data, Tag, MaxThreads>::instance() -{ - return instances().at(threading::get_id()); -} + if(!_constructed.at(_t.index)) + _constructed.at(_t.index) = + (_instance->at(_t.index) = + utility::generate{}(std::forward(_args)...), + true); -template -typename thread_data, Tag, MaxThreads>::instance_array_t& -thread_data, Tag, MaxThreads>::instances() -{ - static auto _v = instance_array_t{}; - return _v; + return _instance->at(_t.index); + + (void) _grow; } template @@ -270,22 +341,7 @@ thread_data, Tag, MaxThreads>::instance(construct_on_thread&& Args&&... _args) { construct(construct_on_thread{ _t }, std::forward(_args)...); - return instances().at(_t.index); -} - -template -template -typename thread_data, Tag, MaxThreads>::instance_array_t& -thread_data, Tag, MaxThreads>::instances(construct_on_init, - Args&&... _args) -{ - static auto& _v = [&]() -> instance_array_t& { - auto& _internal = instances(); - for(size_t i = 0; i < MaxThreads; ++i) - _internal.at(i) = generate{}(std::forward(_args)...); - return _internal; - }(); - return _v; + return instance()->at(_t.index); } //--------------------------------------------------------------------------------------// @@ -294,70 +350,150 @@ thread_data, Tag, MaxThreads>::instances(construct_on_init, // //--------------------------------------------------------------------------------------// -using tim::identity; -using tim::identity_t; - template struct thread_data, Tag, MaxThreads> +: base_thread_data, Tag, MaxThreads>> { - using value_type = Tp; - using instance_array_t = std::array; + using this_type = thread_data, Tag, MaxThreads>; + using value_type = Tp; + using array_type = container::stable_vector; + using functor_type = std::function; + + thread_data() = default; + ~thread_data() = default; + + explicit thread_data(functor_type&& _init) + : m_init{ std::move(_init) } + {} + + thread_data(const thread_data&) = default; + thread_data(thread_data&&) noexcept = default; + + thread_data& operator=(const thread_data&) = default; + thread_data& operator=(thread_data&&) noexcept = default; + + static unique_ptr_t& instance(); template - static void construct(construct_on_thread&&, Args&&...); - static value_type& instance(); - static instance_array_t& instances(); + static unique_ptr_t& instance(construct_on_init, Args&&...); + template static value_type& instance(construct_on_thread&&, Args&&...); - template - static instance_array_t& instances(construct_on_init, Args&&...); template - static void construct(Args&&... args) + static unique_ptr_t& construct(construct_on_init, Args&&...); + + template + static value_type& construct(construct_on_thread&&, Args&&...); + + size_t size() { return m_data.size(); } + + decltype(auto) data() { return m_data; } + decltype(auto) data() const { return m_data; } + + decltype(auto) begin() { return m_data.begin(); } + decltype(auto) end() { return m_data.end(); } + + decltype(auto) begin() const { return m_data.begin(); } + decltype(auto) end() const { return m_data.end(); } + + decltype(auto) at(size_t _idx) { return m_data.at(_idx); } + decltype(auto) at(size_t _idx) const { return m_data.at(_idx); } + + decltype(auto) operator[](size_t _idx) { return m_data[_idx]; } + decltype(auto) operator[](size_t _idx) const { return m_data[_idx]; } + + decltype(auto) reserve(size_t _n) { return m_data.reserve(_n); } + decltype(auto) capacity() const { return m_data.capacity(); } + decltype(auto) empty() const { return m_data.empty(); } + + void resize(size_t _n) { container::resize(m_data, _n, m_init()); } + void resize(size_t _n, value_type&& _v) { container::resize(m_data, _n, _v); } + + void fill(value_type _v) { - construct(construct_on_thread{}, std::forward(args)...); + for(auto& itr : m_data) + itr = _v; } - template - static value_type& instance(Args&&... args) - { - return instance(construct_on_thread{}, std::forward(args)...); - } - - static constexpr size_t size() { return MaxThreads; } - - decltype(auto) begin() { return instances().begin(); } - decltype(auto) end() { return instances().end(); } - - decltype(auto) begin() const { return instances().begin(); } - decltype(auto) end() const { return instances().end(); } +private: + array_type m_data = {}; + functor_type m_init = []() { return value_type{}; }; }; +template +unique_ptr_t, Tag, MaxThreads>>& +thread_data, Tag, MaxThreads>::instance() +{ + static auto _v = unique_ptr_t{}; + return _v; +} + template template -void +unique_ptr_t, Tag, MaxThreads>>& +thread_data, Tag, MaxThreads>::instance(construct_on_init, Args&&... _args) +{ + static auto& _v = [&]() -> unique_ptr_t& { + auto& _ref = instance(); + if(!_ref) + _ref = utility::generate>{}( + std::forward(_args)...); + if(_ref->size() < MaxThreads) _ref->resize(MaxThreads); + return _ref; + }(); + return _v; +} + +template +template +unique_ptr_t, Tag, MaxThreads>>& +thread_data, Tag, MaxThreads>::construct(construct_on_init, Args&&... _args) +{ + // construct outside of lambda to prevent data-race + static auto& _ref = instance(construct_on_init{}); + static auto _v = [&]() { + if(_ref) + { + for(auto& itr : *_ref) + itr = utility::generate{}(std::forward(_args)...); + } + return (_ref != nullptr); + }(); + return _ref; + (void) _v; +} + +template +template +Tp& thread_data, Tag, MaxThreads>::construct(construct_on_thread&& _t, Args&&... _args) { // construct outside of lambda to prevent data-race - static auto& _instances = instances(); - if(!_instances.at(_t.index)) - _instances.at(_t.index) = generate{}(std::forward(_args)...); -} + static auto& _instance = instance(construct_on_init{}); + static auto _constructed = container::stable_vector{}; + static auto _grow = []() { + container::resize(_constructed, MaxThreads, false); + grow_functors().emplace_back([](int64_t _n) -> int64_t { + if(static_cast(_n) >= _constructed.size()) + { + _constructed.reserve(_constructed.capacity() + 1); + container::resize(_constructed, _constructed.capacity(), false); + } + return _constructed.size(); + }); + return true; + }(); -template -Tp& -thread_data, Tag, MaxThreads>::instance() -{ - return instances().at(threading::get_id()); -} + if(!_constructed.at(_t.index)) + _constructed.at(_t.index) = + (_instance->at(_t.index) = + utility::generate{}(std::forward(_args)...), + true); -template -typename thread_data, Tag, MaxThreads>::instance_array_t& -thread_data, Tag, MaxThreads>::instances() -{ - static auto _v = instance_array_t{}; - return _v; + return _instance->at(_t.index); + (void) _grow; } template @@ -367,21 +503,7 @@ thread_data, Tag, MaxThreads>::instance(construct_on_thread&& _t, Args&&... _args) { construct(construct_on_thread{ _t }, std::forward(_args)...); - return instances().at(_t.index); -} - -template -template -typename thread_data, Tag, MaxThreads>::instance_array_t& -thread_data, Tag, MaxThreads>::instances(construct_on_init, Args&&... _args) -{ - static auto& _v = [&]() -> instance_array_t& { - auto& _internal = instances(); - for(size_t i = 0; i < MaxThreads; ++i) - _internal.at(i) = generate{}(std::forward(_args)...); - return _internal; - }(); - return _v; + return instance()->at(_t.index); } //--------------------------------------------------------------------------------------// @@ -390,13 +512,106 @@ thread_data, Tag, MaxThreads>::instances(construct_on_init, Args&&. // vector so using vector and // timemory's ring_buffer_allocator to create contiguous memory-page aligned instances of // the bundle -struct instrumentation_bundles +template +struct component_bundle_cache { - using instance_array_t = std::array; + using bundle_type = tim::component_bundle; + using this_type = component_bundle_cache; + using allocator_type = tim::data::ring_buffer_allocator; + using instance_type = + std::array, max_supported_threads>; - bundle_allocator_t allocator{}; - std::vector bundles{}; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + using reverse_iterator = typename std::vector::reverse_iterator; - static instance_array_t& instances(); + allocator_type allocator = {}; + std::vector bundles = {}; + + bool empty() const { return bundles.empty(); } + + auto& front() { return bundles.front(); } + auto& front() const { return bundles.front(); } + + auto& back() { return bundles.back(); } + auto& back() const { return bundles.back(); } + + auto begin() { return bundles.begin(); } + auto end() { return bundles.end(); } + + auto rbegin() { return bundles.rbegin(); } + auto rend() { return bundles.rend(); } + + auto begin() const { return bundles.begin(); } + auto end() const { return bundles.end(); } + + auto size() const { return bundles.size(); } + + auto& at(size_t _idx) { return bundles.at(_idx); } + const auto& at(size_t _idx) const { return bundles.at(_idx); } + + static auto& instances() + { + static auto _v = instance_type{}; + return _v; + } + + static auto& instance(int64_t _tid) { return instances().at(_tid); } + + template + bundle_type* construct(Args&&... args) + { + bundle_type* _v = allocator.allocate(1); + allocator.construct(_v, std::forward(args)...); + return bundles.emplace_back(_v); + } + + void destroy(bundle_type* _v, size_t _idx) + { + allocator.destroy(_v); + allocator.deallocate(_v, 1); + bundles.erase(bundles.begin() + _idx); + } + + void pop_back() + { + bundle_type* _v = bundles.back(); + allocator.destroy(_v); + allocator.deallocate(_v, 1); + bundles.pop_back(); + } + + template + void destroy(IterT _v) + { + iterator itr = begin(); + if constexpr(std::is_same::value) + { + if(_v == rend()) return; + std::advance(itr, std::distance(rbegin(), _v)); + } + else + { + if(_v == end()) return; + itr = _v; + } + allocator.destroy(*itr); + allocator.deallocate(*itr, 1); + bundles.erase(itr); + } }; + +template +struct component_bundle_cache> +: component_bundle_cache +{ + using base_type = component_bundle_cache; + + using base_type::allocator; + using base_type::bundles; + using base_type::instances; +}; + +using instrumentation_bundles = component_bundle_cache; +extern template struct component_bundle_cache; } // namespace omnitrace diff --git a/source/lib/omnitrace/library/thread_data.cpp b/source/lib/omnitrace/library/thread_deleter.cpp similarity index 90% rename from source/lib/omnitrace/library/thread_data.cpp rename to source/lib/omnitrace/library/thread_deleter.cpp index f2d322b721..1fbc6eebea 100644 --- a/source/lib/omnitrace/library/thread_data.cpp +++ b/source/lib/omnitrace/library/thread_deleter.cpp @@ -20,7 +20,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "library/thread_data.hpp" +#include "library/thread_deleter.hpp" #include "library/components/pthread_create_gotcha.hpp" #include "library/thread_info.hpp" #include "library/utility.hpp" @@ -30,12 +30,7 @@ namespace omnitrace { -instrumentation_bundles::instance_array_t& -instrumentation_bundles::instances() -{ - static auto _v = instance_array_t{}; - return _v; -} +template struct component_bundle_cache; void thread_deleter::operator()() const diff --git a/source/lib/omnitrace/library/thread_deleter.hpp b/source/lib/omnitrace/library/thread_deleter.hpp new file mode 100644 index 0000000000..00ccb09636 --- /dev/null +++ b/source/lib/omnitrace/library/thread_deleter.hpp @@ -0,0 +1,52 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "library/concepts.hpp" +#include "library/defines.hpp" + +namespace omnitrace +{ +template <> +struct thread_deleter +{ + void operator()() const; +}; + +extern template struct thread_deleter; + +template +struct thread_deleter +{ + void operator()(Tp* ptr) const + { + constexpr bool delete_pointer = + (use_placement_new_when_generating_unique_ptr::value == false); + + thread_deleter{}(); + if constexpr(delete_pointer) delete ptr; + + (void) ptr; + } +}; +} // namespace omnitrace diff --git a/source/lib/omnitrace/library/thread_info.cpp b/source/lib/omnitrace/library/thread_info.cpp index 675d846e98..ee3280df55 100644 --- a/source/lib/omnitrace/library/thread_info.cpp +++ b/source/lib/omnitrace/library/thread_info.cpp @@ -21,6 +21,9 @@ // SOFTWARE. #include "library/thread_info.hpp" +#include "library/causal/delay.hpp" +#include "library/common.hpp" +#include "library/concepts.hpp" #include "library/config.hpp" #include "library/debug.hpp" #include "library/runtime.hpp" @@ -30,20 +33,39 @@ #include #include +#include namespace omnitrace { namespace { -using thread_index_data_t = - thread_data, project::omnitrace>; -using thread_info_data_t = thread_data, project::omnitrace>; +auto& +get_info_data() +{ + using thread_data_t = thread_data, project::omnitrace>; + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +auto& +get_index_data() +{ + using thread_data_t = + thread_data, project::omnitrace>; + static auto& _v = thread_data_t::instance(construct_on_init{}); + return _v; +} + +auto& +get_info_data(int64_t _tid) +{ + return get_info_data()->at(_tid); +} auto& get_index_data(int64_t _tid) { - static auto& _v = thread_index_data_t::instances(); - return _v.at(_tid); + return get_index_data()->at(_tid); } auto @@ -76,52 +98,104 @@ init_index_data(int64_t _tid, bool _offset = false) const auto unknown_thread = std::optional{}; } // namespace +int64_t +grow_data(int64_t _tid) +{ + struct data_growth + {}; + + static int64_t _max_threads = max_supported_threads; + if(_tid >= _max_threads) + { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + auto_lock_t _lk{ type_mutex() }; + + // check again after locking + if(_tid >= _max_threads) + { + TIMEMORY_PRINTF_WARNING( + stderr, "[%li] Growing thread data from %li to %li...\n", _tid, + _max_threads, _max_threads + max_supported_threads); + fflush(stderr); + + // auto _expected = _max_threads + max_supported_threads; + for(auto itr : grow_functors()) + { + if(itr) + { + int64_t _new_capacity = (*itr)(_tid + 1); + TIMEMORY_PRINTF_WARNING(stderr, + "[%li] Grew thread data from %li to %li...\n", + _tid, _max_threads, _new_capacity); + } + } + _max_threads += max_supported_threads; + } + } + + return _max_threads; +} + const std::optional& thread_info::init(bool _offset) { static thread_local bool _once = false; - auto& _instances = thread_info_data_t::instances(); + auto& _info_data = get_info_data(); auto _tid = utility::get_thread_index(); + if(!_info_data) + { + static auto _dummy = std::optional{}; + return (_dummy.reset(), _dummy); // always reset for safety + } + if(!_once && (_once = true)) { + grow_data(_tid); threading::offset_this_id(_offset); - auto& _info = _instances.at(_tid); + auto& _info = _info_data->at(_tid); _info = thread_info{}; _info->is_offset = threading::offset_this_id(); _info->index_data = init_index_data(_tid, _info->is_offset); + _info->causal_count = &causal::delay::get_local(); _info->lifetime.first = tim::get_clock_real_now(); if(_info->is_offset) set_thread_state(ThreadState::Disabled); } - return _instances.at(_tid); + return _info_data->at(_tid); } const std::optional& thread_info::get() { - return thread_info_data_t::instances().at(utility::get_thread_index()); + return get_info_data(utility::get_thread_index()); } const std::optional& thread_info::get(int64_t _tid, ThreadIdType _type) { if(_type == ThreadIdType::InternalTID) - return thread_info_data_t::instances().at(_tid); + return get_info_data(_tid); else if(_type == ThreadIdType::SystemTID) { - const auto& _v = thread_info_data_t::instances(); - for(const auto& itr : _v) + const auto& _v = get_info_data(); + if(_v) { - if(itr && itr->index_data->system_value == _tid) return itr; + for(const auto& itr : *_v) + { + if(itr && itr->index_data->system_value == _tid) return itr; + } } } else if(_type == ThreadIdType::SequentTID) { - const auto& _v = thread_info_data_t::instances(); - for(const auto& itr : _v) + const auto& _v = get_info_data(); + if(_v) { - if(itr && itr->index_data->sequent_value == _tid) return itr; + for(const auto& itr : *_v) + { + if(itr && itr->index_data->sequent_value == _tid) return itr; + } } } @@ -132,7 +206,7 @@ thread_info::get(int64_t _tid, ThreadIdType _type) void thread_info::set_start(uint64_t _ts, bool _force) { - auto& _v = thread_info_data_t::instances().at(utility::get_thread_index()); + auto& _v = get_info_data(utility::get_thread_index()); if(!_v) init(); if(_force || (_ts > 0 && (_v->lifetime.first == 0 || _ts < _v->lifetime.first))) _v->lifetime.first = _ts; @@ -142,7 +216,7 @@ void thread_info::set_stop(uint64_t _ts) { auto _tid = utility::get_thread_index(); - auto& _v = thread_info_data_t::instances().at(_tid); + auto& _v = get_info_data(_tid); if(_v) { _v->lifetime.second = _ts; @@ -150,7 +224,7 @@ thread_info::set_stop(uint64_t _ts) // less than or equal to the main thread end lifetime if(_tid == 0) { - for(auto& itr : thread_info_data_t::instances()) + for(auto& itr : *get_info_data()) { if(itr && itr->index_data && itr->index_data->internal_value != _tid) { @@ -210,6 +284,7 @@ thread_info::as_string() const if(index_data) _ss << ", index_data=(" << index_data->internal_value << ", " << index_data->system_value << ", " << index_data->sequent_value << ")"; + if(causal_count) _ss << ", causal count=" << *causal_count; _ss << ", lifetime=(" << lifetime.first << ":" << lifetime.second << ")"; return _ss.str(); } diff --git a/source/lib/omnitrace/library/thread_info.hpp b/source/lib/omnitrace/library/thread_info.hpp index b55c44af61..89c2163a7c 100644 --- a/source/lib/omnitrace/library/thread_info.hpp +++ b/source/lib/omnitrace/library/thread_info.hpp @@ -66,6 +66,8 @@ struct thread_index_data int64_t sequent_value = tim::threading::get_id(); }; +int64_t grow_data(int64_t); + struct thread_info { using index_data_t = std::optional; @@ -95,9 +97,10 @@ struct thread_info static const std::optional& get(); static const std::optional& get(int64_t _tid, ThreadIdType _type); - bool is_offset = false; - index_data_t index_data = {}; - lifetime_data_t lifetime = { 0, 0 }; + bool is_offset = false; + const int64_t* causal_count = nullptr; + index_data_t index_data = {}; + lifetime_data_t lifetime = { 0, 0 }; friend std::ostream& operator<<(std::ostream& _os, const thread_info& _v) { diff --git a/source/lib/omnitrace/library/timemory.hpp b/source/lib/omnitrace/library/timemory.hpp index f274df060a..b1af4030fb 100644 --- a/source/lib/omnitrace/library/timemory.hpp +++ b/source/lib/omnitrace/library/timemory.hpp @@ -55,4 +55,5 @@ namespace units = ::tim::units; // NOLINT using settings = ::tim::settings; // NOLINT using ::tim::get_env; // NOLINT +using ::tim::set_env; // NOLINT } // namespace omnitrace diff --git a/source/lib/omnitrace/library/tracing.cpp b/source/lib/omnitrace/library/tracing.cpp index 514fe091db..365da846d3 100644 --- a/source/lib/omnitrace/library/tracing.cpp +++ b/source/lib/omnitrace/library/tracing.cpp @@ -29,6 +29,11 @@ namespace omnitrace { namespace tracing { +bool debug_push = tim::get_env("OMNITRACE_DEBUG_PUSH", false) || get_debug_env(); +bool debug_pop = tim::get_env("OMNITRACE_DEBUG_POP", false) || get_debug_env(); +bool debug_mark = tim::get_env("OMNITRACE_DEBUG_MARK", false) || get_debug_env(); +bool debug_user = tim::get_env("OMNITRACE_DEBUG_USER_REGIONS", false) || get_debug_env(); + perfetto::TraceConfig& get_perfetto_config() { @@ -50,13 +55,6 @@ get_perfetto_track_uuids() return _v; } -std::vector>& -get_finalization_functions() -{ - static auto _v = std::vector>{}; - return _v; -} - tim::hash_map_ptr_t& get_timemory_hash_ids(int64_t _tid) { @@ -72,6 +70,13 @@ get_timemory_hash_aliases(int64_t _tid) return _v.at(_tid); } +std::vector>& +get_finalization_functions() +{ + static auto _v = std::vector>{}; + return _v; +} + void record_thread_start_time() { @@ -80,5 +85,68 @@ record_thread_start_time() thread_info::set_start(comp::wall_clock::record(), get_mode() != Mode::Sampling); }); } + +void +thread_init() +{ + if(get_thread_state() == ThreadState::Disabled) return; + + static thread_local auto _thread_dtor = scope::destructor{ []() { + if(get_state() != State::Finalized) + { + if(get_use_causal()) + causal::sampling::shutdown(); + else if(get_use_sampling()) + sampling::shutdown(); + auto& _thr_bundle = thread_data::instance(); + if(_thr_bundle && _thr_bundle->get() && + _thr_bundle->get()->get_is_running()) + _thr_bundle->stop(); + } + } }; + + if(get_thread_state() == ThreadState::Disabled) return; + + static thread_local auto _thread_setup = []() { + if(threading::get_id() > 0) + threading::set_thread_name(JOIN(" ", "Thread", threading::get_id()).c_str()); + thread_data::construct(JOIN('/', "omnitrace/process", + process::get_id(), "thread", + threading::get_id()), + quirk::config{}); + get_interval_data()->reserve(512); + // save the hash maps + get_timemory_hash_ids() = tim::get_hash_ids(); + get_timemory_hash_aliases() = tim::get_hash_aliases(); + record_thread_start_time(); + return true; + }(); + + if(get_thread_state() == ThreadState::Disabled) return; + + static thread_local auto _sample_setup = []() { + auto _idx = utility::get_thread_index(); + // the main thread will initialize sampling when it initializes the tooling + if(_idx > 0) + { + auto _use_causal = get_use_causal(); + auto _use_sampling = get_use_sampling(); + if(_use_causal || _use_sampling) + { + OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + if(_use_causal) + causal::sampling::setup(); + else if(_use_sampling) + sampling::setup(); + } + return (_use_causal || _use_sampling); + } + return false; + }(); + + (void) _thread_dtor; + (void) _thread_setup; + (void) _sample_setup; +} } // namespace tracing } // namespace omnitrace diff --git a/source/lib/omnitrace/library/tracing.hpp b/source/lib/omnitrace/library/tracing.hpp index 4e822a2080..0cd61932e1 100644 --- a/source/lib/omnitrace/library/tracing.hpp +++ b/source/lib/omnitrace/library/tracing.hpp @@ -22,6 +22,8 @@ #pragma once +#include "common/defines.h" +#include "library/causal/sampling.hpp" #include "library/common.hpp" #include "library/concepts.hpp" #include "library/config.hpp" @@ -30,6 +32,8 @@ #include "library/perfetto.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" +#include "library/state.hpp" +#include "library/thread_data.hpp" #include "library/timemory.hpp" #include "library/tracing/annotation.hpp" #include "library/utility.hpp" @@ -55,12 +59,42 @@ namespace tracing using interval_data_instances = thread_data>; using hash_value_t = tim::hash_value_t; +// +// declarations +// +extern OMNITRACE_HIDDEN_API bool debug_push; +extern OMNITRACE_HIDDEN_API bool debug_pop; +extern OMNITRACE_HIDDEN_API bool debug_user; +extern OMNITRACE_HIDDEN_API bool debug_mark; + +std::unordered_map& +get_perfetto_track_uuids(); + perfetto::TraceConfig& get_perfetto_config(); std::unique_ptr& get_perfetto_session(); +tim::hash_map_ptr_t& +get_timemory_hash_ids(int64_t _tid = threading::get_id()); + +tim::hash_alias_ptr_t& +get_timemory_hash_aliases(int64_t _tid = threading::get_id()); + +std::vector>& +get_finalization_functions(); + +void +record_thread_start_time(); + +void +thread_init(); + +// +// definitions +// + template auto get_perfetto_category_uuid(Args&&... _args) @@ -70,9 +104,6 @@ get_perfetto_category_uuid(Args&&... _args) std::forward(_args)...); } -std::unordered_map& -get_perfetto_track_uuids(); - template auto @@ -109,15 +140,6 @@ get_perfetto_track(CategoryT, FuncT&& _desc_generator, Args&&... _args) return TrackT(_uuid); } -std::vector>& -get_finalization_functions(); - -tim::hash_map_ptr_t& -get_timemory_hash_ids(int64_t _tid = threading::get_id()); - -tim::hash_alias_ptr_t& -get_timemory_hash_aliases(int64_t _tid = threading::get_id()); - template OMNITRACE_INLINE auto now() @@ -125,32 +147,17 @@ now() return ::tim::get_clock_real_now(); } -void -record_thread_start_time(); - -namespace -{ -bool debug_push = // NOLINT - tim::get_env("OMNITRACE_DEBUG_PUSH", false) || get_debug_env(); -bool debug_pop = // NOLINT - tim::get_env("OMNITRACE_DEBUG_POP", false) || get_debug_env(); -bool debug_user = // NOLINT - tim::get_env("OMNITRACE_DEBUG_USER_REGIONS", false) || get_debug_env(); -} // namespace - inline auto& get_interval_data(int64_t _tid = threading::get_id()) { - static auto& _v = - interval_data_instances::instances(interval_data_instances::construct_on_init{}); + static auto& _v = interval_data_instances::instances(construct_on_init{}); return _v.at(_tid); } inline auto& get_instrumentation_bundles(int64_t _tid = threading::get_id()) { - static thread_local auto& _v = instrumentation_bundles::instances().at(_tid); - return _v; + return instrumentation_bundles::instance(_tid); } inline auto& @@ -167,54 +174,6 @@ pop_count() return _v; } -inline void -thread_init() -{ - static thread_local auto _dtor = scope::destructor{ []() { - if(get_state() != State::Finalized) - { - if(get_use_sampling()) sampling::shutdown(); - auto& _thr_bundle = thread_data::instance(); - if(_thr_bundle && _thr_bundle->get() && - _thr_bundle->get()->get_is_running()) - _thr_bundle->stop(); - } - } }; - static thread_local auto _thread_setup = []() { - if(threading::get_id() > 0) - threading::set_thread_name(JOIN(" ", "Thread", threading::get_id()).c_str()); - thread_data::construct(JOIN('/', "omnitrace/process", - process::get_id(), "thread", - threading::get_id()), - quirk::config{}); - get_interval_data()->reserve(512); - // save the hash maps - get_timemory_hash_ids() = tim::get_hash_ids(); - get_timemory_hash_aliases() = tim::get_hash_aliases(); - record_thread_start_time(); - return true; - }(); - (void) _thread_setup; - (void) _dtor; -} - -inline void -thread_init_sampling() -{ - static thread_local auto _v = []() { - auto _idx = utility::get_thread_index(); - // the main thread will initialize sampling when it initializes the tooling - if(_idx > 0) - { - auto _use_sampling = get_use_sampling(); - if(_use_sampling) sampling::setup(); - return _use_sampling; - } - return false; - }(); - (void) _v; -} - template inline void push_timemory(CategoryT, const char* name, Args&&... args) @@ -224,11 +183,8 @@ push_timemory(CategoryT, const char* name, Args&&... args) auto& _data = tracing::get_instrumentation_bundles(); // this generates a hash for the raw string array - auto _hash = tim::add_hash_id(tim::string_view_t{ name }); - auto* _bundle = _data.allocator.allocate(1); - _data.bundles.emplace_back(_bundle); - _data.allocator.construct(_bundle, _hash); - _bundle->start(std::forward(args)...); + auto _hash = tim::add_hash_id(tim::string_view_t{ name }); + _data.construct(_hash)->start(std::forward(args)...); } template @@ -367,6 +323,5 @@ pop_perfetto_track(CategoryT, const char*, perfetto::Track _track, uint64_t _ts, TRACE_EVENT_END(trait::name::value, _track, _ts, std::forward(args)...); } - } // namespace tracing } // namespace omnitrace diff --git a/source/lib/omnitrace/library/tracing/annotation.hpp b/source/lib/omnitrace/library/tracing/annotation.hpp index f02111874d..9bdcccdd63 100644 --- a/source/lib/omnitrace/library/tracing/annotation.hpp +++ b/source/lib/omnitrace/library/tracing/annotation.hpp @@ -195,7 +195,8 @@ add_perfetto_annotation(perfetto_event_context_t& ctx, } else { - throw std::runtime_error("invalid annotation value type"); + throw ::omnitrace::exception( + "invalid annotation value type"); } } } diff --git a/source/lib/omnitrace/library/utility.hpp b/source/lib/omnitrace/library/utility.hpp index c8156712af..4a372dcb97 100644 --- a/source/lib/omnitrace/library/utility.hpp +++ b/source/lib/omnitrace/library/utility.hpp @@ -22,10 +22,17 @@ #pragma once +#include "library/concepts.hpp" + +#include +#include + +#include #include #include #include #include +#include #include namespace omnitrace @@ -87,5 +94,137 @@ template using make_index_sequence_range = typename offset_index_sequence, StartN>::type; + +template +struct generate +{ + using type = Tp; + + template + auto operator()(Args&&... _args) const + { + if constexpr(concepts::is_unique_pointer::value) + { + using value_type = typename type::element_type; + + if constexpr(use_placement_new_when_generating_unique_ptr::value) + { + // create a thread-local buffer for placement-new + static thread_local auto _buffer = std::array{}; + if constexpr(std::is_constructible::value) + { + return type{ new(_buffer.data()) + value_type{ std::forward(_args)... } }; + } + else + { + return type{ new(_buffer.data()) + value_type{ invoke(std::forward(_args))... } }; + } + } + else + { + if constexpr(std::is_constructible::value) + { + return type{ new value_type{ std::forward(_args)... } }; + } + else + { + return type{ new value_type{ invoke(std::forward(_args))... } }; + } + } + } + else + { + if constexpr(std::is_constructible::value) + { + return type{ std::forward(_args)... }; + } + else + { + return type{ invoke(std::forward(_args))... }; + } + } + } + +private: + template + static auto invoke(Up&& _v, int, + std::enable_if_t::value, int> = 0) + -> decltype(std::forward(_v)()) + { + return std::forward(_v)(); + } + + template + static auto&& invoke(Up&& _v, long) + { + return std::forward(_v); + } + + template + static decltype(auto) invoke(Up&& _v) + { + return invoke(std::forward(_v), 0); + } +}; + +template