From 78a06e7a429bd0926aa2cba0bbc736b13d188066 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 18 Oct 2022 21:45:56 -0500 Subject: [PATCH] Signal handler backtraces provide line info (#178) * Signal handler backtraces provide line info - print backtrace after SIGINT during finalization * Workflow run-name + jammy rocm CI * fix jammy matrix indentation * disable building dyninst in jammy * Update jammy for rocm * jammy rocm_agent_enumerator * Fix rocm install for jammy * jammy bash * jammy workflow typo * revert some changes * stack-usage + omnitrace-rt symlink + ncclSocketAccept + indiv sigs - symlink omnitrace-rt in build tree - exclude ncclSocketAccept - timemory submodule update accepting individual signal handlers --- .github/workflows/containers.yml | 3 +- .github/workflows/cpack.yml | 3 +- .github/workflows/formatting.yml | 1 + .github/workflows/opensuse.yml | 1 + .github/workflows/ubuntu-bionic.yml | 1 + .github/workflows/ubuntu-focal.yml | 1 + .github/workflows/ubuntu-jammy.yml | 39 ++++++++++++++++++++++++-- README.md | 2 ++ cmake/BuildSettings.cmake | 6 ++-- cmake/MacroUtilities.cmake | 2 +- external/timemory | 2 +- source/bin/omnitrace/CMakeLists.txt | 3 +- source/bin/omnitrace/details.cpp | 2 +- source/bin/omnitrace/log.cpp | 15 +++++++++- source/bin/omnitrace/log.hpp | 3 +- source/bin/omnitrace/omnitrace.cpp | 19 +++++++++---- source/lib/omnitrace/library.cpp | 5 ++++ source/lib/omnitrace/library/debug.hpp | 1 + 18 files changed, 92 insertions(+), 17 deletions(-) diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index c20157c303..4593c9dbd8 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -1,4 +1,5 @@ -name: ci-containers +name: Continuous Integration Containers +run-name: ci-containers # nightly build on: diff --git a/.github/workflows/cpack.yml b/.github/workflows/cpack.yml index b9712968db..35c882c304 100644 --- a/.github/workflows/cpack.yml +++ b/.github/workflows/cpack.yml @@ -1,4 +1,5 @@ -name: cpack +name: Installer Packaging (CPack) +run-name: cpack on: push: diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index c3581d22da..fcb642defd 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -1,5 +1,6 @@ name: Formatting +run-name: formatting on: push: diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index 3bfd1895b9..d3ea4c8fda 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -1,4 +1,5 @@ name: OpenSUSE 15 (GCC, Python) +run-name: opensuse-15 on: push: diff --git a/.github/workflows/ubuntu-bionic.yml b/.github/workflows/ubuntu-bionic.yml index 895be4c149..3fdb2d722c 100644 --- a/.github/workflows/ubuntu-bionic.yml +++ b/.github/workflows/ubuntu-bionic.yml @@ -1,4 +1,5 @@ name: Ubuntu 18.04 (GCC, MPICH, Python) +run-name: ubuntu-bionic on: push: diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index 3354c4682a..d47265d8e6 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -1,4 +1,5 @@ name: Ubuntu 20.04 (GCC, Python, ROCm, MPICH, OpenMPI) +run-name: ubuntu-focal on: push: diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index 870ff89bda..80cc7225fe 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -1,4 +1,5 @@ -name: Ubuntu 22.04 (GCC, Python) +name: Ubuntu 22.04 (GCC, Python, ROCm) +run-name: ubuntu-jammy on: push: @@ -35,10 +36,29 @@ jobs: mpi-headers: ['ON', 'OFF'] static-libgcc: ['OFF'] static-libstdcxx: ['OFF'] + build-dyninst: ['OFF'] + rocm-version: ['0.0'] + include: + - compiler: 'g++' + hip: 'ON' + mpi: 'OFF' + ompt: 'OFF' + papi: 'OFF' + python: 'ON' + lto: 'OFF' + strip: 'OFF' + hidden: 'ON' + build-type: 'Release' + mpi-headers: 'OFF' + static-libgcc: 'OFF' + static-libstdcxx: 'OFF' + build-dyninst: 'OFF' + rocm-version: '5.3' env: OMPI_ALLOW_RUN_AS_ROOT: 1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 + OMNITRACE_CI: 'ON' steps: - uses: actions/checkout@v3 @@ -55,6 +75,20 @@ jobs: python3 -m pip install 'cmake==3.21.4' && for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + - name: Install ROCm Packages + timeout-minutes: 10 + if: ${{ matrix.rocm-version != '0.0' }} + shell: bash + run: | + wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - + echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${{ matrix.rocm-version }}/ jammy main" | tee /etc/apt/sources.list.d/rocm.list + apt-get update + apt-get install -y {rocm-hip-sdk,roctracer-dev,rocprofiler-dev,rocm-smi-lib,rocminfo}${{ matrix.rocm-version }}.0 + echo "/opt/rocm/bin" >> $GITHUB_PATH + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=/opt/rocm/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + /opt/rocm/bin/hipcc -O3 -c ./examples/transpose/transpose.cpp -o /tmp/transpose.o + - name: Test Environment Modules timeout-minutes: 15 shell: bash @@ -85,13 +119,14 @@ jobs: -DOMNITRACE_USE_PAPI=${{ matrix.papi }} -DOMNITRACE_USE_PYTHON=${{ matrix.python }} -DOMNITRACE_USE_MPI_HEADERS=${{ matrix.mpi-headers }} - -DOMNITRACE_STRIP_LIBRARIES=${{ matrix.strip }} -DOMNITRACE_BUILD_LTO=${{ matrix.lto }} + -DOMNITRACE_BUILD_DYNINST=${{ matrix.build-dyninst }} -DOMNITRACE_BUILD_HIDDEN_VISIBILITY=${{ matrix.hidden }} -DOMNITRACE_BUILD_STATIC_LIBGCC=${{ matrix.static-libgcc }} -DOMNITRACE_BUILD_STATIC_LIBSTDCXX=${{ matrix.static-libstdcxx }} -DOMNITRACE_PYTHON_PREFIX=/opt/conda/envs -DOMNITRACE_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10" + -DOMNITRACE_STRIP_LIBRARIES=${{ matrix.strip }} -DUSE_CLANG_OMP=OFF - name: Build diff --git a/README.md b/README.md index bccc55b62a..308fe47696 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ [![Ubuntu 18.04 with GCC and MPICH](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-bionic.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-bionic.yml) [![Ubuntu 20.04 with GCC, ROCm, and MPI](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-focal.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-focal.yml) +[![Ubuntu 22.04 (GCC, Python, ROCm)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-jammy.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-jammy.yml) [![OpenSUSE 15.x with GCC](https://github.com/AMDResearch/omnitrace/actions/workflows/opensuse.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/opensuse.yml) +[![Installer Packaging (CPack)](https://github.com/AMDResearch/omnitrace/actions/workflows/cpack.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/cpack.yml) > ***[Omnitrace](https://github.com/AMDResearch/omnitrace) is an AMD open source research project and is not supported as part of the ROCm software stack.*** diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index 286f50d2b7..0895828dad 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -220,8 +220,10 @@ endif() # developer build flags # if(OMNITRACE_BUILD_DEVELOPER) - add_target_flag_if_avail(omnitrace-compile-options "-Werror" "-Wdouble-promotion" - "-Wshadow" "-Wextra" "-Wpedantic" "/showIncludes") + add_target_flag_if_avail( + omnitrace-compile-options "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra" + "-Wpedantic" "-Wstack-usage=524288" # 512 KB + "/showIncludes") endif() # ----------------------------------------------------------------------------------------# diff --git a/cmake/MacroUtilities.cmake b/cmake/MacroUtilities.cmake index 5e10a9bc91..30a4310b54 100644 --- a/cmake/MacroUtilities.cmake +++ b/cmake/MacroUtilities.cmake @@ -782,7 +782,7 @@ function(OMNITRACE_BUILDTREE_TPL _TPL_TARGET _NEW_NAME _BUILD_TREE_DIR) # build tree symbolic links add_custom_target( ${_NEW_NAME}-build-tree-library${_TAIL} ALL - ${CMAKE_COMMAND} -E copy_if_different $ + ${CMAKE_COMMAND} -E create_symlink $ ${_TPL_PREFIX}${_NEW_NAME}${_TPL_SUFFIX}.${_TPL_VERSION} COMMAND ${CMAKE_COMMAND} -E create_symlink diff --git a/external/timemory b/external/timemory index c8a52973e5..e6305b0455 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit c8a52973e5a1d68290d83ea362afa62c1168985b +Subproject commit e6305b045561f90ffccfb0e8ade66a9cd3c19044 diff --git a/source/bin/omnitrace/CMakeLists.txt b/source/bin/omnitrace/CMakeLists.txt index 1f4460022b..38b7e63213 100644 --- a/source/bin/omnitrace/CMakeLists.txt +++ b/source/bin/omnitrace/CMakeLists.txt @@ -30,7 +30,8 @@ target_link_libraries( omnitrace::omnitrace-compile-definitions omnitrace::omnitrace-sanitizer timemory::timemory-headers - timemory::timemory-extensions) + timemory::timemory-extensions + timemory::timemory-core) set_target_properties( omnitrace-exe diff --git a/source/bin/omnitrace/details.cpp b/source/bin/omnitrace/details.cpp index 0c4bd0c0f8..3e6664c3f9 100644 --- a/source/bin/omnitrace/details.cpp +++ b/source/bin/omnitrace/details.cpp @@ -61,7 +61,7 @@ get_whole_function_names() "ncclCommUserRank", "ncclReduce", "ncclBcast", "ncclBroadcast", "ncclAllReduce", "ncclReduceScatter", "ncclAllGather", "ncclGroupStart", "ncclGroupEnd", "ncclSend", "ncclRecv", "ncclGather", "ncclScatter", "ncclAllToAll", - "ncclAllToAllv" + "ncclAllToAllv", "ncclSocketAccept" }; } diff --git a/source/bin/omnitrace/log.cpp b/source/bin/omnitrace/log.cpp index 2444d46a2b..6ddbdddeb9 100644 --- a/source/bin/omnitrace/log.cpp +++ b/source/bin/omnitrace/log.cpp @@ -91,7 +91,8 @@ log_entry::add_log_entry(log_entry&& _v) void print_log_entries(std::ostream& _os, int64_t _count, - std::function _condition, const char* _color, + const std::function& _condition, + const std::function& _prelude, const char* _color, bool _color_entries) { size_t i0 = (_count < 0) ? 0 : std::max(log_entries.size() - _count, 0); @@ -107,6 +108,18 @@ print_log_entries(std::ostream& _os, int64_t _count, const char* _end = (strlen(_color) > 0 || _color_entries) ? tim::log::color::end() : ""; + if(_prelude) + { + for(size_t i = i0; i < log_entries.size(); ++i) + { + if(!_condition || _condition(log_entries.at(i))) + { + _prelude(); + break; + } + } + } + // the requested number of log entries for(size_t i = i0; i < log_entries.size(); ++i) { diff --git a/source/bin/omnitrace/log.hpp b/source/bin/omnitrace/log.hpp index e3a36ad391..0cfd53e1a6 100644 --- a/source/bin/omnitrace/log.hpp +++ b/source/bin/omnitrace/log.hpp @@ -39,7 +39,8 @@ struct log_entry; void print_log_entries(std::ostream& = std::cerr, int64_t _count = 10, - std::function _cond = {}, + const std::function& _cond = {}, + const std::function& _prelude = {}, const char* _color = tim::log::color::warning(), bool _color_entries = true); diff --git a/source/bin/omnitrace/omnitrace.cpp b/source/bin/omnitrace/omnitrace.cpp index 2c301a8e9d..292be99c7b 100644 --- a/source/bin/omnitrace/omnitrace.cpp +++ b/source/bin/omnitrace/omnitrace.cpp @@ -212,16 +212,24 @@ main(int argc, char** argv) sys_signal::SegFault, sys_signal::FileSize, sys_signal::CPUtime }) signal_settings::enable(itr); - auto _exit_action = [](int nsig) { + static bool _protect = false; + auto _exit_action = [](int nsig) { + if(_protect) return; + _protect = true; TIMEMORY_PRINTF_FATAL( stderr, "omnitrace exited with signal %i :: %s\n", nsig, signal_settings::str(static_cast(nsig)).c_str()); + // print the last log entries print_log_entries(std::cerr, num_log_entries); - std::cerr << "\n[omnitrace][exe] Potentially important log entries\n\n"; - - print_log_entries(std::cerr, -1, [](const auto& _v) { return _v.forced(); }); + // print any forced entries + print_log_entries( + std::cerr, -1, [](const auto& _v) { return _v.forced(); }, + []() { + tim::log::stream(std::cerr, tim::log::color::info()) + << "\n[omnitrace][exe] Potentially important log entries:\n\n"; + }); TIMEMORY_PRINTF_FATAL(stderr, "\n"); TIMEMORY_PRINTF_FATAL( @@ -235,6 +243,7 @@ main(int argc, char** argv) log_ofs.reset(); kill(process::get_id(), nsig); + _protect = false; }; signal_settings::set_exit_action(_exit_action); @@ -1064,7 +1073,7 @@ main(int argc, char** argv) if(!tim::filepath::open(*log_ofs, logfile)) throw std::runtime_error(JOIN(" ", "Error opening log output file", logfile)); verbprintf_bare(0, "Done\n%s", ::tim::log::color::end()); - print_log_entries(*log_ofs, -1, {}, "", false); + print_log_entries(*log_ofs, -1, {}, {}, "", false); } //----------------------------------------------------------------------------------// diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index d1b5c89ca9..aad71bf0a5 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -51,6 +51,8 @@ #include "library/timemory.hpp" #include "library/tracing.hpp" +#include +#include #include #include #include @@ -630,6 +632,9 @@ omnitrace_finalize_hidden(void) OMNITRACE_VERBOSE_F(1, "omnitrace_push_trace :: called %zux\n", _push_count); OMNITRACE_VERBOSE_F(1, "omnitrace_pop_trace :: called %zux\n", _pop_count); + tim::signals::enable_signal_detection({ tim::signals::sys_signal::Interrupt }, + [](int) {}); + OMNITRACE_DEBUG_F("Copying over all timemory hash information to main thread...\n"); // copy these over so that all hashes are known auto& _hzero = tracing::get_timemory_hash_ids(0); diff --git a/source/lib/omnitrace/library/debug.hpp b/source/lib/omnitrace/library/debug.hpp index 92c02715d9..7f8e757623 100644 --- a/source/lib/omnitrace/library/debug.hpp +++ b/source/lib/omnitrace/library/debug.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include