Signal handler backtraces provide line info (#178)
* Signal handler backtraces provide line info - print backtrace after SIGINT during finalization * Workflow run-name + jammy rocm CI * fix jammy matrix indentation * disable building dyninst in jammy * Update jammy for rocm * jammy rocm_agent_enumerator * Fix rocm install for jammy * jammy bash * jammy workflow typo * revert some changes * stack-usage + omnitrace-rt symlink + ncclSocketAccept + indiv sigs - symlink omnitrace-rt in build tree - exclude ncclSocketAccept - timemory submodule update accepting individual signal handlers
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
ede6007f9b
Коммит
78a06e7a42
@@ -1,4 +1,5 @@
|
||||
name: ci-containers
|
||||
name: Continuous Integration Containers
|
||||
run-name: ci-containers
|
||||
|
||||
# nightly build
|
||||
on:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
name: cpack
|
||||
name: Installer Packaging (CPack)
|
||||
run-name: cpack
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
name: Formatting
|
||||
run-name: formatting
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
name: OpenSUSE 15 (GCC, Python)
|
||||
run-name: opensuse-15
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
name: Ubuntu 18.04 (GCC, MPICH, Python)
|
||||
run-name: ubuntu-bionic
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
name: Ubuntu 20.04 (GCC, Python, ROCm, MPICH, OpenMPI)
|
||||
run-name: ubuntu-focal
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
name: Ubuntu 22.04 (GCC, Python)
|
||||
name: Ubuntu 22.04 (GCC, Python, ROCm)
|
||||
run-name: ubuntu-jammy
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -35,10 +36,29 @@ jobs:
|
||||
mpi-headers: ['ON', 'OFF']
|
||||
static-libgcc: ['OFF']
|
||||
static-libstdcxx: ['OFF']
|
||||
build-dyninst: ['OFF']
|
||||
rocm-version: ['0.0']
|
||||
include:
|
||||
- compiler: 'g++'
|
||||
hip: 'ON'
|
||||
mpi: 'OFF'
|
||||
ompt: 'OFF'
|
||||
papi: 'OFF'
|
||||
python: 'ON'
|
||||
lto: 'OFF'
|
||||
strip: 'OFF'
|
||||
hidden: 'ON'
|
||||
build-type: 'Release'
|
||||
mpi-headers: 'OFF'
|
||||
static-libgcc: 'OFF'
|
||||
static-libstdcxx: 'OFF'
|
||||
build-dyninst: 'OFF'
|
||||
rocm-version: '5.3'
|
||||
|
||||
env:
|
||||
OMPI_ALLOW_RUN_AS_ROOT: 1
|
||||
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
|
||||
OMNITRACE_CI: 'ON'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -55,6 +75,20 @@ jobs:
|
||||
python3 -m pip install 'cmake==3.21.4' &&
|
||||
for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done
|
||||
|
||||
- name: Install ROCm Packages
|
||||
timeout-minutes: 10
|
||||
if: ${{ matrix.rocm-version != '0.0' }}
|
||||
shell: bash
|
||||
run: |
|
||||
wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
|
||||
echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${{ matrix.rocm-version }}/ jammy main" | tee /etc/apt/sources.list.d/rocm.list
|
||||
apt-get update
|
||||
apt-get install -y {rocm-hip-sdk,roctracer-dev,rocprofiler-dev,rocm-smi-lib,rocminfo}${{ matrix.rocm-version }}.0
|
||||
echo "/opt/rocm/bin" >> $GITHUB_PATH
|
||||
echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV
|
||||
echo "LD_LIBRARY_PATH=/opt/rocm/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
/opt/rocm/bin/hipcc -O3 -c ./examples/transpose/transpose.cpp -o /tmp/transpose.o
|
||||
|
||||
- name: Test Environment Modules
|
||||
timeout-minutes: 15
|
||||
shell: bash
|
||||
@@ -85,13 +119,14 @@ jobs:
|
||||
-DOMNITRACE_USE_PAPI=${{ matrix.papi }}
|
||||
-DOMNITRACE_USE_PYTHON=${{ matrix.python }}
|
||||
-DOMNITRACE_USE_MPI_HEADERS=${{ matrix.mpi-headers }}
|
||||
-DOMNITRACE_STRIP_LIBRARIES=${{ matrix.strip }}
|
||||
-DOMNITRACE_BUILD_LTO=${{ matrix.lto }}
|
||||
-DOMNITRACE_BUILD_DYNINST=${{ matrix.build-dyninst }}
|
||||
-DOMNITRACE_BUILD_HIDDEN_VISIBILITY=${{ matrix.hidden }}
|
||||
-DOMNITRACE_BUILD_STATIC_LIBGCC=${{ matrix.static-libgcc }}
|
||||
-DOMNITRACE_BUILD_STATIC_LIBSTDCXX=${{ matrix.static-libstdcxx }}
|
||||
-DOMNITRACE_PYTHON_PREFIX=/opt/conda/envs
|
||||
-DOMNITRACE_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10"
|
||||
-DOMNITRACE_STRIP_LIBRARIES=${{ matrix.strip }}
|
||||
-DUSE_CLANG_OMP=OFF
|
||||
|
||||
- name: Build
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
|
||||
[](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-bionic.yml)
|
||||
[](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-focal.yml)
|
||||
[](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-jammy.yml)
|
||||
[](https://github.com/AMDResearch/omnitrace/actions/workflows/opensuse.yml)
|
||||
[](https://github.com/AMDResearch/omnitrace/actions/workflows/cpack.yml)
|
||||
|
||||
> ***[Omnitrace](https://github.com/AMDResearch/omnitrace) is an AMD open source research project and is not supported as part of the ROCm software stack.***
|
||||
|
||||
|
||||
@@ -220,8 +220,10 @@ endif()
|
||||
# developer build flags
|
||||
#
|
||||
if(OMNITRACE_BUILD_DEVELOPER)
|
||||
add_target_flag_if_avail(omnitrace-compile-options "-Werror" "-Wdouble-promotion"
|
||||
"-Wshadow" "-Wextra" "-Wpedantic" "/showIncludes")
|
||||
add_target_flag_if_avail(
|
||||
omnitrace-compile-options "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra"
|
||||
"-Wpedantic" "-Wstack-usage=524288" # 512 KB
|
||||
"/showIncludes")
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
@@ -782,7 +782,7 @@ function(OMNITRACE_BUILDTREE_TPL _TPL_TARGET _NEW_NAME _BUILD_TREE_DIR)
|
||||
# build tree symbolic links
|
||||
add_custom_target(
|
||||
${_NEW_NAME}-build-tree-library${_TAIL} ALL
|
||||
${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:${_TPL_TARGET}>
|
||||
${CMAKE_COMMAND} -E create_symlink $<TARGET_FILE:${_TPL_TARGET}>
|
||||
${_TPL_PREFIX}${_NEW_NAME}${_TPL_SUFFIX}.${_TPL_VERSION}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E create_symlink
|
||||
|
||||
поставляемый
+1
-1
Submodule external/timemory updated: c8a52973e5...e6305b0455
@@ -30,7 +30,8 @@ target_link_libraries(
|
||||
omnitrace::omnitrace-compile-definitions
|
||||
omnitrace::omnitrace-sanitizer
|
||||
timemory::timemory-headers
|
||||
timemory::timemory-extensions)
|
||||
timemory::timemory-extensions
|
||||
timemory::timemory-core)
|
||||
|
||||
set_target_properties(
|
||||
omnitrace-exe
|
||||
|
||||
@@ -61,7 +61,7 @@ get_whole_function_names()
|
||||
"ncclCommUserRank", "ncclReduce", "ncclBcast", "ncclBroadcast", "ncclAllReduce",
|
||||
"ncclReduceScatter", "ncclAllGather", "ncclGroupStart", "ncclGroupEnd",
|
||||
"ncclSend", "ncclRecv", "ncclGather", "ncclScatter", "ncclAllToAll",
|
||||
"ncclAllToAllv"
|
||||
"ncclAllToAllv", "ncclSocketAccept"
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -91,7 +91,8 @@ log_entry::add_log_entry(log_entry&& _v)
|
||||
|
||||
void
|
||||
print_log_entries(std::ostream& _os, int64_t _count,
|
||||
std::function<bool(const log_entry&)> _condition, const char* _color,
|
||||
const std::function<bool(const log_entry&)>& _condition,
|
||||
const std::function<void()>& _prelude, const char* _color,
|
||||
bool _color_entries)
|
||||
{
|
||||
size_t i0 = (_count < 0) ? 0 : std::max<int64_t>(log_entries.size() - _count, 0);
|
||||
@@ -107,6 +108,18 @@ print_log_entries(std::ostream& _os, int64_t _count,
|
||||
const char* _end =
|
||||
(strlen(_color) > 0 || _color_entries) ? tim::log::color::end() : "";
|
||||
|
||||
if(_prelude)
|
||||
{
|
||||
for(size_t i = i0; i < log_entries.size(); ++i)
|
||||
{
|
||||
if(!_condition || _condition(log_entries.at(i)))
|
||||
{
|
||||
_prelude();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// the requested number of log entries
|
||||
for(size_t i = i0; i < log_entries.size(); ++i)
|
||||
{
|
||||
|
||||
@@ -39,7 +39,8 @@ struct log_entry;
|
||||
|
||||
void
|
||||
print_log_entries(std::ostream& = std::cerr, int64_t _count = 10,
|
||||
std::function<bool(const log_entry&)> _cond = {},
|
||||
const std::function<bool(const log_entry&)>& _cond = {},
|
||||
const std::function<void()>& _prelude = {},
|
||||
const char* _color = tim::log::color::warning(),
|
||||
bool _color_entries = true);
|
||||
|
||||
|
||||
@@ -212,16 +212,24 @@ main(int argc, char** argv)
|
||||
sys_signal::SegFault, sys_signal::FileSize, sys_signal::CPUtime })
|
||||
signal_settings::enable(itr);
|
||||
|
||||
auto _exit_action = [](int nsig) {
|
||||
static bool _protect = false;
|
||||
auto _exit_action = [](int nsig) {
|
||||
if(_protect) return;
|
||||
_protect = true;
|
||||
TIMEMORY_PRINTF_FATAL(
|
||||
stderr, "omnitrace exited with signal %i :: %s\n", nsig,
|
||||
signal_settings::str(static_cast<sys_signal>(nsig)).c_str());
|
||||
|
||||
// print the last log entries
|
||||
print_log_entries(std::cerr, num_log_entries);
|
||||
|
||||
std::cerr << "\n[omnitrace][exe] Potentially important log entries\n\n";
|
||||
|
||||
print_log_entries(std::cerr, -1, [](const auto& _v) { return _v.forced(); });
|
||||
// print any forced entries
|
||||
print_log_entries(
|
||||
std::cerr, -1, [](const auto& _v) { return _v.forced(); },
|
||||
[]() {
|
||||
tim::log::stream(std::cerr, tim::log::color::info())
|
||||
<< "\n[omnitrace][exe] Potentially important log entries:\n\n";
|
||||
});
|
||||
|
||||
TIMEMORY_PRINTF_FATAL(stderr, "\n");
|
||||
TIMEMORY_PRINTF_FATAL(
|
||||
@@ -235,6 +243,7 @@ main(int argc, char** argv)
|
||||
log_ofs.reset();
|
||||
|
||||
kill(process::get_id(), nsig);
|
||||
_protect = false;
|
||||
};
|
||||
|
||||
signal_settings::set_exit_action(_exit_action);
|
||||
@@ -1064,7 +1073,7 @@ main(int argc, char** argv)
|
||||
if(!tim::filepath::open(*log_ofs, logfile))
|
||||
throw std::runtime_error(JOIN(" ", "Error opening log output file", logfile));
|
||||
verbprintf_bare(0, "Done\n%s", ::tim::log::color::end());
|
||||
print_log_entries(*log_ofs, -1, {}, "", false);
|
||||
print_log_entries(*log_ofs, -1, {}, {}, "", false);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
@@ -51,6 +51,8 @@
|
||||
#include "library/timemory.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
|
||||
#include <timemory/signals/signal_handlers.hpp>
|
||||
#include <timemory/signals/types.hpp>
|
||||
#include <timemory/hash/types.hpp>
|
||||
#include <timemory/manager/manager.hpp>
|
||||
#include <timemory/operations/types/file_output_message.hpp>
|
||||
@@ -630,6 +632,9 @@ omnitrace_finalize_hidden(void)
|
||||
OMNITRACE_VERBOSE_F(1, "omnitrace_push_trace :: called %zux\n", _push_count);
|
||||
OMNITRACE_VERBOSE_F(1, "omnitrace_pop_trace :: called %zux\n", _pop_count);
|
||||
|
||||
tim::signals::enable_signal_detection({ tim::signals::sys_signal::Interrupt },
|
||||
[](int) {});
|
||||
|
||||
OMNITRACE_DEBUG_F("Copying over all timemory hash information to main thread...\n");
|
||||
// copy these over so that all hashes are known
|
||||
auto& _hzero = tracing::get_timemory_hash_ids(0);
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <timemory/backends/threading.hpp>
|
||||
#include <timemory/log/logger.hpp>
|
||||
#include <timemory/mpl/concepts.hpp>
|
||||
#include <timemory/signals/signal_handlers.hpp>
|
||||
#include <timemory/utility/backtrace.hpp>
|
||||
#include <timemory/utility/locking.hpp>
|
||||
#include <timemory/utility/utility.hpp>
|
||||
|
||||
Ссылка в новой задаче
Block a user