add a signal handler and backtrace

Tweak the signal handler and force non-release build
Increase ulimit locked memory value
Update the singal handler to use bfd symbol resolution.
Include configure logic to find bfd functions.
Add optionally c++ function name demangling
Этот коммит содержится в:
Edgar
2022-04-04 18:51:07 -04:00
родитель fd2f1b3b88
Коммит 2bf6d254b6
5 изменённых файлов: 340 добавлений и 0 удалений
+2
Просмотреть файл
@@ -21,6 +21,8 @@ def runTestCommand (platform, project, gfilter)
def command = """#!/usr/bin/env bash
set -x
cd ${project.paths.project_build_prefix}/build/release/test
${sudo} ulimit -l unlimited
ulimit -a
${sudo} NCCL_DEBUG=INFO HSA_FORCE_FINE_GRAIN_PCIE=1 ./UnitTests --gtest_filter=${gfilter} --gtest_output=xml --gtest_color=yes
"""
+41
Просмотреть файл
@@ -1,5 +1,7 @@
# Copyright (c) 2019-2021 Advanced Micro Devices, Inc. All rights reserved.
cmake_minimum_required(VERSION 2.8.12)
INCLUDE(CheckIncludeFiles)
INCLUDE(CheckSymbolExists)
if(BUILD_TESTS)
@@ -28,6 +30,35 @@ if(BUILD_TESTS)
include_directories(${GTEST_INCLUDE_DIRS} ./common)
enable_language(C)
CHECK_INCLUDE_FILES(bfd.h HAVE_BFD)
if (HAVE_BFD)
CHECK_SYMBOL_EXISTS(bfd_get_section_flags "bfd.h" HAVE_DECL_BFD_GET_SECTION_FLAGS)
CHECK_SYMBOL_EXISTS(bfd_get_section_vma "bfd.h" HAVE_DECL_BFD_GET_SECTION_VMA)
CHECK_CXX_SOURCE_COMPILES(
"#include <bfd.h>
int main (int argc, char **argv) {
bfd_size_type size;
bfd abfd;
asection sec;
size = bfd_section_size(&abfd, &sec);
return (int)(size);
}"
HAVE_TWO_ARG_BFD_SECTION_SIZE)
find_path(DEMANGLE_HEADER demangle.h PATHS /usr/include PATH_SUFFIXES libiberty)
if(NOT DEMANGLE_HEADER)
message("Could not find demangle.h ${DEMANGLE_HEADER}")
else()
message("Found demangle.h in ${DEMANGLE_HEADER}")
set (HAVE_CPLUS_DEMANGLE 1)
set (HAVE_DECL_BASENAME "1")
INCLUDE_DIRECTORIES(${DEMANGLE_HEADER})
endif()
endif()
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/common/config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/common/config.h)
# Collect testing framework source files
set (COMMON_SOURCE_FILES
common/main.cpp
@@ -97,6 +128,16 @@ if(BUILD_TESTS)
target_link_libraries(UnitTests PRIVATE ${GTEST_BOTH_LIBRARIES})
target_link_libraries(UnitTests PRIVATE hip::host hip::device hsa-runtime64::hsa-runtime64)
if(HAVE_BFD)
target_link_libraries(UnitTests PRIVATE bfd dl z)
find_library(HAVE_IBERTY iberty PATHS /usr/lib64 /usr/lib/
PATH_SUFFIXES x86_64-linux-gnu)
if(HAVE_IBERTY)
message("iberty found @ ${HAVE_IBERTY} ")
target_link_libraries(UnitTests PRIVATE iberty dl z)
endif()
endif()
# UnitTests using static library of rccl requires passing rccl
# through -l and -L instead of command line input.
if(BUILD_STATIC)
+236
Просмотреть файл
@@ -0,0 +1,236 @@
#ifndef __BFD_BACKTRACE__
#define __BFD_BACKTRACE__
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
* Modification Copyright (C) Advanced Micro Devices, Inc, 2022. ALL RIGHTS RESERVED
*
* This code is based on the UCX library's mechanism to extract the call stack
* using the BFD library (ucx/src/ucs/debug/debug.c).
*/
#include <dirent.h>
#include <link.h>
#include <dlfcn.h>
#include <execinfo.h>
#include <bfd.h>
#include <stdio.h>
#ifdef HAVE_CPLUS_DEMANGLE
#include <demangle.h>
#endif
struct dl_address_search {
unsigned long address;
const char *filename;
unsigned long base;
};
struct backtrace_file {
struct dl_address_search dl;
bfd *abfd;
asymbol **syms;
};
struct backtrace_line {
unsigned long address;
char *file;
char *function;
unsigned lineno;
};
#define BACKTRACE_MAX 64
struct backtrace {
struct backtrace_line lines[BACKTRACE_MAX];
int size;
int position;
};
typedef struct backtrace backtrace_h;
struct backtrace_search {
int count;
struct backtrace_file *file;
int backoff; /* search the line where the function call
took place, instead of return address */
struct backtrace_line *lines;
int max_lines;
};
static const char *get_exe()
{
static char exe[1024];
int ret;
ret = readlink("/proc/self/exe", exe, sizeof(exe) - 1);
if (ret < 0) {
exe[0] = '\0';
} else {
exe[ret] = '\0';
}
return exe;
}
static int dl_match_address(struct dl_phdr_info *info, size_t size, void *data)
{
struct dl_address_search *dl = (struct dl_address_search *) data;
const ElfW(Phdr) *phdr;
ElfW(Addr) load_base = info->dlpi_addr;
long n;
phdr = info->dlpi_phdr;
for (n = info->dlpi_phnum; --n >= 0; phdr++) {
if (phdr->p_type == PT_LOAD) {
ElfW(Addr) vbaseaddr = phdr->p_vaddr + load_base;
if (dl->address >= vbaseaddr && dl->address < vbaseaddr + phdr->p_memsz) {
dl->filename = info->dlpi_name;
dl->base = info->dlpi_addr;
}
}
}
return 0;
}
static int dl_lookup_address(struct dl_address_search *dl)
{
dl->filename = NULL;
dl->base = 0;
dl_iterate_phdr(dl_match_address, dl);
if (dl->filename == NULL) {
return 0;
}
if (strlen(dl->filename) == 0) {
dl->filename = get_exe();
}
return 1;
}
static int load_file(struct backtrace_file *file)
{
long symcount;
unsigned int size;
char **matching;
file->syms = NULL;
file->abfd = bfd_openr(file->dl.filename, NULL);
if (!file->abfd) {
goto err;
}
if (bfd_check_format(file->abfd, bfd_archive)) {
goto err_close;
}
if (!bfd_check_format_matches(file->abfd, bfd_object, &matching)) {
goto err_close;
}
if ((bfd_get_file_flags(file->abfd) & HAS_SYMS) == 0) {
goto err_close;
}
symcount = bfd_read_minisymbols(file->abfd, 0, (void**)&file->syms, &size);
if (symcount == 0) {
free(file->syms);
symcount = bfd_read_minisymbols(file->abfd, 1, (void**)&file->syms, &size);
}
if (symcount < 0) {
goto err_close;
}
return 1;
err_close:
bfd_close(file->abfd);
err:
return 0;
}
static void unload_file(struct backtrace_file *file)
{
free(file->syms);
bfd_close(file->abfd);
}
static void find_address_in_section(bfd *abfd, asection *section, void *data)
{
struct backtrace_search *search = (backtrace_search *)data;
bfd_size_type size;
bfd_vma vma;
unsigned long address;
const char *filename, *function;
unsigned lineno;
int found;
if ((search->count > 0) || (search->max_lines == 0) ||
#ifdef HAVE_DECL_BFD_GET_SECTION_FLAGS
((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)) {
#else
((bfd_section_flags(section) & SEC_ALLOC) == 0)) {
#endif
return;
}
address = search->file->dl.address - search->file->dl.base;
#ifdef HAVE_DECL_BFD_GET_SECTION_VMA
vma = bfd_get_section_vma(abfd, section);
#else
vma = bfd_section_vma(section);
#endif
if (address < vma) {
return;
}
#ifdef HAVE_TWO_ARG_BFD_SECTION_SIZE
size = bfd_section_size(abfd, section);
#else
size = bfd_section_size(section);
#endif
if (address >= vma + size) {
return;
}
/* Search in address-1 to get the calling line instead of return address */
found = bfd_find_nearest_line(abfd, section, search->file->syms,
address - vma - search->backoff,
&filename, &function, &lineno);
do {
search->lines[search->count].address = address;
search->lines[search->count].file = strdup(filename ? filename :
"UNKNOWN_FILE");
search->lines[search->count].function = function ?
#ifdef HAVE_CPLUS_DEMANGLE
cplus_demangle(function, 0) : strdup("UNKNOWN_FUNCTION");
#else
strdup(function) : strdup("UNKNOWN_FUNCTION");
#endif
search->lines[search->count].lineno = lineno;
if (search->count == 0) {
/* To get the inliner info, search at the original address */
bfd_find_nearest_line(abfd, section, search->file->syms, address - vma,
&filename, &function, &lineno);
}
++search->count;
found = bfd_find_inliner_info(abfd, &filename, &function, &lineno);
} while (found && (search->count < search->max_lines));
}
static int get_line_info(struct backtrace_file *file, int backoff,
struct backtrace_line *lines, int max)
{
struct backtrace_search search;
search.file = file;
search.backoff = backoff;
search.count = 0;
search.lines = lines;
search.max_lines = max;
bfd_map_over_sections(file->abfd, find_address_in_section, &search);
return search.count;
}
#endif
+55
Просмотреть файл
@@ -5,7 +5,15 @@
************************************************************************/
#include "TestBedChild.hpp"
#include "config.h"
#ifdef HAVE_BFD
#include "BfdBacktrace.hpp"
#endif
#include <thread>
#include <signal.h>
#include <execinfo.h>
#define CHILD_NCCL_CALL(cmd, msg) \
{ \
@@ -21,6 +29,47 @@
#define PIPE_READ(val) \
if (read(childReadFd, &val, sizeof(val)) != sizeof(val)) return TEST_FAIL;
void sig_handler(int signum){
printf("\n [%d] Inside handler function signal is %d\n", getpid(), signum);
#ifdef HAVE_BFD
void *addresses[BACKTRACE_MAX];
int num_addresses = backtrace(addresses, BACKTRACE_MAX);
struct backtrace_file file;
backtrace_line line;
backtrace_h bckt;
bckt.size = 0;
for (int i = 0; i < num_addresses; ++i) {
file.dl.address = (unsigned long)addresses[i];
if (dl_lookup_address(&file.dl) && load_file(&file)) {
bckt.size += get_line_info(&file, 1,
bckt.lines + bckt.size,
BACKTRACE_MAX - bckt.size);
unload_file(&file);
}
}
for (int i=0; i<BACKTRACE_MAX; i++ ){
if ((char*)bckt.lines[i].address == NULL) break;
printf("%p %s : %s line %u\n", (char*)bckt.lines[i].address, bckt.lines[i].file, bckt.lines[i].function, bckt.lines[i].lineno);
}
#else
#define BT_BUF_SIZE 1024
void *buffer[BT_BUF_SIZE];
char **strings;
int nptrs = backtrace(buffer, BT_BUF_SIZE);
strings = backtrace_symbols(buffer, nptrs);
for (int j = 0; j < nptrs; j++)
printf("%s\n", strings[j]);
free (strings);
#endif
exit (-1);
}
namespace RcclUnitTesting
{
TestBedChild::TestBedChild(int const childId, bool const verbose, int const printValues)
@@ -28,6 +77,11 @@ namespace RcclUnitTesting
this->childId = childId;
this->verbose = verbose;
this->printValues = printValues;
signal(SIGILL, sig_handler);
signal(SIGBUS, sig_handler);
signal(SIGFPE, sig_handler);
signal(SIGSEGV, sig_handler);
}
int TestBedChild::InitPipes()
@@ -51,6 +105,7 @@ namespace RcclUnitTesting
}
this->parentReadFd = pipefd[0];
this->childWriteFd = pipefd[1];
return TEST_SUCCESS;
}
+6
Просмотреть файл
@@ -0,0 +1,6 @@
#cmakedefine HAVE_BFD
#cmakedefine HAVE_DECL_BFD_GET_SECTION_FLAGS
#cmakedefine HAVE_DECL_BFD_GET_SECTION_VMA
#cmakedefine HAVE_TWO_ARG_BFD_SECTION_SIZE
#cmakedefine HAVE_CPLUS_DEMANGLE
#cmakedefine HAVE_DECL_BASENAME @HAVE_DECL_BASENAME@