add a signal handler and backtrace
Tweak the signal handler and force non-release build Increase ulimit locked memory value Update the singal handler to use bfd symbol resolution. Include configure logic to find bfd functions. Add optionally c++ function name demangling
Этот коммит содержится в:
@@ -21,6 +21,8 @@ def runTestCommand (platform, project, gfilter)
|
||||
def command = """#!/usr/bin/env bash
|
||||
set -x
|
||||
cd ${project.paths.project_build_prefix}/build/release/test
|
||||
${sudo} ulimit -l unlimited
|
||||
ulimit -a
|
||||
${sudo} NCCL_DEBUG=INFO HSA_FORCE_FINE_GRAIN_PCIE=1 ./UnitTests --gtest_filter=${gfilter} --gtest_output=xml --gtest_color=yes
|
||||
"""
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# Copyright (c) 2019-2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
INCLUDE(CheckIncludeFiles)
|
||||
INCLUDE(CheckSymbolExists)
|
||||
|
||||
if(BUILD_TESTS)
|
||||
|
||||
@@ -28,6 +30,35 @@ if(BUILD_TESTS)
|
||||
|
||||
include_directories(${GTEST_INCLUDE_DIRS} ./common)
|
||||
|
||||
enable_language(C)
|
||||
CHECK_INCLUDE_FILES(bfd.h HAVE_BFD)
|
||||
if (HAVE_BFD)
|
||||
CHECK_SYMBOL_EXISTS(bfd_get_section_flags "bfd.h" HAVE_DECL_BFD_GET_SECTION_FLAGS)
|
||||
CHECK_SYMBOL_EXISTS(bfd_get_section_vma "bfd.h" HAVE_DECL_BFD_GET_SECTION_VMA)
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
"#include <bfd.h>
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
bfd_size_type size;
|
||||
bfd abfd;
|
||||
asection sec;
|
||||
size = bfd_section_size(&abfd, &sec);
|
||||
return (int)(size);
|
||||
}"
|
||||
HAVE_TWO_ARG_BFD_SECTION_SIZE)
|
||||
|
||||
find_path(DEMANGLE_HEADER demangle.h PATHS /usr/include PATH_SUFFIXES libiberty)
|
||||
if(NOT DEMANGLE_HEADER)
|
||||
message("Could not find demangle.h ${DEMANGLE_HEADER}")
|
||||
else()
|
||||
message("Found demangle.h in ${DEMANGLE_HEADER}")
|
||||
set (HAVE_CPLUS_DEMANGLE 1)
|
||||
set (HAVE_DECL_BASENAME "1")
|
||||
INCLUDE_DIRECTORIES(${DEMANGLE_HEADER})
|
||||
endif()
|
||||
endif()
|
||||
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/common/config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/common/config.h)
|
||||
|
||||
# Collect testing framework source files
|
||||
set (COMMON_SOURCE_FILES
|
||||
common/main.cpp
|
||||
@@ -97,6 +128,16 @@ if(BUILD_TESTS)
|
||||
target_link_libraries(UnitTests PRIVATE ${GTEST_BOTH_LIBRARIES})
|
||||
target_link_libraries(UnitTests PRIVATE hip::host hip::device hsa-runtime64::hsa-runtime64)
|
||||
|
||||
if(HAVE_BFD)
|
||||
target_link_libraries(UnitTests PRIVATE bfd dl z)
|
||||
find_library(HAVE_IBERTY iberty PATHS /usr/lib64 /usr/lib/
|
||||
PATH_SUFFIXES x86_64-linux-gnu)
|
||||
if(HAVE_IBERTY)
|
||||
message("iberty found @ ${HAVE_IBERTY} ")
|
||||
target_link_libraries(UnitTests PRIVATE iberty dl z)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# UnitTests using static library of rccl requires passing rccl
|
||||
# through -l and -L instead of command line input.
|
||||
if(BUILD_STATIC)
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
#ifndef __BFD_BACKTRACE__
|
||||
#define __BFD_BACKTRACE__
|
||||
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
|
||||
* Modification Copyright (C) Advanced Micro Devices, Inc, 2022. ALL RIGHTS RESERVED
|
||||
*
|
||||
* This code is based on the UCX library's mechanism to extract the call stack
|
||||
* using the BFD library (ucx/src/ucs/debug/debug.c).
|
||||
*/
|
||||
|
||||
|
||||
#include <dirent.h>
|
||||
#include <link.h>
|
||||
#include <dlfcn.h>
|
||||
#include <execinfo.h>
|
||||
#include <bfd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef HAVE_CPLUS_DEMANGLE
|
||||
#include <demangle.h>
|
||||
#endif
|
||||
|
||||
struct dl_address_search {
|
||||
unsigned long address;
|
||||
const char *filename;
|
||||
unsigned long base;
|
||||
};
|
||||
|
||||
struct backtrace_file {
|
||||
struct dl_address_search dl;
|
||||
bfd *abfd;
|
||||
asymbol **syms;
|
||||
};
|
||||
|
||||
struct backtrace_line {
|
||||
unsigned long address;
|
||||
char *file;
|
||||
char *function;
|
||||
unsigned lineno;
|
||||
};
|
||||
|
||||
#define BACKTRACE_MAX 64
|
||||
|
||||
struct backtrace {
|
||||
struct backtrace_line lines[BACKTRACE_MAX];
|
||||
int size;
|
||||
int position;
|
||||
};
|
||||
typedef struct backtrace backtrace_h;
|
||||
|
||||
struct backtrace_search {
|
||||
int count;
|
||||
struct backtrace_file *file;
|
||||
int backoff; /* search the line where the function call
|
||||
took place, instead of return address */
|
||||
struct backtrace_line *lines;
|
||||
int max_lines;
|
||||
};
|
||||
|
||||
static const char *get_exe()
|
||||
{
|
||||
static char exe[1024];
|
||||
int ret;
|
||||
|
||||
ret = readlink("/proc/self/exe", exe, sizeof(exe) - 1);
|
||||
if (ret < 0) {
|
||||
exe[0] = '\0';
|
||||
} else {
|
||||
exe[ret] = '\0';
|
||||
}
|
||||
|
||||
return exe;
|
||||
}
|
||||
|
||||
static int dl_match_address(struct dl_phdr_info *info, size_t size, void *data)
|
||||
{
|
||||
struct dl_address_search *dl = (struct dl_address_search *) data;
|
||||
const ElfW(Phdr) *phdr;
|
||||
ElfW(Addr) load_base = info->dlpi_addr;
|
||||
long n;
|
||||
|
||||
phdr = info->dlpi_phdr;
|
||||
for (n = info->dlpi_phnum; --n >= 0; phdr++) {
|
||||
if (phdr->p_type == PT_LOAD) {
|
||||
ElfW(Addr) vbaseaddr = phdr->p_vaddr + load_base;
|
||||
if (dl->address >= vbaseaddr && dl->address < vbaseaddr + phdr->p_memsz) {
|
||||
dl->filename = info->dlpi_name;
|
||||
dl->base = info->dlpi_addr;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dl_lookup_address(struct dl_address_search *dl)
|
||||
{
|
||||
dl->filename = NULL;
|
||||
dl->base = 0;
|
||||
|
||||
dl_iterate_phdr(dl_match_address, dl);
|
||||
if (dl->filename == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (strlen(dl->filename) == 0) {
|
||||
dl->filename = get_exe();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int load_file(struct backtrace_file *file)
|
||||
{
|
||||
long symcount;
|
||||
unsigned int size;
|
||||
char **matching;
|
||||
|
||||
file->syms = NULL;
|
||||
file->abfd = bfd_openr(file->dl.filename, NULL);
|
||||
if (!file->abfd) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bfd_check_format(file->abfd, bfd_archive)) {
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
if (!bfd_check_format_matches(file->abfd, bfd_object, &matching)) {
|
||||
goto err_close;
|
||||
}
|
||||
if ((bfd_get_file_flags(file->abfd) & HAS_SYMS) == 0) {
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
symcount = bfd_read_minisymbols(file->abfd, 0, (void**)&file->syms, &size);
|
||||
if (symcount == 0) {
|
||||
free(file->syms);
|
||||
symcount = bfd_read_minisymbols(file->abfd, 1, (void**)&file->syms, &size);
|
||||
}
|
||||
if (symcount < 0) {
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
err_close:
|
||||
bfd_close(file->abfd);
|
||||
err:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unload_file(struct backtrace_file *file)
|
||||
{
|
||||
free(file->syms);
|
||||
bfd_close(file->abfd);
|
||||
}
|
||||
|
||||
static void find_address_in_section(bfd *abfd, asection *section, void *data)
|
||||
{
|
||||
struct backtrace_search *search = (backtrace_search *)data;
|
||||
bfd_size_type size;
|
||||
bfd_vma vma;
|
||||
unsigned long address;
|
||||
const char *filename, *function;
|
||||
unsigned lineno;
|
||||
int found;
|
||||
|
||||
if ((search->count > 0) || (search->max_lines == 0) ||
|
||||
#ifdef HAVE_DECL_BFD_GET_SECTION_FLAGS
|
||||
((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)) {
|
||||
#else
|
||||
((bfd_section_flags(section) & SEC_ALLOC) == 0)) {
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
address = search->file->dl.address - search->file->dl.base;
|
||||
#ifdef HAVE_DECL_BFD_GET_SECTION_VMA
|
||||
vma = bfd_get_section_vma(abfd, section);
|
||||
#else
|
||||
vma = bfd_section_vma(section);
|
||||
#endif
|
||||
|
||||
if (address < vma) {
|
||||
return;
|
||||
}
|
||||
#ifdef HAVE_TWO_ARG_BFD_SECTION_SIZE
|
||||
size = bfd_section_size(abfd, section);
|
||||
#else
|
||||
size = bfd_section_size(section);
|
||||
#endif
|
||||
if (address >= vma + size) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Search in address-1 to get the calling line instead of return address */
|
||||
found = bfd_find_nearest_line(abfd, section, search->file->syms,
|
||||
address - vma - search->backoff,
|
||||
&filename, &function, &lineno);
|
||||
do {
|
||||
search->lines[search->count].address = address;
|
||||
search->lines[search->count].file = strdup(filename ? filename :
|
||||
"UNKNOWN_FILE");
|
||||
search->lines[search->count].function = function ?
|
||||
#ifdef HAVE_CPLUS_DEMANGLE
|
||||
cplus_demangle(function, 0) : strdup("UNKNOWN_FUNCTION");
|
||||
#else
|
||||
strdup(function) : strdup("UNKNOWN_FUNCTION");
|
||||
#endif
|
||||
search->lines[search->count].lineno = lineno;
|
||||
if (search->count == 0) {
|
||||
/* To get the inliner info, search at the original address */
|
||||
bfd_find_nearest_line(abfd, section, search->file->syms, address - vma,
|
||||
&filename, &function, &lineno);
|
||||
}
|
||||
|
||||
++search->count;
|
||||
found = bfd_find_inliner_info(abfd, &filename, &function, &lineno);
|
||||
} while (found && (search->count < search->max_lines));
|
||||
}
|
||||
|
||||
|
||||
static int get_line_info(struct backtrace_file *file, int backoff,
|
||||
struct backtrace_line *lines, int max)
|
||||
{
|
||||
struct backtrace_search search;
|
||||
|
||||
search.file = file;
|
||||
search.backoff = backoff;
|
||||
search.count = 0;
|
||||
search.lines = lines;
|
||||
search.max_lines = max;
|
||||
bfd_map_over_sections(file->abfd, find_address_in_section, &search);
|
||||
return search.count;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -5,7 +5,15 @@
|
||||
************************************************************************/
|
||||
|
||||
#include "TestBedChild.hpp"
|
||||
#include "config.h"
|
||||
|
||||
#ifdef HAVE_BFD
|
||||
#include "BfdBacktrace.hpp"
|
||||
#endif
|
||||
|
||||
#include <thread>
|
||||
#include <signal.h>
|
||||
#include <execinfo.h>
|
||||
|
||||
#define CHILD_NCCL_CALL(cmd, msg) \
|
||||
{ \
|
||||
@@ -21,6 +29,47 @@
|
||||
#define PIPE_READ(val) \
|
||||
if (read(childReadFd, &val, sizeof(val)) != sizeof(val)) return TEST_FAIL;
|
||||
|
||||
|
||||
void sig_handler(int signum){
|
||||
printf("\n [%d] Inside handler function signal is %d\n", getpid(), signum);
|
||||
|
||||
#ifdef HAVE_BFD
|
||||
void *addresses[BACKTRACE_MAX];
|
||||
int num_addresses = backtrace(addresses, BACKTRACE_MAX);
|
||||
struct backtrace_file file;
|
||||
backtrace_line line;
|
||||
backtrace_h bckt;
|
||||
bckt.size = 0;
|
||||
|
||||
for (int i = 0; i < num_addresses; ++i) {
|
||||
file.dl.address = (unsigned long)addresses[i];
|
||||
if (dl_lookup_address(&file.dl) && load_file(&file)) {
|
||||
bckt.size += get_line_info(&file, 1,
|
||||
bckt.lines + bckt.size,
|
||||
BACKTRACE_MAX - bckt.size);
|
||||
unload_file(&file);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<BACKTRACE_MAX; i++ ){
|
||||
if ((char*)bckt.lines[i].address == NULL) break;
|
||||
printf("%p %s : %s line %u\n", (char*)bckt.lines[i].address, bckt.lines[i].file, bckt.lines[i].function, bckt.lines[i].lineno);
|
||||
}
|
||||
#else
|
||||
#define BT_BUF_SIZE 1024
|
||||
void *buffer[BT_BUF_SIZE];
|
||||
char **strings;
|
||||
|
||||
int nptrs = backtrace(buffer, BT_BUF_SIZE);
|
||||
strings = backtrace_symbols(buffer, nptrs);
|
||||
for (int j = 0; j < nptrs; j++)
|
||||
printf("%s\n", strings[j]);
|
||||
free (strings);
|
||||
#endif
|
||||
|
||||
exit (-1);
|
||||
}
|
||||
|
||||
namespace RcclUnitTesting
|
||||
{
|
||||
TestBedChild::TestBedChild(int const childId, bool const verbose, int const printValues)
|
||||
@@ -28,6 +77,11 @@ namespace RcclUnitTesting
|
||||
this->childId = childId;
|
||||
this->verbose = verbose;
|
||||
this->printValues = printValues;
|
||||
|
||||
signal(SIGILL, sig_handler);
|
||||
signal(SIGBUS, sig_handler);
|
||||
signal(SIGFPE, sig_handler);
|
||||
signal(SIGSEGV, sig_handler);
|
||||
}
|
||||
|
||||
int TestBedChild::InitPipes()
|
||||
@@ -51,6 +105,7 @@ namespace RcclUnitTesting
|
||||
}
|
||||
this->parentReadFd = pipefd[0];
|
||||
this->childWriteFd = pipefd[1];
|
||||
|
||||
return TEST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
#cmakedefine HAVE_BFD
|
||||
#cmakedefine HAVE_DECL_BFD_GET_SECTION_FLAGS
|
||||
#cmakedefine HAVE_DECL_BFD_GET_SECTION_VMA
|
||||
#cmakedefine HAVE_TWO_ARG_BFD_SECTION_SIZE
|
||||
#cmakedefine HAVE_CPLUS_DEMANGLE
|
||||
#cmakedefine HAVE_DECL_BASENAME @HAVE_DECL_BASENAME@
|
||||
Ссылка в новой задаче
Block a user