3c223c105a
Add network communication through another GPU connected with NVLink (PXN). Add aggregation of messages coming from different local GPUs through PXN and going to the same destination. Add new v5 plugin API with grouped receives and tags. Add compat for v4 plugins. Add naming of NCCL threads to help debugging. Fix NVLink detection and avoid data corruption when some NVLinks are down. Add support for Relaxed Ordering for IB. Add profiling and timing infrastructure.
186 lines
6.3 KiB
C++
186 lines
6.3 KiB
C++
/*************************************************************************
|
|
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#include "core.h"
|
|
#include "nccl_net.h"
|
|
#include <stdlib.h>
|
|
#include <stdarg.h>
|
|
|
|
int ncclDebugLevel = -1;
|
|
thread_local int ncclDebugNoWarn = 0;
|
|
uint64_t ncclDebugMask = NCCL_INIT; // Default debug sub-system mask is INIT
|
|
FILE *ncclDebugFile = stdout;
|
|
pthread_mutex_t ncclDebugLock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
void ncclDebugInit() {
|
|
pthread_mutex_lock(&ncclDebugLock);
|
|
if (ncclDebugLevel != -1) { pthread_mutex_unlock(&ncclDebugLock); return; }
|
|
const char* nccl_debug = getenv("NCCL_DEBUG");
|
|
if (nccl_debug == NULL) {
|
|
ncclDebugLevel = NCCL_LOG_NONE;
|
|
} else if (strcasecmp(nccl_debug, "VERSION") == 0) {
|
|
ncclDebugLevel = NCCL_LOG_VERSION;
|
|
} else if (strcasecmp(nccl_debug, "WARN") == 0) {
|
|
ncclDebugLevel = NCCL_LOG_WARN;
|
|
} else if (strcasecmp(nccl_debug, "INFO") == 0) {
|
|
ncclDebugLevel = NCCL_LOG_INFO;
|
|
} else if (strcasecmp(nccl_debug, "ABORT") == 0) {
|
|
ncclDebugLevel = NCCL_LOG_ABORT;
|
|
} else if (strcasecmp(nccl_debug, "TRACE") == 0) {
|
|
ncclDebugLevel = NCCL_LOG_TRACE;
|
|
}
|
|
|
|
/* Parse the NCCL_DEBUG_SUBSYS env var
|
|
* This can be a comma separated list such as INIT,COLL
|
|
* or ^INIT,COLL etc
|
|
*/
|
|
char* ncclDebugSubsysEnv = getenv("NCCL_DEBUG_SUBSYS");
|
|
if (ncclDebugSubsysEnv != NULL) {
|
|
int invert = 0;
|
|
if (ncclDebugSubsysEnv[0] == '^') { invert = 1; ncclDebugSubsysEnv++; }
|
|
ncclDebugMask = invert ? ~0ULL : 0ULL;
|
|
char *ncclDebugSubsys = strdup(ncclDebugSubsysEnv);
|
|
char *subsys = strtok(ncclDebugSubsys, ",");
|
|
while (subsys != NULL) {
|
|
uint64_t mask = 0;
|
|
if (strcasecmp(subsys, "INIT") == 0) {
|
|
mask = NCCL_INIT;
|
|
} else if (strcasecmp(subsys, "COLL") == 0) {
|
|
mask = NCCL_COLL;
|
|
} else if (strcasecmp(subsys, "P2P") == 0) {
|
|
mask = NCCL_P2P;
|
|
} else if (strcasecmp(subsys, "SHM") == 0) {
|
|
mask = NCCL_SHM;
|
|
} else if (strcasecmp(subsys, "NET") == 0) {
|
|
mask = NCCL_NET;
|
|
} else if (strcasecmp(subsys, "GRAPH") == 0) {
|
|
mask = NCCL_GRAPH;
|
|
} else if (strcasecmp(subsys, "TUNING") == 0) {
|
|
mask = NCCL_TUNING;
|
|
} else if (strcasecmp(subsys, "ENV") == 0) {
|
|
mask = NCCL_ENV;
|
|
} else if (strcasecmp(subsys, "ALLOC") == 0) {
|
|
mask = NCCL_ALLOC;
|
|
} else if (strcasecmp(subsys, "ALL") == 0) {
|
|
mask = NCCL_ALL;
|
|
}
|
|
if (mask) {
|
|
if (invert) ncclDebugMask &= ~mask; else ncclDebugMask |= mask;
|
|
}
|
|
subsys = strtok(NULL, ",");
|
|
}
|
|
free(ncclDebugSubsys);
|
|
}
|
|
|
|
/* Parse and expand the NCCL_DEBUG_FILE path and
|
|
* then create the debug file. But don't bother unless the
|
|
* NCCL_DEBUG level is > VERSION
|
|
*/
|
|
const char* ncclDebugFileEnv = getenv("NCCL_DEBUG_FILE");
|
|
if (ncclDebugLevel > NCCL_LOG_VERSION && ncclDebugFileEnv != NULL) {
|
|
int c = 0;
|
|
char debugFn[PATH_MAX+1] = "";
|
|
char *dfn = debugFn;
|
|
while (ncclDebugFileEnv[c] != '\0' && c < PATH_MAX) {
|
|
if (ncclDebugFileEnv[c++] != '%') {
|
|
*dfn++ = ncclDebugFileEnv[c-1];
|
|
continue;
|
|
}
|
|
switch (ncclDebugFileEnv[c++]) {
|
|
case '%': // Double %
|
|
*dfn++ = '%';
|
|
break;
|
|
case 'h': // %h = hostname
|
|
char hostname[1024];
|
|
getHostName(hostname, 1024, '.');
|
|
dfn += snprintf(dfn, PATH_MAX, "%s", hostname);
|
|
break;
|
|
case 'p': // %p = pid
|
|
dfn += snprintf(dfn, PATH_MAX, "%d", getpid());
|
|
break;
|
|
default: // Echo everything we don't understand
|
|
*dfn++ = '%';
|
|
*dfn++ = ncclDebugFileEnv[c-1];
|
|
break;
|
|
}
|
|
}
|
|
*dfn = '\0';
|
|
if (debugFn[0] != '\0') {
|
|
FILE *file = fopen(debugFn, "w");
|
|
if (file != NULL) {
|
|
ncclDebugFile = file;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_TRACE
|
|
ncclEpoch = std::chrono::high_resolution_clock::now();
|
|
#endif
|
|
pthread_mutex_unlock(&ncclDebugLock);
|
|
}
|
|
|
|
/* Common logging function used by the INFO, WARN and TRACE macros
|
|
* Also exported to the dynamically loadable Net transport modules so
|
|
* they can share the debugging mechanisms and output files
|
|
*/
|
|
void ncclDebugLog(ncclDebugLogLevel level, unsigned long flags, const char *filefunc, int line, const char *fmt, ...) {
|
|
if (ncclDebugLevel == -1) ncclDebugInit();
|
|
if (ncclDebugNoWarn != 0 && level == NCCL_LOG_WARN) { level = NCCL_LOG_INFO; flags = ncclDebugNoWarn; }
|
|
if (ncclDebugLevel < level || ((flags & ncclDebugMask) == 0)) return;
|
|
|
|
// Gather the rank information. This can take > 1us so we want to make sure
|
|
// we only do it when needed.
|
|
char hostname[1024];
|
|
getHostName(hostname, 1024, '.');
|
|
int cudaDev;
|
|
cudaGetDevice(&cudaDev);
|
|
int pid = getpid();
|
|
int tid = syscall(SYS_gettid);
|
|
|
|
char buffer[1024];
|
|
size_t len = 0;
|
|
pthread_mutex_lock(&ncclDebugLock);
|
|
if (level == NCCL_LOG_WARN)
|
|
len = snprintf(buffer, sizeof(buffer),
|
|
"\n%s:%d:%d [%d] %s:%d NCCL WARN ", hostname, pid, tid, cudaDev, filefunc, line);
|
|
else if (level == NCCL_LOG_INFO)
|
|
len = snprintf(buffer, sizeof(buffer),
|
|
"%s:%d:%d [%d] NCCL INFO ", hostname, pid, tid, cudaDev);
|
|
#ifdef ENABLE_TRACE
|
|
else if (level == NCCL_LOG_TRACE) {
|
|
auto delta = std::chrono::high_resolution_clock::now() - ncclEpoch;
|
|
double timestamp = std::chrono::duration_cast<std::chrono::duration<double>>(delta).count()*1000;
|
|
len = snprintf(buffer, sizeof(buffer),
|
|
"%s:%d:%d [%d] %f %s:%d NCCL TRACE ", hostname, pid, tid, cudaDev, timestamp, filefunc, line);
|
|
}
|
|
#endif
|
|
if (len) {
|
|
va_list vargs;
|
|
va_start(vargs, fmt);
|
|
(void) vsnprintf(buffer+len, sizeof(buffer)-len, fmt, vargs);
|
|
va_end(vargs);
|
|
fprintf(ncclDebugFile,"%s\n", buffer);
|
|
fflush(ncclDebugFile);
|
|
}
|
|
pthread_mutex_unlock(&ncclDebugLock);
|
|
}
|
|
|
|
NCCL_PARAM(SetThreadName, "SET_THREAD_NAME", 0);
|
|
|
|
void ncclSetThreadName(pthread_t thread, const char *fmt, ...) {
|
|
// pthread_setname_np is nonstandard GNU extension
|
|
// needs the following feature test macro
|
|
#ifdef _GNU_SOURCE
|
|
if (ncclParamSetThreadName() != 1) return;
|
|
char threadName[NCCL_THREAD_NAMELEN];
|
|
va_list vargs;
|
|
va_start(vargs, fmt);
|
|
vsnprintf(threadName, NCCL_THREAD_NAMELEN, fmt, vargs);
|
|
va_end(vargs);
|
|
pthread_setname_np(thread, threadName);
|
|
#endif
|
|
}
|