Files
rocm-systems/src/misc/param.cc
T
Sylvain Jeaugey f3d5166783 2.16.5-1
Add support for 400Gbit NDR network adapters (CX7)
Handle EINTR in socket poll() function
Add NCCL_PROGRESS_APPENDOP_FREQ to control op append overhead
Resource cleanup fixes
Fix double free in case of init failure
Fix crash in ncclCommAbort
Revert AMD speed commit
2023-02-02 12:52:47 -08:00

82 строки
2.3 KiB
C++

/*************************************************************************
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include "param.h"
#include "debug.h"
#include <algorithm>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
const char* userHomeDir() {
struct passwd *pwUser = getpwuid(getuid());
return pwUser == NULL ? NULL : pwUser->pw_dir;
}
void setEnvFile(const char* fileName) {
FILE * file = fopen(fileName, "r");
if (file == NULL) return;
char *line = NULL;
char envVar[1024];
char envValue[1024];
size_t n = 0;
ssize_t read;
while ((read = getline(&line, &n, file)) != -1) {
if (line[read-1] == '\n') line[read-1] = '\0';
int s=0; // Env Var Size
while (line[s] != '\0' && line[s] != '=') s++;
if (line[s] == '\0') continue;
strncpy(envVar, line, std::min(1023,s));
envVar[s] = '\0';
s++;
strncpy(envValue, line+s, 1023);
envValue[1023]='\0';
setenv(envVar, envValue, 0);
//printf("%s : %s->%s\n", fileName, envVar, envValue);
}
if (line) free(line);
fclose(file);
}
void initEnv() {
char confFilePath[1024];
const char * userDir = userHomeDir();
if (userDir) {
sprintf(confFilePath, "%s/.nccl.conf", userDir);
setEnvFile(confFilePath);
}
sprintf(confFilePath, "/etc/nccl.conf");
setEnvFile(confFilePath);
}
void ncclLoadParam(char const* env, int64_t deftVal, int64_t uninitialized, int64_t* cache) {
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&mutex);
if (__atomic_load_n(cache, __ATOMIC_RELAXED) == uninitialized) {
char* str = getenv(env);
int64_t value = deftVal;
if (str && strlen(str) > 0) {
errno = 0;
value = strtoll(str, nullptr, 0);
if (errno) {
value = deftVal;
INFO(NCCL_ALL,"Invalid value %s for %s, using default %lld.", str, env, (long long)deftVal);
} else {
INFO(NCCL_ENV,"%s set by environment to %lld.", env, (long long)value);
}
}
__atomic_store_n(cache, value, __ATOMIC_RELAXED);
}
pthread_mutex_unlock(&mutex);
}