f40ce73e89
Added detection of IBM/Power NVLink bridge device.
Add NUMA support to PCI distance calculations.
Added NCCL_IGNORE_CPU_AFFINITY env var.
Fix memory leaks; GithubIssue#180
Compiler warning fix; GithubIssue#178
Replace non-standard variable length arrays. GithubIssue#171
Fix Tree+Shared Memory crash. GithubPR#185
Fix LL cleanup hang during long running DL jobs.
Fix NCCL_RINGS environment variable handling.
Added extra checks to catch repeat calls to ncclCommDestroy() GithubIssue#191
Improve bootstrap socket connection reliability at scale.
Fix hostname hashing issue. GithubIssue#187
Code cleanup to rename all non device files from *.cu to *.cc
46 líneas
1020 B
C
46 líneas
1020 B
C
/*************************************************************************
|
|
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#ifndef NCCL_TOPO_H_
|
|
#define NCCL_TOPO_H_
|
|
|
|
#include "nccl.h"
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
|
|
ncclResult_t getCudaPath(int cudaDev, char** path);
|
|
|
|
static int getNumaId(char *path) {
|
|
char npath[PATH_MAX];
|
|
snprintf(npath, PATH_MAX, "%s/numa_node", path);
|
|
npath[PATH_MAX-1] = '\0';
|
|
|
|
int numaId = -1;
|
|
FILE *file = fopen(npath, "r");
|
|
if (file == NULL) return -1;
|
|
if (fscanf(file, "%d", &numaId) == EOF) { fclose(file); return -1; }
|
|
fclose(file);
|
|
|
|
return numaId;
|
|
}
|
|
|
|
enum ncclPathDist {
|
|
PATH_PIX = 0,
|
|
PATH_PXB = 1,
|
|
PATH_PHB = 2,
|
|
PATH_NODE = 3,
|
|
PATH_SYS = 4,
|
|
PATH_ARRAY_SIZE = 5
|
|
};
|
|
|
|
extern const char* pathDists[PATH_ARRAY_SIZE];
|
|
|
|
int pciDistance(char* path1, char* path2);
|
|
|
|
#endif
|