Merge remote-tracking branch 'nccl/master' into develop
Этот коммит содержится в:
@@ -1,6 +1,6 @@
|
||||
##### version
|
||||
NCCL_MAJOR := 2
|
||||
NCCL_MINOR := 7
|
||||
NCCL_PATCH := 3
|
||||
NCCL_PATCH := 6
|
||||
NCCL_SUFFIX :=
|
||||
PKG_REVISION := 1
|
||||
|
||||
@@ -947,8 +947,8 @@ done:
|
||||
int dupChannels = std::min(graph->nChannels*2, graph->maxChannels);
|
||||
memcpy(graph->intra+graph->nChannels*ngpus, graph->intra, (dupChannels-graph->nChannels)*ngpus*sizeof(int));
|
||||
memcpy(graph->inter+graph->nChannels*2,graph->inter, (dupChannels-graph->nChannels)*2*sizeof(int));
|
||||
graph->speedIntra /= 2;
|
||||
graph->speedInter /= 2;
|
||||
graph->speedIntra /= DIVUP(dupChannels, graph->nChannels);
|
||||
graph->speedInter /= DIVUP(dupChannels, graph->nChannels);
|
||||
graph->nChannels = dupChannels;
|
||||
}
|
||||
return ncclSuccess;
|
||||
|
||||
@@ -108,7 +108,6 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
|
||||
for (int p=0; p<NCCL_NUM_PROTOCOLS; p++) {
|
||||
float speed = comm->nNodes <= 2 || a == NCCL_ALGO_COLLNET ? graphs[a]->speedIntra : graphs[a]->speedInter;
|
||||
float busBw = graphs[a]->nChannels * speed;
|
||||
if (compCap80) busBw *= 0.92;
|
||||
|
||||
// Various model refinements
|
||||
if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL) busBw *= 1.0/5.0;
|
||||
|
||||
@@ -687,9 +687,14 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, nvmlDevice_t nvm
|
||||
if (index == -1) {
|
||||
const char* busId;
|
||||
NCCLCHECK(xmlGetAttr(sub, "target", &busId));
|
||||
char* path;
|
||||
NCCLCHECK(getPciPath(busId, &path));
|
||||
NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass"));
|
||||
if (strcmp(busId, "fffffff:ffff:ff") == 0) {
|
||||
// Remote NVLink device is not visible inside this VM. Assume NVSwitch.
|
||||
NCCLCHECK(xmlSetAttr(sub, "tclass", "0x068000"));
|
||||
} else {
|
||||
char* path;
|
||||
NCCLCHECK(getPciPath(busId, &path));
|
||||
NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass"));
|
||||
}
|
||||
}
|
||||
}
|
||||
*gpuNodeRet = gpuNode;
|
||||
|
||||
+4
-1
@@ -146,7 +146,10 @@ void* ncclAsyncThreadPreconnect(void* args_) {
|
||||
|
||||
NCCL_API(ncclResult_t, ncclGroupEnd);
|
||||
ncclResult_t ncclGroupEnd() {
|
||||
if (ncclGroupMode == 0) return ncclInvalidUsage;
|
||||
if (ncclGroupMode == 0) {
|
||||
WARN("ncclGroupEnd: not in a group call.");
|
||||
return ncclInvalidUsage;
|
||||
}
|
||||
ncclGroupMode--;
|
||||
if (ncclGroupMode > 0) return ncclSuccess;
|
||||
int savedDev;
|
||||
|
||||
Ссылка в новой задаче
Block a user