2019-11-19 14:57:39 -08:00
|
|
|
/*************************************************************************
|
2020-01-16 16:02:42 -08:00
|
|
|
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
|
2020-01-15 17:54:27 -07:00
|
|
|
* Modifications Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
|
2019-11-19 14:57:39 -08:00
|
|
|
*
|
|
|
|
|
* See LICENSE.txt for license information
|
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
|
|
#include "comm.h"
|
|
|
|
|
#include "graph.h"
|
|
|
|
|
#include "trees.h"
|
|
|
|
|
#include "rings.h"
|
2020-07-21 14:10:36 -07:00
|
|
|
#include "topo.h"
|
2019-11-19 14:57:39 -08:00
|
|
|
|
|
|
|
|
/******************************************************************/
|
|
|
|
|
/********************* Internode connection ***********************/
|
|
|
|
|
/******************************************************************/
|
|
|
|
|
|
|
|
|
|
ncclResult_t ncclTopoPreset(struct ncclComm* comm,
|
2020-01-16 16:02:42 -08:00
|
|
|
struct ncclTopoGraph* treeGraph, struct ncclTopoGraph* ringGraph, struct ncclTopoGraph* collNetGraph,
|
2019-11-19 14:57:39 -08:00
|
|
|
struct ncclTopoRanks* topoRanks) {
|
|
|
|
|
int rank = comm->rank;
|
|
|
|
|
int localRanks = comm->localRanks;
|
|
|
|
|
|
2021-03-25 20:59:32 -07:00
|
|
|
for (int c=0; c<comm->nChannels; c++) {
|
2019-11-19 14:57:39 -08:00
|
|
|
struct ncclChannel* channel = comm->channels+c;
|
|
|
|
|
channel->ring.prev = channel->ring.next = -1;
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->tree.up = -1;
|
|
|
|
|
for (int i=0; i<NCCL_MAX_TREE_ARITY; i++) channel->tree.down[i] = -1;
|
2019-11-19 14:57:39 -08:00
|
|
|
|
|
|
|
|
int* ringIntra = ringGraph->intra+c*localRanks;
|
|
|
|
|
int* treeIntra = treeGraph->intra+c*localRanks;
|
|
|
|
|
|
|
|
|
|
for (int i=0; i<localRanks; i++) {
|
|
|
|
|
if (ringIntra[i] == rank) {
|
|
|
|
|
topoRanks->ringRecv[c] = ringIntra[0];
|
|
|
|
|
topoRanks->ringSend[c] = ringIntra[localRanks-1];
|
|
|
|
|
channel->ring.prev = (i == 0) ? -1 : ringIntra[i-1];
|
|
|
|
|
channel->ring.next = (i == localRanks-1) ? -1 : ringIntra[i+1];
|
|
|
|
|
}
|
|
|
|
|
if (treeIntra[i] == rank) {
|
2020-09-04 14:35:05 -07:00
|
|
|
int parentIndex = 0;
|
|
|
|
|
int child0Index = treeGraph->pattern == NCCL_TOPO_PATTERN_TREE ? 0 : 1;
|
|
|
|
|
int child1Index = treeGraph->pattern == NCCL_TOPO_PATTERN_SPLIT_TREE ? 1 : 0;
|
2019-11-19 14:57:39 -08:00
|
|
|
|
2020-09-04 14:35:05 -07:00
|
|
|
topoRanks->treeToParent[c] = treeIntra[parentIndex];
|
|
|
|
|
topoRanks->treeToChild0[c] = treeIntra[child0Index];
|
|
|
|
|
topoRanks->treeToChild1[c] = treeIntra[child1Index];
|
|
|
|
|
channel->tree.up = i == 0 ? -1 : treeIntra[i-1];
|
|
|
|
|
channel->tree.down[0] = i == localRanks-1 ? -1 : treeIntra[i+1];
|
2019-11-19 14:57:39 -08:00
|
|
|
}
|
2021-03-25 20:59:32 -07:00
|
|
|
}
|
|
|
|
|
topoRanks->ringPrev[c] = channel->ring.prev;
|
|
|
|
|
topoRanks->ringNext[c] = channel->ring.next;
|
|
|
|
|
}
|
|
|
|
|
// Duplicate channels rings/trees
|
|
|
|
|
struct ncclChannel* channel0 = comm->channels;
|
|
|
|
|
struct ncclChannel* channel1 = channel0+comm->nChannels;
|
|
|
|
|
memcpy(channel1, channel0, comm->nChannels*sizeof(struct ncclChannel));
|
|
|
|
|
// Setup collnet tree
|
|
|
|
|
for (int c=0; c<comm->collNetnChannels; c++) {
|
|
|
|
|
struct ncclChannel* channel = comm->channels+c;
|
|
|
|
|
channel->collTree.up = -1;
|
|
|
|
|
for (int i=0; i<NCCL_MAX_TREE_ARITY; i++) channel->collTree.down[i] = -1;
|
|
|
|
|
|
|
|
|
|
int* collNetIntra = collNetGraph->intra+c*localRanks;
|
|
|
|
|
|
|
|
|
|
for (int i=0; i<localRanks; i++) {
|
2020-01-16 16:02:42 -08:00
|
|
|
if (collNetIntra[i] == rank) {
|
|
|
|
|
int prev = (i-1+localRanks)%localRanks, next = (i+1)%localRanks;
|
|
|
|
|
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.up = collNetIntra[prev];
|
|
|
|
|
channel->collTree.down[0] = collNetIntra[next];
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
2019-11-19 14:57:39 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static ncclResult_t connectRings(struct ncclComm* comm, int* ringRecv, int* ringSend, int* ringPrev, int* ringNext, int* firstRanks) {
|
|
|
|
|
int nChannels = comm->nChannels;
|
|
|
|
|
int nNodes = comm->nNodes;
|
|
|
|
|
for (int c=0; c<nChannels; c++) {
|
|
|
|
|
int* recv = ringRecv+c*comm->nRanks;
|
|
|
|
|
int* send = ringSend+c*comm->nRanks;
|
|
|
|
|
int* prev = ringPrev+c*comm->nRanks;
|
|
|
|
|
int* next = ringNext+c*comm->nRanks;
|
|
|
|
|
struct ncclChannel* channel0 = comm->channels+c;
|
|
|
|
|
struct ncclChannel* channel1 = channel0+nChannels;
|
|
|
|
|
for (int n=0; n<nNodes; n++) {
|
|
|
|
|
int recvRank = recv[firstRanks[n]];
|
|
|
|
|
int prevSendRank = send[firstRanks[(n-1+nNodes)%nNodes]];
|
|
|
|
|
prev[recvRank] = prevSendRank;
|
|
|
|
|
if (comm->rank == recvRank) {
|
|
|
|
|
channel0->ring.prev = prevSendRank;
|
|
|
|
|
channel1->ring.prev = prevSendRank;
|
|
|
|
|
}
|
|
|
|
|
int sendRank = send[firstRanks[n]];
|
|
|
|
|
int nextRecvRank = recv[firstRanks[(n+1)%nNodes]];
|
|
|
|
|
next[sendRank] = nextRecvRank;
|
|
|
|
|
if (comm->rank == sendRank) {
|
|
|
|
|
channel0->ring.next = nextRecvRank;
|
|
|
|
|
channel1->ring.next = nextRecvRank;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-07-14 15:16:50 -07:00
|
|
|
TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next);
|
|
|
|
|
TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next);
|
2019-11-19 14:57:39 -08:00
|
|
|
}
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static ncclResult_t getIndexes(int* ranks, int* indexes, int nNodes, int* firstRanks) {
|
|
|
|
|
for (int n=0; n<nNodes; n++) indexes[n] = ranks[firstRanks[n]];
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-04 14:35:05 -07:00
|
|
|
static ncclResult_t setTreeUp(struct ncclTree* tree, int* indexes, int u) {
|
|
|
|
|
if (u == -1) return ncclSuccess;
|
|
|
|
|
tree->up = indexes[u];
|
2019-11-19 14:57:39 -08:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-04 14:35:05 -07:00
|
|
|
static ncclResult_t setTreeDown(struct ncclTree* tree, int* indexes, int d) {
|
|
|
|
|
if (d == -1) return ncclSuccess;
|
2019-11-19 14:57:39 -08:00
|
|
|
int x = 0;
|
2020-09-04 14:35:05 -07:00
|
|
|
while (x < NCCL_MAX_TREE_ARITY && tree->down[x] >= 0) x++;
|
|
|
|
|
if (x == NCCL_MAX_TREE_ARITY) {
|
2021-02-09 15:34:08 -08:00
|
|
|
WARN("Internal error : tree already has %d children (%d %d %d)", x, tree->down[0], tree->down[1], tree->down[2]);
|
2019-11-19 14:57:39 -08:00
|
|
|
return ncclInternalError;
|
|
|
|
|
}
|
2020-09-04 14:35:05 -07:00
|
|
|
tree->down[x] = indexes[d];
|
2019-11-19 14:57:39 -08:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-04 14:35:05 -07:00
|
|
|
static ncclResult_t connectTrees(struct ncclComm* comm, int* treeToParent, int* treeToChild0, int* treeToChild1, int* firstRanks, int* treePatterns) {
|
2019-11-19 14:57:39 -08:00
|
|
|
const int nChannels = comm->nChannels, nNodes = comm->nNodes, node = comm->node;
|
2020-09-04 14:35:05 -07:00
|
|
|
int* ranksToParent, *ranksToChild0, *ranksToChild1;
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ranksToParent, nNodes));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ranksToChild0, nNodes));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ranksToChild1, nNodes));
|
2019-11-19 14:57:39 -08:00
|
|
|
|
|
|
|
|
// Compute tree depth. Not an exact value but a good approximation in most
|
|
|
|
|
// cases
|
|
|
|
|
int depth = comm->nRanks/nNodes - 1 + log2i(nNodes);
|
|
|
|
|
|
2020-09-04 14:35:05 -07:00
|
|
|
int t0u, t0d0, t0d1, t0ChildType, t1u, t1d0, t1d1, t1ChildType;
|
|
|
|
|
NCCLCHECK(ncclGetDtree(nNodes, node, &t0u, &t0d0, &t0d1, &t0ChildType, &t1u, &t1d0, &t1d1, &t1ChildType));
|
2019-11-19 14:57:39 -08:00
|
|
|
for (int c=0; c<nChannels; c++) {
|
|
|
|
|
struct ncclChannel* channel0 = comm->channels+c;
|
|
|
|
|
struct ncclChannel* channel1 = channel0+nChannels;
|
2020-09-04 14:35:05 -07:00
|
|
|
NCCLCHECK(getIndexes(treeToParent+c*comm->nRanks, ranksToParent, nNodes, firstRanks));
|
|
|
|
|
NCCLCHECK(getIndexes(treeToChild0+c*comm->nRanks, ranksToChild0, nNodes, firstRanks));
|
|
|
|
|
NCCLCHECK(getIndexes(treeToChild1+c*comm->nRanks, ranksToChild1, nNodes, firstRanks));
|
|
|
|
|
if (comm->rank == ranksToParent[node]) {
|
|
|
|
|
NCCLCHECK(setTreeUp(&channel0->tree, t0ChildType == 0 ? ranksToChild0 : ranksToChild1, t0u));
|
|
|
|
|
NCCLCHECK(setTreeUp(&channel1->tree, t1ChildType == 0 ? ranksToChild0 : ranksToChild1, t1u));
|
|
|
|
|
}
|
|
|
|
|
if (comm->rank == ranksToChild0[node]) {
|
|
|
|
|
NCCLCHECK(setTreeDown(&channel0->tree, ranksToParent, t0d0));
|
|
|
|
|
NCCLCHECK(setTreeDown(&channel1->tree, ranksToParent, t1d0));
|
|
|
|
|
}
|
|
|
|
|
if (comm->rank == ranksToChild1[node]) {
|
|
|
|
|
NCCLCHECK(setTreeDown(&channel0->tree, ranksToParent, t0d1));
|
|
|
|
|
NCCLCHECK(setTreeDown(&channel1->tree, ranksToParent, t1d1));
|
|
|
|
|
}
|
|
|
|
|
if (comm->rank == ranksToParent[node] ||
|
|
|
|
|
comm->rank == ranksToChild0[node] ||
|
|
|
|
|
comm->rank == ranksToChild1[node]) {
|
|
|
|
|
INFO(NCCL_GRAPH, "Tree %d : %d -> %d -> %d/%d/%d", c, channel0->tree.up, comm->rank, channel0->tree.down[0], channel0->tree.down[1], channel0->tree.down[2]);
|
|
|
|
|
INFO(NCCL_GRAPH, "Tree %d : %d -> %d -> %d/%d/%d", c+nChannels, channel1->tree.up, comm->rank, channel1->tree.down[0], channel1->tree.down[1], channel1->tree.down[2]);
|
|
|
|
|
}
|
|
|
|
|
channel0->tree.depth = channel1->tree.depth = depth;
|
2019-11-19 14:57:39 -08:00
|
|
|
}
|
2020-09-04 14:35:05 -07:00
|
|
|
free(ranksToParent);
|
|
|
|
|
free(ranksToChild0);
|
|
|
|
|
free(ranksToChild1);
|
2019-11-19 14:57:39 -08:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-16 16:02:42 -08:00
|
|
|
ncclResult_t ncclTopoConnectCollNet(struct ncclComm* comm, struct ncclTopoGraph* collNetGraph, int rank) {
|
|
|
|
|
int nranks = comm->nRanks;
|
|
|
|
|
int depth = nranks/comm->nNodes;
|
|
|
|
|
int sendIndex = collNetGraph->pattern == NCCL_TOPO_PATTERN_TREE ? 0 : 1; // send GPU index depends on topo pattern
|
|
|
|
|
int sendEndIndex = (sendIndex+comm->localRanks-1)%comm->localRanks;
|
2021-03-19 12:58:13 -07:00
|
|
|
for (int c=0; c<comm->collNetnChannels/2; c++) {
|
2020-01-16 16:02:42 -08:00
|
|
|
struct ncclChannel* channel = comm->channels+c;
|
|
|
|
|
// Set root of collTree to id nranks
|
|
|
|
|
if (rank == collNetGraph->intra[sendIndex+c*comm->localRanks]) { // is master
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.up = nranks;
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
|
|
|
|
if (rank == collNetGraph->intra[sendEndIndex+c*comm->localRanks]) { // is bottom of intra-node chain
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.down[0] = -1;
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.depth = depth;
|
|
|
|
|
INFO(NCCL_GRAPH, "CollNet Channel %d rank %d up %d down %d", c, rank, channel->collTree.up, channel->collTree.down[0]);
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
|
|
|
|
int recvIndex = 0; // recv GPU index is always 0
|
|
|
|
|
int recvEndIndex = (recvIndex+comm->localRanks-1)%comm->localRanks;
|
2021-03-19 12:58:13 -07:00
|
|
|
for (int c=0; c<comm->collNetnChannels/2; c++) {
|
|
|
|
|
struct ncclChannel* channel = comm->channels+comm->collNetnChannels/2+c;
|
2020-01-16 16:02:42 -08:00
|
|
|
// Set root of collTree to id nranks
|
|
|
|
|
if (rank == collNetGraph->intra[recvIndex+c*comm->localRanks]) { // is master
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.up = nranks;
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
|
|
|
|
if (rank == collNetGraph->intra[recvEndIndex+c*comm->localRanks]) { // is bottom of intra-node chain
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.down[0] = -1;
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
2020-09-04 14:35:05 -07:00
|
|
|
channel->collTree.depth = depth;
|
2021-03-19 12:58:13 -07:00
|
|
|
INFO(NCCL_GRAPH, "CollNet Channel %d rank %d up %d down %d", comm->collNetnChannels/2+c, rank, channel->collTree.up, channel->collTree.down[0]);
|
2020-01-16 16:02:42 -08:00
|
|
|
}
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-19 14:57:39 -08:00
|
|
|
// Legacy naming
|
|
|
|
|
NCCL_PARAM(MinNrings, "MIN_NRINGS", -2);
|
|
|
|
|
NCCL_PARAM(MaxNrings, "MAX_NRINGS", -2);
|
|
|
|
|
// New naming
|
|
|
|
|
NCCL_PARAM(MinNchannels, "MIN_NCHANNELS", -2);
|
|
|
|
|
NCCL_PARAM(MaxNchannels, "MAX_NCHANNELS", -2);
|
|
|
|
|
|
|
|
|
|
int ncclMinNchannels() {
|
2019-11-26 16:33:13 -08:00
|
|
|
int minNchannels = 2;
|
2019-11-19 14:57:39 -08:00
|
|
|
if (ncclParamMinNrings() != -2) minNchannels = ncclParamMinNrings();
|
|
|
|
|
if (ncclParamMinNchannels() != -2) minNchannels = ncclParamMinNchannels();
|
|
|
|
|
if (minNchannels > MAXCHANNELS) {
|
2021-02-09 15:34:08 -08:00
|
|
|
WARN("User asked for a minimum of %d channels, limiting to %d", minNchannels, MAXCHANNELS);
|
2019-11-19 14:57:39 -08:00
|
|
|
minNchannels = MAXCHANNELS;
|
|
|
|
|
}
|
|
|
|
|
if (minNchannels < 0) minNchannels = 0;
|
|
|
|
|
return minNchannels;
|
|
|
|
|
}
|
|
|
|
|
int ncclMaxNchannels() {
|
|
|
|
|
int maxNchannels = MAXCHANNELS;
|
|
|
|
|
if (ncclParamMaxNrings() != -2) maxNchannels = ncclParamMaxNrings();
|
|
|
|
|
if (ncclParamMaxNchannels() != -2) maxNchannels = ncclParamMaxNchannels();
|
|
|
|
|
if (maxNchannels > MAXCHANNELS) maxNchannels = MAXCHANNELS;
|
|
|
|
|
if (maxNchannels < 1) {
|
2021-02-09 15:34:08 -08:00
|
|
|
WARN("User asked for a maximum of %d channels, setting it to 1", maxNchannels);
|
2019-11-19 14:57:39 -08:00
|
|
|
maxNchannels = 1;
|
|
|
|
|
}
|
|
|
|
|
return maxNchannels;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-05 17:51:56 -07:00
|
|
|
ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePatterns, struct ncclTopoRanks** allTopoRanks, int* rings, int nc) {
|
2019-11-19 14:57:39 -08:00
|
|
|
// Gather data from all ranks
|
2020-09-04 14:35:05 -07:00
|
|
|
int *ringRecv, *ringSend, *ringPrev, *ringNext, *treeToParent, *treeToChild0, *treeToChild1;
|
2019-11-19 14:57:39 -08:00
|
|
|
int nranks = comm->nRanks;
|
|
|
|
|
int nChannels = comm->nChannels;
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ringRecv, nranks*MAXCHANNELS));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ringSend, nranks*MAXCHANNELS));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ringPrev, nranks*MAXCHANNELS));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&ringNext, nranks*MAXCHANNELS));
|
2020-09-04 14:35:05 -07:00
|
|
|
NCCLCHECK(ncclCalloc(&treeToParent, nranks*MAXCHANNELS));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&treeToChild0, nranks*MAXCHANNELS));
|
|
|
|
|
NCCLCHECK(ncclCalloc(&treeToChild1, nranks*MAXCHANNELS));
|
2019-11-19 14:57:39 -08:00
|
|
|
for (int i=0; i<nranks; i++) {
|
|
|
|
|
for (int c=0; c<nChannels;c++) {
|
|
|
|
|
ringRecv[c*nranks+i] = allTopoRanks[i]->ringRecv[c];
|
|
|
|
|
ringSend[c*nranks+i] = allTopoRanks[i]->ringSend[c];
|
|
|
|
|
ringPrev[c*nranks+i] = allTopoRanks[i]->ringPrev[c];
|
|
|
|
|
ringNext[c*nranks+i] = allTopoRanks[i]->ringNext[c];
|
2020-09-04 14:35:05 -07:00
|
|
|
treeToParent[c*nranks+i] = allTopoRanks[i]->treeToParent[c];
|
|
|
|
|
treeToChild0[c*nranks+i] = allTopoRanks[i]->treeToChild0[c];
|
|
|
|
|
treeToChild1[c*nranks+i] = allTopoRanks[i]->treeToChild1[c];
|
2019-11-19 14:57:39 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Connect rings and trees. This should also duplicate the channels.
|
|
|
|
|
NCCLCHECK(connectRings(comm, ringRecv, ringSend, ringPrev, ringNext, firstRanks));
|
2020-09-04 14:35:05 -07:00
|
|
|
NCCLCHECK(connectTrees(comm, treeToParent, treeToChild0, treeToChild1, firstRanks, treePatterns));
|
2019-11-19 14:57:39 -08:00
|
|
|
|
2020-03-02 12:51:29 -08:00
|
|
|
// Duplicate ringPrev/ringNext for ncclBuildRing
|
|
|
|
|
memcpy(ringPrev+nChannels*nranks, ringPrev, nChannels*nranks*sizeof(int));
|
|
|
|
|
memcpy(ringNext+nChannels*nranks, ringNext, nChannels*nranks*sizeof(int));
|
2019-11-19 14:57:39 -08:00
|
|
|
|
2020-03-02 12:51:29 -08:00
|
|
|
// Duplication should be complete now
|
|
|
|
|
nChannels = comm->nChannels = std::min(MAXCHANNELS,nChannels*2);
|
2019-11-19 14:57:39 -08:00
|
|
|
|
|
|
|
|
// Honor NCCL_MIN_NRINGS/NCCL_MAX_NRINGS.
|
|
|
|
|
// We permit combining max, then min, to only use the first channels, then duplicate them.
|
|
|
|
|
nChannels = comm->nChannels = std::min((int)ncclMaxNchannels(), nChannels);
|
|
|
|
|
int c;
|
2021-04-05 17:51:56 -07:00
|
|
|
for (c=nChannels; c<std::min((int)ncclMaxNchannels(), std::max(nc, ncclMinNchannels())); c++) {
|
2019-11-19 14:57:39 -08:00
|
|
|
memcpy(ringPrev+c*nranks, ringPrev+(c-nChannels)*nranks, nranks*sizeof(int));
|
|
|
|
|
memcpy(ringNext+c*nranks, ringNext+(c-nChannels)*nranks, nranks*sizeof(int));
|
|
|
|
|
memcpy(comm->channels+c, comm->channels+c-nChannels, sizeof(struct ncclChannel));
|
|
|
|
|
}
|
|
|
|
|
nChannels = comm->nChannels = c;
|
|
|
|
|
|
|
|
|
|
// Create rings array and check all is fine
|
|
|
|
|
NCCLCHECK(ncclBuildRings(nChannels, rings, comm->rank, comm->nRanks, ringPrev, ringNext));
|
|
|
|
|
|
|
|
|
|
free(ringRecv);
|
|
|
|
|
free(ringSend);
|
|
|
|
|
free(ringPrev);
|
|
|
|
|
free(ringNext);
|
2020-09-04 14:35:05 -07:00
|
|
|
free(treeToParent);
|
|
|
|
|
free(treeToChild0);
|
|
|
|
|
free(treeToChild1);
|
2019-11-19 14:57:39 -08:00
|
|
|
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|