Change default channels duplication for chordal ring (#233)
Этот коммит содержится в:
@@ -110,8 +110,8 @@ static ncclResult_t connectRings(struct ncclComm* comm, int* ringRecv, int* ring
|
||||
channel1->ring.next = nextRecvRank;
|
||||
}
|
||||
}
|
||||
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next);
|
||||
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next);
|
||||
TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next);
|
||||
TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next);
|
||||
}
|
||||
return ncclSuccess;
|
||||
}
|
||||
@@ -288,6 +288,10 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct nccl
|
||||
memcpy(ringPrev+nChannels*nranks, ringPrev, nChannels*nranks*sizeof(int));
|
||||
memcpy(ringNext+nChannels*nranks, ringNext, nChannels*nranks*sizeof(int));
|
||||
|
||||
char *str = NULL;
|
||||
NCCLCHECK(parseChordalRing(comm->topo, &str));
|
||||
int end = std::min((int)ncclMaxNchannels(), (str ? nChannels*3 : ncclMinNchannels()));
|
||||
|
||||
// Duplication should be complete now
|
||||
nChannels = comm->nChannels = std::min(MAXCHANNELS,nChannels*2);
|
||||
|
||||
@@ -295,7 +299,7 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct nccl
|
||||
// We permit combining max, then min, to only use the first channels, then duplicate them.
|
||||
nChannels = comm->nChannels = std::min((int)ncclMaxNchannels(), nChannels);
|
||||
int c;
|
||||
for (c=nChannels; c<ncclMinNchannels(); c++) {
|
||||
for (c=nChannels; c<end; c++) {
|
||||
memcpy(ringPrev+c*nranks, ringPrev+(c-nChannels)*nranks, nranks*sizeof(int));
|
||||
memcpy(ringNext+c*nranks, ringNext+(c-nChannels)*nranks, nranks*sizeof(int));
|
||||
memcpy(comm->channels+c, comm->channels+c-nChannels, sizeof(struct ncclChannel));
|
||||
|
||||
@@ -714,7 +714,7 @@ end:
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
static void parseChordalRing(struct ncclTopoSystem* system, char **str) {
|
||||
ncclResult_t parseChordalRing(struct ncclTopoSystem* system, char **str) {
|
||||
static const char *ringBase = "0 6 7 4 5 3 2 1|0 5 6 3 7 1 4 2|0 4 6 2 7 5 1 3|0 1 2 3 5 4 7 6|0 2 4 1 7 3 6 5|0 3 1 5 7 2 6 4";
|
||||
static char ringRemap[256];
|
||||
int id[8], dist[8];
|
||||
@@ -723,7 +723,7 @@ static void parseChordalRing(struct ncclTopoSystem* system, char **str) {
|
||||
int ngpus = system->nodes[GPU].count;
|
||||
// single node CR8G only
|
||||
if (ngpus != 8 || system->nodes[NET].count != 0)
|
||||
return;
|
||||
return ncclSuccess;
|
||||
// validate chordal ring and calculate distance
|
||||
for (i=0; i<ngpus; i++) {
|
||||
struct ncclTopoNode* node = system->nodes[GPU].nodes+i;
|
||||
@@ -741,7 +741,7 @@ static void parseChordalRing(struct ncclTopoSystem* system, char **str) {
|
||||
count ++;
|
||||
}
|
||||
if(count != ngpus-2 || sum < 0 || sum > ngpus-1) {
|
||||
return;
|
||||
return ncclSuccess;
|
||||
}
|
||||
dist[i] = sum;
|
||||
}
|
||||
@@ -766,7 +766,7 @@ static void parseChordalRing(struct ncclTopoSystem* system, char **str) {
|
||||
ringRemap[i] = 0;
|
||||
*str = ringRemap;
|
||||
INFO(NCCL_GRAPH, "Use chordal ring: %s", ringRemap);
|
||||
return;
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
|
||||
@@ -799,7 +799,7 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph
|
||||
if (graph->nChannels > 0) return ncclSuccess;
|
||||
}
|
||||
|
||||
if (!str) parseChordalRing(system, &str);
|
||||
if (!str) NCCLCHECK(parseChordalRing(system, &str));
|
||||
if (str) {
|
||||
NCCLCHECK(parseGraph(str, &graph->nChannels, ngpus, graph->intra));
|
||||
for (int i=0; i<graph->nChannels*ngpus; i++) {
|
||||
|
||||
@@ -101,4 +101,6 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
|
||||
#include "info.h"
|
||||
ncclResult_t ncclTopoGetAlgoTime(struct ncclInfo* info, int algorithm, int protocol, float* time);
|
||||
|
||||
ncclResult_t parseChordalRing(struct ncclTopoSystem* system, char **str);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -870,6 +870,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm
|
||||
snprintf(line+strlen(line), 1023-strlen(line), " [%d] %d/%d/%d->%d->%d|%d->%d->%d/%d/%d",
|
||||
c, treeUp->down[0], treeUp->down[1], treeUp->down[2], rank, treeUp->up,
|
||||
treeDn->up, rank, treeDn->down[0], treeDn->down[1], treeDn->down[2]);
|
||||
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, comm->channels[c].ring.prev, comm->rank, comm->channels[c].ring.next);
|
||||
}
|
||||
line[1023] = '\0';
|
||||
INFO(NCCL_INIT, "Trees%s", line);
|
||||
|
||||
@@ -448,6 +448,7 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGather3Data_t
|
||||
snprintf(line+strlen(line), 1023-strlen(line), " [%d] %d/%d/%d->%d->%d|%d->%d->%d/%d/%d",
|
||||
c, treeUp->down[0], treeUp->down[1], treeUp->down[2], rank, treeUp->up,
|
||||
treeDn->up, rank, treeDn->down[0], treeDn->down[1], treeDn->down[2]);
|
||||
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, comm->channels[c].ring.prev, comm->rank, comm->channels[c].ring.next);
|
||||
}
|
||||
line[1023] = '\0';
|
||||
INFO(NCCL_INIT, "Trees%s", line);
|
||||
|
||||
Ссылка в новой задаче
Block a user