Merge pull request #173 from wenkaidu/cr8
Fix incorrect CR8 detection
[ROCm/rccl commit: 8fb18e0ae7]
Этот коммит содержится в:
@@ -95,8 +95,8 @@ static ncclResult_t connectRings(struct ncclComm* comm, int* ringRecv, int* ring
|
||||
channel1->ring.next = nextRecvRank;
|
||||
}
|
||||
}
|
||||
TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next);
|
||||
TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next);
|
||||
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next);
|
||||
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next);
|
||||
}
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
@@ -451,7 +451,7 @@ static void parseChordalRing(struct ncclTopoSystem* system, char **str) {
|
||||
|
||||
int ngpus = system->nodes[GPU].count;
|
||||
// single node CR8G only
|
||||
if (ngpus != 8 && system->nodes[NET].count != 0)
|
||||
if (ngpus != 8 || system->nodes[NET].count != 0)
|
||||
return;
|
||||
// validate chordal ring and calculate distance
|
||||
for (i=0; i<ngpus; i++) {
|
||||
|
||||
Ссылка в новой задаче
Block a user