From 93d448e2fe72317c68ac24466e0a5ea579a2b557 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Fri, 21 Feb 2020 09:51:24 -0800 Subject: [PATCH] Fix incorrect CR8 detection Also change level of ring graph print to help debugging [ROCm/rccl commit: f54dc581135c4bff2b835f1dee998842b028d999] --- projects/rccl/src/graph/connect.cc | 4 ++-- projects/rccl/src/graph/search.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/rccl/src/graph/connect.cc b/projects/rccl/src/graph/connect.cc index bf0bcb8b87..0ed929f6f4 100644 --- a/projects/rccl/src/graph/connect.cc +++ b/projects/rccl/src/graph/connect.cc @@ -95,8 +95,8 @@ static ncclResult_t connectRings(struct ncclComm* comm, int* ringRecv, int* ring channel1->ring.next = nextRecvRank; } } - TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next); - TRACE(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next); + INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c, channel0->ring.prev, comm->rank, channel0->ring.next); + INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d", c+nChannels, channel1->ring.prev, comm->rank, channel1->ring.next); } return ncclSuccess; } diff --git a/projects/rccl/src/graph/search.cc b/projects/rccl/src/graph/search.cc index 8b74122ac9..03e8ecf35f 100644 --- a/projects/rccl/src/graph/search.cc +++ b/projects/rccl/src/graph/search.cc @@ -451,7 +451,7 @@ static void parseChordalRing(struct ncclTopoSystem* system, char **str) { int ngpus = system->nodes[GPU].count; // single node CR8G only - if (ngpus != 8 && system->nodes[NET].count != 0) + if (ngpus != 8 || system->nodes[NET].count != 0) return; // validate chordal ring and calculate distance for (i=0; i