From 7b7f781658c5f5954dfc7f55c6f8bf9f05106f30 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Sat, 25 Apr 2020 01:01:13 +0000 Subject: [PATCH] Fix incorrect next device ID in PCI ordered search [ROCm/rccl commit: edb49ed2d55d16a89b2332d8d0ec83958acbb4f7] --- projects/rccl/src/graph/search.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rccl/src/graph/search.cc b/projects/rccl/src/graph/search.cc index a0de32fdd4..ba4ac472ab 100644 --- a/projects/rccl/src/graph/search.cc +++ b/projects/rccl/src/graph/search.cc @@ -345,7 +345,7 @@ ncclResult_t ncclTopoSearchRecGpu(struct ncclTopoSystem* system, struct ncclTopo int next[NCCL_TOPO_MAX_NODES]; int count; if (forcedOrder == FORCED_ORDER_PCI) { // Try the PCI order - next[0] = (busIdToCudaDev(gpu->id)+1)%system->nodes[GPU].count; + next[0] = step+1; count = 1; } else if (forcedOrder == FORCED_ORDER_REPLAY) { // Try last channel order NCCLCHECK(ncclTopoReplayGetGpu(system, graph, step, next));