From 05df0f8cea8b357e2f670e44a2d6c8338f0d7784 Mon Sep 17 00:00:00 2001 From: Nusrat Islam Date: Mon, 24 Jun 2024 16:42:38 -0500 Subject: [PATCH] graph: fix minNchannels for multi-node Multi-node rccl was not correctly setting the minNchannels value. This PR fixes the bug. --- src/graph/connect.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph/connect.cc b/src/graph/connect.cc index 5e1f7e4c89..871750e0f7 100644 --- a/src/graph/connect.cc +++ b/src/graph/connect.cc @@ -674,7 +674,7 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa int minNchannels = ncclMinNchannels(); if (comm->nNodes > 1) { - minNchannels = std::min(64, maxChannels); + minNchannels = std::min(64, minNchannels); } if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled())) {