From 62cbb3bcddaf7942ebd45387510d2ab4804a8add Mon Sep 17 00:00:00 2001 From: Nusrat Islam Date: Fri, 31 May 2024 15:50:30 -0500 Subject: [PATCH] set MIN_NCHANNEL limit to 64 for multi-node [ROCm/rccl commit: 9746d8ca3fcb6327e44604a0a5febc675bf7cf78] --- projects/rccl/src/graph/connect.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/projects/rccl/src/graph/connect.cc b/projects/rccl/src/graph/connect.cc index 55b3a6491e..f00a34fd8c 100644 --- a/projects/rccl/src/graph/connect.cc +++ b/projects/rccl/src/graph/connect.cc @@ -668,7 +668,12 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa nChannels = comm->nChannels = copyChannels(comm, nChannels, 2*nChannels, ringPrev, ringNext); } - int minNchannels = ncclMinNchannels(); + int minNchannels = 64; + if (comm->nNodes == 1) { + minNchannels = ncclMinNchannels(); + } else { + minNchannels = std::min(64,ncclMinNchannels()); + } if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled())) { int mscclNumChannelsRequired = 0;