From b8116456885af2c4152aa2f45ce7125604fd344e Mon Sep 17 00:00:00 2001 From: Dingming Wu Date: Tue, 11 Nov 2025 07:46:51 -0800 Subject: [PATCH] Adjust nChannels on gfx950 based on ranks and nodes for better bandwidth (#2027) --- src/init.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/init.cc b/src/init.cc index eb936ac9bd..5ef4c40481 100644 --- a/src/init.cc +++ b/src/init.cc @@ -1453,8 +1453,16 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p allGather3Data[rank].nc = 4; } } + // For single node communicators that do not uses the full xgmi links per gpu, i.e., nranks < 8 + // Inflate the nChannels a bit to achieve higher b/w. if (IsArchMatch(comm->topo->nodes[GPU].nodes[idx].gpu.gcn, "gfx950")) { - allGather3Data[rank].nc = 4; + if (nranks == 2 && nNodes == 1){ + allGather3Data[rank].nc = 16; + } else if (nranks == 4 && nNodes == 1){ + allGather3Data[rank].nc = 8; + } else { + allGather3Data[rank].nc = 4; + } } allGather3Data[rank].pivotA2AEnabled = comm->topo->pivotA2AEnabled && rcclParamPivotAlltoallEnable();