Adjust nChannels on gfx950 based on ranks and nodes for better bandwidth (#2027)

This commit is contained in:
Dingming Wu
2025-11-11 07:46:51 -08:00
committad av GitHub
förälder 1678bb9ae7
incheckning b811645688
+9 -1
Visa fil
@@ -1453,8 +1453,16 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
allGather3Data[rank].nc = 4;
}
}
// For single node communicators that do not uses the full xgmi links per gpu, i.e., nranks < 8
// Inflate the nChannels a bit to achieve higher b/w.
if (IsArchMatch(comm->topo->nodes[GPU].nodes[idx].gpu.gcn, "gfx950")) {
allGather3Data[rank].nc = 4;
if (nranks == 2 && nNodes == 1){
allGather3Data[rank].nc = 16;
} else if (nranks == 4 && nNodes == 1){
allGather3Data[rank].nc = 8;
} else {
allGather3Data[rank].nc = 4;
}
}
allGather3Data[rank].pivotA2AEnabled = comm->topo->pivotA2AEnabled && rcclParamPivotAlltoallEnable();