Increase default number of channels for MI300A in multi-node scenario. (#1366)

This commit changed the default of channels of MI300A from 8 upto 24.
This helps bring up multi-node performance to the expected level.
Этот коммит содержится в:
Arm Patinyasakdikul
2024-10-11 11:37:48 -05:00
коммит произвёл GitHub
родитель b55b6be0cb
Коммит 133ea201cf
+15 -6
Просмотреть файл
@@ -1485,12 +1485,21 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
if (ringGraph.nChannels > MAXCHANNELS/2)
allGather3Data[rank].nc = 1;
if (IsArchMatch(comm->topo->nodes[GPU].nodes[idx].gpu.gcn, "gfx94")) {
if (nranks == 2)
// NCCL_MIN_NCHANNELS=32
allGather3Data[rank].nc = 16;
else if (nranks == 4)
// NCCL_MIN_NCHANNELS=24
allGather3Data[rank].nc = 4;
// Multi-node MI300A
int managed = 0;
CUDACHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
if (managed && nNodes > 1) {
// This forces the minimum channels to 24
allGather3Data[rank].nc = 6;
} else {
// MI300X
if (nranks == 2)
// NCCL_MIN_NCHANNELS=32
allGather3Data[rank].nc = 16;
else if (nranks == 4)
// NCCL_MIN_NCHANNELS=24
allGather3Data[rank].nc = 4;
}
}
allGather3Data[rank].pivotA2AEnabled = comm->topo->pivotA2AEnabled && rcclParamPivotAlltoallEnable();