Increase default number of channels for MI300A in multi-node scenario. (#1366)
This commit changed the default of channels of MI300A from 8 upto 24. This helps bring up multi-node performance to the expected level.
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
b55b6be0cb
Коммит
133ea201cf
+15
-6
@@ -1485,12 +1485,21 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
|
||||
if (ringGraph.nChannels > MAXCHANNELS/2)
|
||||
allGather3Data[rank].nc = 1;
|
||||
if (IsArchMatch(comm->topo->nodes[GPU].nodes[idx].gpu.gcn, "gfx94")) {
|
||||
if (nranks == 2)
|
||||
// NCCL_MIN_NCHANNELS=32
|
||||
allGather3Data[rank].nc = 16;
|
||||
else if (nranks == 4)
|
||||
// NCCL_MIN_NCHANNELS=24
|
||||
allGather3Data[rank].nc = 4;
|
||||
// Multi-node MI300A
|
||||
int managed = 0;
|
||||
CUDACHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
|
||||
if (managed && nNodes > 1) {
|
||||
// This forces the minimum channels to 24
|
||||
allGather3Data[rank].nc = 6;
|
||||
} else {
|
||||
// MI300X
|
||||
if (nranks == 2)
|
||||
// NCCL_MIN_NCHANNELS=32
|
||||
allGather3Data[rank].nc = 16;
|
||||
else if (nranks == 4)
|
||||
// NCCL_MIN_NCHANNELS=24
|
||||
allGather3Data[rank].nc = 4;
|
||||
}
|
||||
}
|
||||
|
||||
allGather3Data[rank].pivotA2AEnabled = comm->topo->pivotA2AEnabled && rcclParamPivotAlltoallEnable();
|
||||
|
||||
Ссылка в новой задаче
Block a user