Changing channel stride for MI300X multinode (#1196)
* Shuffling MI300X multi-node channels
* Updating tree channel logic
[ROCm/rccl commit: 0948eecbba]
This commit is contained in:
@@ -281,7 +281,10 @@ static ncclResult_t setTreeDown(struct ncclTree* tree, int* indexes, int d) {
|
||||
}
|
||||
|
||||
static ncclResult_t connectTrees(struct ncclComm* comm, int* treeToParent, int* treeToChild0, int* treeToChild1, int* treePatterns) {
|
||||
const int nChannels = (comm->nChannels > MAXCHANNELS/2) ? comm->nChannels/2 : comm->nChannels, nNodes = comm->nNodes, node = comm->node;
|
||||
|
||||
const int channelLimit = IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? MAXCHANNELS/2 : 16;
|
||||
const int nChannels = (comm->nChannels > channelLimit) ? comm->nChannels / 2 : comm->nChannels;
|
||||
const int nNodes = comm->nNodes, node = comm->node;
|
||||
|
||||
// Compute tree depth. Not an exact value but a good approximation in most
|
||||
// cases
|
||||
@@ -290,7 +293,7 @@ static ncclResult_t connectTrees(struct ncclComm* comm, int* treeToParent, int*
|
||||
int t0u, t0d0, t0d1, t0ChildType, t1u, t1d0, t1d1, t1ChildType;
|
||||
int* ttp, *ttc0, *ttc1;
|
||||
NCCLCHECK(ncclGetDtree(nNodes, node, &t0u, &t0d0, &t0d1, &t0ChildType, &t1u, &t1d0, &t1d1, &t1ChildType));
|
||||
if (comm->nChannels <= IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? (MAXCHANNELS/2) : (MAXCHANNELS/4)) {
|
||||
if (nChannels == comm->nChannels) {
|
||||
for (int c=0; c<nChannels; c++) {
|
||||
struct ncclChannel* channel0 = comm->channels+c;
|
||||
struct ncclChannel* channel1 = channel0+nChannels;
|
||||
|
||||
@@ -603,14 +603,70 @@ static struct rcclRomeModel rome_model_81 = {
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3",
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
|
||||
"N6 6 3 1 4 0 7 5 2 N2|"
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
|
||||
"N7 7 2 0 6 4 1 5 3 N3|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N2 2 5 0 3 6 1 7 4 N4|"
|
||||
"N3 3 7 0 4 2 1 6 5 N5|"
|
||||
"N4 4 6 2 7 3 0 5 1 N1|"
|
||||
"N5 5 4 7 1 3 2 6 0 N0|"
|
||||
"N6 6 3 1 4 0 7 5 2 N2|",
|
||||
|
||||
.ringTail2 = "N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
@@ -619,14 +675,71 @@ static struct rcclRomeModel rome_model_81 = {
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0",
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
|
||||
"N2 2 5 0 3 6 4 7 1 N1|"
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
|
||||
"N3 3 7 2 6 1 5 4 0 N0|"
|
||||
"N7 7 4 1 3 2 0 6 5 N5|"
|
||||
"N6 6 3 0 7 5 1 4 2 N2|"
|
||||
"N4 4 6 2 1 7 0 5 3 N3|"
|
||||
"N5 5 2 7 3 1 6 0 4 N4|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N2 2 5 0 3 6 4 7 1 N1|",
|
||||
|
||||
|
||||
.ringTail1 = "N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
@@ -635,14 +748,70 @@ static struct rcclRomeModel rome_model_81 = {
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7",
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
|
||||
"N1 1 0 2 4 3 5 7 6 N6|"
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
|
||||
"N0 0 1 2 3 4 5 6 7 N7|"
|
||||
"N5 5 4 2 7 1 6 3 0 N0|"
|
||||
"N2 2 5 0 3 7 4 6 1 N1|"
|
||||
"N3 3 6 4 0 5 1 7 2 N2|"
|
||||
"N4 4 7 0 6 5 2 1 3 N3|"
|
||||
"N6 6 2 0 7 5 3 1 4 N4|"
|
||||
"N7 7 3 2 6 0 4 1 5 N5|"
|
||||
"N1 1 0 2 4 3 5 7 6 N6|",
|
||||
|
||||
.options = "noCpuCheck=1,tuning=5",
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user