diff --git a/projects/rccl/src/graph/connect.cc b/projects/rccl/src/graph/connect.cc index f432495c0b..a87fc92749 100644 --- a/projects/rccl/src/graph/connect.cc +++ b/projects/rccl/src/graph/connect.cc @@ -281,7 +281,10 @@ static ncclResult_t setTreeDown(struct ncclTree* tree, int* indexes, int d) { } static ncclResult_t connectTrees(struct ncclComm* comm, int* treeToParent, int* treeToChild0, int* treeToChild1, int* treePatterns) { - const int nChannels = (comm->nChannels > MAXCHANNELS/2) ? comm->nChannels/2 : comm->nChannels, nNodes = comm->nNodes, node = comm->node; + + const int channelLimit = IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? MAXCHANNELS/2 : 16; + const int nChannels = (comm->nChannels > channelLimit) ? comm->nChannels / 2 : comm->nChannels; + const int nNodes = comm->nNodes, node = comm->node; // Compute tree depth. Not an exact value but a good approximation in most // cases @@ -290,7 +293,7 @@ static ncclResult_t connectTrees(struct ncclComm* comm, int* treeToParent, int* int t0u, t0d0, t0d1, t0ChildType, t1u, t1d0, t1d1, t1ChildType; int* ttp, *ttc0, *ttc1; NCCLCHECK(ncclGetDtree(nNodes, node, &t0u, &t0d0, &t0d1, &t0ChildType, &t1u, &t1d0, &t1d1, &t1ChildType)); - if (comm->nChannels <= IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? (MAXCHANNELS/2) : (MAXCHANNELS/4)) { + if (nChannels == comm->nChannels) { for (int c=0; cchannels+c; struct ncclChannel* channel1 = channel0+nChannels; diff --git a/projects/rccl/src/graph/rome_models.cc b/projects/rccl/src/graph/rome_models.cc index 347e8873b1..d5aeea38a8 100644 --- a/projects/rccl/src/graph/rome_models.cc +++ b/projects/rccl/src/graph/rome_models.cc @@ -603,14 +603,70 @@ static struct rcclRomeModel rome_model_81 = { "N5 5 4 7 1 3 2 6 0 N0|" "N6 6 3 1 4 0 7 5 2 N2|" "N7 7 2 0 6 4 1 5 3 N3|" - "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" "N2 2 5 0 3 6 1 7 4 N4|" "N3 3 7 0 4 2 1 6 5 N5|" "N4 4 6 2 7 3 0 5 1 N1|" "N5 5 4 7 1 3 2 6 0 N0|" "N6 6 3 1 4 0 7 5 2 N2|" - "N7 7 2 0 6 4 1 5 3 N3", + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + + "N2 2 5 0 3 6 1 7 4 N4|" + "N3 3 7 0 4 2 1 6 5 N5|" + "N4 4 6 2 7 3 0 5 1 N1|" + "N5 5 4 7 1 3 2 6 0 N0|" + "N6 6 3 1 4 0 7 5 2 N2|" + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" + + "N3 3 7 0 4 2 1 6 5 N5|" + "N4 4 6 2 7 3 0 5 1 N1|" + "N5 5 4 7 1 3 2 6 0 N0|" + "N6 6 3 1 4 0 7 5 2 N2|" + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N2 2 5 0 3 6 1 7 4 N4|" + + "N4 4 6 2 7 3 0 5 1 N1|" + "N5 5 4 7 1 3 2 6 0 N0|" + "N6 6 3 1 4 0 7 5 2 N2|" + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N2 2 5 0 3 6 1 7 4 N4|" + "N3 3 7 0 4 2 1 6 5 N5|" + + "N5 5 4 7 1 3 2 6 0 N0|" + "N6 6 3 1 4 0 7 5 2 N2|" + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N2 2 5 0 3 6 1 7 4 N4|" + "N3 3 7 0 4 2 1 6 5 N5|" + "N4 4 6 2 7 3 0 5 1 N1|" + + "N6 6 3 1 4 0 7 5 2 N2|" + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N2 2 5 0 3 6 1 7 4 N4|" + "N3 3 7 0 4 2 1 6 5 N5|" + "N4 4 6 2 7 3 0 5 1 N1|" + "N5 5 4 7 1 3 2 6 0 N0|" + + "N7 7 2 0 6 4 1 5 3 N3|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N2 2 5 0 3 6 1 7 4 N4|" + "N3 3 7 0 4 2 1 6 5 N5|" + "N4 4 6 2 7 3 0 5 1 N1|" + "N5 5 4 7 1 3 2 6 0 N0|" + "N6 6 3 1 4 0 7 5 2 N2|", + .ringTail2 = "N7 7 4 1 3 2 0 6 5 N5|" "N6 6 3 0 7 5 1 4 2 N2|" "N4 4 6 2 1 7 0 5 3 N3|" @@ -619,14 +675,71 @@ static struct rcclRomeModel rome_model_81 = { "N0 0 1 2 3 4 5 6 7 N7|" "N2 2 5 0 3 6 4 7 1 N1|" "N3 3 7 2 6 1 5 4 0 N0|" - "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" "N4 4 6 2 1 7 0 5 3 N3|" "N5 5 2 7 3 1 6 0 4 N4|" "N1 1 0 2 4 3 5 7 6 N6|" "N0 0 1 2 3 4 5 6 7 N7|" "N2 2 5 0 3 6 4 7 1 N1|" - "N3 3 7 2 6 1 5 4 0 N0", + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + + "N4 4 6 2 1 7 0 5 3 N3|" + "N5 5 2 7 3 1 6 0 4 N4|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N2 2 5 0 3 6 4 7 1 N1|" + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" + + "N5 5 2 7 3 1 6 0 4 N4|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N2 2 5 0 3 6 4 7 1 N1|" + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" + "N4 4 6 2 1 7 0 5 3 N3|" + + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N2 2 5 0 3 6 4 7 1 N1|" + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" + "N4 4 6 2 1 7 0 5 3 N3|" + "N5 5 2 7 3 1 6 0 4 N4|" + + "N0 0 1 2 3 4 5 6 7 N7|" + "N2 2 5 0 3 6 4 7 1 N1|" + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" + "N4 4 6 2 1 7 0 5 3 N3|" + "N5 5 2 7 3 1 6 0 4 N4|" + "N1 1 0 2 4 3 5 7 6 N6|" + + "N2 2 5 0 3 6 4 7 1 N1|" + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" + "N4 4 6 2 1 7 0 5 3 N3|" + "N5 5 2 7 3 1 6 0 4 N4|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + + "N3 3 7 2 6 1 5 4 0 N0|" + "N7 7 4 1 3 2 0 6 5 N5|" + "N6 6 3 0 7 5 1 4 2 N2|" + "N4 4 6 2 1 7 0 5 3 N3|" + "N5 5 2 7 3 1 6 0 4 N4|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N2 2 5 0 3 6 4 7 1 N1|", + + .ringTail1 = "N5 5 4 2 7 1 6 3 0 N0|" "N2 2 5 0 3 7 4 6 1 N1|" "N3 3 6 4 0 5 1 7 2 N2|" @@ -635,14 +748,70 @@ static struct rcclRomeModel rome_model_81 = { "N7 7 3 2 6 0 4 1 5 N5|" "N1 1 0 2 4 3 5 7 6 N6|" "N0 0 1 2 3 4 5 6 7 N7|" - "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" "N3 3 6 4 0 5 1 7 2 N2|" "N4 4 7 0 6 5 2 1 3 N3|" "N6 6 2 0 7 5 3 1 4 N4|" "N7 7 3 2 6 0 4 1 5 N5|" "N1 1 0 2 4 3 5 7 6 N6|" - "N0 0 1 2 3 4 5 6 7 N7", + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + + "N3 3 6 4 0 5 1 7 2 N2|" + "N4 4 7 0 6 5 2 1 3 N3|" + "N6 6 2 0 7 5 3 1 4 N4|" + "N7 7 3 2 6 0 4 1 5 N5|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" + + "N4 4 7 0 6 5 2 1 3 N3|" + "N6 6 2 0 7 5 3 1 4 N4|" + "N7 7 3 2 6 0 4 1 5 N5|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" + "N3 3 6 4 0 5 1 7 2 N2|" + + "N6 6 2 0 7 5 3 1 4 N4|" + "N7 7 3 2 6 0 4 1 5 N5|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" + "N3 3 6 4 0 5 1 7 2 N2|" + "N4 4 7 0 6 5 2 1 3 N3|" + + "N7 7 3 2 6 0 4 1 5 N5|" + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" + "N3 3 6 4 0 5 1 7 2 N2|" + "N4 4 7 0 6 5 2 1 3 N3|" + "N6 6 2 0 7 5 3 1 4 N4|" + + "N1 1 0 2 4 3 5 7 6 N6|" + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" + "N3 3 6 4 0 5 1 7 2 N2|" + "N4 4 7 0 6 5 2 1 3 N3|" + "N6 6 2 0 7 5 3 1 4 N4|" + "N7 7 3 2 6 0 4 1 5 N5|" + + "N0 0 1 2 3 4 5 6 7 N7|" + "N5 5 4 2 7 1 6 3 0 N0|" + "N2 2 5 0 3 7 4 6 1 N1|" + "N3 3 6 4 0 5 1 7 2 N2|" + "N4 4 7 0 6 5 2 1 3 N3|" + "N6 6 2 0 7 5 3 1 4 N4|" + "N7 7 3 2 6 0 4 1 5 N5|" + "N1 1 0 2 4 3 5 7 6 N6|", + .options = "noCpuCheck=1,tuning=5", };