From 03bb6bcb54874724c009a976cb29bb3207a63e22 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Wed, 26 Aug 2020 11:40:11 -0700 Subject: [PATCH] Increase minimal channels for gfx908 (#259) [ROCm/rccl commit: c5cbece6d03ceb0e20fdd9e6c382f04957368fbe] --- projects/rccl/src/graph/connect.cc | 3 +- projects/rccl/src/include/graph.h | 2 +- projects/rccl/src/init.cc | 10 ++- projects/rccl/tools/scripts/topo_val.sh | 2 +- projects/rccl/tools/topo_expl/include/utils.h | 1 + .../tools/topo_expl/models/topo_4p3l_ia.xml | 85 +++++++++++++++++++ projects/rccl/tools/topo_expl/topo_expl.cpp | 2 + projects/rccl/tools/topo_expl/utils.cpp | 9 +- 8 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 projects/rccl/tools/topo_expl/models/topo_4p3l_ia.xml diff --git a/projects/rccl/src/graph/connect.cc b/projects/rccl/src/graph/connect.cc index b62e7741a6..622f8f4dab 100644 --- a/projects/rccl/src/graph/connect.cc +++ b/projects/rccl/src/graph/connect.cc @@ -255,7 +255,7 @@ int ncclMaxNchannels() { return maxNchannels; } -ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct ncclTopoRanks** allTopoRanks, int* rings) { +ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct ncclTopoRanks** allTopoRanks, int* rings, int gcn) { // Gather data from all ranks int *ringRecv, *ringSend, *ringPrev, *ringNext, *treeUpRecv, *treeUpSend, *treeDnRecv,*treeDnSend; int nranks = comm->nRanks; @@ -290,6 +290,7 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, struct nccl memcpy(ringNext+nChannels*nranks, ringNext, nChannels*nranks*sizeof(int)); int nc = nChannels*2; + if (gcn == 908) nc = std::max(nc, 4); if (comm->topo->nodes[NET].count == 0 && comm->topo->type == RCCL_TOPO_CR8G) nc = nChannels*3; if (comm->topo->nodes[NET].count && comm->topo->type == RCCL_TOPO_4P2H_ROME) nc = 4*comm->topo->nodes[NET].count; int end = std::min((int)ncclMaxNchannels(), std::max(nc, ncclMinNchannels())); diff --git a/projects/rccl/src/include/graph.h b/projects/rccl/src/include/graph.h index b326c71b63..4af2c529c1 100644 --- a/projects/rccl/src/include/graph.h +++ b/projects/rccl/src/include/graph.h @@ -95,7 +95,7 @@ ncclResult_t ncclTopoPreset(struct ncclComm* comm, struct ncclTopoRanks* topoRanks); ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, - struct ncclTopoRanks** allTopoRanks, int* rings); + struct ncclTopoRanks** allTopoRanks, int* rings, int gcn); ncclResult_t ncclTopoConnectCollNet(struct ncclComm* comm, struct ncclTopoGraph* collNetGraph, int rank); diff --git a/projects/rccl/src/init.cc b/projects/rccl/src/init.cc index 256906c288..63abfcdfb1 100644 --- a/projects/rccl/src/init.cc +++ b/projects/rccl/src/init.cc @@ -759,6 +759,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm int cudaCompCap; int fullCudaCompCap; int nChannels; + int gcn; struct ncclGraphInfo tree; struct ncclGraphInfo ring; struct ncclGraphInfo collNet; @@ -766,7 +767,10 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm } *allGather3Data; NCCLCHECK(ncclCalloc(&allGather3Data, nranks)); - allGather3Data[rank].cudaCompCap = ncclCudaCompCap(); + int idx; + NCCLCHECK(ncclTopoIdToIndex(comm->topo, GPU, myInfo->busId, &idx)); + allGather3Data[rank].cudaCompCap = comm->topo->nodes[GPU].nodes[idx].gpu.cudaCompCap; + allGather3Data[rank].gcn = comm->topo->nodes[GPU].nodes[idx].gpu.gcn; allGather3Data[rank].nChannels = comm->nChannels = treeGraph.nChannels = ringGraph.nChannels = std::min(treeGraph.nChannels, ringGraph.nChannels); allGather3Data[rank].tree.sameChannels = treeGraph.sameChannels; @@ -813,8 +817,10 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm int nChannelsOrig = comm->nChannels; struct ncclTopoRanks** allTopoRanks; NCCLCHECK(ncclCalloc(&allTopoRanks, comm->nRanks)); + int gcn = allGather3Data[0].gcn; for (int i=0; inChannels = std::min(allGather3Data[i].nChannels, comm->nChannels); treeGraph.sameChannels = std::min(allGather3Data[i].tree.sameChannels, treeGraph.sameChannels); @@ -840,7 +846,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm int *rings; NCCLCHECK(ncclCalloc(&rings, nranks*MAXCHANNELS)); - NCCLCHECK(ncclTopoPostset(comm, nodesFirstRank, allTopoRanks, rings)); + NCCLCHECK(ncclTopoPostset(comm, nodesFirstRank, allTopoRanks, rings, gcn)); if (comm->nNodes > 1 && ncclParamCollNetEnable() == 1 && collNetSupport() && collNetGraph.nChannels) { diff --git a/projects/rccl/tools/scripts/topo_val.sh b/projects/rccl/tools/scripts/topo_val.sh index e0f2b3c155..b937eed34c 100755 --- a/projects/rccl/tools/scripts/topo_val.sh +++ b/projects/rccl/tools/scripts/topo_val.sh @@ -21,7 +21,7 @@ DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -for i in {0..33} +for i in {0..35} do $DIR/../topo_expl/topo_expl -m $i > "topo_m$i.log" $DIR/../TopoVisual/topo_visual.sh -i "topo_m$i.log" diff --git a/projects/rccl/tools/topo_expl/include/utils.h b/projects/rccl/tools/topo_expl/include/utils.h index 9337e18aee..1f9eebdd1d 100644 --- a/projects/rccl/tools/topo_expl/include/utils.h +++ b/projects/rccl/tools/topo_expl/include/utils.h @@ -25,6 +25,7 @@ struct allGather3Data_t{ int cudaCompCap; int fullCudaCompCap; int nChannels; + int gcn; struct ncclGraphInfo tree; struct ncclGraphInfo ring; struct ncclGraphInfo collNet; diff --git a/projects/rccl/tools/topo_expl/models/topo_4p3l_ia.xml b/projects/rccl/tools/topo_expl/models/topo_4p3l_ia.xml new file mode 100644 index 0000000000..01b20c2dc7 --- /dev/null +++ b/projects/rccl/tools/topo_expl/models/topo_4p3l_ia.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rccl/tools/topo_expl/topo_expl.cpp b/projects/rccl/tools/topo_expl/topo_expl.cpp index 2ab829b643..411bfa1903 100644 --- a/projects/rccl/tools/topo_expl/topo_expl.cpp +++ b/projects/rccl/tools/topo_expl/topo_expl.cpp @@ -103,6 +103,8 @@ NodeModelDesc model_descs[] = { {4, "topo_8p_ts1_n4.xml", "4 nodes 8 VEGA20 TS1 NPS=4"}, {1, "topo_8p_ts1_n4_1.xml", "single node 8 VEGA20 TS1 NPS=4 Alt. Model"}, {4, "topo_8p_ts1_n4_1.xml", "4 nodes 8 VEGA20 TS1 NPS=4 Alt. Model"}, + {1, "topo_4p3l_ia.xml", "single node 8 gfx908"}, + {4, "topo_4p3l_ia.xml", "4 nodes 8 gfx908"}, }; int main(int argc,char* argv[]) diff --git a/projects/rccl/tools/topo_expl/utils.cpp b/projects/rccl/tools/topo_expl/utils.cpp index 6324828134..960119ca24 100644 --- a/projects/rccl/tools/topo_expl/utils.cpp +++ b/projects/rccl/tools/topo_expl/utils.cpp @@ -216,7 +216,10 @@ ncclResult_t initTransportsRank_1(struct ncclComm* comm, struct allGather1Data_t } // AllGather3 - begin - allGather3Data[rank].cudaCompCap = ncclCudaCompCap(); + int idx; + NCCLCHECK(ncclTopoIdToIndex(comm->topo, GPU, myInfo->busId, &idx)); + allGather3Data[rank].cudaCompCap = comm->topo->nodes[GPU].nodes[idx].gpu.cudaCompCap; + allGather3Data[rank].gcn = comm->topo->nodes[GPU].nodes[idx].gpu.gcn; allGather3Data[rank].nChannels = comm->nChannels = std::min(treeGraph.nChannels, ringGraph.nChannels); allGather3Data[rank].tree.sameChannels = treeGraph.sameChannels; allGather3Data[rank].tree.speedIntra = treeGraph.speedIntra; @@ -397,8 +400,10 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGather3Data_t int nChannelsOrig = comm->nChannels; struct ncclTopoRanks** allTopoRanks; NCCLCHECK(ncclCalloc(&allTopoRanks, comm->nRanks)); + int gcn = allGather3Data[0].gcn; for (int i=0; inChannels = std::min(allGather3Data[i].nChannels, comm->nChannels); treeGraph.sameChannels = std::min(allGather3Data[i].tree.sameChannels, treeGraph.sameChannels); @@ -424,7 +429,7 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGather3Data_t int *rings; NCCLCHECK(ncclCalloc(&rings, nranks*MAXCHANNELS)); - NCCLCHECK(ncclTopoPostset(comm, nodesFirstRank, allTopoRanks, rings)); + NCCLCHECK(ncclTopoPostset(comm, nodesFirstRank, allTopoRanks, rings, gcn)); if (comm->nNodes > 1 && ncclParamCollNetEnable() == 1 && collNetSupport()) {