From b30b8becea61a3df84705d628d789839cddd3ba8 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Tue, 17 May 2022 08:23:59 -0700 Subject: [PATCH] Refine and add new Rome models (#548) [ROCm/rccl commit: 283dc86a73b52b3814d2bccb30297819daf06e1e] --- .../rccl/src/collectives/device/sendrecv.h | 4 +- projects/rccl/src/graph/paths.cc | 2 +- projects/rccl/src/graph/rome_models.cc | 2 +- projects/rccl/src/graph/topo.cc | 2 +- projects/rccl/src/graph/topo.h | 12 +- projects/rccl/tools/scripts/topo_val.sh | 2 +- .../tools/topo_expl/models/topo_8p1h_4.xml | 146 ++++++++++++++++++ projects/rccl/tools/topo_expl/topo_expl.cpp | 2 + 8 files changed, 158 insertions(+), 14 deletions(-) create mode 100644 projects/rccl/tools/topo_expl/models/topo_8p1h_4.xml diff --git a/projects/rccl/src/collectives/device/sendrecv.h b/projects/rccl/src/collectives/device/sendrecv.h index 15be552009..28eef3ea59 100644 --- a/projects/rccl/src/collectives/device/sendrecv.h +++ b/projects/rccl/src/collectives/device/sendrecv.h @@ -22,7 +22,7 @@ struct RunWork { ssize_t const count = args->count; int const chunkSize = args->chunkSize/sizeof(T); int const peer = args->peer; - Primitives, 1, Proto, 1> prims + Primitives, 0, Proto, 1> prims (tid, nthreads, nullptr, &peer, args->buff, nullptr, /*redOpArg(ignored)=*/0, group); ssize_t offset = 0; do { @@ -39,7 +39,7 @@ struct RunWork { ssize_t const count = args->count; int const chunkSize = args->chunkSize/sizeof(T); int const peer = args->peer; - Primitives, 1, Proto, 1> prims + Primitives, 0, Proto, 1> prims (tid, nthreads, &peer, nullptr, nullptr, args->buff, /*redOpArg(ignored)=*/0, group); ssize_t offset = 0; do { diff --git a/projects/rccl/src/graph/paths.cc b/projects/rccl/src/graph/paths.cc index d2494755f3..a21f11327c 100644 --- a/projects/rccl/src/graph/paths.cc +++ b/projects/rccl/src/graph/paths.cc @@ -699,7 +699,7 @@ static ncclResult_t ncclTopoGetNchannels(struct ncclTopoSystem* system, int g /* // Local rank path = system->nodes[GPU].nodes[peer].paths[GPU]+g; if (path->type == PATH_NVL) { - float nvlWidth = ncclTopoNVLinkSpeed(system->nodes[GPU].nodes[g].gpu.cudaCompCap); + float nvlWidth = ncclTopoXGMISpeed(system->nodes[GPU].nodes[g].gpu.cudaCompCap); *nChannels = 2*std::max(1, (int)(path->width / nvlWidth)); } else { *nChannels = 2; diff --git a/projects/rccl/src/graph/rome_models.cc b/projects/rccl/src/graph/rome_models.cc index 135c8f4ee2..e1c98f6a0e 100644 --- a/projects/rccl/src/graph/rome_models.cc +++ b/projects/rccl/src/graph/rome_models.cc @@ -903,7 +903,7 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo } if (!link->remNode) continue; if (link->type != LINK_NVL) continue; - romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->width/VEGA_XGMI_WIDTH; + romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->width/ncclTopoXGMISpeed(node->gpu.gcn); count ++; } if (romeTopo->nLinks < count) romeTopo->nLinks = count; diff --git a/projects/rccl/src/graph/topo.cc b/projects/rccl/src/graph/topo.cc index 7f13451227..945e64f552 100644 --- a/projects/rccl/src/graph/topo.cc +++ b/projects/rccl/src/graph/topo.cc @@ -543,7 +543,7 @@ ncclResult_t ncclTopoAddXGMI(struct ncclXmlNode* node, struct ncclTopoSystem* sy } } if (remote) { - int nvlSpeed = VEGA_XGMI_WIDTH; + float nvlSpeed = ncclTopoXGMISpeed(gpu->gpu.gcn); NCCLCHECK(ncclTopoConnectNodes(gpu, remote, LINK_NVL, count*nvlSpeed)); if (remote->type != GPU) { NCCLCHECK(ncclTopoConnectNodes(remote, gpu, LINK_NVL, count*nvlSpeed)); diff --git a/projects/rccl/src/graph/topo.h b/projects/rccl/src/graph/topo.h index 56a468129d..38e8c7cfe1 100644 --- a/projects/rccl/src/graph/topo.h +++ b/projects/rccl/src/graph/topo.h @@ -25,6 +25,7 @@ #define ARM_WIDTH 6.0 #define NET_WIDTH 12.0 // 100Gbit #define VEGA_XGMI_WIDTH 24.0 +#define MI200_XGMI_WIDTH 36.0 // Intel CPU convert GPU P2P traffic into 64B PCI TLPs, so GPU // to GPU traffic consumes more PCI bandwidth. @@ -183,13 +184,8 @@ static ncclResult_t ncclTopoRankToIndex(struct ncclTopoSystem* system, int rank, return ncclInternalError; } -// Returns NVLink speed in GB/s -static float ncclTopoNVLinkSpeed(int cudaCompCap) { - return - cudaCompCap == 86 ? SM86_NVLINK_WIDTH : - cudaCompCap >= 80 ? SM80_NVLINK_WIDTH : - cudaCompCap >= 70 ? SM70_NVLINK_WIDTH : - cudaCompCap >= 60 ? SM60_NVLINK_WIDTH : - SM80_NVLINK_WIDTH; +// Returns XGMI speed in GB/s +static float ncclTopoXGMISpeed(int gcn) { + return gcn == 910 ? MI200_XGMI_WIDTH : VEGA_XGMI_WIDTH; } #endif diff --git a/projects/rccl/tools/scripts/topo_val.sh b/projects/rccl/tools/scripts/topo_val.sh index 6bcce1284c..f71879eefe 100755 --- a/projects/rccl/tools/scripts/topo_val.sh +++ b/projects/rccl/tools/scripts/topo_val.sh @@ -21,7 +21,7 @@ DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -for i in {0..74} +for i in {0..76} do if [[ $i -eq 50 ]] || [[ $i -eq 51 ]] then diff --git a/projects/rccl/tools/topo_expl/models/topo_8p1h_4.xml b/projects/rccl/tools/topo_expl/models/topo_8p1h_4.xml new file mode 100644 index 0000000000..e2b734eda9 --- /dev/null +++ b/projects/rccl/tools/topo_expl/models/topo_8p1h_4.xml @@ -0,0 +1,146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/projects/rccl/tools/topo_expl/topo_expl.cpp b/projects/rccl/tools/topo_expl/topo_expl.cpp index a44abacaf9..d903ea3f8f 100644 --- a/projects/rccl/tools/topo_expl/topo_expl.cpp +++ b/projects/rccl/tools/topo_expl/topo_expl.cpp @@ -145,6 +145,8 @@ NodeModelDesc model_descs[] = { {4, "topo_8p1h_1.xml", "4 nodes 8P1H Alt."}, {1, "topo_8p1h_2.xml", "single node 8P1H Alt."}, {4, "topo_8p1h_3.xml", "4 nodes 8P1H Alt."}, + {1, "topo_8p1h_4.xml", "Single node 8P1H Alt."}, + {2, "topo_8p1h_4.xml", "2 nodes 8P1H Alt."}, }; int main(int argc,char* argv[])