From abcfbf1231f7aa8a0022a991e278ce445d33aa6a Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Mon, 10 Feb 2020 16:01:36 -0800 Subject: [PATCH] Generate 8G6L chordal ring from reference --- src/graph/search.cc | 51 +++++++++++++++++++++++++++++++++++++++++++++ src/graph/topo.h | 10 ++++----- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/graph/search.cc b/src/graph/search.cc index a244ef8b1f..8b74122ac9 100644 --- a/src/graph/search.cc +++ b/src/graph/search.cc @@ -443,6 +443,56 @@ end: return ncclSuccess; } +static void parseChordalRing(struct ncclTopoSystem* system, char **str) { + static const char *ringBase = "0 6 7 4 5 3 2 1|0 5 6 3 7 1 4 2|0 4 6 2 7 5 1 3|0 1 2 3 5 4 7 6|0 2 4 1 7 3 6 5|0 3 1 5 7 2 6 4"; + static char ringRemap[256]; + int id[8], dist[8]; + int i; + + int ngpus = system->nodes[GPU].count; + // single node CR8G only + if (ngpus != 8 && system->nodes[NET].count != 0) + return; + // validate chordal ring and calculate distance + for (i=0; inodes[GPU].nodes+i; + if (node->paths[GPU] == NULL) continue; + int sum = ngpus*(ngpus-1)/2 - node->rank; + int count = 0; + for (int n = 0; npaths[GPU][n].type != LINK_NVL) continue; + sum -= system->nodes[GPU].nodes[n].rank; + count ++; + } + if(count != ngpus-2 || sum < 0 || sum > ngpus-1) { + return; + } + dist[i] = sum; + } + // remap GPU ids + for (i = 0; i= '0' && ringBase[i] <= '9') + ringRemap[i] = id[ringBase[i]-'0']+'0'; + else + ringRemap[i] = ringBase[i]; + } + ringRemap[i] = 0; + *str = ringRemap; + INFO(NCCL_GRAPH, "Use chordal ring: %s", ringRemap); + return; +} + ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph) { int ngpus = system->nodes[GPU].count; int crossNic = (system->nodes[NET].count > 1) && graph->crossNic ? 1 : 0; @@ -454,6 +504,7 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph graph->sameChannels = 1; char* str = getenv("NCCL_GRAPH"); + if (!str) parseChordalRing(system, &str); if (str) { NCCLCHECK(parseGraph(str, &graph->nChannels, ngpus, graph->intra)); for (int i=0; inChannels*ngpus; i++) { diff --git a/src/graph/topo.h b/src/graph/topo.h index 0410572838..d45125fc42 100644 --- a/src/graph/topo.h +++ b/src/graph/topo.h @@ -17,13 +17,13 @@ #define PCI_WIDTH 12 // PCI Gen3 x16 #define QPI_WIDTH 8 #define SKL_QPI_WIDTH 12 -#define SKL_PCI_WIDTH 14 -#define SKL_CPUPCI_WIDTH 10 +#define SKL_PCI_WIDTH 12 +#define SKL_CPUPCI_WIDTH 12 #define P9_WIDTH 32 #define NET_WIDTH 12 // 100Gbit -#define ROME_QPI_WIDTH 12 -#define ROME_PCI_WIDTH 22 -#define ROME_CPUPCI_WIDTH 16 +#define ROME_QPI_WIDTH 24 +#define ROME_PCI_WIDTH 24 +#define ROME_CPUPCI_WIDTH 24 // Intel CPU convert GPU P2P traffic into 64B PCI TLPs, to GPU // to GPU traffic consumed more PCI bandwidth.