From ec8d89b1ddd8a3b257c8bbee57b8706e9ff797ee Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Mon, 22 Feb 2021 16:46:45 -0500 Subject: [PATCH] Match NBIO only when GPUs and NICs are directly connected to CPU --- src/graph/paths.cc | 15 +++++++++++++-- src/graph/rome_models.h | 2 +- src/graph/search.cc | 42 +++++++++++++++++++++++++---------------- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/graph/paths.cc b/src/graph/paths.cc index 13a6434dd4..8475bf1ef0 100644 --- a/src/graph/paths.cc +++ b/src/graph/paths.cc @@ -342,8 +342,19 @@ ncclResult_t ncclTopoCheckGdr(struct ncclTopoSystem* system, int64_t busId, int else { int arch, vendor, model; NCCLCHECK(ncclTopoCpuType(system, &arch, &vendor, &model)); - if((system->nodes[GPU].nodes[g].id & 0xf0000) == (system->nodes[NET].nodes[n].net.busId & 0xf0000)) - netGdrLevel = PATH_PHB; + if (arch == NCCL_TOPO_CPU_ARCH_X86 && vendor == NCCL_TOPO_CPU_VENDOR_AMD && model == NCCL_TOPO_CPU_TYPE_ROME) { + int i, d1 = -1, d2 = -1; + for (i = 0; i < system->nodes[CPU].count; i++) + if (system->nodes[GPU].nodes[g].paths[CPU][i].count == 2) break; + if (i nodes[CPU].count) d1 = system->nodes[CPU].nodes[i].id; + for (i = 0; i < system->nodes[CPU].count; i++) + if (system->nodes[NET].nodes[n].paths[CPU][i].count == 2) break; + if (i nodes[CPU].count) d2 = system->nodes[CPU].nodes[i].id; + if (d1 != -1 && d2 != -1 && d1 == d2 && + (system->nodes[GPU].nodes[g].id & 0xf0000) == (system->nodes[NET].nodes[n].net.busId & 0xf0000)) { + netGdrLevel = PATH_PHB; + } + } } int distance = gpu->paths[NET][n].type; diff --git a/src/graph/rome_models.h b/src/graph/rome_models.h index 21b089c0b4..556e01977e 100755 --- a/src/graph/rome_models.h +++ b/src/graph/rome_models.h @@ -254,7 +254,7 @@ static struct rcclRomeModel rome_model_49 = { .nicNuma = { 0, 1, 2, 3, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, }, .pattern = "21212121", - .ringBase = "N0 0 1 2 3 4 5 6 7 N3|N3 7 6 5 4 3 2 1 0 N0", + .ringBase = "N0 0 1 2 3 4 5 6 7 N3|N3 7 6 5 4 3 2 1 0 N0|N1 2 3 0 1 6 7 4 5 N2|N2 5 4 7 6 1 0 3 2 N1", }; static struct rcclRomeModel romeTopoModels[] = { diff --git a/src/graph/search.cc b/src/graph/search.cc index 65eb5040f3..cb34d515b6 100644 --- a/src/graph/search.cc +++ b/src/graph/search.cc @@ -991,7 +991,7 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo return ncclSuccess; } -static bool permuteGpuIds(int *g, int n, int last, struct rcclRomeModel* ref, struct rcclRomeModel* topo, int* time) { +static bool permuteGpuIds(int *g, int n, int last, struct rcclRomeModel* ref, struct rcclRomeModel* topo, int* time, bool nbio) { (*time) ++; if (n == last) { int i, j; @@ -1009,22 +1009,24 @@ static bool permuteGpuIds(int *g, int n, int last, struct rcclRomeModel* ref, st } if (i < ref->nGpus) return false; // match NBIO - for (i = 0; i < ref->nGpus; i++) { - for (j = 0; j < ref->nGpus; j++) { - if (i == j) continue; - bool nbio_ref = (ref->gpuIds[i]&0xf0000) == (ref->gpuIds[j]&0xf0000); - bool nbio_topo = (topo->gpuIds[g[i]]&0xf0000) == (topo->gpuIds[g[j]]&0xf0000); - if (nbio_ref != nbio_topo) break; - if (nbio_ref && ((ref->gpuIds[i]-ref->gpuIds[j])*(topo->gpuIds[g[i]]-topo->gpuIds[g[j]]) < 0)) break; + if (nbio) { + for (i = 0; i < ref->nGpus; i++) { + for (j = 0; j < ref->nGpus; j++) { + if (i == j) continue; + bool nbio_ref = (ref->gpuIds[i]&0xf0000) == (ref->gpuIds[j]&0xf0000); + bool nbio_topo = (topo->gpuIds[g[i]]&0xf0000) == (topo->gpuIds[g[j]]&0xf0000); + if (nbio_ref != nbio_topo) break; + if (nbio_ref && ((ref->gpuIds[i]-ref->gpuIds[j])*(topo->gpuIds[g[i]]-topo->gpuIds[g[j]]) < 0)) break; + } + if (j < ref->nGpus) break; } - if (j < ref->nGpus) break; + if (i < ref->nGpus) return false; } - if (i < ref->nGpus) return false; return true; } else { for (int i = n; i <= last; i++) { std::swap(g[n], g[i]); - if (permuteGpuIds(g, n+1, last, ref, topo, time)) return true; + if (permuteGpuIds(g, n+1, last, ref, topo, time, nbio)) return true; std::swap(g[n], g[i]); } } @@ -1057,12 +1059,23 @@ static ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopo int time = 0; struct timeval tvs, tve; gettimeofday(&tvs, NULL); + + // check if GPUs are directly connected to CPU + bool match_nbio = true; + for (i = 0; i < romeTopo.nGpus; i++) { + int cpu, gpu; + NCCLCHECK(ncclTopoIdToIndex(system, CPU, romeTopo.gpuNuma[i], &cpu)); + NCCLCHECK(ncclTopoIdToIndex(system, GPU, romeTopo.gpuIds[i], &gpu)); + if (system->nodes[GPU].nodes[gpu].paths[CPU][cpu].count > 2) break; + } + if (i < romeTopo.nGpus) match_nbio = false; + for (i = 0; i < sizeof(romeTopoModels)/sizeof(romeTopoModels[0]); i++) { if (romeTopo.nCpus != romeTopoModels[i].nCpus || romeTopo.nGpus != romeTopoModels[i].nGpus || romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks) continue; if (strcmp(romeTopoModels[i].pattern, pattern)) continue; for (int j = 0; j < ngpus; j++) g[j] = (j+2)%ngpus; - if (permuteGpuIds(g, 0, ngpus-1, romeTopoModels+i, &romeTopo, &time)) break; + if (permuteGpuIds(g, 0, ngpus-1, romeTopoModels+i, &romeTopo, &time, match_nbio)) break; } gettimeofday(&tve, NULL); float t = (tve.tv_sec - tvs.tv_sec)*1E3 + (tve.tv_usec - tvs.tv_usec)/1E3; @@ -1072,11 +1085,8 @@ static ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopo } char line[1024]; -#ifdef ENABLE_TRACE - sprintf(line, "Found matching Rome model index %d in %.2fms (%d iter) with GPU mapping: ", i, t, time); -#else + //sprintf(line, "Found matching Rome model index %d in %.2fms (%d iter) with GPU mapping: ", i, t, time); sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i); -#endif int offset = strlen(line); for (int k = 0; k < ngpus; k++) { sprintf(line+offset, "%d ", g[k]);