Add XGMI sys type and clean up detection code (#597)

Este commit está contenido en:
Wenkai Du
2022-08-12 09:52:29 -07:00
cometido por GitHub
padre f6b9686482
commit f5c0b243a8
Se han modificado 3 ficheros con 60 adiciones y 62 borrados
+23 -27
Ver fichero
@@ -653,34 +653,30 @@ ncclResult_t ncclTopoTrimSystem(struct ncclTopoSystem* system, struct ncclComm*
NCCLCHECK(ncclTopoCpuType(system, &arch, &vendor, &model));
if (arch == NCCL_TOPO_CPU_ARCH_X86 && vendor == NCCL_TOPO_CPU_VENDOR_AMD
&& model == NCCL_TOPO_CPU_TYPE_ROME) {
int gdr, ret = 1;
int net;
for (int g = 0; g < system->nodes[GPU].count; g++) {
NCCLCHECK(ncclTopoGetLocalNet(system, system->nodes[GPU].nodes[g].gpu.rank[0], &net));
NCCLCHECK(ncclTopoCheckGdr(system, system->nodes[GPU].nodes[g].id, net, 1, &gdr));
if (!gdr) {
ret = 0;
break;
int gdr = 1;
bool allXgmi = true;
// detect if all GPUs are connected by XGMI
for (int i = 0; i < system->nodes[GPU].count && allXgmi; i++) {
int cudaDev1 = system->nodes[GPU].nodes[i].gpu.dev;
for (int j = 0; j < system->nodes[GPU].count && allXgmi; j++) {
if (i == j) continue;
int cudaDev2 = system->nodes[GPU].nodes[j].gpu.dev;
bool isXGMI;
NCCLCHECK(ncclTopoGetLinkType(comm->topo, cudaDev1, cudaDev2, &isXGMI));
allXgmi &= isXGMI;
}
}
if (ret) {
bool allXgmi = true;
// don't trim NICs unless all GPUs are connected by XGMI
for (int i = 0; i < system->nodes[GPU].count && allXgmi; i++) {
int cudaDev1 = system->nodes[GPU].nodes[i].gpu.dev;
for (int j = 0; j < system->nodes[GPU].count && allXgmi; j++) {
if (i == j) continue;
int cudaDev2 = system->nodes[GPU].nodes[j].gpu.dev;
bool isXGMI;
NCCLCHECK(ncclTopoGetLinkType(comm->topo, cudaDev1, cudaDev2, &isXGMI));
allXgmi &= isXGMI;
}
}
if (!allXgmi) {
remove = 0;
system->type |= RCCL_TOPO_GDR_ALL;
INFO(NCCL_GRAPH, "GDR is available on all GPUs");
}
if (allXgmi) system->type |= RCCL_TOPO_XGMI_ALL;
for (int g = 0; g < system->nodes[GPU].count; g++) {
int net;
NCCLCHECK(ncclTopoGetLocalNet(system, system->nodes[GPU].nodes[g].gpu.rank[0], &net));
NCCLCHECK(ncclTopoCheckGdr(system, system->nodes[GPU].nodes[g].id, net, 1, &gdr));
if (!gdr) break;
}
if (gdr && !allXgmi) {
remove = 0;
system->type |= RCCL_TOPO_GDR_ALL;
INFO(NCCL_GRAPH, "GDR is available on all GPUs");
}
}
@@ -755,7 +751,7 @@ ncclResult_t ncclTopoComputeP2pChannels(struct ncclComm* comm) {
}
}
if (comm->topo->nodes[GPU].count == comm->topo->nRanks && (comm->topo->type & RCCL_TOPO_4P2H_ROME) && !(comm->topo->type & RCCL_TOPO_GDR_ALL) && (comm->topo->nodes[GPU].nodes[0].gpu.gcn != 910)) {
if (comm->topo->nodes[GPU].count == comm->topo->nRanks && (comm->topo->type & RCCL_TOPO_4P2H_ROME) && !(comm->topo->type & RCCL_TOPO_GDR_ALL) && !(comm->topo->type & RCCL_TOPO_XGMI_ALL)) {
// Adjust P2P channels on Rome
comm->p2pnChannelsPerPeer = 2;
comm->p2pnChannels = 2;
+36 -35
Ver fichero
@@ -1171,7 +1171,6 @@ ncclResult_t ncclTopoGetIntraNetDev(struct ncclTopoSystem* system, int rank, str
ncclResult_t ncclTopoGetLinkType(struct ncclTopoSystem* system, int cudaDev1, int cudaDev2, bool* isXGMI, int maxInter, int nInter, int *inter) {
int interGpus[MAX_XGMI_INTER_GPUS+1];
int ngpus = system->nodes[GPU].count;
*isXGMI = false;
// check for direct XGMI connection
for (int i=0; i<ngpus; i++) {
if (system->nodes[GPU].nodes[i].gpu.dev == cudaDev1) {
@@ -1188,42 +1187,44 @@ ncclResult_t ncclTopoGetLinkType(struct ncclTopoSystem* system, int cudaDev1, in
}
}
}
if (maxInter == 0) return ncclSuccess;
// check if there are intermediate GPUs that are connected to both
bool res1, res2, res3;
int j;
for (j=0; j<nInter; j++) {
bool res1;
ncclTopoGetLinkType(system, inter[j], inter[j+1], &res1, 0);
if (!res1) break;
}
if (j<nInter) return ncclSuccess;
if (nInter > 0 && inter != nullptr) {
ncclTopoGetLinkType(system, inter[nInter], cudaDev2, &res2, 0);
if (res2) {
*isXGMI = true;
return ncclSuccess;
// try intermediate GPUs
if (maxInter) {
// check if there are intermediate GPUs that are connected to both
bool res1, res2, res3;
int j;
for (j=0; j<nInter; j++) {
ncclTopoGetLinkType(system, inter[j], inter[j+1], &res1, 0);
if (!res1) break;
}
memcpy(interGpus+1, inter+1, sizeof(int)*nInter);
}
interGpus[0] = cudaDev1;
// add one more intermediate GPU recursively util reaching max depth
nInter++;
if (nInter+2 > ngpus || nInter > MAX_XGMI_INTER_GPUS || nInter > maxInter) return ncclSuccess;
for (int i=0; i<ngpus; i++) {
int dev = system->nodes[GPU].nodes[i].gpu.dev;
// skip duplicated GPU
if (dev == cudaDev2) continue;
for (j=0; j<nInter; j++)
if (dev == interGpus[j]) break;
if (j<nInter) continue;
// check connectivity with intermediate GPUs
interGpus[nInter] = dev;
ncclTopoGetLinkType(system, cudaDev1, cudaDev2, &res3, maxInter, nInter, interGpus);
if (res3) {
*isXGMI = true;
return ncclSuccess;
if (j<nInter) return ncclSuccess;
if (nInter > 0 && inter != nullptr) {
ncclTopoGetLinkType(system, inter[nInter], cudaDev2, &res2, 0);
if (res2) {
*isXGMI = true;
return ncclSuccess;
}
memcpy(interGpus+1, inter+1, sizeof(int)*nInter);
}
interGpus[0] = cudaDev1;
// add one more intermediate GPU recursively util reaching max depth
nInter++;
if (nInter+2 > ngpus || nInter > MAX_XGMI_INTER_GPUS || nInter > maxInter) return ncclSuccess;
for (int i=0; i<ngpus; i++) {
int dev = system->nodes[GPU].nodes[i].gpu.dev;
// skip duplicated GPU
if (dev == cudaDev2) continue;
for (j=0; j<nInter; j++)
if (dev == interGpus[j]) break;
if (j<nInter) continue;
// check connectivity with intermediate GPUs
interGpus[nInter] = dev;
ncclTopoGetLinkType(system, cudaDev1, cudaDev2, &res3, maxInter, nInter, interGpus);
if (res3) {
*isXGMI = true;
return ncclSuccess;
}
}
}
*isXGMI = false;
return ncclSuccess;
}
+1
Ver fichero
@@ -104,6 +104,7 @@ struct ncclTopoLinkList {
#define RCCL_TOPO_GDR_ALL 4
#define RCCL_TOPO_16P1H 8
#define RCCL_TOPO_FORCE_INTRA 16
#define RCCL_TOPO_XGMI_ALL 32
#define RCCL_TOPO_MAX_RANKS_PER_GPU 8
struct ncclTopoNode {