Add Topologies for 16-GPU gfx942 SuperNode (#1417)

* Add Topologies for 16-GPU gfx942 SuperNode

- Add GigaIO topologies to tools/topo_expl for dev and testing
- Add GigaIO Columba 16 GPU romeModel and adjust topology
  matching algorithm in rome_models for 16 GPU system
- Fix bug which failed to match Rome Model when using subsets
  of system resources (i.e. ROCR_VISIBLE_DEVICES is set)
- Fixes for topo_expl

* Fix bug w/ 1H16P
Этот коммит содержится в:
Benjamin Kitor
2024-12-03 13:12:03 -08:00
коммит произвёл GitHub
родитель 28594b26b3
Коммит a05329bd0d
8 изменённых файлов: 781 добавлений и 71 удалений
+174 -66
Просмотреть файл
@@ -1,5 +1,6 @@
/*
Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 GigaIO Networks, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -1122,6 +1123,8 @@ RCCL_PARAM(ModelReversalDisable, "MODEL_REVERSAL_DISABLE", 0);
/* Parse user defined rings. Format is like :
* "0 1|1 0|0 1 2 3|3 2 1 0|N0 0 2 3 1 N1|1 3 2 0|0 1 2 3 4 5 6 7|N2 7 6 5 4 3 2 1 0 N1"
* GPUs are numberd from 0 to ngpus, represening GPU-index for the current communicator
* gpu_map translates between GPU-index and rocm-smi index
* Network interfaces can be optionally specified by N prefix.
* Rings with a non-matching number of gpus are ignored so we can provide
* rings for multiple cases.
@@ -1187,7 +1190,7 @@ ncclResult_t parseGraph(const char* str, struct ncclTopoSystem* system, struct n
// Ignore if gpus are duplicate
for (int i=0; i<r; i++)
if (gpus[i] == g) goto newchannel;
// remap if needed
// find rocm-smi id if needed
if (gpu_map) g = gpu_map[g];
// Translate gpu numbers into ranks
int j = 0;
@@ -1487,7 +1490,8 @@ ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGrap
}
static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRomeModel* romeTopo, char *pattern) {
static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRomeModel* romeTopo,
char *pattern, int *devids) {
pattern[0] = 0; // pattern will be NULL for invalid topology
romeTopo->nGpus = system->nodes[GPU].count;
romeTopo->nCpus = system->nodes[CPU].count;
@@ -1568,13 +1572,9 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo
int count = 0;
for (n = 0; n < romeTopo->nGpus; n++) {
romeTopo->connMatrix[i*romeTopo->nGpus+n] = 0;
struct ncclTopoLink* link;
for (link = node->links; link->remNode; link++) {
if (link->remNode->gpu.dev == n) break;
}
if (!link->remNode) continue;
if (link->type != LINK_NVL) continue;
romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->bw/ncclTopoXGMISpeed(node->gpu.gcn);
struct ncclTopoLinkList *path = node->paths[GPU] + gpu_scores[n].g;
if (path->type != LINK_NVL) continue;
romeTopo->connMatrix[i*romeTopo->nGpus+n] = path->bw/ncclTopoXGMISpeed(node->gpu.gcn);
count ++;
}
if (romeTopo->nLinks < count) romeTopo->nLinks = count;
@@ -1608,6 +1608,10 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo
}
pattern[romeTopo->nCpus*2] = 0;
if (devids)
for (int i = 0; i<romeTopo->nGpus; i++)
devids[i] = gpu_scores[i].dev;
// compute gdr level matrix
for (int i = 0; i < romeTopo->nNics; i++) {
int n = net_scores[i].n;
@@ -1758,7 +1762,6 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
constexpr int NUMA_PERMUTE_COUNT = 24;
constexpr int TOTAL_PERMUTE_COUNT = (NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT);
static char ringRemap[256];
int i;
const int ngpus = system->nodes[GPU].count;
@@ -1768,7 +1771,8 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
// number of GPUs and NICs on each numa node is used as first screening pattern
struct rcclRomeModel romeTopo;
char pattern[256];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
int devids[NUMA_CPUS * NUMA_GPUS];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
// only match for system with 8 GPUs and 2 CPUs
if (ngpus != 8 || ncpus != NUMA_CPUS) return ncclSuccess;
@@ -1782,7 +1786,7 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
int *all_gpu_permutations = (int *)malloc(TOTAL_PERMUTE_COUNT*NUMA_CPUS*NUMA_GPUS*sizeof(int));
struct timeval tvs, tve;
gettimeofday(&tvs, NULL);
std::vector<int> r(ngpus), g(ngpus);
std::vector<int> r(ngpus), g(ngpus), rdm(ngpus);
for (i = 0; i < sizeof(romeTopoModels)/sizeof(romeTopoModels[0]); i++) {
if (romeTopo.nCpus != romeTopoModels[i].nCpus || romeTopo.nGpus != romeTopoModels[i].nGpus ||
romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks) continue;
@@ -1854,13 +1858,14 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
//printf("No solution in %.2fms\n", t);
return ncclSuccess;
}
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g8[m]];
char line[1024];
//sprintf(line, "Found matching Rome model index %d in %.2fms with GPU mapping: ", i, t);
sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i);
int offset = strlen(line);
for (int k = 0; k < ngpus; k++) {
sprintf(line+offset, "%d ", g8[k]);
sprintf(line+offset, "%d ", rdm[k]);
offset = strlen(line);
}
if (nnets > 1) {
@@ -1881,35 +1886,35 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
// Attempt to use rail-optimized rings if they exist
if (system->nHosts % 2 == 0) {
// For even number of nodes, alternate forward/reverse on ringBase
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, system->hostIdx % 2));
}
else {
// For odd number of nodes, check first to see if ringTail1 and ringTail2 are defined
if (system->nHosts == 1 || romeTopoModels[i].ringTail1 == nullptr || romeTopoModels[i].ringTail2 == nullptr) {
if (system->nHosts > 1)
INFO(NCCL_GRAPH, "[WARN] Dropping back due to lack of support for odd-number of nodes for model index %d\n", i);
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, system->hostIdx % 2));
}
else
{
if (system->hostIdx == (system->nHosts - 1)) {
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, g8, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, rdm.data(), nnets > 1 ? n : NULL, 0));
} else if (system->hostIdx == (system->nHosts - 2)) {
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, g8, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, rdm.data(), nnets > 1 ? n : NULL, 0));
} else {
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, system->hostIdx % 2));
}
}
}
break;
case NCCL_TOPO_PATTERN_BALANCED_TREE:
if (romeTopoModels[i].treeBase != nullptr) {
NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, g8));
NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, rdm.data()));
if (graph->nChannels) return ncclSuccess;
}
// Fall back to tree from ringBase
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, 0));
// Override GDR distance if requested
if (checkOption(romeTopoModels[i].options, "netOverride")) {
for (int i = 0; i < system->nodes[NET].count; i++) {
@@ -1936,7 +1941,6 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
}
ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* graph, const char *ringBase) {
static char ringRemap[64];
int i;
int ngpus = system->nodes[GPU].count;
@@ -1956,7 +1960,8 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
// number of GPUs and NICs on each numa node is used as first screening pattern
struct rcclRomeModel romeTopo;
char pattern[256];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
int devids[NCCL_TOPO_MAX_NODES];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
// recognize system as Rome 4P2H even if no matching model
if (ngpus > 4 && romeTopo.nLinks) system->type |= RCCL_TOPO_4P2H_ROME;
@@ -1964,7 +1969,7 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
// detect multiple NICs per GPU
int nnetspergpu = (nnets%ngpus == 0) ? nnets/ngpus : 0;
int g[NCCL_TOPO_MAX_NODES], n[NCCL_TOPO_MAX_NODES];
int g[NCCL_TOPO_MAX_NODES], n[NCCL_TOPO_MAX_NODES], rdm[NCCL_TOPO_MAX_NODES];
int time = 0;
struct timeval tvs, tve;
gettimeofday(&tvs, NULL);
@@ -2038,13 +2043,14 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
//printf("No solution in %.2fms (%d iter)\n", t, time);
return ncclSuccess;
}
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g[m]];
char line[1024];
//sprintf(line, "Found matching Rome model index %d in %.2fms (%d iter) with GPU mapping: ", i, t, time);
sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i);
int offset = strlen(line);
for (int k = 0; k < ngpus; k++) {
sprintf(line+offset, "%d ", g[k]);
sprintf(line+offset, "%d ", rdm[k]);
offset = strlen(line);
}
if (nnets > 1) {
@@ -2064,23 +2070,23 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
// Attempt to use rail-optimized rings if they exist
if (system->nHosts % 2 == 0) {
// For even number of nodes, alternate forward/reverse on ringBase
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, system->hostIdx % 2));
}
else {
// For odd number of nodes, check first to see if ringTail1 and ringTail2 are defined
if (system->nHosts == 1 || romeTopoModels[i].ringTail1 == nullptr || romeTopoModels[i].ringTail2 == nullptr) {
if (system->nHosts > 1)
INFO(NCCL_GRAPH, "[WARN] Dropping back due to lack of support for odd-number of nodes for model index %d\n", i);
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, system->hostIdx % 2));
}
else
{
if (system->hostIdx == (system->nHosts - 1)) {
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, g, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, rdm, nnets > 1 ? n : NULL, 0));
} else if (system->hostIdx == (system->nHosts - 2)) {
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, g, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, rdm, nnets > 1 ? n : NULL, 0));
} else {
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, system->hostIdx % 2));
}
}
}
@@ -2092,7 +2098,7 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
}
// Fall back to tree from ringBase
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, 0));
// Override GDR distance if requested
if (checkOption(romeTopoModels[i].options, "netOverride")) {
for (int i = 0; i < system->nodes[NET].count; i++) {
@@ -2121,7 +2127,6 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
constexpr int NUMA_PERMUTE_COUNT = 24;
constexpr int TOTAL_PERMUTE_COUNT = (NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT);
static char ringRemap[256];
int i;
const int ngpus = system->nodes[GPU].count;
@@ -2137,13 +2142,14 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
// number of GPUs and NICs on each numa node is used as first screening pattern
struct rcclRomeModel romeTopo;
char pattern[256];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
int devids[NUMA_CPUS*NUMA_GPUS];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
// only match for system with 16 GPUs
if (ngpus != 16 || ncpus != NUMA_CPUS) return ncclSuccess;
int gcnt = 0;
int *g16, n[NCCL_TOPO_MAX_NODES];
int *g16, n[NCCL_TOPO_MAX_NODES], rdm[NUMA_GPUS*NUMA_CPUS];
int *all_gpu_permutations = (int *)malloc(TOTAL_PERMUTE_COUNT*NUMA_CPUS*NUMA_GPUS*sizeof(int));
struct timeval tvs, tve;
gettimeofday(&tvs, NULL);
@@ -2223,13 +2229,14 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
//printf("No solution in %.2fms\n", t);
return ncclSuccess;
}
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g16[m]];
char line[1024];
//sprintf(line, "Found matching Rome model index %d in %.2fms with GPU mapping: ", i, t);
sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i);
int offset = strlen(line);
for (int k = 0; k < ngpus; k++) {
sprintf(line+offset, "%d ", g16[k]);
sprintf(line+offset, "%d ", rdm[k]);
offset = strlen(line);
}
if (nnets > 1) {
@@ -2244,21 +2251,56 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
system->type |= RCCL_TOPO_16P1H;
parseOptions(system, romeTopoModels[i].options);
// create 16P1H based on reference and remapped ids
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, g16, nnets > 1 ? n : NULL, false));
if (romeTopoModels[i].treeBase != nullptr) NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, g16));
// create 16P1H based on reference and remapped ids
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, false));
if (romeTopoModels[i].treeBase != nullptr) NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, rdm));
// clean up
free(all_gpu_permutations);
return ncclSuccess;
}
ncclResult_t find_gpu_hives(int *g_hives, int *ng_hives, struct rcclRomeModel *romeTopo) {
const int ngpus = romeTopo->nGpus;
const int gpus_per_hive = romeTopo->nLinks + 1;
const int nhives = ngpus / gpus_per_hive;
int i;
for (i = 0; i < ngpus; i++) {
int j, h;
for (j = 0; j < nhives; j++) {
if (ng_hives[j]) {
if (romeTopo->connMatrix[i*ngpus+g_hives[j*gpus_per_hive]]) {
g_hives[j*gpus_per_hive+ng_hives[j]] = i;
ng_hives[j] ++;
break;
}
}
}
if (j >= nhives) {
for (h = 0; h < nhives; h++) {
if (ng_hives[h] == 0) {
g_hives[h*gpus_per_hive] = i;
ng_hives[h]++;
break;
}
}
if (h >= nhives)
return ncclInternalError;
}
}
for (int i = 0; i < nhives; i++) {
if (ng_hives[i] != gpus_per_hive)
return ncclInternalError;
}
return ncclSuccess;
}
ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
constexpr int NUM_HIVES = 4;
constexpr int HIVE_GPUS = 4;
static char ringRemap[256];
const int ngpus = system->nodes[GPU].count;
const int nnets = system->nodes[NET].count;
@@ -2271,13 +2313,15 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
// number of GPUs and NICs on each numa node is used as first screening pattern
struct rcclRomeModel romeTopo;
char pattern[256];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
int devids[HIVE_GPUS*NUM_HIVES];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
// only match for system with 16 GPUs
if (ngpus != NUM_HIVES*HIVE_GPUS || nnets != NUM_HIVES*HIVE_GPUS) return ncclSuccess;
std::vector<int> g_hives(ngpus), n_hives(nnets);
int ng_hives[NUM_HIVES];
int rdm[NUM_HIVES*HIVE_GPUS];
// try to sort GPUs into hives
for (int i = 0; i < NUM_HIVES; i++)
@@ -2286,36 +2330,17 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
n_hives[i] = -1;
for (int i = 0; i < ngpus; i++)
g_hives[i] = -1;
for (int i = 0; i < ngpus; i++) {
int j, h;
for (j = 0; j < NUM_HIVES; j++) {
if (ng_hives[j]) {
if (romeTopo.connMatrix[i*ngpus+g_hives[j*HIVE_GPUS]]) {
g_hives[j*HIVE_GPUS+ng_hives[j]] = i;
ng_hives[j]++;
break;
}
}
}
if (j >= NUM_HIVES) {
for (h = 0; h < NUM_HIVES; h++) {
if (ng_hives[h] == 0) {
g_hives[h*HIVE_GPUS] = i;
ng_hives[h]++;
break;
}
}
if (h >= NUM_HIVES)
return ncclSuccess;
}
}
if (find_gpu_hives(g_hives.data(), ng_hives, &romeTopo))
return ncclSuccess;
for (int i = 0; i < NUM_HIVES; i++)
if (ng_hives[i] != 4) return ncclSuccess;
// remap NET ids
for (int i = 0; i < nnets; i++) {
int j;
for (j = 0; j < ngpus; j++) {
if(romeTopo.gdrLevel[i*nnets+g_hives[j]] == 3) {
if(romeTopo.gdrLevel[i*nnets+g_hives[j]] == PATH_PIX) {
n_hives[j] = i;
break;
}
@@ -2327,11 +2352,14 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
if (n_hives[i] == -1) return ncclSuccess;
for (int i = 0; i < ngpus; i++)
if (g_hives[i] == -1) return ncclSuccess;
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g_hives[m]];
char line[1024];
sprintf(line, "Found matching Rome model 4P4H with GPU mapping: ");
int offset = strlen(line);
for (int k = 0; k < ngpus; k++) {
sprintf(line+offset, "%d ", g_hives[k]);
sprintf(line+offset, "%d ", rdm[k]);
offset = strlen(line);
}
if (nnets > 1) {
@@ -2347,6 +2375,86 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
system->type |= RCCL_TOPO_4P2H_ROME;
parseOptions(system, rome_model_68.options);
// create 4P4H based on reference and remapped ids
NCCLCHECK(parseGraph(rome_model_68.ringBase, system, graph, g_hives.data(), n_hives.data(), false));
NCCLCHECK(parseGraph(rome_model_68.ringBase, system, graph, rdm, n_hives.data(), false));
return ncclSuccess;
}
static struct rcclRomeModel gio16gColumbaModel = {
.nGpus = 16, .nCpus = 2, .nNics = 0, .nLinks = 7,
.gpuIds = { 0x24000, 0x2a000, 0x37000, 0x3d000, 0x4a000, 0x50000, 0x5d000, 0x63000, 0xaf000, 0xb5000, 0xc2000, 0xc8000, 0xd5000, 0xdb000, 0xe8000, 0xee000, },
.nicIds = { },
.gpuNuma = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, },
.nicNuma = { },
.connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, },
.gdrLevel = { },
.pattern = "8080",
.ringBase = "0 1 2 3 4 5 7 6 12 13 14 15 8 9 11 10"
"| 2 7 3 5 1 6 0 4 14 9 15 10 13 11 12 8"
"| 1 3 0 7 4 6 2 5 13 15 11 8 10 14 12 9"
"| 3 6 5 0 2 4 1 7 15 12 10 9 13 8 14 11"
"| 10 11 9 8 15 14 13 12 6 7 5 4 3 2 1 0"
"| 8 12 11 13 10 15 9 14 4 0 6 1 5 3 7 2"
"| 9 12 14 10 8 11 15 13 5 2 6 4 7 0 3 1"
"| 11 14 8 13 9 10 12 15 7 1 4 2 0 5 6 3",
.options = "",
};
ncclResult_t parseGIOTopos(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
ncclResult_t r;
constexpr int NGPUS = 16;
constexpr int NHIVES = 2;
constexpr int GPUS_PER_HIVE = NGPUS/NHIVES;
const int ngpus = system->nodes[GPU].count;
const int ncpus = system->nodes[CPU].count;
const int nnets = system->nodes[NET].count;
if (ngpus < NGPUS || ncpus > 2 || nnets != 0)
return ncclSuccess;
struct rcclRomeModel romeTopo;
char pattern[256];
int devids[NGPUS];
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
if (romeTopo.nLinks + 1 != GPUS_PER_HIVE)
return ncclSuccess;
int g_hives[NGPUS], ng_hives[NHIVES], rdm[NGPUS];
memset(ng_hives, 0, sizeof(ng_hives));
memset(g_hives, -1, sizeof(g_hives));
NCCLCHECKGOTO(find_gpu_hives(g_hives, ng_hives, &romeTopo), r, exit);
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g_hives[m]];
char line[1024];
int offset;
sprintf(line, "Found GigaIO rome topo, gpu_mapping: ");
offset = strlen(line);
for (int k = 0; k < ngpus; k++) {
sprintf(line+offset, "%d ", g_hives[k]);
offset = strlen(line);
}
INFO(NCCL_GRAPH,"%s", line);
system->type |= RCCL_TOPO_4P2H_ROME;
NCCLCHECKGOTO(parseGraph(gio16gColumbaModel.ringBase, system, graph, rdm, NULL, false), r, exit);
exit:
return ncclSuccess;
}
+2
Просмотреть файл
@@ -1,5 +1,6 @@
/*
Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 GigaIO Networks, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -29,5 +30,6 @@ ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGrap
ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph, const char *ringBase);
ncclResult_t parseGIOTopos(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
#endif
+4
Просмотреть файл
@@ -1,6 +1,7 @@
/*************************************************************************
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
* Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
* Modifications Copyright (c) 2019-2022 GigaIO Networks, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@@ -983,6 +984,9 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph
if (graph->nChannels) return ncclSuccess;
// try to match 4H4P
NCCLCHECK(parse4H4P(system, graph));
if (graph->nChannels) return ncclSuccess;
NCCLCHECK(parseGIOTopos(system, graph));
}
if (graph->nChannels) return ncclSuccess;
+2 -2
Просмотреть файл
@@ -265,7 +265,7 @@ ncclResult_t collNetCanConnect(int* ret, struct ncclTopoSystem* topo, struct ncc
ncclResult_t collNetSendSetup(struct ncclComm* comm, struct ncclTopoGraph* graph, struct ncclPeerInfo* myInfo, struct ncclPeerInfo* peerInfo, struct ncclConnect* connectInfo, struct ncclConnector* send, int channelId, int connIndex) {
int netDev, useGdr = 0, proxy;
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, &netDev, &proxy));
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, NULL, &netDev, &proxy));
NCCLCHECK(ncclTopoCheckGdr(comm->topo, myInfo->busId, netDev, 1, &useGdr));
INFO(NCCL_INIT|NCCL_NET,"Coll %02d : %d [send] via COLLNET/%s/%d%s", channelId, myInfo->rank, "SHARP", netDev, useGdr ? "/GDRDMA" : "");
@@ -275,7 +275,7 @@ ncclResult_t collNetSendSetup(struct ncclComm* comm, struct ncclTopoGraph* graph
ncclResult_t collNetRecvSetup(struct ncclComm* comm, struct ncclTopoGraph* graph, struct ncclPeerInfo* myInfo, struct ncclPeerInfo* peerInfo, struct ncclConnect* connectInfo, struct ncclConnector* recv, int channelId, int connIndex) {
int netDev, useGdr = 0, proxy;
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, &netDev, &proxy));
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, NULL, &netDev, &proxy));
NCCLCHECK(ncclTopoCheckGdr(comm->topo, myInfo->busId, netDev, 0, &useGdr));
INFO(NCCL_INIT|NCCL_NET,"Coll %02d : %d [receive] via COLLNET/%s/%d%s", channelId, myInfo->rank, "SHARP", netDev, useGdr ? "/GDRDMA" : "");
+273
Просмотреть файл
@@ -0,0 +1,273 @@
<system version="2">
<cpu host_hash="0x58e137603ea7ac35" numaid="1" affinity="ffffffff,ffffff00,00000000,0000ffff,ffffffff,ff000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
<pci busid="0000:32:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:34:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:36:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:38:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:3a:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:3c:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:3e:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:40:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:42:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="0" sm="304" gcn="gfx942" arch="38911" rank="0" gdr="0">
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:43:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:45:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:47:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:49:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="1" sm="304" gcn="gfx942" arch="38911" rank="1" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:4a:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:4c:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="2" sm="304" gcn="gfx942" arch="38911" rank="2" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:4e:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:50:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="3" sm="304" gcn="gfx942" arch="38911" rank="3" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:53:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:55:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:57:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:59:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:5b:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:5d:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="4" sm="304" gcn="gfx942" arch="38911" rank="4" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:5e:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:60:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:62:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:64:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="5" sm="304" gcn="gfx942" arch="38911" rank="5" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:65:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:67:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="6" sm="304" gcn="gfx942" arch="38911" rank="6" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:69:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:6b:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="7" sm="304" gcn="gfx942" arch="38911" rank="7" gdr="0">
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:6e:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:70:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:72:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:74:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:76:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:78:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="8" sm="304" gcn="gfx942" arch="38911" rank="8" gdr="0">
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:79:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:7b:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:7d:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:7f:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="9" sm="304" gcn="gfx942" arch="38911" rank="9" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:80:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:82:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="10" sm="304" gcn="gfx942" arch="38911" rank="10" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:84:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:86:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="11" sm="304" gcn="gfx942" arch="38911" rank="11" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:89:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8b:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8d:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8f:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:91:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:93:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="12" sm="304" gcn="gfx942" arch="38911" rank="12" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:94:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:96:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:98:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:9a:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="13" sm="304" gcn="gfx942" arch="38911" rank="13" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:9b:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:9d:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="14" sm="304" gcn="gfx942" arch="38911" rank="14" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:9f:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:a1:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="15" sm="304" gcn="gfx942" arch="38911" rank="15" gdr="0">
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</cpu>
<cpu host_hash="0x58e137603ea7ac35" numaid="0" affinity="00000000,000000ff,ffffffff,ffff0000,00000000,00ffffff,ffffffff" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
<pci busid="0000:01:00.0" class="0x020000" vendor="0x8086" device="0x1521" subsystem_vendor="0x8086" subsystem_device="0x0002" link_speed="5.0 GT/s PCIe" link_width="4">
<nic>
<net name="enp1s0f0" dev="0" speed="1000" port="0" latency="0.000000" guid="0x0" maxconn="65536" gdr="0"/>
</nic>
</pci>
</cpu>
</system>
+313
Просмотреть файл
@@ -0,0 +1,313 @@
<system version="2">
<cpu numaid="1" affinity="ffffffff,ffffff00,00000000,0000ffff,ffffffff,ff000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
<pci busid="0000:45:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:47:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:49:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:4b:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:4d:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:4f:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:51:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:53:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:55:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:57:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="0" sm="94" gcn="gfx942" arch="38911" rank="0" gdr="0">
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:58:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:5b:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:5d:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="1" sm="94" gcn="gfx942" arch="38911" rank="1" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:60:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:62:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:64:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:66:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:68:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:6a:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="2" sm="94" gcn="gfx942" arch="38911" rank="2" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:6b:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:6e:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:70:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="3" sm="94" gcn="gfx942" arch="38911" rank="3" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:75:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:77:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:79:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:7b:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:7d:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:7f:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:81:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="4" sm="94" gcn="gfx942" arch="38911" rank="4" gdr="0">
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:82:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:85:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:87:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="5" sm="94" gcn="gfx942" arch="38911" rank="5" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:8a:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8c:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8e:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:90:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:92:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:94:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="6" sm="94" gcn="gfx942" arch="38911" rank="6" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:95:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:98:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:9a:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="7" sm="94" gcn="gfx942" arch="38911" rank="7" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:9f:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x11f8" subsystem_device="0x4000" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:a1:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:a3:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:a5:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:a7:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:a9:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:ab:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="8" sm="94" gcn="gfx942" arch="38911" rank="8" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:ac:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:af:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:b1:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="9" sm="94" gcn="gfx942" arch="38911" rank="9" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:b4:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:b6:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:b8:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:ba:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:bc:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:be:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="10" sm="94" gcn="gfx942" arch="38911" rank="10" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:bf:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:c2:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:c4:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="11" sm="94" gcn="gfx942" arch="38911" rank="11" gdr="0">
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:c9:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x11f8" subsystem_device="0x4000" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:cb:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:cd:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:cf:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:d1:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:d3:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:d5:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="12" sm="94" gcn="gfx942" arch="38911" rank="12" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:d6:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:d9:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:db:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="13" sm="94" gcn="gfx942" arch="38911" rank="13" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
<pci busid="0000:de:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:e0:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:e2:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:e4:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:e6:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:e8:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="14" sm="94" gcn="gfx942" arch="38911" rank="14" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:e9:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
<pci busid="0000:ec:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
<pci busid="0000:ee:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
<gpu dev="15" sm="94" gcn="gfx942" arch="38911" rank="15" gdr="0">
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
</gpu>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</pci>
</cpu>
<cpu numaid="0" affinity="00000000,000000ff,ffffffff,ffff0000,00000000,00ffffff,ffffffff" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
<pci busid="0000:01:00.0" class="0x020000" vendor="0x8086" device="0x1521" subsystem_vendor="0x8086" subsystem_device="0x0002" link_speed="5.0 GT/s PCIe" link_width="4">
<nic>
<net name="enp1s0f0" dev="0" speed="1000" port="0" latency="0.000000" guid="0x0" maxconn="65536" gdr="0"/>
</nic>
</pci>
</cpu>
</system>
+2
Просмотреть файл
@@ -161,6 +161,8 @@ NodeModelDesc model_descs[] = {
{5, "topo_8p_940.xml", "5 nodes gfx940 8P"},
{2, "topo_8p_942.xml", "2 nodes gfx942 8P"},
{2, "topo_8p_942_1.xml", "2 nodes gfx942 8P Alt."},
{1, "topo_16p_gio-1s-1rp-cascade.xml", "GigaIO 16P gfx942"},
{1, "topo_16p_gio-3s-1rp-split-flat.xml", "GigaIO 16P gfx942 1rp-split"},
};
NCCL_PARAM(MaxCTAs, "MAX_CTAS", MAXCHANNELS);
+11 -3
Просмотреть файл
@@ -224,9 +224,9 @@ void ncclDebugLog(ncclDebugLogLevel level, unsigned long flags, const char *file
ncclResult_t ncclTopoGetSystem(const char* xmlTopoFile, struct ncclTopoSystem** system) {
struct ncclXml* xml;
NCCLCHECK(ncclCalloc(&xml, 1));
NCCLCHECK(xmlAlloc(&xml, NCCL_GRAPH_XML_MAX_NODES));
NCCLCHECK(ncclTopoGetXmlFromFile(xmlTopoFile, xml, 0));
NCCLCHECK(ncclTopoGetSystemFromXml(xml, system));
NCCLCHECK(ncclTopoGetSystemFromXml(xml, system, 0));
free(xml);
return ncclSuccess;
}
@@ -1092,7 +1092,7 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGatherInfo *a
NCCLCHECKGOTO(ncclCalloc(&rings, nranks*MAXCHANNELS), ret, fail);
NCCLCHECKGOTO(ncclTopoPostset(comm, nodesFirstRank, nodesTreePatterns, allTopoRanks, rings, graphs, nc), ret, fail);
NCCLCHECKGOTO(ncclTopoPostset(comm, nodesFirstRank, nodesTreePatterns, allTopoRanks, rings, graphs, NULL, nc), ret, fail);
if (comm->topo->treeDefined) NCCLCHECK(ncclTreeBasePostset(comm, &treeGraph));
// AllGather3 - end
@@ -1358,3 +1358,11 @@ bool mscclForceEnabled() {
ncclResult_t mscclSchedulerInit(ncclComm_t comm, int* numChannelsRequired) {
return ncclSuccess;
}
uint64_t getHostHash(void) {
return 0xdeadbeef;
}
ncclResult_t bootstrapIntraNodeAllGather(void* commState, int *ranks, int rank, int nranks, void* allData, int size) {
return ncclSuccess;
}