Add Topologies for 16-GPU gfx942 SuperNode (#1417)
* Add Topologies for 16-GPU gfx942 SuperNode - Add GigaIO topologies to tools/topo_expl for dev and testing - Add GigaIO Columba 16 GPU romeModel and adjust topology matching algorithm in rome_models for 16 GPU system - Fix bug which failed to match Rome Model when using subsets of system resources (i.e. ROCR_VISIBLE_DEVICES is set) - Fixes for topo_expl * Fix bug w/ 1H16P
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
28594b26b3
Коммит
a05329bd0d
+174
-66
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2024 GigaIO Networks, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -1122,6 +1123,8 @@ RCCL_PARAM(ModelReversalDisable, "MODEL_REVERSAL_DISABLE", 0);
|
||||
|
||||
/* Parse user defined rings. Format is like :
|
||||
* "0 1|1 0|0 1 2 3|3 2 1 0|N0 0 2 3 1 N1|1 3 2 0|0 1 2 3 4 5 6 7|N2 7 6 5 4 3 2 1 0 N1"
|
||||
* GPUs are numberd from 0 to ngpus, represening GPU-index for the current communicator
|
||||
* gpu_map translates between GPU-index and rocm-smi index
|
||||
* Network interfaces can be optionally specified by N prefix.
|
||||
* Rings with a non-matching number of gpus are ignored so we can provide
|
||||
* rings for multiple cases.
|
||||
@@ -1187,7 +1190,7 @@ ncclResult_t parseGraph(const char* str, struct ncclTopoSystem* system, struct n
|
||||
// Ignore if gpus are duplicate
|
||||
for (int i=0; i<r; i++)
|
||||
if (gpus[i] == g) goto newchannel;
|
||||
// remap if needed
|
||||
// find rocm-smi id if needed
|
||||
if (gpu_map) g = gpu_map[g];
|
||||
// Translate gpu numbers into ranks
|
||||
int j = 0;
|
||||
@@ -1487,7 +1490,8 @@ ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGrap
|
||||
}
|
||||
|
||||
|
||||
static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRomeModel* romeTopo, char *pattern) {
|
||||
static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRomeModel* romeTopo,
|
||||
char *pattern, int *devids) {
|
||||
pattern[0] = 0; // pattern will be NULL for invalid topology
|
||||
romeTopo->nGpus = system->nodes[GPU].count;
|
||||
romeTopo->nCpus = system->nodes[CPU].count;
|
||||
@@ -1568,13 +1572,9 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo
|
||||
int count = 0;
|
||||
for (n = 0; n < romeTopo->nGpus; n++) {
|
||||
romeTopo->connMatrix[i*romeTopo->nGpus+n] = 0;
|
||||
struct ncclTopoLink* link;
|
||||
for (link = node->links; link->remNode; link++) {
|
||||
if (link->remNode->gpu.dev == n) break;
|
||||
}
|
||||
if (!link->remNode) continue;
|
||||
if (link->type != LINK_NVL) continue;
|
||||
romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->bw/ncclTopoXGMISpeed(node->gpu.gcn);
|
||||
struct ncclTopoLinkList *path = node->paths[GPU] + gpu_scores[n].g;
|
||||
if (path->type != LINK_NVL) continue;
|
||||
romeTopo->connMatrix[i*romeTopo->nGpus+n] = path->bw/ncclTopoXGMISpeed(node->gpu.gcn);
|
||||
count ++;
|
||||
}
|
||||
if (romeTopo->nLinks < count) romeTopo->nLinks = count;
|
||||
@@ -1608,6 +1608,10 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo
|
||||
}
|
||||
pattern[romeTopo->nCpus*2] = 0;
|
||||
|
||||
if (devids)
|
||||
for (int i = 0; i<romeTopo->nGpus; i++)
|
||||
devids[i] = gpu_scores[i].dev;
|
||||
|
||||
// compute gdr level matrix
|
||||
for (int i = 0; i < romeTopo->nNics; i++) {
|
||||
int n = net_scores[i].n;
|
||||
@@ -1758,7 +1762,6 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
constexpr int NUMA_PERMUTE_COUNT = 24;
|
||||
constexpr int TOTAL_PERMUTE_COUNT = (NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT);
|
||||
|
||||
static char ringRemap[256];
|
||||
int i;
|
||||
|
||||
const int ngpus = system->nodes[GPU].count;
|
||||
@@ -1768,7 +1771,8 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
// number of GPUs and NICs on each numa node is used as first screening pattern
|
||||
struct rcclRomeModel romeTopo;
|
||||
char pattern[256];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
|
||||
int devids[NUMA_CPUS * NUMA_GPUS];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
|
||||
|
||||
// only match for system with 8 GPUs and 2 CPUs
|
||||
if (ngpus != 8 || ncpus != NUMA_CPUS) return ncclSuccess;
|
||||
@@ -1782,7 +1786,7 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
int *all_gpu_permutations = (int *)malloc(TOTAL_PERMUTE_COUNT*NUMA_CPUS*NUMA_GPUS*sizeof(int));
|
||||
struct timeval tvs, tve;
|
||||
gettimeofday(&tvs, NULL);
|
||||
std::vector<int> r(ngpus), g(ngpus);
|
||||
std::vector<int> r(ngpus), g(ngpus), rdm(ngpus);
|
||||
for (i = 0; i < sizeof(romeTopoModels)/sizeof(romeTopoModels[0]); i++) {
|
||||
if (romeTopo.nCpus != romeTopoModels[i].nCpus || romeTopo.nGpus != romeTopoModels[i].nGpus ||
|
||||
romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks) continue;
|
||||
@@ -1854,13 +1858,14 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
//printf("No solution in %.2fms\n", t);
|
||||
return ncclSuccess;
|
||||
}
|
||||
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g8[m]];
|
||||
|
||||
char line[1024];
|
||||
//sprintf(line, "Found matching Rome model index %d in %.2fms with GPU mapping: ", i, t);
|
||||
sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i);
|
||||
int offset = strlen(line);
|
||||
for (int k = 0; k < ngpus; k++) {
|
||||
sprintf(line+offset, "%d ", g8[k]);
|
||||
sprintf(line+offset, "%d ", rdm[k]);
|
||||
offset = strlen(line);
|
||||
}
|
||||
if (nnets > 1) {
|
||||
@@ -1881,35 +1886,35 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
// Attempt to use rail-optimized rings if they exist
|
||||
if (system->nHosts % 2 == 0) {
|
||||
// For even number of nodes, alternate forward/reverse on ringBase
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
}
|
||||
else {
|
||||
// For odd number of nodes, check first to see if ringTail1 and ringTail2 are defined
|
||||
if (system->nHosts == 1 || romeTopoModels[i].ringTail1 == nullptr || romeTopoModels[i].ringTail2 == nullptr) {
|
||||
if (system->nHosts > 1)
|
||||
INFO(NCCL_GRAPH, "[WARN] Dropping back due to lack of support for odd-number of nodes for model index %d\n", i);
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (system->hostIdx == (system->nHosts - 1)) {
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, g8, nnets > 1 ? n : NULL, 0));
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, rdm.data(), nnets > 1 ? n : NULL, 0));
|
||||
} else if (system->hostIdx == (system->nHosts - 2)) {
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, g8, nnets > 1 ? n : NULL, 0));
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, rdm.data(), nnets > 1 ? n : NULL, 0));
|
||||
} else {
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NCCL_TOPO_PATTERN_BALANCED_TREE:
|
||||
if (romeTopoModels[i].treeBase != nullptr) {
|
||||
NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, g8));
|
||||
NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, rdm.data()));
|
||||
if (graph->nChannels) return ncclSuccess;
|
||||
}
|
||||
|
||||
// Fall back to tree from ringBase
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g8, nnets > 1 ? n : NULL, 0));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm.data(), nnets > 1 ? n : NULL, 0));
|
||||
// Override GDR distance if requested
|
||||
if (checkOption(romeTopoModels[i].options, "netOverride")) {
|
||||
for (int i = 0; i < system->nodes[NET].count; i++) {
|
||||
@@ -1936,7 +1941,6 @@ ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
}
|
||||
|
||||
ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* graph, const char *ringBase) {
|
||||
static char ringRemap[64];
|
||||
int i;
|
||||
|
||||
int ngpus = system->nodes[GPU].count;
|
||||
@@ -1956,7 +1960,8 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
|
||||
// number of GPUs and NICs on each numa node is used as first screening pattern
|
||||
struct rcclRomeModel romeTopo;
|
||||
char pattern[256];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
|
||||
int devids[NCCL_TOPO_MAX_NODES];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
|
||||
|
||||
// recognize system as Rome 4P2H even if no matching model
|
||||
if (ngpus > 4 && romeTopo.nLinks) system->type |= RCCL_TOPO_4P2H_ROME;
|
||||
@@ -1964,7 +1969,7 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
|
||||
// detect multiple NICs per GPU
|
||||
int nnetspergpu = (nnets%ngpus == 0) ? nnets/ngpus : 0;
|
||||
|
||||
int g[NCCL_TOPO_MAX_NODES], n[NCCL_TOPO_MAX_NODES];
|
||||
int g[NCCL_TOPO_MAX_NODES], n[NCCL_TOPO_MAX_NODES], rdm[NCCL_TOPO_MAX_NODES];
|
||||
int time = 0;
|
||||
struct timeval tvs, tve;
|
||||
gettimeofday(&tvs, NULL);
|
||||
@@ -2038,13 +2043,14 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
|
||||
//printf("No solution in %.2fms (%d iter)\n", t, time);
|
||||
return ncclSuccess;
|
||||
}
|
||||
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g[m]];
|
||||
|
||||
char line[1024];
|
||||
//sprintf(line, "Found matching Rome model index %d in %.2fms (%d iter) with GPU mapping: ", i, t, time);
|
||||
sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i);
|
||||
int offset = strlen(line);
|
||||
for (int k = 0; k < ngpus; k++) {
|
||||
sprintf(line+offset, "%d ", g[k]);
|
||||
sprintf(line+offset, "%d ", rdm[k]);
|
||||
offset = strlen(line);
|
||||
}
|
||||
if (nnets > 1) {
|
||||
@@ -2064,23 +2070,23 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
|
||||
// Attempt to use rail-optimized rings if they exist
|
||||
if (system->nHosts % 2 == 0) {
|
||||
// For even number of nodes, alternate forward/reverse on ringBase
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
}
|
||||
else {
|
||||
// For odd number of nodes, check first to see if ringTail1 and ringTail2 are defined
|
||||
if (system->nHosts == 1 || romeTopoModels[i].ringTail1 == nullptr || romeTopoModels[i].ringTail2 == nullptr) {
|
||||
if (system->nHosts > 1)
|
||||
INFO(NCCL_GRAPH, "[WARN] Dropping back due to lack of support for odd-number of nodes for model index %d\n", i);
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (system->hostIdx == (system->nHosts - 1)) {
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, g, nnets > 1 ? n : NULL, 0));
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail1, system, graph, rdm, nnets > 1 ? n : NULL, 0));
|
||||
} else if (system->hostIdx == (system->nHosts - 2)) {
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, g, nnets > 1 ? n : NULL, 0));
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, rdm, nnets > 1 ? n : NULL, 0));
|
||||
} else {
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, system->hostIdx % 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2092,7 +2098,7 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
|
||||
}
|
||||
|
||||
// Fall back to tree from ringBase
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, 0));
|
||||
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, 0));
|
||||
// Override GDR distance if requested
|
||||
if (checkOption(romeTopoModels[i].options, "netOverride")) {
|
||||
for (int i = 0; i < system->nodes[NET].count; i++) {
|
||||
@@ -2121,7 +2127,6 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
constexpr int NUMA_PERMUTE_COUNT = 24;
|
||||
constexpr int TOTAL_PERMUTE_COUNT = (NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT*NUMA_PERMUTE_COUNT);
|
||||
|
||||
static char ringRemap[256];
|
||||
int i;
|
||||
|
||||
const int ngpus = system->nodes[GPU].count;
|
||||
@@ -2137,13 +2142,14 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
// number of GPUs and NICs on each numa node is used as first screening pattern
|
||||
struct rcclRomeModel romeTopo;
|
||||
char pattern[256];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
|
||||
int devids[NUMA_CPUS*NUMA_GPUS];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
|
||||
|
||||
// only match for system with 16 GPUs
|
||||
if (ngpus != 16 || ncpus != NUMA_CPUS) return ncclSuccess;
|
||||
|
||||
int gcnt = 0;
|
||||
int *g16, n[NCCL_TOPO_MAX_NODES];
|
||||
int *g16, n[NCCL_TOPO_MAX_NODES], rdm[NUMA_GPUS*NUMA_CPUS];
|
||||
int *all_gpu_permutations = (int *)malloc(TOTAL_PERMUTE_COUNT*NUMA_CPUS*NUMA_GPUS*sizeof(int));
|
||||
struct timeval tvs, tve;
|
||||
gettimeofday(&tvs, NULL);
|
||||
@@ -2223,13 +2229,14 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
//printf("No solution in %.2fms\n", t);
|
||||
return ncclSuccess;
|
||||
}
|
||||
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g16[m]];
|
||||
|
||||
char line[1024];
|
||||
//sprintf(line, "Found matching Rome model index %d in %.2fms with GPU mapping: ", i, t);
|
||||
sprintf(line, "Found matching Rome model index %d with GPU mapping: ", i);
|
||||
int offset = strlen(line);
|
||||
for (int k = 0; k < ngpus; k++) {
|
||||
sprintf(line+offset, "%d ", g16[k]);
|
||||
sprintf(line+offset, "%d ", rdm[k]);
|
||||
offset = strlen(line);
|
||||
}
|
||||
if (nnets > 1) {
|
||||
@@ -2244,21 +2251,56 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra
|
||||
system->type |= RCCL_TOPO_16P1H;
|
||||
parseOptions(system, romeTopoModels[i].options);
|
||||
|
||||
// create 16P1H based on reference and remapped ids
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, g16, nnets > 1 ? n : NULL, false));
|
||||
|
||||
if (romeTopoModels[i].treeBase != nullptr) NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, g16));
|
||||
// create 16P1H based on reference and remapped ids
|
||||
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, rdm, nnets > 1 ? n : NULL, false));
|
||||
|
||||
if (romeTopoModels[i].treeBase != nullptr) NCCLCHECK(parseGraphLight(romeTopoModels[i].treeBase, system, graph, rdm));
|
||||
// clean up
|
||||
free(all_gpu_permutations);
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t find_gpu_hives(int *g_hives, int *ng_hives, struct rcclRomeModel *romeTopo) {
|
||||
const int ngpus = romeTopo->nGpus;
|
||||
const int gpus_per_hive = romeTopo->nLinks + 1;
|
||||
const int nhives = ngpus / gpus_per_hive;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < ngpus; i++) {
|
||||
int j, h;
|
||||
for (j = 0; j < nhives; j++) {
|
||||
if (ng_hives[j]) {
|
||||
if (romeTopo->connMatrix[i*ngpus+g_hives[j*gpus_per_hive]]) {
|
||||
g_hives[j*gpus_per_hive+ng_hives[j]] = i;
|
||||
ng_hives[j] ++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j >= nhives) {
|
||||
for (h = 0; h < nhives; h++) {
|
||||
if (ng_hives[h] == 0) {
|
||||
g_hives[h*gpus_per_hive] = i;
|
||||
ng_hives[h]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (h >= nhives)
|
||||
return ncclInternalError;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < nhives; i++) {
|
||||
if (ng_hives[i] != gpus_per_hive)
|
||||
return ncclInternalError;
|
||||
}
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
|
||||
constexpr int NUM_HIVES = 4;
|
||||
constexpr int HIVE_GPUS = 4;
|
||||
|
||||
static char ringRemap[256];
|
||||
|
||||
const int ngpus = system->nodes[GPU].count;
|
||||
const int nnets = system->nodes[NET].count;
|
||||
|
||||
@@ -2271,13 +2313,15 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
|
||||
// number of GPUs and NICs on each numa node is used as first screening pattern
|
||||
struct rcclRomeModel romeTopo;
|
||||
char pattern[256];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern));
|
||||
int devids[HIVE_GPUS*NUM_HIVES];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
|
||||
|
||||
// only match for system with 16 GPUs
|
||||
if (ngpus != NUM_HIVES*HIVE_GPUS || nnets != NUM_HIVES*HIVE_GPUS) return ncclSuccess;
|
||||
|
||||
std::vector<int> g_hives(ngpus), n_hives(nnets);
|
||||
int ng_hives[NUM_HIVES];
|
||||
int rdm[NUM_HIVES*HIVE_GPUS];
|
||||
|
||||
// try to sort GPUs into hives
|
||||
for (int i = 0; i < NUM_HIVES; i++)
|
||||
@@ -2286,36 +2330,17 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
|
||||
n_hives[i] = -1;
|
||||
for (int i = 0; i < ngpus; i++)
|
||||
g_hives[i] = -1;
|
||||
for (int i = 0; i < ngpus; i++) {
|
||||
int j, h;
|
||||
for (j = 0; j < NUM_HIVES; j++) {
|
||||
if (ng_hives[j]) {
|
||||
if (romeTopo.connMatrix[i*ngpus+g_hives[j*HIVE_GPUS]]) {
|
||||
g_hives[j*HIVE_GPUS+ng_hives[j]] = i;
|
||||
ng_hives[j]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j >= NUM_HIVES) {
|
||||
for (h = 0; h < NUM_HIVES; h++) {
|
||||
if (ng_hives[h] == 0) {
|
||||
g_hives[h*HIVE_GPUS] = i;
|
||||
ng_hives[h]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (h >= NUM_HIVES)
|
||||
return ncclSuccess;
|
||||
}
|
||||
}
|
||||
|
||||
if (find_gpu_hives(g_hives.data(), ng_hives, &romeTopo))
|
||||
return ncclSuccess;
|
||||
|
||||
for (int i = 0; i < NUM_HIVES; i++)
|
||||
if (ng_hives[i] != 4) return ncclSuccess;
|
||||
// remap NET ids
|
||||
for (int i = 0; i < nnets; i++) {
|
||||
int j;
|
||||
for (j = 0; j < ngpus; j++) {
|
||||
if(romeTopo.gdrLevel[i*nnets+g_hives[j]] == 3) {
|
||||
if(romeTopo.gdrLevel[i*nnets+g_hives[j]] == PATH_PIX) {
|
||||
n_hives[j] = i;
|
||||
break;
|
||||
}
|
||||
@@ -2327,11 +2352,14 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
|
||||
if (n_hives[i] == -1) return ncclSuccess;
|
||||
for (int i = 0; i < ngpus; i++)
|
||||
if (g_hives[i] == -1) return ncclSuccess;
|
||||
|
||||
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g_hives[m]];
|
||||
|
||||
char line[1024];
|
||||
sprintf(line, "Found matching Rome model 4P4H with GPU mapping: ");
|
||||
int offset = strlen(line);
|
||||
for (int k = 0; k < ngpus; k++) {
|
||||
sprintf(line+offset, "%d ", g_hives[k]);
|
||||
sprintf(line+offset, "%d ", rdm[k]);
|
||||
offset = strlen(line);
|
||||
}
|
||||
if (nnets > 1) {
|
||||
@@ -2347,6 +2375,86 @@ ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* grap
|
||||
system->type |= RCCL_TOPO_4P2H_ROME;
|
||||
parseOptions(system, rome_model_68.options);
|
||||
// create 4P4H based on reference and remapped ids
|
||||
NCCLCHECK(parseGraph(rome_model_68.ringBase, system, graph, g_hives.data(), n_hives.data(), false));
|
||||
NCCLCHECK(parseGraph(rome_model_68.ringBase, system, graph, rdm, n_hives.data(), false));
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
static struct rcclRomeModel gio16gColumbaModel = {
|
||||
.nGpus = 16, .nCpus = 2, .nNics = 0, .nLinks = 7,
|
||||
.gpuIds = { 0x24000, 0x2a000, 0x37000, 0x3d000, 0x4a000, 0x50000, 0x5d000, 0x63000, 0xaf000, 0xb5000, 0xc2000, 0xc8000, 0xd5000, 0xdb000, 0xe8000, 0xee000, },
|
||||
.nicIds = { },
|
||||
.gpuNuma = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, },
|
||||
.nicNuma = { },
|
||||
.connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, },
|
||||
.gdrLevel = { },
|
||||
.pattern = "8080",
|
||||
.ringBase = "0 1 2 3 4 5 7 6 12 13 14 15 8 9 11 10"
|
||||
"| 2 7 3 5 1 6 0 4 14 9 15 10 13 11 12 8"
|
||||
"| 1 3 0 7 4 6 2 5 13 15 11 8 10 14 12 9"
|
||||
"| 3 6 5 0 2 4 1 7 15 12 10 9 13 8 14 11"
|
||||
"| 10 11 9 8 15 14 13 12 6 7 5 4 3 2 1 0"
|
||||
"| 8 12 11 13 10 15 9 14 4 0 6 1 5 3 7 2"
|
||||
"| 9 12 14 10 8 11 15 13 5 2 6 4 7 0 3 1"
|
||||
"| 11 14 8 13 9 10 12 15 7 1 4 2 0 5 6 3",
|
||||
.options = "",
|
||||
};
|
||||
|
||||
ncclResult_t parseGIOTopos(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
|
||||
ncclResult_t r;
|
||||
constexpr int NGPUS = 16;
|
||||
constexpr int NHIVES = 2;
|
||||
constexpr int GPUS_PER_HIVE = NGPUS/NHIVES;
|
||||
const int ngpus = system->nodes[GPU].count;
|
||||
const int ncpus = system->nodes[CPU].count;
|
||||
const int nnets = system->nodes[NET].count;
|
||||
|
||||
if (ngpus < NGPUS || ncpus > 2 || nnets != 0)
|
||||
return ncclSuccess;
|
||||
|
||||
struct rcclRomeModel romeTopo;
|
||||
char pattern[256];
|
||||
int devids[NGPUS];
|
||||
NCCLCHECK(parseRomeSystem(system, &romeTopo, pattern, devids));
|
||||
|
||||
if (romeTopo.nLinks + 1 != GPUS_PER_HIVE)
|
||||
return ncclSuccess;
|
||||
|
||||
int g_hives[NGPUS], ng_hives[NHIVES], rdm[NGPUS];
|
||||
memset(ng_hives, 0, sizeof(ng_hives));
|
||||
memset(g_hives, -1, sizeof(g_hives));
|
||||
NCCLCHECKGOTO(find_gpu_hives(g_hives, ng_hives, &romeTopo), r, exit);
|
||||
|
||||
for (int m = 0; m<ngpus; m++) rdm[m] = devids[g_hives[m]];
|
||||
|
||||
char line[1024];
|
||||
int offset;
|
||||
sprintf(line, "Found GigaIO rome topo, gpu_mapping: ");
|
||||
offset = strlen(line);
|
||||
for (int k = 0; k < ngpus; k++) {
|
||||
sprintf(line+offset, "%d ", g_hives[k]);
|
||||
offset = strlen(line);
|
||||
}
|
||||
INFO(NCCL_GRAPH,"%s", line);
|
||||
|
||||
system->type |= RCCL_TOPO_4P2H_ROME;
|
||||
|
||||
NCCLCHECKGOTO(parseGraph(gio16gColumbaModel.ringBase, system, graph, rdm, NULL, false), r, exit);
|
||||
|
||||
exit:
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2024 GigaIO Networks, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -29,5 +30,6 @@ ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGrap
|
||||
ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
|
||||
ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
|
||||
ncclResult_t parseA2a8P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph, const char *ringBase);
|
||||
ncclResult_t parseGIOTopos(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Modifications Copyright (c) 2019-2022 GigaIO Networks, Inc. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
@@ -983,6 +984,9 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph
|
||||
if (graph->nChannels) return ncclSuccess;
|
||||
// try to match 4H4P
|
||||
NCCLCHECK(parse4H4P(system, graph));
|
||||
if (graph->nChannels) return ncclSuccess;
|
||||
|
||||
NCCLCHECK(parseGIOTopos(system, graph));
|
||||
}
|
||||
if (graph->nChannels) return ncclSuccess;
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ ncclResult_t collNetCanConnect(int* ret, struct ncclTopoSystem* topo, struct ncc
|
||||
ncclResult_t collNetSendSetup(struct ncclComm* comm, struct ncclTopoGraph* graph, struct ncclPeerInfo* myInfo, struct ncclPeerInfo* peerInfo, struct ncclConnect* connectInfo, struct ncclConnector* send, int channelId, int connIndex) {
|
||||
int netDev, useGdr = 0, proxy;
|
||||
|
||||
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, &netDev, &proxy));
|
||||
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, NULL, &netDev, &proxy));
|
||||
NCCLCHECK(ncclTopoCheckGdr(comm->topo, myInfo->busId, netDev, 1, &useGdr));
|
||||
|
||||
INFO(NCCL_INIT|NCCL_NET,"Coll %02d : %d [send] via COLLNET/%s/%d%s", channelId, myInfo->rank, "SHARP", netDev, useGdr ? "/GDRDMA" : "");
|
||||
@@ -275,7 +275,7 @@ ncclResult_t collNetSendSetup(struct ncclComm* comm, struct ncclTopoGraph* graph
|
||||
ncclResult_t collNetRecvSetup(struct ncclComm* comm, struct ncclTopoGraph* graph, struct ncclPeerInfo* myInfo, struct ncclPeerInfo* peerInfo, struct ncclConnect* connectInfo, struct ncclConnector* recv, int channelId, int connIndex) {
|
||||
int netDev, useGdr = 0, proxy;
|
||||
|
||||
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, &netDev, &proxy));
|
||||
NCCLCHECK(ncclTopoGetNetDev(comm, myInfo->rank, graph, channelId, peerInfo->rank, NULL, &netDev, &proxy));
|
||||
NCCLCHECK(ncclTopoCheckGdr(comm->topo, myInfo->busId, netDev, 0, &useGdr));
|
||||
|
||||
INFO(NCCL_INIT|NCCL_NET,"Coll %02d : %d [receive] via COLLNET/%s/%d%s", channelId, myInfo->rank, "SHARP", netDev, useGdr ? "/GDRDMA" : "");
|
||||
|
||||
@@ -0,0 +1,273 @@
|
||||
<system version="2">
|
||||
<cpu host_hash="0x58e137603ea7ac35" numaid="1" affinity="ffffffff,ffffff00,00000000,0000ffff,ffffffff,ff000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
|
||||
<pci busid="0000:32:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:34:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:36:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:38:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:3a:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:3c:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:3e:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:40:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:42:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="0" sm="304" gcn="gfx942" arch="38911" rank="0" gdr="0">
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:43:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:45:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:47:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:49:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="1" sm="304" gcn="gfx942" arch="38911" rank="1" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:4a:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:4c:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="2" sm="304" gcn="gfx942" arch="38911" rank="2" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:4e:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:50:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="3" sm="304" gcn="gfx942" arch="38911" rank="3" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:53:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:55:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:57:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:59:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:5b:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:5d:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="4" sm="304" gcn="gfx942" arch="38911" rank="4" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:5e:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:60:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:62:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:64:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="5" sm="304" gcn="gfx942" arch="38911" rank="5" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:65:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:67:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="6" sm="304" gcn="gfx942" arch="38911" rank="6" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6b:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:69:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:6b:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="7" sm="304" gcn="gfx942" arch="38911" rank="7" gdr="0">
|
||||
<xgmi target="0000:42:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:49:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:4c:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:50:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:64:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:67:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:6e:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:70:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:72:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:74:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:76:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:78:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="8" sm="304" gcn="gfx942" arch="38911" rank="8" gdr="0">
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:79:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:7b:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:7d:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:7f:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="9" sm="304" gcn="gfx942" arch="38911" rank="9" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:80:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:82:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="10" sm="304" gcn="gfx942" arch="38911" rank="10" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:84:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:86:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="11" sm="304" gcn="gfx942" arch="38911" rank="11" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:89:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:8b:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:8d:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:8f:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:91:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:93:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="12" sm="304" gcn="gfx942" arch="38911" rank="12" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:94:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:96:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x15d9" subsystem_device="0x1d2a" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:98:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:9a:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="13" sm="304" gcn="gfx942" arch="38911" rank="13" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:9b:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:9d:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="14" sm="304" gcn="gfx942" arch="38911" rank="14" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:a1:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:9f:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:a1:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="15" sm="304" gcn="gfx942" arch="38911" rank="15" gdr="0">
|
||||
<xgmi target="0000:78:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:7f:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:82:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:86:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:93:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9d:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</cpu>
|
||||
<cpu host_hash="0x58e137603ea7ac35" numaid="0" affinity="00000000,000000ff,ffffffff,ffff0000,00000000,00ffffff,ffffffff" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
|
||||
<pci busid="0000:01:00.0" class="0x020000" vendor="0x8086" device="0x1521" subsystem_vendor="0x8086" subsystem_device="0x0002" link_speed="5.0 GT/s PCIe" link_width="4">
|
||||
<nic>
|
||||
<net name="enp1s0f0" dev="0" speed="1000" port="0" latency="0.000000" guid="0x0" maxconn="65536" gdr="0"/>
|
||||
</nic>
|
||||
</pci>
|
||||
</cpu>
|
||||
</system>
|
||||
@@ -0,0 +1,313 @@
|
||||
<system version="2">
|
||||
<cpu numaid="1" affinity="ffffffff,ffffff00,00000000,0000ffff,ffffffff,ff000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
|
||||
<pci busid="0000:45:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:47:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:49:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:4b:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:4d:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:4f:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:51:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:53:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:55:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:57:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="0" sm="94" gcn="gfx942" arch="38911" rank="0" gdr="0">
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:58:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:5b:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:5d:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="1" sm="94" gcn="gfx942" arch="38911" rank="1" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:60:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:62:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:64:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:66:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:68:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:6a:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="2" sm="94" gcn="gfx942" arch="38911" rank="2" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:6b:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:6e:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:70:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="3" sm="94" gcn="gfx942" arch="38911" rank="3" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:75:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x1ce5" subsystem_device="0xff83" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:77:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:79:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:7b:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:7d:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:7f:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:81:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="4" sm="94" gcn="gfx942" arch="38911" rank="4" gdr="0">
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:82:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:85:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:87:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="5" sm="94" gcn="gfx942" arch="38911" rank="5" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:8a:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:8c:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:8e:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:90:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:92:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:94:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="6" sm="94" gcn="gfx942" arch="38911" rank="6" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:95:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:98:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:9a:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="7" sm="94" gcn="gfx942" arch="38911" rank="7" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:9f:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x11f8" subsystem_device="0x4000" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:a1:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:a3:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:a5:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:a7:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:a9:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:ab:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="8" sm="94" gcn="gfx942" arch="38911" rank="8" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:ac:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:af:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:b1:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="9" sm="94" gcn="gfx942" arch="38911" rank="9" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:b4:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:b6:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:b8:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:ba:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:bc:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:be:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="10" sm="94" gcn="gfx942" arch="38911" rank="10" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:c4:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:bf:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:c2:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:c4:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="11" sm="94" gcn="gfx942" arch="38911" rank="11" gdr="0">
|
||||
<xgmi target="0000:57:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:5d:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:6a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:70:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ab:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:b1:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:be:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:c9:00.0" class="0x060400" vendor="0x11f8" device="0x4000" subsystem_vendor="0x11f8" subsystem_device="0x4000" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:cb:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:cd:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:cf:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:d1:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:d3:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:d5:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="12" sm="94" gcn="gfx942" arch="38911" rank="12" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:d6:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:d9:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:db:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="13" sm="94" gcn="gfx942" arch="38911" rank="13" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:de:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1ce5" subsystem_device="0xff82" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:e0:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa032" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:e2:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x0104" link_speed="16.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:e4:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:e6:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:e8:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="14" sm="94" gcn="gfx942" arch="38911" rank="14" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:ee:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
<pci busid="0000:e9:00.0" class="0x060400" vendor="0x1000" device="0xc030" subsystem_vendor="0x1000" subsystem_device="0x1003" link_speed="5.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:ec:00.0" class="0x060400" vendor="0x1022" device="0x1500" subsystem_vendor="0x1022" subsystem_device="0x1500" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<pci busid="0000:ee:00.0" class="0x120000" vendor="0x1002" device="0x74a1" subsystem_vendor="0x1002" subsystem_device="0x74a1" link_speed="32.0 GT/s PCIe" link_width="16">
|
||||
<gpu dev="15" sm="94" gcn="gfx942" arch="38911" rank="15" gdr="0">
|
||||
<xgmi target="0000:81:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:87:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:94:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:9a:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:d5:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:db:00.0" count="1" tclass="0x120000"/>
|
||||
<xgmi target="0000:e8:00.0" count="1" tclass="0x120000"/>
|
||||
</gpu>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</pci>
|
||||
</cpu>
|
||||
<cpu numaid="0" affinity="00000000,000000ff,ffffffff,ffff0000,00000000,00ffffff,ffffffff" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="17">
|
||||
<pci busid="0000:01:00.0" class="0x020000" vendor="0x8086" device="0x1521" subsystem_vendor="0x8086" subsystem_device="0x0002" link_speed="5.0 GT/s PCIe" link_width="4">
|
||||
<nic>
|
||||
<net name="enp1s0f0" dev="0" speed="1000" port="0" latency="0.000000" guid="0x0" maxconn="65536" gdr="0"/>
|
||||
</nic>
|
||||
</pci>
|
||||
</cpu>
|
||||
</system>
|
||||
@@ -161,6 +161,8 @@ NodeModelDesc model_descs[] = {
|
||||
{5, "topo_8p_940.xml", "5 nodes gfx940 8P"},
|
||||
{2, "topo_8p_942.xml", "2 nodes gfx942 8P"},
|
||||
{2, "topo_8p_942_1.xml", "2 nodes gfx942 8P Alt."},
|
||||
{1, "topo_16p_gio-1s-1rp-cascade.xml", "GigaIO 16P gfx942"},
|
||||
{1, "topo_16p_gio-3s-1rp-split-flat.xml", "GigaIO 16P gfx942 1rp-split"},
|
||||
};
|
||||
|
||||
NCCL_PARAM(MaxCTAs, "MAX_CTAS", MAXCHANNELS);
|
||||
|
||||
@@ -224,9 +224,9 @@ void ncclDebugLog(ncclDebugLogLevel level, unsigned long flags, const char *file
|
||||
|
||||
ncclResult_t ncclTopoGetSystem(const char* xmlTopoFile, struct ncclTopoSystem** system) {
|
||||
struct ncclXml* xml;
|
||||
NCCLCHECK(ncclCalloc(&xml, 1));
|
||||
NCCLCHECK(xmlAlloc(&xml, NCCL_GRAPH_XML_MAX_NODES));
|
||||
NCCLCHECK(ncclTopoGetXmlFromFile(xmlTopoFile, xml, 0));
|
||||
NCCLCHECK(ncclTopoGetSystemFromXml(xml, system));
|
||||
NCCLCHECK(ncclTopoGetSystemFromXml(xml, system, 0));
|
||||
free(xml);
|
||||
return ncclSuccess;
|
||||
}
|
||||
@@ -1092,7 +1092,7 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGatherInfo *a
|
||||
|
||||
NCCLCHECKGOTO(ncclCalloc(&rings, nranks*MAXCHANNELS), ret, fail);
|
||||
|
||||
NCCLCHECKGOTO(ncclTopoPostset(comm, nodesFirstRank, nodesTreePatterns, allTopoRanks, rings, graphs, nc), ret, fail);
|
||||
NCCLCHECKGOTO(ncclTopoPostset(comm, nodesFirstRank, nodesTreePatterns, allTopoRanks, rings, graphs, NULL, nc), ret, fail);
|
||||
if (comm->topo->treeDefined) NCCLCHECK(ncclTreeBasePostset(comm, &treeGraph));
|
||||
|
||||
// AllGather3 - end
|
||||
@@ -1358,3 +1358,11 @@ bool mscclForceEnabled() {
|
||||
ncclResult_t mscclSchedulerInit(ncclComm_t comm, int* numChannelsRequired) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
uint64_t getHostHash(void) {
|
||||
return 0xdeadbeef;
|
||||
}
|
||||
|
||||
ncclResult_t bootstrapIntraNodeAllGather(void* commState, int *ranks, int rank, int nranks, void* allData, int size) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user