Add new Rome model (#1304)

* Add another rome model and override

* Fix bug

* Fix typo

* Add ring

* Update ring

* Fix model matching

* Clean up

* Clean up

* Reverse rings for NCCL_RINGS input

* Only reverse NCCL_RINGS for ring graph

* Fix mapping issue when using  NCCL_RINGS

* Add NCCL_RINGS_REMAP to handle inconsistant net names
This commit is contained in:
Wenkai Du
2024-08-23 08:45:43 +08:00
کامیت شده توسط GitHub
والد db840f024e
کامیت 532b70afb6
7فایلهای تغییر یافته به همراه46 افزوده شده و 10 حذف شده
+36 -6
مشاهده پرونده
@@ -841,6 +841,18 @@ static struct rcclRomeModel rome_model_85 = {
.options = "tuning=2",
};
static struct rcclRomeModel rome_model_87 = {
.nGpus = 8, .nCpus = 2, .nNics = 4, .nLinks = 7,
.gpuIds = { 0xa000, 0x80000, 0xa4000, 0xc8000, 0x10b000, 0x181000, 0x1a5000, 0x1c9000, },
.nicIds = { 0xc9000, 0x1a2000, 0x108000, 0x81000, },
.gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
.nicNuma = { 0, 1, 1, 0, },
.connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, },
.gdrLevel = { PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, },
.pattern = "4242",
.ringBase = "N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N1 6 3 1 4 0 7 5 2 N0|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 7 2 0 6 4 1 5 3 N0|N3 0 1 2 3 4 5 6 7 N1|N3 1 0 2 4 3 5 7 6 N1|N0 2 5 0 3 6 1 7 4 N2|N0 3 7 0 4 2 1 6 5 N2|N2 4 6 2 7 3 0 5 1 N3|N2 5 4 7 1 3 2 6 0 N3|N1 6 3 1 4 0 7 5 2 N0",
.options = "noCpuCheck=1,netOverride=1",
};
static struct rcclRomeModel romeTopoModels[] = {
rome_model_22, /* 0 */
@@ -886,6 +898,7 @@ static struct rcclRomeModel romeTopoModels[] = {
rome_model_81, /* 40 */
rome_model_84, /* 41 */
rome_model_85, /* 42 */
rome_model_87, /* 43 */
};
/* Parse user defined rings. Format is like :
@@ -1306,7 +1319,7 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo
struct ncclNetId net_scores[NCCL_TOPO_MAX_NODES];
for (int i = 0; i < romeTopo->nNics; i ++) {
net_scores[i].n = i;
net_scores[i].id = system->nodes[NET].nodes[i].id;
net_scores[i].id = system->nodes[NET].nodes[i].net.dev;
}
qsort(net_scores, romeTopo->nNics, sizeof(struct ncclNetId), cmpNets);
@@ -1494,7 +1507,7 @@ static bool permuteNetIds(int *n, int *g, int s, int last, struct rcclRomeModel*
}
ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* graph, const char *ringBase) {
static char ringRemap[64];
int i;
@@ -1623,14 +1636,14 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
// Attempt to use rail-optimized rings if they exist
if (system->nHosts % 2 == 0) {
// For even number of nodes, alternate forward/reverse on ringBase
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
}
else {
// For odd number of nodes, check first to see if ringTail1 and ringTail2 are defined
if (system->nHosts == 1 || romeTopoModels[i].ringTail1 == nullptr || romeTopoModels[i].ringTail2 == nullptr) {
if (system->nHosts > 1)
INFO(NCCL_GRAPH, "[WARN] Dropping back due to lack of support for odd-number of nodes for model index %d\n", i);
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
}
else
{
@@ -1639,7 +1652,7 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
} else if (system->hostIdx == (system->nHosts - 2)) {
NCCLCHECK(parseGraph(romeTopoModels[i].ringTail2, system, graph, g, nnets > 1 ? n : NULL, 0));
} else {
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, system->hostIdx % 2));
}
}
}
@@ -1651,7 +1664,24 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
}
// Fall back to tree from ringBase
NCCLCHECK(parseGraph(romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, 0));
NCCLCHECK(parseGraph(ringBase != nullptr ? ringBase : romeTopoModels[i].ringBase, system, graph, g, nnets > 1 ? n : NULL, 0));
// Override GDR distance if requested
if (checkOption(romeTopoModels[i].options, "netOverride")) {
for (int i = 0; i < system->nodes[NET].count; i++) {
for (int j = 0; j < system->nodes[GPU].count; j++) {
if (system->nodes[GPU].nodes[j].paths[NET][i].type == PATH_PXB) {
int k;
for (k = 0; k < system->nodes[GPU].count; k++) {
if (k != j &&
system->nodes[GPU].nodes[k].gpu.dev/2 == system->nodes[GPU].nodes[j].gpu.dev/2)
break;
}
if (k < system->nodes[GPU].count)
system->nodes[GPU].nodes[k].paths[NET][i].type = PATH_PXB;
}
}
}
}
break;
}
return ncclSuccess;
+1 -1
مشاهده پرونده
@@ -24,7 +24,7 @@ THE SOFTWARE.
ncclResult_t parseGraph(const char* str, struct ncclTopoSystem* system, struct ncclTopoGraph* graph, int* gpu_map, int* net_map, int reverse);
ncclResult_t parseGraphLight(const char* str, struct ncclTopoSystem* system, struct ncclTopoGraph* graph, int* gpu_map);
ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* graph2, const char *ringBase);
ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
ncclResult_t parse4H4P(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);
+5 -2
مشاهده پرونده
@@ -947,7 +947,9 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph
NCCLCHECK(parseGraphLight(strTrees, system, graph, NULL));
system->treeDefined=true;
} else {
NCCLCHECK(parseGraph(str, system, graph, NULL, NULL, false));
// For even number of nodes, alternate forward/reverse on ringBase
NCCLCHECK(parseGraph(str, system, graph, NULL, NULL,
graph->pattern == NCCL_TOPO_PATTERN_RING ? system->hostIdx % 2 : 0));
int arch, vendor, model;
NCCLCHECK(ncclTopoCpuType(system, &arch, &vendor, &model));
if (graph->nChannels && arch == NCCL_TOPO_CPU_ARCH_X86 && vendor == NCCL_TOPO_CPU_VENDOR_AMD && model == NCCL_TOPO_CPU_TYPE_ROME) {
@@ -959,7 +961,8 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph
NCCLCHECK(parseChordalRing(system, graph));
if (graph->nChannels) return ncclSuccess;
// try to match Rome 4P2H
NCCLCHECK(parseRome4P2H(system, graph));
const char *remap_str = getenv("NCCL_RINGS_REMAP");
NCCLCHECK(parseRome4P2H(system, graph, remap_str));
if (graph->nChannels) return ncclSuccess;
// try to match 1H16P
+1
مشاهده پرونده
@@ -355,6 +355,7 @@ ncclResult_t ncclTopoAddNet(struct ncclXmlNode* xmlNet, struct ncclTopoSystem* s
NCCLCHECK(xmlGetAttrIntDefault(xmlNet, "gdr", &net->net.gdrSupport, 0));
NCCLCHECK(xmlGetAttrIntDefault(xmlNet, "maxconn", &net->net.maxChannels, MAXCHANNELS));
NCCLCHECK(xmlGetAttrIntDefault(xmlNet, "coll", &net->net.collSupport, 0));
NCCLCHECK(xmlGetAttrIntDefault(xmlNet, "dev", &net->net.dev, 0));
net->net.busId = busId;
ncclDebugNoWarn = 0;
+1
مشاهده پرونده
@@ -140,6 +140,7 @@ struct ncclTopoNode {
int collSupport;
int maxChannels;
int64_t busId;
int dev;
}net;
struct {
int arch;
+1 -1
مشاهده پرونده
@@ -21,7 +21,7 @@
DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
for i in {0..82}
for i in {0..87}
do
if [[ $i -eq 50 ]] || [[ $i -eq 51 ]]
then
@@ -159,6 +159,7 @@ NodeModelDesc model_descs[] = {
{2, "topo_8p_940_16n.xml", "2 nodes gfx940 16 NICs"},
{2, "topo_8p1h_6.xml", "2 nodes 8P1H Alt."},
{5, "topo_8p_940.xml", "5 nodes gfx940 8P"},
{2, "topo_8p_942.xml", "2 nodes gfx942 8P"},
};
NCCL_PARAM(MaxCTAs, "MAX_CTAS", MAXCHANNELS);