@@ -591,7 +591,20 @@ static struct rcclRomeModel rome_model_82 = {
|
||||
.gdrLevel = { },
|
||||
.pattern = "4040",
|
||||
.ringBase = "0 1 2 3 4 5 6 7|0 1 2 3 4 5 7 6|0 2 4 1 3 6 5 7|0 2 4 6 1 7 3 5|0 3 1 5 2 7 4 6|0 3 5 1 6 2 7 4|0 4 1 7 3 6 2 5|7 6 5 4 3 2 1 0|6 7 5 4 3 2 1 0|7 5 6 3 1 4 2 0|5 3 7 1 6 4 2 0|6 4 7 2 5 1 3 0|4 7 2 6 1 5 3 0|5 2 6 3 7 1 4 0",
|
||||
.options = "noCpuCheck=1,mscclEnabled=1",
|
||||
.options = "noCpuCheck=1,mscclEnabled=1,disableNumaMatching=1",
|
||||
};
|
||||
|
||||
static struct rcclRomeModel rome_model_83 = {
|
||||
.nGpus = 8, .nCpus = 2, .nNics = 8, .nLinks = 7,
|
||||
.gpuIds = { 0xc000, 0x22000, 0x38000, 0x5c000, 0x9f000, 0xaf000, 0xbf000, 0xdf000, },
|
||||
.nicIds = { 0x7000, 0x1d000, 0x33000, 0x57000, 0x9a000, 0xaa000, 0xba000, 0xda000, },
|
||||
.gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
|
||||
.nicNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
|
||||
.connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, },
|
||||
.gdrLevel = { PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, },
|
||||
.pattern = "4444",
|
||||
.ringBase = "N0 0 1 2 3 4 5 6 7 N7|N1 1 0 2 4 3 5 7 6 N6|N2 2 5 0 3 6 1 7 4 N4|N3 3 7 0 4 2 1 6 5 N5|N4 4 6 2 7 3 0 5 1 N1|N5 5 4 7 1 3 2 6 0 N0|N6 6 3 1 4 0 7 5 2 N2|N7 7 2 0 6 4 1 5 3 N3",
|
||||
.options = "noCpuCheck=1,disableNumaMatching=1",
|
||||
};
|
||||
|
||||
static struct rcclRomeModel romeTopoModels[] = {
|
||||
@@ -636,7 +649,8 @@ static struct rcclRomeModel romeTopoModels[] = {
|
||||
rome_model_79,
|
||||
rome_model_80,
|
||||
rome_model_81,
|
||||
rome_model_82
|
||||
rome_model_82,
|
||||
rome_model_83,
|
||||
};
|
||||
|
||||
/* Parse user defined rings. Format is like :
|
||||
@@ -1281,7 +1295,7 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
|
||||
if (!ignore_numa && strcmp(romeTopoModels[i].pattern, pattern)) continue;
|
||||
// permute GPU IDs
|
||||
for (int j = 0; j < ngpus; j++) g[j] = (j+2)%ngpus;
|
||||
if (!permuteGpuIds(g, 0, ngpus-1, romeTopoModels+i, &romeTopo, &time, match_nbio, ignore_numa)) continue;
|
||||
if (!permuteGpuIds(g, 0, ngpus-1, romeTopoModels+i, &romeTopo, &time, ignore_cpu ? false : match_nbio, ignore_numa)) continue;
|
||||
if (nnets > 1) {
|
||||
// permute NET IDs
|
||||
for (int j = 0; j < nnets; j++) n[j] = (j+2)%nnets;
|
||||
|
||||
@@ -376,6 +376,10 @@ ncclResult_t ncclTopoAddGpu(struct ncclXmlNode* xmlGpu, struct ncclTopoSystem* s
|
||||
gpu->gpu.gcn = "gfx908";
|
||||
} else if (strcmp(gpu->gpu.gcn, "910") == 0) {
|
||||
gpu->gpu.gcn = "gfx90a";
|
||||
} else if (strcmp(gpu->gpu.gcn, "940") == 0) {
|
||||
gpu->gpu.gcn = "gfx940";
|
||||
} else if (strcmp(gpu->gpu.gcn, "941") == 0) {
|
||||
gpu->gpu.gcn = "gfx941";
|
||||
}
|
||||
rcclHipDeviceArch_t arch;
|
||||
NCCLCHECK(xmlGetAttrInt(xmlGpu, "arch", &arch.value));
|
||||
|
||||
مرجع در شماره جدید
Block a user