From cd17cf6dce5d9796cd28caaa73ad5125ef258db4 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Mon, 21 Mar 2022 10:54:40 -0700 Subject: [PATCH] Update Rome model matching and add new models (#516) * Update Rome model matching and add new models * Add missing file * Models update --- src/graph/rome_models.cc | 159 ++++++++++----- src/graph/tuning.cc | 1 + tools/scripts/topo_val.sh | 2 +- tools/topo_expl/models/topo_16p1h_vm.xml | 48 ++--- tools/topo_expl/models/topo_4p4h.xml | 236 +++++++++++++++++++++++ tools/topo_expl/models/topo_8p1h_1.xml | 118 ++++++++++++ tools/topo_expl/models/topo_8p1h_n1.xml | 110 +++++++++++ tools/topo_expl/topo_expl.cpp | 5 + 8 files changed, 607 insertions(+), 72 deletions(-) create mode 100644 tools/topo_expl/models/topo_4p4h.xml create mode 100644 tools/topo_expl/models/topo_8p1h_1.xml create mode 100644 tools/topo_expl/models/topo_8p1h_n1.xml diff --git a/src/graph/rome_models.cc b/src/graph/rome_models.cc index a76b25e35c..690787ac90 100644 --- a/src/graph/rome_models.cc +++ b/src/graph/rome_models.cc @@ -52,7 +52,7 @@ static struct rcclRomeModel rome_model_22 = { .gpuNuma = { 1, 0, 1, 2, 3, 1, 2, 3, }, .nicNuma = { 2, }, .connMatrix = { 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, }, - .gdrLevel = { 6, 6, 6, 5, 6, 6, 5, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_SYS, PATH_SYS, PATH_PHB, PATH_SYS, }, .pattern = "10302120", .ringBase = "7 4 5 3 1 0 6 2|4 7 3 5 0 1 2 6", .options = "", @@ -65,7 +65,7 @@ static struct rcclRomeModel rome_model_25 = { .gpuNuma = { 0, 1, 1, 1, 2, 2, 2, 3, }, .nicNuma = { 0, 3, }, .connMatrix = { 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, }, - .gdrLevel = { 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, }, + .gdrLevel = { PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, }, .pattern = "11303011", .ringBase = "2 1 0 3 6 7 5 4|7 6 4 5 1 2 3 0", .options = "", @@ -78,7 +78,7 @@ static struct rcclRomeModel rome_model_27 = { .gpuNuma = { 0, 1, 1, 1, 2, 2, 2, 3, }, .nicNuma = { 0, 3, }, .connMatrix = { 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, }, - .gdrLevel = { 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, }, + .gdrLevel = { PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, }, .pattern = "11303011", .ringBase = "0 6 2 3 1 7 5 4|7 1 4 5 6 0 3 2", .options = "", @@ -91,7 +91,7 @@ static struct rcclRomeModel rome_model_29 = { .gpuNuma = { 0, 1, 1, 1, 2, 2, 3, 3, }, .nicNuma = { 2, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, }, - .gdrLevel = { 6, 6, 6, 6, 5, 5, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, }, .pattern = "10302120", .ringBase = "6 5 7 4 0 1 3 2|6 4 7 5 2 3 1 0", .options = "", @@ -104,7 +104,7 @@ static struct rcclRomeModel rome_model_31 = { .gpuNuma = { 1, 2, 2, 3, 4, 5, 5, 7, }, .nicNuma = { 0, 6, }, .connMatrix = { 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, }, - .gdrLevel = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, }, .pattern = "0110201010200110", .ringBase = "1 2 3 0 6 4 5 7|4 6 7 5 2 1 0 3", .options = "", @@ -117,7 +117,7 @@ static struct rcclRomeModel rome_model_33 = { .gpuNuma = { 1, 2, 2, 3, 4, 5, 5, 7, }, .nicNuma = { 0, 6, }, .connMatrix = { 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, }, - .gdrLevel = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, }, .pattern = "0110201010200110", .ringBase = "1 4 5 7 0 3 2 6|4 1 7 5 6 2 3 0", .options = "", @@ -221,7 +221,7 @@ static struct rcclRomeModel rome_model_40 = { .gpuNuma = { 0, 1, 1, 1, 2, 2, 3, 3, }, .nicNuma = { 2, }, .connMatrix = { 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, }, - .gdrLevel = { 6, 6, 6, 6, 5, 5, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, }, .pattern = "10302120", .ringBase = "6 7 1 4 0 5 3 2|7 6 4 1 0 2 3 5", .options = "", @@ -234,7 +234,7 @@ static struct rcclRomeModel rome_model_42 = { .gpuNuma = { 1, 2, 2, 3, 5, 5, 6, 7, }, .nicNuma = { 4, }, .connMatrix = { 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, }, - .gdrLevel = { 6, 6, 6, 6, 6, 6, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, }, .pattern = "10201001201010", .ringBase = "7 4 6 1 3 0 2 5|6 4 7 1 3 2 5 0", .options = "", @@ -247,7 +247,7 @@ static struct rcclRomeModel rome_model_44 = { .gpuNuma = { 0, 0, 1, 1, 2, 2, 3, 3, }, .nicNuma = { 2, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, }, - .gdrLevel = { 6, 6, 6, 6, 5, 5, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, }, .pattern = "20202120", .ringBase = "5 4 7 6 2 1 3 0|5 6 7 4 1 0 2 3", .options = "", @@ -273,7 +273,7 @@ static struct rcclRomeModel rome_model_46 = { .gpuNuma = { 1, 2, 2, 3, 5, 5, 6, 7, }, .nicNuma = { 4, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, }, - .gdrLevel = { 6, 6, 6, 6, 6, 6, 6, 6, }, + .gdrLevel = { PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, }, .pattern = "10201001201010", .ringBase = "6 5 7 4 1 2 3 0|7 4 6 5 1 0 3 2", .options = "", @@ -299,7 +299,7 @@ static struct rcclRomeModel rome_model_49 = { .gpuNuma = { 0, 0, 1, 1, 2, 2, 3, 3, }, .nicNuma = { 0, 1, 2, 3, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, }, - .gdrLevel = { 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, }, + .gdrLevel = { PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, }, .pattern = "21212121", .ringBase = "N0 0 1 2 3 4 5 6 7 N3|N3 7 6 5 4 3 2 1 0 N0|N1 2 3 0 1 6 7 4 5 N2|N2 5 4 7 6 1 0 3 2 N1", .options = "", @@ -325,7 +325,7 @@ static struct rcclRomeModel rome_model_53 = { .gpuNuma = { 1, 1, 3, 3, 5, 5, 7, 7, }, .nicNuma = { 1, 3, 5, 7, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, }, - .gdrLevel = { 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, }, + .gdrLevel = { PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, }, .pattern = "21212121", .ringBase = "N0 0 1 2 3 4 5 6 7 N3|N3 7 6 5 4 3 2 1 0 N0|N1 2 3 0 1 6 7 4 5 N2|N2 5 4 7 6 1 0 3 2 N1", .options = "", @@ -363,7 +363,7 @@ static struct rcclRomeModel rome_model_56 = { .nicIds = { }, .gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, }, .nicNuma = { }, - .connMatrix = { 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, }, + .connMatrix = { 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 4, 0, }, .gdrLevel = { }, .pattern = "40404040", .ringBase = "0 1 3 2 6 7 15 14 10 11 9 8 12 13 5 4|0 1 2 3 7 6 13 12 8 9 10 11 15 14 5 4|0 2 3 7 6 14 15 11 10 8 9 13 12 4 5 1|4 5 13 12 8 9 11 10 14 15 7 6 2 3 1 0|4 5 14 15 11 10 9 8 12 13 6 7 3 2 1 0|1 5 4 12 13 9 8 10 11 15 14 6 7 3 2 0", @@ -389,10 +389,10 @@ static struct rcclRomeModel rome_model_59 = { .nicIds = { 0x4b000, 0x5a000, 0xb000, 0x1a000, 0xcc000, 0xdb000, 0x8c000, 0x9b000, }, .gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, }, .nicNuma = { 0, 0, 1, 1, 2, 2, 3, 3, }, - .connMatrix = { 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, }, - .gdrLevel = { 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, }, + .connMatrix = { 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 4, 0, }, + .gdrLevel = { PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, }, .pattern = "42424242", - .ringBase = "N4 9 8 12 13 5 4 0 1 3 2 6 7 15 14 10 11 N5|N5 10 11 9 8 12 13 5 4 0 1 3 2 6 7 15 14 N7|N7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 6 7 N3|N3 6 7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 N1|N1 3 2 6 7 15 14 10 11 9 8 12 13 5 4 0 1 N0|N1 2 3 1 0 4 5 13 12 8 9 11 10 14 15 7 6 N3|N3 7 6 2 3 1 0 4 5 13 12 8 9 11 10 14 15 N7|N5 11 10 14 15 7 6 2 3 1 0 4 5 13 12 8 9 N4|N7 14 15 7 6 2 3 1 0 4 5 13 12 8 9 11 10 N5|N0 1 0 4 5 13 12 8 9 11 10 14 15 7 6 2 3 N1|N0 0 1 2 3 7 6 13 12 8 9 10 11 15 14 5 4 N2|N2 5 4 0 1 2 3 7 6 13 12 8 9 10 11 15 14 N7|N4 8 9 10 11 15 14 5 4 0 1 2 3 7 6 13 12 N6|N6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 7 6 N3|N3 7 6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 N1|N1 3 2 1 0 4 5 14 15 11 10 9 8 12 13 6 7 N3|N3 6 7 3 2 1 0 4 5 14 15 11 10 9 8 12 13 N6|N2 4 5 14 15 11 10 9 8 12 13 6 7 3 2 1 0 N0|N7 14 15 11 10 9 8 12 13 6 7 3 2 1 0 4 5 N2|N6 12 13 6 7 3 2 1 0 4 5 14 15 11 10 9 8 N4|N0 1 0 2 3 7 6 14 15 11 10 8 9 13 12 4 5 N2|N2 4 5 1 0 2 3 7 6 14 15 11 10 8 9 13 12 N6|N4 8 9 13 12 4 5 1 0 2 3 7 6 14 15 11 10 N5|N6 13 12 4 5 1 0 2 3 7 6 14 15 11 10 8 9 N4|N5 11 10 8 9 13 12 4 5 1 0 2 3 7 6 14 15 N7|N1 2 3 7 6 14 15 11 10 8 9 13 12 4 5 1 0 N0|N5 10 11 15 14 6 7 3 2 0 1 5 4 12 13 9 8 N4|N2 5 4 12 13 9 8 10 11 15 14 6 7 3 2 0 1 N0|N0 0 1 5 4 12 13 9 8 10 11 15 14 6 7 3 2 N1|N7 15 14 6 7 3 2 0 1 5 4 12 13 9 8 10 11 N5|N4 9 8 10 11 15 14 6 7 3 2 0 1 5 4 12 13 N6|N6 12 13 9 8 10 11 15 14 6 7 3 2 0 1 5 4 N2|", + .ringBase = "N4 9 8 12 13 5 4 0 1 3 2 6 7 15 14 10 11 N5|N1 3 2 6 7 15 14 10 11 9 8 12 13 5 4 0 1 N0|N3 7 6 2 3 1 0 4 5 13 12 8 9 11 10 14 15 N7|N7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 6 7 N3|N5 11 10 14 15 7 6 2 3 1 0 4 5 13 12 8 9 N4|N0 1 0 4 5 13 12 8 9 11 10 14 15 7 6 2 3 N1|N3 6 7 3 2 1 0 4 5 14 15 11 10 9 8 12 13 N6|N7 14 15 11 10 9 8 12 13 6 7 3 2 1 0 4 5 N2|N2 5 4 0 1 2 3 7 6 13 12 8 9 10 11 15 14 N7|N6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 7 6 N3|N4 8 9 13 12 4 5 1 0 2 3 7 6 14 15 11 10 N5|N5 10 11 15 14 6 7 3 2 0 1 5 4 12 13 9 8 N4|N6 12 13 9 8 10 11 15 14 6 7 3 2 0 1 5 4 N2|N2 4 5 1 0 2 3 7 6 14 15 11 10 8 9 13 12 N6|N1 2 3 7 6 14 15 11 10 8 9 13 12 4 5 1 0 N0|N0 0 1 5 4 12 13 9 8 10 11 15 14 6 7 3 2 N1|N5 10 11 9 8 12 13 5 4 0 1 3 2 6 7 15 14 N7|N3 6 7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 N1|N1 2 3 1 0 4 5 13 12 8 9 11 10 14 15 7 6 N3|N7 14 15 7 6 2 3 1 0 4 5 13 12 8 9 11 10 N5|N0 0 1 2 3 7 6 13 12 8 9 10 11 15 14 5 4 N2|N4 8 9 10 11 15 14 5 4 0 1 2 3 7 6 13 12 N6|N3 7 6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 N1|N1 3 2 1 0 4 5 14 15 11 10 9 8 12 13 6 7 N3|N6 12 13 6 7 3 2 1 0 4 5 14 15 11 10 9 8 N4|N2 4 5 14 15 11 10 9 8 12 13 6 7 3 2 1 0 N0|N0 1 0 2 3 7 6 14 15 11 10 8 9 13 12 4 5 N2|N6 13 12 4 5 1 0 2 3 7 6 14 15 11 10 8 9 N4|N5 11 10 8 9 13 12 4 5 1 0 2 3 7 6 14 15 N7|N2 5 4 12 13 9 8 10 11 15 14 6 7 3 2 0 1 N0|N7 15 14 6 7 3 2 0 1 5 4 12 13 9 8 10 11 N5|N4 9 8 10 11 15 14 6 7 3 2 0 1 5 4 12 13 N6", .options = "", }; @@ -416,7 +416,7 @@ static struct rcclRomeModel rome_model_63 = { .gpuNuma = { 3, 3, 1, 1, 0, 0, 2, 2, }, .nicNuma = { 3, 1, 0, 2, }, .connMatrix = { 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, }, - .gdrLevel = { 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, }, + .gdrLevel = { PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, }, .pattern = "21212121", .ringBase = "N0 0 1 5 4 6 7 3 2 N1|N1 2 3 7 6 4 5 1 0 N0|N3 7 6 0 1 3 2 4 5 N2|N2 5 4 2 3 1 0 6 7 N3|N0 0 1 5 4 6 7 3 2 N1|N1 2 3 7 6 4 5 1 0 N0|N3 7 6 0 1 3 2 4 5 N2|N2 5 4 2 3 1 0 6 7 N3", .options = "", @@ -428,11 +428,11 @@ static struct rcclRomeModel rome_model_65 = { .nicIds = { 0x4b000, 0x5a000, 0xb000, 0x1a000, 0xcc000, 0xdb000, 0x8c000, 0x9b000, }, .gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, }, .nicNuma = { 0, 0, 1, 1, 2, 2, 3, 3, }, - .connMatrix = { 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, }, - .gdrLevel = { 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, }, + .connMatrix = { 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 4, 0, }, + .gdrLevel = { PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, }, .pattern = "42424242", - .ringBase = "N4 9 8 12 13 5 4 0 1 3 2 6 7 15 14 10 11 N5|N5 10 11 9 8 12 13 5 4 0 1 3 2 6 7 15 14 N7|N7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 6 7 N3|N3 6 7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 N1|N1 3 2 6 7 15 14 10 11 9 8 12 13 5 4 0 1 N0|N1 2 3 1 0 4 5 13 12 8 9 11 10 14 15 7 6 N3|N3 7 6 2 3 1 0 4 5 13 12 8 9 11 10 14 15 N7|N5 11 10 14 15 7 6 2 3 1 0 4 5 13 12 8 9 N4|N7 14 15 7 6 2 3 1 0 4 5 13 12 8 9 11 10 N5|N0 1 0 4 5 13 12 8 9 11 10 14 15 7 6 2 3 N1|N0 0 1 2 3 7 6 13 12 8 9 10 11 15 14 5 4 N2|N2 5 4 0 1 2 3 7 6 13 12 8 9 10 11 15 14 N7|N4 8 9 10 11 15 14 5 4 0 1 2 3 7 6 13 12 N6|N6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 7 6 N3|N3 7 6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 N1|N1 3 2 1 0 4 5 14 15 11 10 9 8 12 13 6 7 N3|N3 6 7 3 2 1 0 4 5 14 15 11 10 9 8 12 13 N6|N2 4 5 14 15 11 10 9 8 12 13 6 7 3 2 1 0 N0|N7 14 15 11 10 9 8 12 13 6 7 3 2 1 0 4 5 N2|N6 12 13 6 7 3 2 1 0 4 5 14 15 11 10 9 8 N4|N0 1 0 2 3 7 6 14 15 11 10 8 9 13 12 4 5 N2|N2 4 5 1 0 2 3 7 6 14 15 11 10 8 9 13 12 N6|N4 8 9 13 12 4 5 1 0 2 3 7 6 14 15 11 10 N5|N6 13 12 4 5 1 0 2 3 7 6 14 15 11 10 8 9 N4|N5 11 10 8 9 13 12 4 5 1 0 2 3 7 6 14 15 N7|N1 2 3 7 6 14 15 11 10 8 9 13 12 4 5 1 0 N0|N5 10 11 15 14 6 7 3 2 0 1 5 4 12 13 9 8 N4|N2 5 4 12 13 9 8 10 11 15 14 6 7 3 2 0 1 N0|N0 0 1 5 4 12 13 9 8 10 11 15 14 6 7 3 2 N1|N7 15 14 6 7 3 2 0 1 5 4 12 13 9 8 10 11 N5|N4 9 8 10 11 15 14 6 7 3 2 0 1 5 4 12 13 N6|N6 12 13 9 8 10 11 15 14 6 7 3 2 0 1 5 4 N2|", - .options = "netGdrLevel=5", + .ringBase = "N4 9 8 12 13 5 4 0 1 3 2 6 7 15 14 10 11 N5|N1 3 2 6 7 15 14 10 11 9 8 12 13 5 4 0 1 N0|N3 7 6 2 3 1 0 4 5 13 12 8 9 11 10 14 15 N7|N7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 6 7 N3|N5 11 10 14 15 7 6 2 3 1 0 4 5 13 12 8 9 N4|N0 1 0 4 5 13 12 8 9 11 10 14 15 7 6 2 3 N1|N3 6 7 3 2 1 0 4 5 14 15 11 10 9 8 12 13 N6|N7 14 15 11 10 9 8 12 13 6 7 3 2 1 0 4 5 N2|N2 5 4 0 1 2 3 7 6 13 12 8 9 10 11 15 14 N7|N6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 7 6 N3|N4 8 9 13 12 4 5 1 0 2 3 7 6 14 15 11 10 N5|N5 10 11 15 14 6 7 3 2 0 1 5 4 12 13 9 8 N4|N6 12 13 9 8 10 11 15 14 6 7 3 2 0 1 5 4 N2|N2 4 5 1 0 2 3 7 6 14 15 11 10 8 9 13 12 N6|N1 2 3 7 6 14 15 11 10 8 9 13 12 4 5 1 0 N0|N0 0 1 5 4 12 13 9 8 10 11 15 14 6 7 3 2 N1|N5 10 11 9 8 12 13 5 4 0 1 3 2 6 7 15 14 N7|N3 6 7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 N1|N1 2 3 1 0 4 5 13 12 8 9 11 10 14 15 7 6 N3|N7 14 15 7 6 2 3 1 0 4 5 13 12 8 9 11 10 N5|N0 0 1 2 3 7 6 13 12 8 9 10 11 15 14 5 4 N2|N4 8 9 10 11 15 14 5 4 0 1 2 3 7 6 13 12 N6|N3 7 6 13 12 8 9 10 11 15 14 5 4 0 1 2 3 N1|N1 3 2 1 0 4 5 14 15 11 10 9 8 12 13 6 7 N3|N6 12 13 6 7 3 2 1 0 4 5 14 15 11 10 9 8 N4|N2 4 5 14 15 11 10 9 8 12 13 6 7 3 2 1 0 N0|N0 1 0 2 3 7 6 14 15 11 10 8 9 13 12 4 5 N2|N6 13 12 4 5 1 0 2 3 7 6 14 15 11 10 8 9 N4|N5 11 10 8 9 13 12 4 5 1 0 2 3 7 6 14 15 N7|N2 5 4 12 13 9 8 10 11 15 14 6 7 3 2 0 1 N0|N7 15 14 6 7 3 2 0 1 5 4 12 13 9 8 10 11 N5|N4 9 8 10 11 15 14 6 7 3 2 0 1 5 4 12 13 N6", + .options = "netGdrLevel=PHB", }; static struct rcclRomeModel rome_model_66 = { @@ -441,11 +441,11 @@ static struct rcclRomeModel rome_model_66 = { .nicIds = { }, .gpuNuma = { 1, 1, 1, 1, 3, 3, 3, 3, }, .nicNuma = { }, - .connMatrix = { 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, }, + .connMatrix = { 0, 4, 0, 0, 2, 0, 1, 0, 4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, 1, 0, 2, 0, 0, 1, 4, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 4, 0, 1, 0, 0, 0, 1, 4, 0, }, .gdrLevel = { }, .pattern = "4040", .ringBase = "0 6 7 5 4 2 3 1|1 3 2 4 5 7 6 0|0 1 7 6 2 3 5 4|4 5 3 2 6 7 1 0", - .options = "", + .options = "disableNumaMatching=1", }; static struct rcclRomeModel rome_model_67 = { @@ -454,11 +454,11 @@ static struct rcclRomeModel rome_model_67 = { .nicIds = { 0x1d000, 0x1e000, 0xa1000, 0xa2000, }, .gpuNuma = { 1, 1, 1, 1, 3, 3, 3, 3, }, .nicNuma = { 1, 1, 3, 3, }, - .connMatrix = { 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, }, - .gdrLevel = { 4, 4, 4, 4, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 4, 4, 4, 4, }, + .connMatrix = { 0, 4, 0, 0, 2, 0, 1, 0, 4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, 1, 0, 2, 0, 0, 1, 4, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 4, 0, 1, 0, 0, 0, 1, 4, 0, }, + .gdrLevel = { PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, }, .pattern = "4242", .ringBase = "N3 7 6 0 1 3 2 4 5 N2|N2 5 4 2 3 1 0 6 7 N3|N1 2 3 5 4 0 1 7 6 N3|N2 4 5 3 2 6 7 1 0 N0|N1 3 2 4 5 7 6 0 1 N0|N0 1 0 6 7 5 4 2 3 N1|N0 0 1 7 6 2 3 5 4 N2|N3 6 7 1 0 4 5 3 2 N1", - .options = "", + .options = "disableNumaMatching=1", }; static struct rcclRomeModel rome_model_68 = { @@ -468,10 +468,36 @@ static struct rcclRomeModel rome_model_68 = { .gpuNuma = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, .nicNuma = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, .connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, }, - .gdrLevel = { 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 3, }, + .gdrLevel = { PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PXB, PATH_PIX, }, .pattern = "@@", .ringBase = "N0 0 1 2 3 N3 N4 4 5 6 7 N7 N8 8 9 10 11 N11 N12 12 13 14 15 N15|N15 15 14 13 12 N12 N11 11 10 9 8 N8 N7 7 6 5 4 N4 N3 3 2 1 0 N0|N1 1 3 0 2 N2 N5 5 7 4 6 N6 N9 9 11 8 10 N10 N13 13 15 12 14 N14|N14 14 12 15 13 N13 N10 10 8 11 9 N9 N6 6 4 7 5 N5 N2 2 0 3 1 N1|N0 0 1 2 3 N3 N4 4 5 6 7 N7 N8 8 9 10 11 N11 N12 12 13 14 15 N15|N15 15 14 13 12 N12 N11 11 10 9 8 N8 N7 7 6 5 4 N4 N3 3 2 1 0 N0|N1 1 3 0 2 N2 N5 5 7 4 6 N6 N9 9 11 8 10 N10 N13 13 15 12 14 N14|N14 14 12 15 13 N13 N10 10 8 11 9 N9 N6 6 4 7 5 N5 N2 2 0 3 1 N1", - .options = "netGdrLevel=3", + .options = "netGdrLevel=PIX", +}; + +static struct rcclRomeModel rome_model_71 = { + .nGpus = 8, .nCpus = 2, .nNics = 0, .nLinks = 3, + .gpuIds = { 0x32000, 0x35000, 0x11000, 0x14000, 0xae000, 0xb3000, 0x8e000, 0x93000, }, + .nicIds = { }, + .gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, }, + .nicNuma = { }, + .connMatrix = { 0, 4, 1, 0, 0, 0, 2, 0, 4, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 4, 1, 0, 0, 1, 0, 0, 4, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 1, 0, 1, 4, 0, }, + .gdrLevel = { }, + .pattern = "4040", + .ringBase = "0 1 3 2 4 5 7 6|6 7 5 4 2 3 1 0|0 1 5 4 2 3 7 6|6 7 3 2 4 5 1 0", + .options = "disableNumaMatching=1", +}; + +static struct rcclRomeModel rome_model_72 = { + .nGpus = 8, .nCpus = 2, .nNics = 4, .nLinks = 3, + .gpuIds = { 0x32000, 0x35000, 0x11000, 0x14000, 0xae000, 0xb3000, 0x8e000, 0x93000, }, + .nicIds = { 0x1d000, 0x1e000, 0xa0000, 0xa1000, }, + .gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, }, + .nicNuma = { 0, 0, 1, 1, }, + .connMatrix = { 0, 4, 1, 0, 0, 0, 2, 0, 4, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 4, 1, 0, 0, 1, 0, 0, 4, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 1, 0, 1, 4, 0, }, + .gdrLevel = { PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PHB, }, + .pattern = "4242", + .ringBase = "N0 0 1 3 2 4 5 7 6 N3|N1 2 3 1 0 6 7 5 4 N2|N3 7 6 0 1 5 4 2 3 N1|N0 1 0 6 7 3 2 4 5 N2|N2 4 5 7 6 0 1 3 2 N1|N3 6 7 5 4 2 3 1 0 N0|N2 5 4 2 3 7 6 0 1 N0|N1 3 2 4 5 1 0 6 7 N3", + .options = "disableNumaMatching=1", }; static struct rcclRomeModel romeTopoModels[] = { @@ -508,6 +534,8 @@ static struct rcclRomeModel romeTopoModels[] = { rome_model_66, rome_model_67, rome_model_68, + rome_model_71, + rome_model_72, }; /* Parse user defined rings. Format is like : @@ -626,6 +654,7 @@ end: } #define MAX_OPT_TOKENS 10 +extern const char* topoPathTypeStr[]; static void parseOptions(struct ncclTopoSystem* system, const char *options) { if (strcmp(options, "")) { @@ -639,7 +668,17 @@ static void parseOptions(struct ncclTopoSystem* system, const char *options) { tokens[numTokens++] = strtok(NULL, "=, "); for (int i = 0; i < numTokens/2; i++) { if (strcmp(tokens[i*2], "netGdrLevel") == 0) { - system->netGdrLevel = atol(tokens[i*2+1]); + int j; + for (j = 0; j <= PATH_SYS; j++) { + if (strcmp(tokens[i*2+1], topoPathTypeStr[j]) == 0) + break; + } + if (j <= PATH_SYS) + system->netGdrLevel = j; + else { + system->netGdrLevel = -2; + WARN("invalid netGdrLevel: %s", tokens[i*2+1]); + } } else if (strcmp(tokens[i*2], "pivotA2AEnabled") == 0) { system->pivotA2AEnabled = (bool)atol(tokens[i*2+1]); } else if (strcmp(tokens[i*2], "pivotA2ANumBiRings") == 0) { @@ -650,6 +689,26 @@ static void parseOptions(struct ncclTopoSystem* system, const char *options) { } } +static bool disableNumaMatching(const char *options) { + if (strcmp(options, "")) { + char *str_temp = (char *)malloc(strlen(options) + 1); + strcpy(str_temp, options); + char* tokens[MAX_OPT_TOKENS]; + int numTokens = 0; + tokens[numTokens] = strtok(str_temp, "=, "); + numTokens++; + while (tokens[numTokens-1] != NULL && numTokens < MAX_OPT_TOKENS) + tokens[numTokens++] = strtok(NULL, "=, "); + for (int i = 0; i < numTokens/2; i++) { + if (strcmp(tokens[i*2], "disableNumaMatching") == 0) { + return (bool)atol(tokens[i*2+1]); + } + } + free(str_temp); + } + return false; +} + ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) { static const char *ringBase = "0 1 2 3 5 4 7 6|0 2 4 1 7 3 6 5|0 3 1 5 7 2 6 4|0 6 7 4 5 3 2 1|0 5 6 3 7 1 4 2|0 4 6 2 7 5 1 3"; int id[8], dist[8]; @@ -812,7 +871,7 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo } if (!link->remNode) continue; if (link->type != LINK_NVL) continue; - romeTopo->connMatrix[i*romeTopo->nGpus+n] = 1; + romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->width/VEGA_XGMI_WIDTH; count ++; } if (romeTopo->nLinks < count) romeTopo->nLinks = count; @@ -883,25 +942,27 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo fprintf(file, "},\n"); fprintf(file, " .gdrLevel = { "); for (int i = 0; i < romeTopo->nNics; i ++) - for (int n = 0; n < romeTopo->nGpus; n++) fprintf(file, "%d, ", romeTopo->gdrLevel[i*romeTopo->nGpus+n]); + for (int n = 0; n < romeTopo->nGpus; n++) fprintf(file, "PATH_%s, ", topoPathTypeStr[romeTopo->gdrLevel[i*romeTopo->nGpus+n]]); fprintf(file, "},\n"); fprintf(file, " .pattern = \"%s\",\n", pattern); fprintf(file, " .ringBase = \"\",\n"); - fprintf(file, " .options = "",\n"); + fprintf(file, " .options = \"\",\n"); fprintf(file, "};\n"); fclose(file); } return ncclSuccess; } -static bool permuteGpuIds(int *g, int n, int last, struct rcclRomeModel* ref, struct rcclRomeModel* topo, int* time, bool nbio) { +static bool permuteGpuIds(int *g, int n, int last, struct rcclRomeModel* ref, struct rcclRomeModel* topo, int* time, bool nbio, bool ignore_numa) { (*time) ++; if (n == last) { int i, j; // match GPU numa - for (i = 0; i < ref->nGpus; i++) - if (ref->gpuNuma[i] != topo->gpuNuma[g[i]]) break; - if (i < ref->nGpus) return false; + if (!ignore_numa) { + for (i = 0; i < ref->nGpus; i++) + if (ref->gpuNuma[i] != topo->gpuNuma[g[i]]) break; + if (i < ref->nGpus) return false; + } // match XGMI connection for (i = 0; i < ref->nGpus; i++) { for (j = 0; j < ref->nGpus; j++) { @@ -929,22 +990,24 @@ static bool permuteGpuIds(int *g, int n, int last, struct rcclRomeModel* ref, st } else { for (int i = n; i <= last; i++) { std::swap(g[n], g[i]); - if (permuteGpuIds(g, n+1, last, ref, topo, time, nbio)) return true; + if (permuteGpuIds(g, n+1, last, ref, topo, time, nbio, ignore_numa)) return true; std::swap(g[n], g[i]); } } return false; } -static bool permuteNetIds(int *n, int *g, int s, int last, struct rcclRomeModel* ref, struct rcclRomeModel* topo, int* time) { +static bool permuteNetIds(int *n, int *g, int s, int last, struct rcclRomeModel* ref, struct rcclRomeModel* topo, int* time, bool ignore_numa) { (*time) ++; if (s == last) { int i, j; // match NET numa - for (i = 0; i < ref->nNics; i++) { - if (ref->nicNuma[i] != topo->nicNuma[n[i]]) break; + if (!ignore_numa) { + for (i = 0; i < ref->nNics; i++) { + if (ref->nicNuma[i] != topo->nicNuma[n[i]]) break; + } + if (i < ref->nNics) return false; } - if (i < ref->nNics) return false; // match gdr level for (i = 0; i < ref->nNics; i++) { for (j = 0; j < ref->nGpus; j++) { @@ -957,7 +1020,7 @@ static bool permuteNetIds(int *n, int *g, int s, int last, struct rcclRomeModel* } else { for (int i = s; i <= last; i++) { std::swap(n[s], n[i]); - if (permuteNetIds(n, g, s+1, last, ref, topo, time)) return true; + if (permuteNetIds(n, g, s+1, last, ref, topo, time, ignore_numa)) return true; std::swap(n[s], n[i]); } } @@ -1004,16 +1067,18 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph* if (i < romeTopo.nGpus) match_nbio = false; for (i = 0; i < sizeof(romeTopoModels)/sizeof(romeTopoModels[0]); i++) { - if (romeTopo.nCpus != romeTopoModels[i].nCpus || romeTopo.nGpus != romeTopoModels[i].nGpus || + bool ignore_numa = disableNumaMatching(romeTopoModels[i].options); + if (!ignore_numa && romeTopo.nCpus != romeTopoModels[i].nCpus) continue; + if (romeTopo.nGpus != romeTopoModels[i].nGpus || romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks) continue; - if (strcmp(romeTopoModels[i].pattern, pattern)) continue; + if (!ignore_numa && strcmp(romeTopoModels[i].pattern, pattern)) continue; // permute GPU IDs for (int j = 0; j < ngpus; j++) g[j] = (j+2)%ngpus; - if (!permuteGpuIds(g, 0, ngpus-1, romeTopoModels+i, &romeTopo, &time, match_nbio)) continue; + if (!permuteGpuIds(g, 0, ngpus-1, romeTopoModels+i, &romeTopo, &time, match_nbio, ignore_numa)) continue; if (nnets > 1) { // permute NET IDs for (int j = 0; j < nnets; j++) n[j] = (j+2)%nnets; - if (permuteNetIds(n, g, 0, nnets-1, romeTopoModels+i, &romeTopo, &time)) break; + if (permuteNetIds(n, g, 0, nnets-1, romeTopoModels+i, &romeTopo, &time, ignore_numa)) break; } else break; } gettimeofday(&tve, NULL); @@ -1143,7 +1208,7 @@ ncclResult_t parse1H16P(struct ncclTopoSystem* system, struct ncclTopoGraph* gra // permute NET IDs int time = 0; for (int m = 0; m < nnets; m++) n[m] = (m+2)%nnets; - if (permuteNetIds(n, g16, 0, nnets-1, romeTopoModels+i, &romeTopo, &time)) break; + if (permuteNetIds(n, g16, 0, nnets-1, romeTopoModels+i, &romeTopo, &time, false)) break; } else break; } if (p < TOTAL_PERMUTE_COUNT) break; diff --git a/src/graph/tuning.cc b/src/graph/tuning.cc index df51c6e71b..6b1f3f31bf 100644 --- a/src/graph/tuning.cc +++ b/src/graph/tuning.cc @@ -138,6 +138,7 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom if (a == NCCL_ALGO_TREE && p == NCCL_PROTO_SIMPLE) (nNodes == 2) ? busBw *= 0.33 : busBw *= 0.11; if (a == NCCL_ALGO_TREE && (p == NCCL_PROTO_LL || p == NCCL_PROTO_LL128)) busBw *= 0.04; if (gcn == 910 && a == NCCL_ALGO_TREE && p == NCCL_PROTO_SIMPLE && nNodes == 2 && nRanks == 32) busBw *= 3.61; + if (gcn == 910 && a == NCCL_ALGO_TREE && p == NCCL_PROTO_SIMPLE && nNodes == 2 && nRanks == 16) busBw *= 6.5; #else if (compCap80) busBw = std::min(busBw, 235.0f); if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL) { busBw = std::min(llMaxBw, busBw * ((nNodes > 1 || coll == ncclFuncAllReduce || coll == ncclFuncReduce) ? 1.0/4.0 : 1.0/3.0)); } diff --git a/tools/scripts/topo_val.sh b/tools/scripts/topo_val.sh index 77e513fa8e..cd2ef834ec 100755 --- a/tools/scripts/topo_val.sh +++ b/tools/scripts/topo_val.sh @@ -21,7 +21,7 @@ DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -for i in {0..67} +for i in {0..72} do if [[ $i -eq 50 ]] || [[ $i -eq 51 ]] then diff --git a/tools/topo_expl/models/topo_16p1h_vm.xml b/tools/topo_expl/models/topo_16p1h_vm.xml index 30f2c4444a..fef1ba5307 100644 --- a/tools/topo_expl/models/topo_16p1h_vm.xml +++ b/tools/topo_expl/models/topo_16p1h_vm.xml @@ -3,28 +3,28 @@ - - + + - + - + - + - + @@ -44,15 +44,15 @@ - - + + - + @@ -60,15 +60,15 @@ - + - - + + @@ -88,14 +88,14 @@ - + - + @@ -103,15 +103,15 @@ - - + + - + - + @@ -131,14 +131,14 @@ - + - - + + @@ -146,14 +146,14 @@ - + - - + + diff --git a/tools/topo_expl/models/topo_4p4h.xml b/tools/topo_expl/models/topo_4p4h.xml new file mode 100644 index 0000000000..bd20841115 --- /dev/null +++ b/tools/topo_expl/models/topo_4p4h.xml @@ -0,0 +1,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/topo_expl/models/topo_8p1h_1.xml b/tools/topo_expl/models/topo_8p1h_1.xml new file mode 100644 index 0000000000..f7aaba74dd --- /dev/null +++ b/tools/topo_expl/models/topo_8p1h_1.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/topo_expl/models/topo_8p1h_n1.xml b/tools/topo_expl/models/topo_8p1h_n1.xml new file mode 100644 index 0000000000..1d9b83a1e8 --- /dev/null +++ b/tools/topo_expl/models/topo_8p1h_n1.xml @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/topo_expl/topo_expl.cpp b/tools/topo_expl/topo_expl.cpp index e051cae38c..e54f413985 100644 --- a/tools/topo_expl/topo_expl.cpp +++ b/tools/topo_expl/topo_expl.cpp @@ -137,6 +137,11 @@ NodeModelDesc model_descs[] = { {4, "topo_16p1h_vm.xml", "4 nodes 16P1H VM"}, {1, "topo_8p1h.xml", "single node 8P1H"}, {4, "topo_8p1h.xml", "4 nodes 8P1H"}, + {1, "topo_4p4h.xml", "single node gfx908 4P4H"}, + {1, "topo_8p1h_n1.xml", "single node 8P1H"}, + {4, "topo_8p1h_n1.xml", "4 nodes 8P1H"}, + {1, "topo_8p1h_1.xml", "single node 8P1H Alt."}, + {4, "topo_8p1h_1.xml", "4 nodes 8P1H Alt."}, }; int main(int argc,char* argv[])