From df98a6957d2bc0de172d5aae603ef13ff49b2abc Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Wed, 28 Feb 2024 10:46:05 -0800 Subject: [PATCH] Add another Rome model (#1095) --- src/graph/rome_models.cc | 15 +++++++++++++++ tools/topo_expl/topo_expl.cpp | 1 + 2 files changed, 16 insertions(+) diff --git a/src/graph/rome_models.cc b/src/graph/rome_models.cc index 141f210066..914d00a95e 100644 --- a/src/graph/rome_models.cc +++ b/src/graph/rome_models.cc @@ -594,6 +594,20 @@ static struct rcclRomeModel rome_model_84 = { .options = "noCpuCheck=1,tuning=5", }; +static struct rcclRomeModel rome_model_85 = { + .nGpus = 8, .nCpus = 2, .nNics = 4, .nLinks = 3, + .gpuIds = { 0x32000, 0x35000, 0x11000, 0x14000, 0xae000, 0xb3000, 0x8e000, 0x93000, }, + .nicIds = { 0x2d000, 0x5000, 0xab000, 0x94000, }, + .gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, }, + .nicNuma = { 0, 0, 1, 1, }, + .connMatrix = { 0, 4, 1, 0, 0, 0, 2, 0, 4, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 4, 1, 0, 0, 1, 0, 0, 4, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 1, 0, 1, 4, 0, }, + .gdrLevel = { PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PXB, }, + .pattern = "4242", + .ringBase = "N0 0 1 3 2 4 5 7 6 N3|N1 2 3 1 0 6 7 5 4 N2|N3 7 6 0 1 5 4 2 3 N1|N0 1 0 6 7 3 2 4 5 N2|N2 4 5 7 6 0 1 3 2 N1|N3 6 7 5 4 2 3 1 0 N0|N2 5 4 2 3 7 6 0 1 N0|N1 3 2 4 5 1 0 6 7 N3", + .options = "tuning=2", +}; + + static struct rcclRomeModel romeTopoModels[] = { rome_model_22, rome_model_25, @@ -637,6 +651,7 @@ static struct rcclRomeModel romeTopoModels[] = { rome_model_80, rome_model_81, rome_model_84, + rome_model_85, }; /* Parse user defined rings. Format is like : diff --git a/tools/topo_expl/topo_expl.cpp b/tools/topo_expl/topo_expl.cpp index 373ca59a52..f6963f7830 100644 --- a/tools/topo_expl/topo_expl.cpp +++ b/tools/topo_expl/topo_expl.cpp @@ -157,6 +157,7 @@ NodeModelDesc model_descs[] = { {1, "topo_8p_940vm.xml", "single node gfx940 VM"}, {2, "topo_8p_940vm.xml", "2 nodes gfx940 VM"}, {2, "topo_8p_940_16n.xml", "2 nodes gfx940 16 NICs"}, + {2, "topo_8p1h_6.xml", "2 nodes 8P1H Alt."}, }; NCCL_PARAM(MaxCTAs, "MAX_CTAS", MAXCHANNELS);