Add more Rome models (#292)

This commit is contained in:
Wenkai Du
2020-10-30 21:26:04 -07:00
committed by GitHub
parent bfab1d3592
commit dfa3c41ede
6 changed files with 307 additions and 1 deletions
+33
View File
@@ -166,6 +166,36 @@ static struct rcclRomeModel rome_model_28 = {
.ringBase = "0 3 2 1 4 5 6 7|7 6 5 4 1 2 3 0|0 2 5 7 4 6 3 1|1 3 6 4 7 5 2 0",
};
static struct rcclRomeModel rome_model_40 = {
.nGpus = 8, .nCpus = 4, .nNics = 1, .nLinks = 3,
.gpuIds = { 0x43000, 0x23000, 0x26000, 0x3000, 0xc3000, 0xc6000, 0xa3000, 0x83000, },
.gpuNuma = { 0, 1, 1, 1, 2, 2, 3, 3, },
.nicNuma = { 2, },
.connMatrix = { 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, },
.pattern = "10302120",
.ringBase = "6 7 1 4 0 5 3 2|7 6 4 1 0 2 3 5",
};
static struct rcclRomeModel rome_model_42 = {
.nGpus = 8, .nCpus = 7, .nNics = 1, .nLinks = 3,
.gpuIds = { 0x43000, 0x23000, 0x26000, 0x3000, 0xc3000, 0xc6000, 0xa3000, 0x83000, },
.gpuNuma = { 1, 2, 2, 3, 5, 5, 6, 7, },
.nicNuma = { 4, },
.connMatrix = { 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, },
.pattern = "00102010012010",
.ringBase = "7 4 6 1 3 0 2 5|6 4 7 1 3 2 5 0",
};
static struct rcclRomeModel rome_model_44 = {
.nGpus = 8, .nCpus = 4, .nNics = 1, .nLinks = 3,
.gpuIds = { 0x63000, 0x43000, 0x27000, 0x3000, 0xe3000, 0xc3000, 0xa3000, 0x83000, },
.gpuNuma = { 0, 0, 1, 1, 2, 2, 3, 3, },
.nicNuma = { 2, },
.connMatrix = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, },
.pattern = "20202120",
.ringBase = "4 5 7 6 2 1 3 0|7 4 5 6 2 0 1 3",
};
static struct rcclRomeModel romeTopoModels[] = {
rome_model_22,
rome_model_25,
@@ -180,4 +210,7 @@ static struct rcclRomeModel romeTopoModels[] = {
rome_model_23,
rome_model_38,
rome_model_28,
rome_model_40,
rome_model_42,
rome_model_44,
};
+1 -1
View File
@@ -21,7 +21,7 @@
DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
for i in {0..38}
for i in {0..44}
do
$DIR/../topo_expl/topo_expl -m $i > "topo_m$i.log"
$DIR/../TopoVisual/topo_visual.sh -i "topo_m$i.log"
+87
View File
@@ -0,0 +1,87 @@
<system version="2">
<cpu numaid="0" affinity="00000000,00000000,00000000,ffffffff" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:41:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:43:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="0" sm="90" gcn="908" arch="38911" rank="0" gdr="1">
<xgmi target="0000:26:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c6:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="1" affinity="00000000,00000000,ffffffff,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:21:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:23:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="1" sm="90" gcn="908" arch="38911" rank="1" gdr="1">
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:24:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:26:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="2" sm="90" gcn="908" arch="38911" rank="2" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c6:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:01:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:03:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="3" sm="90" gcn="908" arch="38911" rank="3" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:26:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c6:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="2" affinity="00000000,ffffffff,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:c1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:c3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="4" sm="90" gcn="908" arch="38911" rank="4" gdr="1">
<xgmi target="0000:23:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:c4:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:c6:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="5" sm="90" gcn="908" arch="38911" rank="5" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:26:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:e1:00.0" class="0x020700" link_speed="16 GT/s" link_width="16">
<nic>
<net name="mlx5_0" dev="0" speed="200000" port="1" guid="0x70cd600003da341c" maxconn="262144" gdr="1"/>
</nic>
</pci>
</cpu>
<cpu numaid="3" affinity="ffffffff,00000000,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:a1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:a3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="6" sm="90" gcn="908" arch="38911" rank="6" gdr="1">
<xgmi target="0000:23:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:81:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:83:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="7" sm="90" gcn="908" arch="38911" rank="7" gdr="1">
<xgmi target="0000:23:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
</system>
+87
View File
@@ -0,0 +1,87 @@
<system version="2">
<cpu numaid="0" affinity="00000000,00000000,00ffffff,00000000,00000000,00ffffff" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:61:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:63:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="0" sm="90" gcn="908" arch="38911" rank="0" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:27:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:41:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:43:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="1" sm="90" gcn="908" arch="38911" rank="1" gdr="1">
<xgmi target="0000:63:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:27:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="1" affinity="00000000,0000ffff,ff000000,00000000,0000ffff,ff000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:25:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:27:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="2" sm="90" gcn="908" arch="38911" rank="2" gdr="1">
<xgmi target="0000:63:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:01:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:03:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="3" sm="90" gcn="908" arch="38911" rank="3" gdr="1">
<xgmi target="0000:63:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:27:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="2" affinity="000000ff,ffff0000,00000000,000000ff,ffff0000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:e1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:e3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="4" sm="90" gcn="908" arch="38911" rank="4" gdr="1">
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:c1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:c3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="5" sm="90" gcn="908" arch="38911" rank="5" gdr="1">
<xgmi target="0000:e3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:c4:00.0" class="0x020700" link_speed="16 GT/s" link_width="16">
<nic>
<net name="mlx5_0" dev="0" speed="200000" port="1" guid="0x22fd9f00039b0398" maxconn="262144" gdr="1"/>
</nic>
</pci>
</cpu>
<cpu numaid="3" affinity="ffffff00,00000000,00000000,ffffff00,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:a1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:a3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="6" sm="90" gcn="908" arch="38911" rank="6" gdr="1">
<xgmi target="0000:e3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:81:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:83:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="7" sm="90" gcn="908" arch="38911" rank="7" gdr="1">
<xgmi target="0000:e3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
</system>
+93
View File
@@ -0,0 +1,93 @@
<system version="2">
<cpu numaid="1" affinity="00000000,00000000,00000000,ffff0000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:41:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:43:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="0" sm="90" gcn="908" arch="38911" rank="0" gdr="1">
<xgmi target="0000:26:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c6:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="2" affinity="00000000,00000000,0000ffff,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:21:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:23:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="1" sm="90" gcn="908" arch="38911" rank="1" gdr="1">
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:24:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:26:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="2" sm="90" gcn="908" arch="38911" rank="2" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c6:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="3" affinity="00000000,00000000,ffff0000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:01:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:03:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="3" sm="90" gcn="908" arch="38911" rank="3" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:26:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c6:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="5" affinity="00000000,ffff0000,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:c1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:c3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="4" sm="90" gcn="908" arch="38911" rank="4" gdr="1">
<xgmi target="0000:23:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:c4:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:c6:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="5" sm="90" gcn="908" arch="38911" rank="5" gdr="1">
<xgmi target="0000:43:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:26:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:03:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="6" affinity="0000ffff,00000000,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:a1:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:a3:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="6" sm="90" gcn="908" arch="38911" rank="6" gdr="1">
<xgmi target="0000:23:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:83:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="7" affinity="ffff0000,00000000,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:81:00.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0000:83:00.0" class="0x038000" link_speed="16 GT/s" link_width="16">
<gpu dev="7" sm="90" gcn="908" arch="38911" rank="7" gdr="1">
<xgmi target="0000:23:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:c3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:a3:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</cpu>
<cpu numaid="4" affinity="00000000,0000ffff,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="143" modelid="49">
<pci busid="0000:e1:00.0" class="0x020700" link_speed="16 GT/s" link_width="16">
<nic>
<net name="mlx5_0" dev="0" speed="200000" port="1" guid="0x70cd600003da341c" maxconn="262144" gdr="1"/>
</nic>
</pci>
</cpu>
</system>
+6
View File
@@ -108,6 +108,12 @@ NodeModelDesc model_descs[] = {
{4, "topo_8p_rome_n2_2.xml", "4 nodes 8 VEGA20 Rome NPS=2 Alt. Model 2 NET/IF"},
{4, "topo_8p_ts1_n4_2.xml", "4 nodes 8 VEGA20 TS1 NPS=4 3 NET/IF"},
{1, "topo_8p_rome_n4.xml", "single node 8 VEGA20 Rome NPS=4"},
{1, "topo_4p3l_n2.xml", "single node 8 gfx908 Rome"},
{4, "topo_4p3l_n2.xml", "4 nodes 8 gfx908 Rome"},
{1, "topo_4p3l_n4.xml", "single node 8 gfx908 Rome NPS=4"},
{4, "topo_4p3l_n4.xml", "4 nodes 8 gfx908 Rome NPS=4"},
{1, "topo_4p3l_n2_1.xml", "single node 8 gfx908 Rome"},
{4, "topo_4p3l_n2_1.xml", "4 nodes 8 gfx908 Rome"},
};
int main(int argc,char* argv[])