Refine and add new Rome models (#548)

[ROCm/rccl commit: 283dc86a73]
Этот коммит содержится в:
Wenkai Du
2022-05-17 08:23:59 -07:00
коммит произвёл GitHub
родитель b37180b7ed
Коммит b30b8becea
8 изменённых файлов: 158 добавлений и 14 удалений
+2 -2
Просмотреть файл
@@ -22,7 +22,7 @@ struct RunWork<ncclFuncSendRecv, T, RedOp, NCCL_ALGO_RING, NCCL_PROTO_SIMPLE> {
ssize_t const count = args->count;
int const chunkSize = args->chunkSize/sizeof(T);
int const peer = args->peer;
Primitives<T, RedOp, FanAsymmetric<0, 1>, 1, Proto, 1> prims
Primitives<T, RedOp, FanAsymmetric<0, 1>, 0, Proto, 1> prims
(tid, nthreads, nullptr, &peer, args->buff, nullptr, /*redOpArg(ignored)=*/0, group);
ssize_t offset = 0;
do {
@@ -39,7 +39,7 @@ struct RunWork<ncclFuncSendRecv, T, RedOp, NCCL_ALGO_RING, NCCL_PROTO_SIMPLE> {
ssize_t const count = args->count;
int const chunkSize = args->chunkSize/sizeof(T);
int const peer = args->peer;
Primitives<T, RedOp, FanAsymmetric<1, 0>, 1, Proto, 1> prims
Primitives<T, RedOp, FanAsymmetric<1, 0>, 0, Proto, 1> prims
(tid, nthreads, &peer, nullptr, nullptr, args->buff, /*redOpArg(ignored)=*/0, group);
ssize_t offset = 0;
do {
+1 -1
Просмотреть файл
@@ -699,7 +699,7 @@ static ncclResult_t ncclTopoGetNchannels(struct ncclTopoSystem* system, int g /*
// Local rank
path = system->nodes[GPU].nodes[peer].paths[GPU]+g;
if (path->type == PATH_NVL) {
float nvlWidth = ncclTopoNVLinkSpeed(system->nodes[GPU].nodes[g].gpu.cudaCompCap);
float nvlWidth = ncclTopoXGMISpeed(system->nodes[GPU].nodes[g].gpu.cudaCompCap);
*nChannels = 2*std::max(1, (int)(path->width / nvlWidth));
} else {
*nChannels = 2;
+1 -1
Просмотреть файл
@@ -903,7 +903,7 @@ static ncclResult_t parseRomeSystem(struct ncclTopoSystem* system, struct rcclRo
}
if (!link->remNode) continue;
if (link->type != LINK_NVL) continue;
romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->width/VEGA_XGMI_WIDTH;
romeTopo->connMatrix[i*romeTopo->nGpus+n] = link->width/ncclTopoXGMISpeed(node->gpu.gcn);
count ++;
}
if (romeTopo->nLinks < count) romeTopo->nLinks = count;
+1 -1
Просмотреть файл
@@ -543,7 +543,7 @@ ncclResult_t ncclTopoAddXGMI(struct ncclXmlNode* node, struct ncclTopoSystem* sy
}
}
if (remote) {
int nvlSpeed = VEGA_XGMI_WIDTH;
float nvlSpeed = ncclTopoXGMISpeed(gpu->gpu.gcn);
NCCLCHECK(ncclTopoConnectNodes(gpu, remote, LINK_NVL, count*nvlSpeed));
if (remote->type != GPU) {
NCCLCHECK(ncclTopoConnectNodes(remote, gpu, LINK_NVL, count*nvlSpeed));
+4 -8
Просмотреть файл
@@ -25,6 +25,7 @@
#define ARM_WIDTH 6.0
#define NET_WIDTH 12.0 // 100Gbit
#define VEGA_XGMI_WIDTH 24.0
#define MI200_XGMI_WIDTH 36.0
// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, so GPU
// to GPU traffic consumes more PCI bandwidth.
@@ -183,13 +184,8 @@ static ncclResult_t ncclTopoRankToIndex(struct ncclTopoSystem* system, int rank,
return ncclInternalError;
}
// Returns NVLink speed in GB/s
static float ncclTopoNVLinkSpeed(int cudaCompCap) {
return
cudaCompCap == 86 ? SM86_NVLINK_WIDTH :
cudaCompCap >= 80 ? SM80_NVLINK_WIDTH :
cudaCompCap >= 70 ? SM70_NVLINK_WIDTH :
cudaCompCap >= 60 ? SM60_NVLINK_WIDTH :
SM80_NVLINK_WIDTH;
// Returns XGMI speed in GB/s
static float ncclTopoXGMISpeed(int gcn) {
return gcn == 910 ? MI200_XGMI_WIDTH : VEGA_XGMI_WIDTH;
}
#endif
+1 -1
Просмотреть файл
@@ -21,7 +21,7 @@
DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
for i in {0..74}
for i in {0..76}
do
if [[ $i -eq 50 ]] || [[ $i -eq 51 ]]
then
+146
Просмотреть файл
@@ -0,0 +1,146 @@
<system version="2">
<cpu numaid="0" affinity="00000000,00000000,ffffffff,ffffffff,00000000,00000000,ffffffff,ffffffff" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="1">
<pci busid="0000:22:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:2e:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:30:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:32:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="0" sm="90" gcn="910" arch="38911" rank="0" gdr="1">
<xgmi target="0000:35:00.0" count="4" tclass="0x038000"/>
<xgmi target="0000:11:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:8e:00.0" count="2" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:33:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:35:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="1" sm="90" gcn="910" arch="38911" rank="1" gdr="1">
<xgmi target="0000:32:00.0" count="4" tclass="0x038000"/>
<xgmi target="0000:14:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:b3:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:24:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:26:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_1" dev="1" speed="200000" port="1" latency="0.000000" guid="0x54102f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
<pci busid="0000:2b:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:2d:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_2" dev="2" speed="200000" port="1" latency="0.000000" guid="0x44102f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
</pci>
<pci busid="0000:01:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:0d:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:0f:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:11:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="2" sm="90" gcn="910" arch="38911" rank="2" gdr="1">
<xgmi target="0000:32:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:14:00.0" count="4" tclass="0x038000"/>
<xgmi target="0000:ae:00.0" count="2" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:12:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:14:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="3" sm="90" gcn="910" arch="38911" rank="3" gdr="1">
<xgmi target="0000:35:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:11:00.0" count="4" tclass="0x038000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
</pci>
<pci busid="0000:03:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:05:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_3" dev="3" speed="200000" port="1" latency="0.000000" guid="0xd8112f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
<pci busid="0000:0a:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:0c:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_4" dev="4" speed="200000" port="1" latency="0.000000" guid="0x60102f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
</pci>
</cpu>
<cpu numaid="1" affinity="7fffffff,ffffffff,00000000,00000000,ffffffff,ffffffff,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="1">
<pci busid="0000:a1:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:a9:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:ac:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:ae:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="4" sm="90" gcn="910" arch="38911" rank="4" gdr="1">
<xgmi target="0000:11:00.0" count="2" tclass="0x038000"/>
<xgmi target="0000:b3:00.0" count="4" tclass="0x038000"/>
<xgmi target="0000:8e:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:ab:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_6" dev="6" speed="200000" port="1" latency="0.000000" guid="0x68102f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
<pci busid="0000:af:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:b1:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:b3:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="5" sm="90" gcn="910" arch="38911" rank="5" gdr="1">
<xgmi target="0000:35:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:ae:00.0" count="4" tclass="0x038000"/>
<xgmi target="0000:93:00.0" count="1" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:b4:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_7" dev="7" speed="200000" port="1" latency="0.000000" guid="0xe4112f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
</pci>
<pci busid="0000:81:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:89:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8c:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:8e:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="6" sm="90" gcn="910" arch="38911" rank="6" gdr="1">
<xgmi target="0000:32:00.0" count="2" tclass="0x038000"/>
<xgmi target="0000:ae:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:93:00.0" count="4" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:8b:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_8" dev="8" speed="200000" port="1" latency="0.000000" guid="0x6c112f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
<pci busid="0000:8f:00.0" class="0x060400" vendor="0x1000" device="0xc010" subsystem_vendor="0x1000" subsystem_device="0xa096" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:91:00.0" class="0x060400" vendor="0x1022" device="0x14c7" subsystem_vendor="0x1022" subsystem_device="0x14c7" link_speed="16.0 GT/s PCIe" link_width="16">
<pci busid="0000:93:00.0" class="0x038000" vendor="0x1002" device="0x740c" subsystem_vendor="0x1002" subsystem_device="0x0b0c" link_speed="16.0 GT/s PCIe" link_width="16">
<gpu dev="7" sm="90" gcn="910" arch="38911" rank="7" gdr="1">
<xgmi target="0000:14:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:b3:00.0" count="1" tclass="0x038000"/>
<xgmi target="0000:8e:00.0" count="4" tclass="0x038000"/>
</gpu>
</pci>
</pci>
<pci busid="0000:94:00.0" class="0x020000" vendor="0x15b3" device="0x101b" subsystem_vendor="0x15b3" subsystem_device="0x0007" link_speed="16.0 GT/s PCIe" link_width="16">
<nic>
<net name="mlx5_9" dev="9" speed="200000" port="1" latency="0.000000" guid="0x44112f0003fd7010" maxconn="131072" gdr="1"/>
</nic>
</pci>
</pci>
</pci>
</cpu>
</system>
+2
Просмотреть файл
@@ -145,6 +145,8 @@ NodeModelDesc model_descs[] = {
{4, "topo_8p1h_1.xml", "4 nodes 8P1H Alt."},
{1, "topo_8p1h_2.xml", "single node 8P1H Alt."},
{4, "topo_8p1h_3.xml", "4 nodes 8P1H Alt."},
{1, "topo_8p1h_4.xml", "Single node 8P1H Alt."},
{2, "topo_8p1h_4.xml", "2 nodes 8P1H Alt."},
};
int main(int argc,char* argv[])