Update tuning table and fix topo_expl

[ROCm/rccl commit: 94ad7f6f51]
This commit is contained in:
Wenkai Du
2022-11-04 22:54:29 +00:00
parent 4c9c1d41ee
commit ffecb74b1e
3 changed files with 24 additions and 23 deletions
+1 -1
View File
@@ -721,7 +721,7 @@ newchannel:
} while (str[offset++] != 0);
end:
graph->nChannels = nChannels;
graph->bwIntra = graph->bwInter = system->maxBw;
graph->bwIntra = graph->bwInter = system->totalBw/nChannels;
if (graph->id == 1) {
for (int i=0; i<graph->nChannels; i++) {
int net;
+21 -20
View File
@@ -71,30 +71,30 @@ struct tuningModel {
static struct tuningModel tuning_model_0 {
.hwLat = {
/* NVLINK */
{ /* Tree (LL/LL128/Simple)*/ { 1.5, 1.5, 4.5 }, /* Ring (LL/LL128/Simple)*/ { 1.5, 1.5, 4.5 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 4.5 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 4.5 } },
{ /* Tree (LL/LL128/Simple)*/ { 0.8, 1.4, 2.5 }, /* Ring (LL/LL128/Simple)*/ { 0.8, 2.2, 3.6 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 0.8 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 1.4 } },
/* PCI */
{ /* Tree (LL/LL128/Simple)*/ { 2.2, 2.2, 5.7 }, /* Ring (LL/LL128/Simple)*/ { 2.2, 2.2, 5.7 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 5.7 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 5.7 } },
/* NET */
{ /* Tree (LL/LL128/Simple)*/ { 28.3, 28.3, 45.4 }, /* Ring (LL/LL128/Simple)*/ { 2.0, 2.0, 24.1 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 45.4 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 45.4 } },
{ /* Tree (LL/LL128/Simple)*/ { 11.8, 18.2, 20.8 }, /* Ring (LL/LL128/Simple)*/ { 9.5, 19.8, 15.1 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 11.8 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 18.2 } },
},
.bwRatio = {
/* 2 nodes */
{ /* Tree (LL/LL128/Simple)*/ { 0.06, 1.00, 1.30 }, /* Ring (LL/LL128/Simple)*/ { 0.07, 1.00, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
{ /* Tree (LL/LL128/Simple)*/ { 0.04, 0.22, 0.91 }, /* Ring (LL/LL128/Simple)*/ { 0.04, 0.34, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
/* more than 2 nodes */
{ /* Tree (LL/LL128/Simple)*/ { 0.06, 1.00, 0.30 }, /* Ring (LL/LL128/Simple)*/ { 0.07, 1.00, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
{ /* Tree (LL/LL128/Simple)*/ { 0.04, 0.22, 0.95 }, /* Ring (LL/LL128/Simple)*/ { 0.04, 0.34, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
},
.treeCorrectionFactor = {
{ 0.3, 0.9, 0.8, 0.7, 0.6, 0.3, 0.1, 0.4, 0.8, 0.8, 0.5, 0.3, 0.4, 0.3, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.3, 0.9, 0.8, 0.7, 0.6, 0.3, 0.1, 0.4, 0.8, 0.8, 0.5, 0.3, 0.4, 0.3, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.4, 1.0, 1.0, 0.8, 1.0, 1.0, 0.3, 1.0, 0.9, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.7, 0.4, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, },
{ 0.1, 0.2, 0.1, 0.1, 0.9, 0.3, 0.4, 0.1, 0.2, 0.4, 0.2, 0.1, 0.3, 0.3, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.1, 0.3, 1.0, 0.1, 0.5, 1.0, 0.9, 1.0, 1.0, 1.0, 0.3, 0.1, 0.4, 0.5, 0.5, 0.4, 0.4, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, },
{ 0.2, 1.0, 0.1, 0.1, 0.7, 0.2, 0.4, 0.1, 0.1, 0.3, 0.4, 0.3, 0.6, 0.8, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.8, 0.8, 0.8, 0.8, 0.9, 0.9, 0.9, },
},
.ringCorrectionFactor = {
{ 0.2, 0.7, 0.7, 0.6, 0.6, 0.3, 0.2, 0.5, 1.0, 1.0, 0.8, 0.6, 0.8, 0.6, 0.3, 0.3, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.2, 0.7, 0.7, 0.6, 0.6, 0.3, 0.2, 0.5, 1.0, 1.0, 0.8, 0.6, 0.8, 0.6, 0.3, 0.3, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 1.0, 0.6, 0.9, 1.0, 0.7, 0.7, 1.0, 0.6, 0.8, 0.2, 0.1, 0.1, 0.1, 0.1, 0.2, 0.5, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, },
{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.4, 0.2, 0.3, 0.5, 0.3, 0.1, 0.5, 0.5, 0.3, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8, 0.7, 0.5, 0.4, 0.4, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, },
{ 1.0, 0.8, 0.2, 1.0, 1.0, 0.3, 1.0, 0.1, 0.1, 0.2, 0.2, 0.1, 0.5, 1.0, 0.8, 0.8, 1.0, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, },
},
};
@@ -161,30 +161,30 @@ static struct tuningModel tuning_model_2 {
static struct tuningModel tuning_model_3 {
.hwLat = {
/* NVLINK */
{ /* Tree (LL/LL128/Simple)*/ { 1.5, 1.5, 4.5 }, /* Ring (LL/LL128/Simple)*/ { 1.5, 1.5, 4.5 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 4.5 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 4.5 } },
{ /* Tree (LL/LL128/Simple)*/ { 0.8, 0.0, 2.5 }, /* Ring (LL/LL128/Simple)*/ { 0.8, 0.0, 3.6 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 0.8 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 0.0 } },
/* PCI */
{ /* Tree (LL/LL128/Simple)*/ { 2.2, 2.2, 5.7 }, /* Ring (LL/LL128/Simple)*/ { 2.2, 2.2, 5.7 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 5.7 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 5.7 } },
/* NET */
{ /* Tree (LL/LL128/Simple)*/ { 17.4, 17.4, 40.3 }, /* Ring (LL/LL128/Simple)*/ { 4.1, 4.1, 40.6 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 40.3 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 40.3 } },
{ /* Tree (LL/LL128/Simple)*/ { 12.5, 0.0, 22.4 }, /* Ring (LL/LL128/Simple)*/ { 9.5, 0.0, 19.8 }, /* CollNetDirect (Simple)*/ { 0.0, 0.0, 12.5 }, /* CollNetChain (Simple)*/ { 0.0, 0.0, 0.0 } },
},
.bwRatio = {
/* 2 nodes */
{ /* Tree (LL/LL128/Simple)*/ { 0.08, 1.00, 0.95 }, /* Ring (LL/LL128/Simple)*/ { 0.08, 1.00, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
{ /* Tree (LL/LL128/Simple)*/ { 0.20, 0.00, 1.75 }, /* Ring (LL/LL128/Simple)*/ { 0.20, 0.00, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
/* more than 2 nodes */
{ /* Tree (LL/LL128/Simple)*/ { 0.08, 1.00, 0.41 }, /* Ring (LL/LL128/Simple)*/ { 0.08, 1.00, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
{ /* Tree (LL/LL128/Simple)*/ { 0.20, 0.00, 0.96 }, /* Ring (LL/LL128/Simple)*/ { 0.20, 0.00, 1.00 }, /* CollNetDirect (Simple)*/ { 0.00, 0.00, 1.00 }, /* CollNetChain (Simple)*/ { 0.00, 0.00, 1.00 } },
},
.treeCorrectionFactor = {
{ 0.6, 1.0, 1.0, 1.0, 0.1, 0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.9, 0.9, 0.8, 0.6, 0.4, 0.5, 0.4, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.6, 1.0, 1.0, 1.0, 0.1, 0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.9, 0.9, 0.8, 0.6, 0.4, 0.5, 0.4, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 1.0, 0.1, 0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 0.7, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 0.6, 0.6, 0.6, 0.6, },
{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, 0.2, 1.0, 0.9, 1.0, 0.6, 0.4, 0.6, 0.4, 0.3, 0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, },
{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, },
{ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1, 0.1, 0.1, 0.2, 1.0, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8, 0.7, 0.8, 0.9, 0.7, 0.7, },
},
.ringCorrectionFactor = {
{ 0.5, 0.2, 0.1, 0.1, 0.3, 0.3, 0.1, 0.3, 0.7, 0.8, 0.5, 0.4, 0.2, 0.1, 0.1, 0.1, 0.3, 0.5, 0.4, 0.4, 0.3, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.5, 0.2, 0.1, 0.1, 0.3, 0.3, 0.1, 0.3, 0.7, 0.8, 0.5, 0.4, 0.2, 0.1, 0.1, 0.1, 0.3, 0.5, 0.4, 0.4, 0.3, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, },
{ 0.2, 0.3, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 1.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.6, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, },
{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 0.1, 0.2, 0.1, 0.4, 0.4, 0.2, 0.2, 0.3, 0.7, 0.5, 0.4, 0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, },
{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, },
{ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.5, 1.0, 0.1, 0.3, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.4, 0.7, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, },
},
};
@@ -304,6 +304,7 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
int collnet = (a == NCCL_ALGO_COLLNET_DIRECT || a == NCCL_ALGO_COLLNET_CHAIN) ? 1 : 0;
float bw = nNodes <= 2 || collnet ? graphs[a]->bwIntra : graphs[a]->bwInter;
float busBw = comm->topo->baseBw != 0.0 ? comm->topo->baseBw : graphs[a]->nChannels * bw;
//INFO(NCCL_INIT, "algo %s proto %s busBw %f baseBw %f bw %f nChannels %d bwIntra %f bwInter %f", ncclAlgoStr[a], ncclProtoStr[p], busBw, comm->topo->baseBw, bw, graphs[a]->nChannels, graphs[a]->bwIntra, graphs[a]->bwInter);
// Various model refinements
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
+2 -2
View File
@@ -32,7 +32,7 @@
#include "rocm_smi/rocm_smi.h"
const char* ncclFuncStr[NCCL_NUM_FUNCTIONS+2] = { "Broadcast", "Reduce", "AllGather", "ReduceScatter", "AllReduce", "SendRecv", "AllToAllPivot" };
const char* ncclAlgoStr[NCCL_NUM_ALGORITHMS] = { "Tree", "Ring", "CollNet" };
const char* ncclAlgoStr[NCCL_NUM_ALGORITHMS] = { "Tree", "Ring", "CollNetDirect", "CollNetChain" };
const char* ncclProtoStr[NCCL_NUM_PROTOCOLS] = { "LL", "LL128", "Simple" };
extern NodeModel *node_model;
@@ -698,7 +698,7 @@ ncclResult_t initTransportsRank_1(struct ncclComm* comm, struct allGather3Data_t
comm->nChannels = (comm->topo->nodes[GPU].count != comm->topo->nRanks && comm->topo->nodes[NET].count)
? std::min(treeGraph.nChannels, ringGraph.nChannels) : ringGraph.nChannels;
NCCLCHECK(ncclTopoPreset(comm, &treeGraph, &ringGraph, &allGather3Data[rank].topoRanks));
NCCLCHECK(ncclTopoPreset(comm, &treeGraph, &ringGraph, &collNetGraph, &allGather3Data[rank].topoRanks));
return ncclSuccess;
}