topo_expl: fix build and add tuning support (#539)
This commit is contained in:
@@ -8,7 +8,7 @@ HIPCC = $(HIP_PATH)/bin/hipcc
|
||||
EXE = topo_expl
|
||||
CXXFLAGS = -g -O3 -Iinclude -I../../src -I../../src/include -I../../src/graph/ -I/opt/rocm/rocm_smi/include/ -DTOPO_EXPL -DENABLE_TRACE
|
||||
|
||||
files = $(EXE).cpp model.cpp utils.cpp ../../src/graph/topo.cc ../../src/graph/rings.cc ../../src/graph/paths.cc ../../src/graph/trees.cc \
|
||||
files = $(EXE).cpp model.cpp utils.cpp ../../src/graph/topo.cc ../../src/graph/rings.cc ../../src/graph/paths.cc ../../src/graph/trees.cc ../../src/misc/param.cc \
|
||||
../../src/graph/search.cc ../../src/graph/connect.cc ../../src/graph/tuning.cc ../../src/graph/xml.cc ../../src/misc/nvmlwrap_stub.cc ../../src/graph/rome_models.cc
|
||||
|
||||
all: $(EXE)
|
||||
|
||||
@@ -46,6 +46,7 @@ THE SOFTWARE.
|
||||
#include "model.h"
|
||||
#include "utils.h"
|
||||
#include "topo.h"
|
||||
#include "graph.h"
|
||||
|
||||
NodeModel *node_model;
|
||||
|
||||
@@ -236,6 +237,34 @@ int main(int argc,char* argv[])
|
||||
initTransportsRank_3(&comm[i], allGather3Data, treeGraph[i], ringGraph[i], collNetGraph[i]);
|
||||
}
|
||||
|
||||
for (uint64_t len = 8; len <= 4294967296L; len *= 2) {
|
||||
struct ncclInfo info;
|
||||
float minTime = 3600000000.0;
|
||||
info.comm = &comm[0];
|
||||
info.coll = ncclFuncAllReduce;
|
||||
info.nBytes = len;
|
||||
// Find algorithm / protocol.
|
||||
info.algorithm = -1;
|
||||
info.protocol = -1;
|
||||
int nAlgos = NCCL_NUM_ALGORITHMS;
|
||||
for (int a=0; a<nAlgos; a++) {
|
||||
for (int p=0; p<NCCL_NUM_PROTOCOLS; p++) {
|
||||
float time;
|
||||
NCCLCHECK(ncclTopoGetAlgoTime(&info, a, p, 1, &time));
|
||||
if (time >= 0 && time < minTime) {
|
||||
info.algorithm = a;
|
||||
info.protocol = p;
|
||||
minTime = time;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (info.algorithm == -1 || info.protocol == -1) {
|
||||
WARN("Error : no algorithm/protocol available");
|
||||
return ncclInternalError;
|
||||
}
|
||||
INFO(NCCL_TUNING, "%10ld %s %s time %f", info.nBytes, ncclAlgoStr[info.algorithm], ncclProtoStr[info.protocol], minTime);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nranks; i++) {
|
||||
free(comm[i].connectSend);
|
||||
free(comm[i].connectRecv);
|
||||
|
||||
مرجع در شماره جدید
Block a user