diff --git a/tools/topo_expl/Makefile b/tools/topo_expl/Makefile index 0e8d708a59..8bd75f001f 100644 --- a/tools/topo_expl/Makefile +++ b/tools/topo_expl/Makefile @@ -5,6 +5,8 @@ HIP_PATH = ../../.. endif HIPCC = $(HIP_PATH)/bin/hipcc +.DEFAULT_GOAL := all + EXE = topo_expl CXXFLAGS = -g -ffunction-sections -fdata-sections -Wl,--gc-sections -fgpu-rdc -Iinclude -Ihipify_rccl/include -Ihipify_rccl/include/plugin -Ihipify_rccl/device/include -Ihipify_rccl/graph -I/opt/rocm/include/ -DTOPO_EXPL -DENABLE_TRACE -DENABLE_LL128 -DNVTX_NO_IMPL -DRCCL_EXPOSE_STATIC -lpthread @@ -12,7 +14,34 @@ files = $(EXE).cpp model.cpp utils.cpp hipify_rccl/graph/topo.cc hipify_rccl/gra hipify_rccl/graph/search.cc hipify_rccl/graph/connect.cc hipify_rccl/graph/tuning.cc hipify_rccl/graph/xml.cc ../../src/misc/nvmlwrap_stub.cc hipify_rccl/graph/rome_models.cc hipify_rccl/graph/archinfo.cc \ hipify_rccl/collectives.cc hipify_rccl/register.cc hipify_rccl/enqueue.cc ../../src/rccl_wrap.cc -all: hipify $(EXE) +FMT_DIR := third_party/fmt +FMT_INCLUDE := $(FMT_DIR)/include +FMT_HEADER := $(FMT_INCLUDE)/fmt/format.h +FMT_GIT := https://github.com/fmtlib/fmt.git + +# Probe for using a portable pipe (no <<< here-string) +HAVE_FMT := $(shell echo '#include ' | \ + $(HIPCC) -xc++ -std=c++17 -E - >/dev/null 2>&1 && echo yes || echo no) + +ifeq ($(HAVE_FMT),no) + CXXFLAGS += -I$(FMT_INCLUDE) + NEED_FMT := 1 +endif + +ifeq ($(NEED_FMT),1) +$(FMT_HEADER): + rm -rf third_party/fmt + @echo ">>> fmt not found; cloning $(FMT_GIT) ..." + @mkdir -p $(dir $(FMT_DIR)) + @git clone --depth=1 $(FMT_GIT) $(FMT_DIR) +else +$(FMT_HEADER): +endif +# --------------------------------------------------------------------------- + + + +all: $(FMT_HEADER) hipify $(EXE) $(EXE): $(files) $(HIPCC) $(CXXFLAGS) $^ -o $@ @@ -30,13 +59,14 @@ hipify: cp -a ../../src/misc/archinfo.cc hipify_rccl/graph/ hipify-perl -inplace -quiet-warnings hipify_rccl/include/*.h hipify-perl -inplace -quiet-warnings hipify_rccl/include/plugin/*.h + hipify-perl -inplace -quiet-warnings hipify_rccl/include/latency_profiler/*.h hipify-perl -inplace -quiet-warnings hipify_rccl/device/include/*.h - sed -i "s/template/template/g" "hipify_rccl/device/include/common.h" - sed -i "s/\\(struct RunWorkBatch]*\\)>*/\\1, COLL_UNROLL>/" "hipify_rccl/device/include/common.h" + sed -i "s/template/template/g" "hipify_rccl/device/include/common.h" + sed -i "s/\\(struct RunWorkBatch]*\\)>*/\\1, USE_ACC, COLL_UNROLL>/" "hipify_rccl/device/include/common.h" hipify-perl -inplace -quiet-warnings hipify_rccl/graph/* hipify-perl -inplace -quiet-warnings hipify_rccl/include/network/unpack/* hipify-perl -inplace -quiet-warnings hipify_rccl/*.cc clean: - rm -rf hipify_rccl + rm -rf hipify_rccl third_party rm -f *.o $(EXE) diff --git a/tools/topo_expl/README.md b/tools/topo_expl/README.md index cdc742d547..86ecd9c1ef 100644 --- a/tools/topo_expl/README.md +++ b/tools/topo_expl/README.md @@ -33,7 +33,7 @@ Run `./topo_expl` without arguments to see the list of available models. Each mo ## Example Usage: Print RCCL's algorithm/protocol selections -The tool is typically run with the `NCCL_DEBUG=INFO` environment variable, but for the convenience of just printing the algo/proto table, we use version `NCCL_DEBUG=version` in this example to avoid printing topo details. +The tool is typically run with the `NCCL_DEBUG=INFO` environment variable to show the topology information and print out the constructed rings/trees. However, for the convenience of just printing the algo/proto table, we use version `NCCL_DEBUG=version` in this example to avoid printing topo details. ```bash # List available models @@ -45,6 +45,9 @@ NCCL_DEBUG=version ./topo_expl -m 55 # Test a multi-node MI300 configuration with 8 nodes NCCL_DEBUG=version ./topo_expl -m 55 -n 8 +# Test a multi-node MI350 configuration with 2 nodes +NCCL_DEBUG=version ./topo_expl -m 59 -n 2 + # Test MI250 configuration (model 42) NCCL_DEBUG=version ./topo_expl -m 42 diff --git a/tools/topo_expl/include/device_table.h b/tools/topo_expl/include/device_table.h index d37b0d2215..0ceecb53e3 100644 --- a/tools/topo_expl/include/device_table.h +++ b/tools/topo_expl/include/device_table.h @@ -11,5 +11,8 @@ static struct rcclKernelItem rcclKernelTable[] = { }; template __forceinline__ __device__ void NCCL_CALL_FUNCTIONS(unsigned short funcIndex) noexcept { } +__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_1(unsigned short funcIndex) noexcept { } +__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_2(unsigned short funcIndex) noexcept { } +__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_4(unsigned short funcIndex) noexcept { } #endif diff --git a/tools/topo_expl/models/topo_8p_950.xml b/tools/topo_expl/models/topo_8p_950.xml new file mode 100644 index 0000000000..899cc2eff0 --- /dev/null +++ b/tools/topo_expl/models/topo_8p_950.xml @@ -0,0 +1,167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/topo_expl/topo_expl.cpp b/tools/topo_expl/topo_expl.cpp index e63387d59a..20222c4a23 100644 --- a/tools/topo_expl/topo_expl.cpp +++ b/tools/topo_expl/topo_expl.cpp @@ -136,6 +136,8 @@ NodeModelDesc model_descs[] = { {"topo_8p_942vm.xml", " 8gfx942 1H7XGMI 8NIC 2Intel B"}, {"topo_16p_gio-1s-1rp-cascade.xml", "16gfx942 2H7XGMI 1NIC 2AMD A"}, {"topo_16p_gio-3s-1rp-split-flat.xml", "16gfx942 2H7XGMI 1NIC 2AMD B"}, + // GFX 950 + {"topo_8p_950.xml", " 8gfx950 1H7XGMI 8NIC 2AMD A"}, }; NCCL_PARAM(MaxCTAs, "MAX_CTAS", MAXCHANNELS);