diff --git a/CMakeLists.txt b/CMakeLists.txt index d950565e2f..296c01c28b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,27 @@ # ######################################################################## #Adding pthread flag for linking set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") +macro(check_mpi mpi_compiler mpi_lib_a mpi_lib_so mpi_bin_dir mpi_base_lib_dir mpi_inc_dir) + find_program(MPI_MPICXX ${mpi_compiler} PATHS ${mpi_bin_dir} NO_DEFAULT_PATH) + if (MPI_MPICXX) + message ("-- ${mpi_compiler} found @ ${MPI_MPICXX}") + find_file(MPI_H mpi.h PATHS ${mpi_inc_dir} NO_DEFAULT_PATH) + message ("-- mpi.h is in ${MPI_H}") + find_file(MPI_LIB NAMES ${mpi_lib_so} ${mpi_lib_a} PATHS ${mpi_base_lib_dir} PATH_SUFFIXES lib lib64 lib/x86_64-linux-gnu NO_DEFAULT_PATH) + message ("-- libmpi is ${MPI_LIB}") + if (NOT MPI_H OR NOT MPI_LIB) + set (MPI_MPICXX "MPI_MPICXX-NOTFOUND") + set (MPI_H "MPI_H-NOTFOUND") + set (MPI_LIB "MPI_LIB-NOTFOUND") + else() + add_definitions(-DMPI_SUPPORT) + include_directories(${mpi_inc_dir}) + link_libraries(${MPI_LIB}) + endif() + else() + message ("-- ${mpi_compiler} not found") + endif() +endmacro() cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR) @@ -32,8 +53,8 @@ include(ROCMCheckTargetIds) include(ROCMClients) # Build variables -option(USE_MPI "Build RCCL-tests with MPI support. Requires the MPI path to be set.") -set(MPI_PATH "" CACHE PATH "Path to MPI installation") +option(NO_MPI "Build RCCL-tests without MPI support.") +option(MPI_PATH "Use MPI in the specified directory.") ## Get default GPU targets using rocm_check_target_ids rocm_check_target_ids( DEFAULT_AMDGPU_TARGETS @@ -41,13 +62,63 @@ rocm_check_target_ids( ) set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for these tests to target.") -# Find the MPI package if we're using MPI -if (USE_MPI) - if(NOT MPI_PATH STREQUAL "") - set(MPI_HOME "${MPI_PATH}") +if (NOT NO_MPI) + # CHECK for MPI Path first. User requested this directory explicitely + if (MPI_PATH) + set(mpi_spec_bin_dir "${MPI_PATH}/bin") + set(mpi_spec_inc_dir "${MPI_PATH}/include") + check_mpi(mpicxx libmpi.a libmpi.so ${mpi_spec_bin_dir} ${MPI_PATH} ${mpi_spec_inc_dir}) + if (NOT MPI_MPICXX) + # Since the user explicitely requested this directory, abort if something went wrong. + MESSAGE(FATAL_ERROR "Could not find MPI in ${MPI_PATH}") + endif() endif() - find_package(MPI REQUIRED MODULE) - add_definitions(-DOMPI_SKIP_MPICXX -DMPI_SUPPORT) + + # Check for MPICH Ubuntu installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx.mpich libmpich.a libmpich.so /usr/bin /usr /usr/include/x86_64-linux-gnu/mpich) + endif() + + # Check for Open MPI Ubuntu installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx.openmpi libmpi.a libmpi.so /usr/bin /usr/lib/x86_64-linux-gnu/openmpi /usr/lib/x86_64-linux-gnu/openmpi/include) + endif() + + # Check for MPICH RHEL installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx libmpich.a libmpich.so /usr/lib64/mpich/bin /usr/lib64/mpich /usr/include/mpich-x86_64) + endif() + + # Check for Open MPI RHEL installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx libmpi.a libmpi.so /usr/lib64/openmpi/bin /usr/lib64/openmpi /usr/include/openmpi-x64_64) + endif() + + # Check for MPICH SLES installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx libmpich.a libmpich.so /usr/lib64/mpi/gcc/mpich/bin /usr/lib64/mpi/gcc/mpich /usr/lib64/mpi/gcc/mpich/include) + endif() + + # Check for Open MPI v4 SLES installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx libmpi.a libmpi.so /usr/lib64/mpi/gcc/openmpi4/bin /usr/lib64/mpi/gcc/openmpi4 /usr/lib64/mpi/gcc/openmpi4/include) + endif() + + # Check for Open MPI v3 SLES installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx libmpi.a libmpi.so /usr/lib64/mpi/gcc/openmpi3/bin /usr/lib64/mpi/gcc/openmpi3 /usr/lib64/mpi/gcc/openmpi3/include) + endif() + + # Check for Open MPI v2 SLES installation + if (NOT MPI_MPICXX) + check_mpi(mpicxx libmpi.a libmpi.so /usr/lib64/mpi/gcc/openmpi2/bin /usr/lib64/mpi/gcc/openmpi2 /usr/lib64/mpi/gcc/openmpi2/include) + endif() + + if (NOT MPI_MPICXX) + message ("-- no MPI library found") + endif() +else() + message ("-- MPI support explicitely disabled") endif() set(ROCM_USE_DEV_COMPONENT OFF) # This repo doesn't have a dev component @@ -57,7 +128,7 @@ add_subdirectory(src) # Create ROCm standard packages rocm_create_package( - NAME rccl-separate-tests + NAME rccl-tests DESCRIPTION "Tests for the ROCm Communication Collectives Library" MAINTAINER "RCCL Maintainer " ) diff --git a/README.md b/README.md index c2847232e6..74f15515b4 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,23 @@ RCCL tests rely on MPI to work on multiple processes, hence multiple nodes. If y $ make MPI=1 MPI_HOME=/path/to/mpi HIP_HOME=/path/to/hip RCCL_HOME=/path/to/rccl ``` +RCCL tests can also be built using cmake. A typical sequence will be: + +```shell +$ mkdir build +$ cd build +$ CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_PREFIX_PATH=/path/to/rccl .. +$ make +``` + +When using the cmake build procedure, please make sure that RCCL has also been built using cmake (i.e. not using the install.sh script), since cmake will check +for cmake target and config files that are created during the RCCL build. + +Using the cmake method also has the advantage that the build is automatically checking for MPI installations, i.e. it is not necessary to explicitly request +MPI builds. A user can request to use a particular MPI library by using the MPI_PATH variable. MPI support can be explicitely disabled by adding the -DNO_MPI=1 +flag to the cmake command line. + + ## Usage RCCL tests can run on multiple processes, multiple threads, and multiple HIP devices per thread. The number of process is managed by MPI and is therefore not passed to the tests as argument. The total number of ranks (=HIP devices) will be equal to (number of processes)\*(number of threads)\*(number of GPUs per thread).