auto-detect and enable MPI

This commit is contained in:
Edgar Gabriel
2023-02-14 22:31:54 +00:00
orang tua d16d1fb16b
melakukan c96ff57ac7
2 mengubah file dengan 58 tambahan dan 8 penghapusan
+42 -8
Melihat File
@@ -3,6 +3,33 @@
# ########################################################################
#Adding pthread flag for linking
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
macro(check_mpi mpi_compiler mpi_lib_a mpi_lib_so)
find_program(MPI_MPICXX ${mpi_compiler})
if (MPI_MPICXX)
message ("-- ${mpi_compiler} found @ ${MPI_MPICXX}")
if (${CMAKE_VERSION} VERSION_LESS "3.20.0")
get_filename_component(mpi.tmpdir ${MPI_MPICXX} DIRECTORY)
get_filename_component(mpi_base_dir ${mpi.tmpdir} DIRECTORY)
else()
cmake_path(GET MPI_MPICXX PARENT_PATH mpi.tmpdir)
cmake_path(GET mpi.tmpdir PARENT_PATH mpi_base_dir)
endif()
find_file(MPI_H mpi.h PATHS ${mpi_base_dir} PATH_SUFFIXES include include/x86_64-linux-gnu ${ARGN} {REQUIRED)
if (${CMAKE_VERSION} VERSION_LESS "3.20.0")
get_filename_component(mpi_inc_dir ${MPI_H} DIRECTORY)
else()
cmake_path(GET MPI_H PARENT_PATH mpi_inc_dir)
endif()
message ("-- mpi.h is in ${mpi_inc_dir}")
find_file(MPI_LIB NAMES ${mpi_lib_so} ${mpi_lib_a} PATHS ${mpi_base_dir} PATH_SUFFIXES lib lib64 lib/x86_64-linux-gnu REQIRED)
message ("-- libmpi is ${MPI_LIB}")
add_definitions(-DMPI_SUPPORT)
include_directories(${mpi_inc_dir})
link_libraries(${MPI_LIB})
else()
message ("-- ${mpi_compiler} not found")
endif()
endmacro()
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
@@ -32,8 +59,7 @@ include(ROCMCheckTargetIds)
include(ROCMClients)
# Build variables
option(USE_MPI "Build RCCL-tests with MPI support. Requires the MPI path to be set.")
set(MPI_PATH "" CACHE PATH "Path to MPI installation")
option(NO_MPI "Build RCCL-tests without MPI support.")
## Get default GPU targets using rocm_check_target_ids
rocm_check_target_ids(
DEFAULT_AMDGPU_TARGETS
@@ -41,13 +67,21 @@ rocm_check_target_ids(
)
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for these tests to target.")
# Find the MPI package if we're using MPI
if (USE_MPI)
if(NOT MPI_PATH STREQUAL "")
set(MPI_HOME "${MPI_PATH}")
if (NOT NO_MPI)
# Check for MPICH first
check_mpi(mpicxx.mpich libmpich.a libmpich.so include/x86_64-linux-gnu/mpich)
# Check for MPI in general. If we find mpicxx, we don't know whether its
# MPICH or another MPI implementation
if (NOT MPI_MPICXX)
check_mpi(mpicxx libmpi.a libmpi.so)
endif()
find_package(MPI REQUIRED MODULE)
add_definitions(-DOMPI_SKIP_MPICXX -DMPI_SUPPORT)
if (NOT MPI_MPICXX)
message ("-- no MPI library found")
endif()
else()
message ("-- MPI support explicitely disabled")
endif()
set(ROCM_USE_DEV_COMPONENT OFF) # This repo doesn't have a dev component
+16
Melihat File
@@ -18,6 +18,22 @@ RCCL tests rely on MPI to work on multiple processes, hence multiple nodes. If y
$ make MPI=1 MPI_HOME=/path/to/mpi HIP_HOME=/path/to/hip RCCL_HOME=/path/to/rccl
```
RCCL tests can also be built using cmake. A typical sequence will be:
```shell
$ mkdir build
$ cd build
$ CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_PREFIX_PATH=/path/to/rccl ..
$ make
```
When using the cmake build procedure, please make sure that RCCL has also been built using cmake (i.e. not using the install.sh script), since cmake will check
for cmake target and config files that are created during the RCCL build.
Using the cmake method also has the advantage that the build is automatically checking for MPI installations, i.e. it is not necessary to explicitley request
MPI builds. A user can explicitely disable MPI builds by adding the -DNO_MPI=1 flag to the cmake command line.
## Usage
RCCL tests can run on multiple processes, multiple threads, and multiple HIP devices per thread. The number of process is managed by MPI and is therefore not passed to the tests as argument. The total number of ranks (=HIP devices) will be equal to (number of processes)\*(number of threads)\*(number of GPUs per thread).