diff --git a/CMakeLists.txt b/CMakeLists.txt index dae6c5c5d0..5ccdaaaa95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ project(rccl CXX) set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") option(BUILD_TESTS "Build test programs" OFF) +option(INSTALL_DEPENDENCIES "Force install dependencies" OFF) include(cmake/Dependencies.cmake) diff --git a/README.md b/README.md index 9c245d80dc..2e8dcaee3f 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ The root of this repository has a helper script 'install.sh' to build and instal * `./install.sh` -- builds library including unit tests * `./install.sh -i` -- builds and installs the library to /opt/rocm/rccl; installation path can be changed with --prefix argument (see below.) +* `./install.sh -d` -- installs all necessary dependencies for RCCL. Should be re-invoked if the build folder is removed. * `./install.sh -h` -- shows help * `./install.sh -t` -- builds library including unit tests * `./install.sh -r` -- runs unit tests (must be already built) @@ -62,15 +63,19 @@ RCCL package install requires sudo/root access because it creates a directory ca ## Tests -There are unit tests implemented with the Googletest framework in RCCL, which are currently a work-in-progress. To invoke the unit tests, go to the rccl-install folder, then the test/ subfolder, and execute the appropriate unit test executable(s). Several notes for running the unit tests: +There are unit tests implemented with the Googletest framework in RCCL, which are currently a work-in-progress. The unit tests require Googletest 1.10 or higher to build and execute properly. +To invoke the unit tests, go to the build folder, then the test subfolder, and execute the appropriate unit test executable(s). -1. The LD_LIBRARY_PATH environment variable will need to be set to include /path/to/rccl-install/lib/ in order to run the unit tests. -2. The HSA_FORCE_FINE_GRAIN_PCIE environment variable will need to be set to 1 in order to run the unit tests. +Unit test names are now of the format: +[CollectiveCall]CorrectnessSweep/[CollectiveCall]CorrectnessTest.[Type of test]/[ncclRedOp_t]_[datatype]_[number of elements]_[number of devices]_[in place/out of place]_[environment variables] + +This allows filtering of unit tests being run by their parameter values by passing the --gtest_filter command line flag, for example: -An example call to the unit tests: ```shell -$ LD_LIBRARY_PATH=rccl-install/lib/ HSA_FORCE_FINE_GRAIN_PCIE=1 rccl-install/test/UnitTests +--gtest_filter="AllReduceCorrectnessSweep*float32*" ``` +will run only AllReduce correctness tests with float32 datatype. See "Running a Subset of the Tests" at https://chromium.googlesource.com/external/github.com/google/googletest/+/HEAD/googletest/docs/advanced.md for more information on how to form more advanced filters. + There are also other performance and error-checking tests for RCCL. These are maintained separately at https://github.com/ROCmSoftwarePlatform/rccl-tests. See the rccl-tests README for more information on how to build and run those tests. diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 824f233aa6..f135d16b67 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -31,39 +31,43 @@ # For downloading, building, and installing required dependencies include(cmake/DownloadProject.cmake) - -if(BUILD_TESTS) - find_package(GTest QUIET) - if(NOT GTest_FOUND) +find_package(GTest 1.10) - if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$") +if(NOT GTest_FOUND OR INSTALL_DEPENDENCIES) + if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$") # hip-clang cannot compile googlebenchmark for some reason set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") - endif() + endif() # unset(GTEST_INCLUDE_DIR CACHE) # unset(GTEST_INCLUDE_DIRS CACHE) - message(STATUS "GTest not found. Downloading and building GTest.") - # Download, build and install googletest library - set(GTEST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/gtest CACHE PATH "") - download_project(PROJ googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.10.0 - INSTALL_DIR ${GTEST_ROOT} - CMAKE_ARGS -DBUILD_GTEST=ON -DCMAKE_INSTALL_PREFIX= ${COMPILER_OVERRIDE} - LOG_DOWNLOAD TRUE - LOG_CONFIGURE TRUE - LOG_BUILD TRUE - LOG_INSTALL TRUE - UPDATE_DISCONNECTED TRUE - ) - find_package(GTest REQUIRED CONFIG PATHS ${GTEST_ROOT}) - set(GTEST_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/gtest/include CACHE PATH "") - set(GTEST_BOTH_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/gtest/lib/libgtest.a;${CMAKE_CURRENT_BINARY_DIR}/gtest/lib/libgtest_main.a CACHE PATH "") + message(STATUS "GTest not found. Downloading and building GTest.") + # Download, build and install googletest library + set(GTEST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/gtest CACHE PATH "") + download_project(PROJ googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.10.0 + INSTALL_DIR ${GTEST_ROOT} + CMAKE_ARGS -DBUILD_GTEST=ON -DCMAKE_INSTALL_PREFIX= ${COMPILER_OVERRIDE} + LOG_DOWNLOAD TRUE + LOG_CONFIGURE TRUE + LOG_BUILD TRUE + LOG_INSTALL TRUE + UPDATE_DISCONNECTED TRUE + ) + find_package(GTest REQUIRED CONFIG PATHS ${GTEST_ROOT}) + set(GTEST_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/gtest/include CACHE PATH "") + if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/gtest/lib) + set(GTEST_BOTH_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/gtest/lib/libgtest.a;${CMAKE_CURRENT_BINARY_DIR}/gtest/lib/libgtest_main.a CACHE PATH "") + elseif(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/gtest/lib64) + set(GTEST_BOTH_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/gtest/lib64/libgtest.a;${CMAKE_CURRENT_BINARY_DIR}/gtest/lib64/libgtest_main.a CACHE PATH "") + else() + message(FATAL_ERROR "Cannot find gtest library installation path.") endif() endif() + # Find or download/install rocm-cmake project find_package(ROCM QUIET CONFIG PATHS /opt/rocm) if(NOT ROCM_FOUND) diff --git a/install.sh b/install.sh index 833fea48c0..8453ac53ed 100755 --- a/install.sh +++ b/install.sh @@ -10,6 +10,7 @@ function display_help() echo "./install [-h|--help] " echo " [-h|--help] prints this help message." echo " [-i|--install] install RCCL library (see --prefix argument below.)" + echo " [-d|--dependencies] install RCCL depdencencies." echo " [-p|--package_build] Build RCCL package." echo " [-t|--tests_build] Build unit tests, but do not run." echo " [-r|--run_tests_quick] Run small subset of unit tests (must be built already.)" @@ -42,7 +43,7 @@ build_static=false # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,install,dependencies,package_build,tests_build,run_tests_quick,static,run_tests_all,hcc,hip-clang,no_clean,prefix: --options hiptrs -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,install,dependencies,package_build,tests_build,run_tests_quick,static,run_tests_all,hcc,hip-clang,no_clean,prefix: --options hidptrs -- "$@") else echo "Need a new version of getopt" exit 1 @@ -170,12 +171,9 @@ case "${OS_ID}" in esac if ($install_dependencies); then - if [[ -e /etc/redhat-release ]]; then - yum install chrpath libgomp - else - apt install chrpath libomp-dev - fi + cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON" fi + check_exit_code "$?" if ($build_tests) || (($run_tests) && [[ ! -f ./test/UnitTests ]]); then