[BUILD] Update install.sh for RCCL build (#1191)
Signed-off-by: nileshnegi <Nilesh.Negi@amd.com>
This commit is contained in:
zatwierdzone przez
GitHub
rodzic
1249a6c3fd
commit
5aaf7121d9
@@ -1,8 +1,17 @@
|
||||
# Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
# Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License.
|
||||
|
||||
# CMake version minimum requirements
|
||||
#==================================================================================================
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
# CMake Toolchain file to define compilers and path to ROCm
|
||||
#==================================================================================================
|
||||
if (NOT CMAKE_TOOLCHAIN_FILE)
|
||||
set(CMAKE_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/toolchain-linux.cmake")
|
||||
message(STATUS "CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}")
|
||||
endif()
|
||||
|
||||
# RCCL project
|
||||
#==================================================================================================
|
||||
project(rccl CXX)
|
||||
|
||||
+18
-12
@@ -18,11 +18,18 @@ The collective operations are implemented using ring and tree algorithms and hav
|
||||
RCCL directly depends on HIP runtime plus the HIP-Clang compiler, which are part of the ROCm software stack.
|
||||
For ROCm installation instructions, see https://github.com/ROCm/ROCm.
|
||||
|
||||
The root of this repository has a helper script 'install.sh' to build and install RCCL on Ubuntu with a single command. It does not take a lot of options and hard-codes configuration that can be specified through invoking cmake directly, but it's a great way to get started quickly and can serve as an example of how to build/install.
|
||||
The root of this repository has a helper script `install.sh` to build and install RCCL with a single command. It hard-codes configurations that can be specified through invoking cmake directly, but it's a great way to get started quickly and can serve as an example of how to build/install RCCL.
|
||||
|
||||
### To build the library using the install script:
|
||||
|
||||
```shell
|
||||
./install.sh --help
|
||||
./install.sh
|
||||
```
|
||||
|
||||
For more info on build options/flags when using the install script, use `./install.sh --help`
|
||||
```shell
|
||||
./install.sh --help
|
||||
RCCL build & installation helper script
|
||||
Options:
|
||||
--address-sanitizer Build with address sanitizer enabled
|
||||
-d|--dependencies Install RCCL depdencencies
|
||||
@@ -33,37 +40,38 @@ The root of this repository has a helper script 'install.sh' to build and instal
|
||||
-f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)
|
||||
-h|--help Prints this help message
|
||||
-i|--install Install RCCL library (see --prefix argument below)
|
||||
-j|--jobs Specify how many parallel compilation jobs to run (nproc by default)
|
||||
-j|--jobs Specify how many parallel compilation jobs to run ($nproc by default)
|
||||
-l|--local_gpu_only Only compile for local GPU architecture
|
||||
--amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)
|
||||
--no_clean Don't delete files if they already exist
|
||||
--npkit-enable Compile with npkit enabled
|
||||
--roctx-enable Compile with roctx enabled (example usage: rocprof --roctx-trace ./rccl-program)
|
||||
-p|--package_build Build RCCL package
|
||||
--prefix Specify custom directory to install RCCL to (default: /opt/rocm)
|
||||
--prefix Specify custom directory to install RCCL to (default: `/opt/rocm`)
|
||||
--rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility
|
||||
--run_tests_all Run all rccl unit tests (must be built already)
|
||||
-r|--run_tests_quick Run small subset of rccl unit tests (must be built already)
|
||||
--static Build RCCL as a static library instead of shared library
|
||||
-t|--tests_build Build rccl unit tests, but do not run
|
||||
--time-trace Plot the build time of RCCL
|
||||
--time-trace Plot the build time of RCCL (requires `ninja-build` package installed on the system)
|
||||
--verbose Show compile commands
|
||||
```
|
||||
|
||||
## Manual build
|
||||
|
||||
### To build the library :
|
||||
### To build the library using CMake:
|
||||
|
||||
```shell
|
||||
$ git clone https://github.com/ROCm/rccl.git
|
||||
$ cd rccl
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_PREFIX_PATH=/opt/rocm/ ..
|
||||
$ cmake ..
|
||||
$ make -j 16 # Or some other suitable number of parallel jobs
|
||||
```
|
||||
You may substitute an installation path of your own choosing by passing CMAKE_INSTALL_PREFIX. For example:
|
||||
You may substitute an installation path of your own choosing by passing `CMAKE_INSTALL_PREFIX`. For example:
|
||||
```shell
|
||||
$ CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_PREFIX_PATH=/opt/rocm/ -DCMAKE_INSTALL_PREFIX=$PWD/rccl-install ..
|
||||
$ cmake -DCMAKE_INSTALL_PREFIX=$PWD/rccl-install ..
|
||||
```
|
||||
Note: ensure rocm-cmake is installed, `apt install rocm-cmake`.
|
||||
|
||||
@@ -123,11 +131,9 @@ Please refer to the [RCCL Documentation Site](https://rocm.docs.amd.com/projects
|
||||
|
||||
Run the steps below to build documentation locally.
|
||||
|
||||
```
|
||||
```shell
|
||||
cd docs
|
||||
|
||||
pip3 install -r sphinx/requirements.txt
|
||||
|
||||
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
|
||||
```
|
||||
|
||||
|
||||
+89
-56
@@ -16,12 +16,13 @@ build_package=false
|
||||
build_release=true
|
||||
build_static=false
|
||||
build_tests=false
|
||||
build_verbose=0
|
||||
build_verbose=false
|
||||
clean_build=true
|
||||
collective_trace=true
|
||||
enable_ninja=""
|
||||
install_dependencies=false
|
||||
install_library=false
|
||||
install_prefix="${ROCM_PATH}"
|
||||
msccl_kernel_enabled=true
|
||||
num_parallel_jobs=$(nproc)
|
||||
npkit_enabled=false
|
||||
@@ -53,13 +54,13 @@ function display_help()
|
||||
echo " --npkit-enable Compile with npkit enabled"
|
||||
echo " --roctx-enable Compile with roctx enabled (example usage: rocprof --roctx-trace ./rccl-program)"
|
||||
echo " -p|--package_build Build RCCL package"
|
||||
echo " --prefix Specify custom directory to install RCCL to (default: /opt/rocm)"
|
||||
echo " --prefix Specify custom directory to install RCCL to (default: \`/opt/rocm\`)"
|
||||
echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility"
|
||||
echo " --run_tests_all Run all rccl unit tests (must be built already)"
|
||||
echo " -r|--run_tests_quick Run small subset of rccl unit tests (must be built already)"
|
||||
echo " --static Build RCCL as a static library instead of shared library"
|
||||
echo " -t|--tests_build Build rccl unit tests, but do not run"
|
||||
echo " --time-trace Plot the build time of RCCL"
|
||||
echo " --time-trace Plot the build time of RCCL (requires \`ninja-build\` package installed on the system)"
|
||||
echo " --verbose Show compile commands"
|
||||
}
|
||||
|
||||
@@ -69,14 +70,14 @@ function display_help()
|
||||
|
||||
# check if we have a modern version of getopt that can handle whitespace and long parameters
|
||||
getopt -T
|
||||
if [[ $? -eq 4 ]]; then
|
||||
if [[ "$?" -eq 4 ]]; then
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
|
||||
else
|
||||
echo "Need a new version of getopt"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $? -ne 0 ]]; then
|
||||
if [[ "$?" -ne 0 ]]; then
|
||||
echo "getopt invocation failed; could not parse the command line";
|
||||
exit 1
|
||||
fi
|
||||
@@ -101,14 +102,14 @@ while true; do
|
||||
--npkit-enable) npkit_enabled=true; shift ;;
|
||||
--roctx-enable) roctx_enabled=true; shift ;;
|
||||
-p | --package_build) build_package=true; shift ;;
|
||||
--prefix) install_prefix=${2}; shift 2 ;;
|
||||
--prefix) install_library=true; install_prefix=${2}; shift 2 ;;
|
||||
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
|
||||
-r | --run_tests_quick) run_tests=true; shift ;;
|
||||
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
|
||||
--static) build_static=true; shift ;;
|
||||
-t | --tests_build) build_tests=true; shift ;;
|
||||
--time-trace) time_trace=true; shift ;;
|
||||
--verbose) build_verbose=1; shift ;;
|
||||
--verbose) build_verbose=true; shift ;;
|
||||
--) shift ; break ;;
|
||||
*) echo "Unexpected command line parameter received; aborting";
|
||||
exit 1
|
||||
@@ -116,8 +117,6 @@ while true; do
|
||||
esac
|
||||
done
|
||||
|
||||
ROCM_BIN_PATH=$ROCM_PATH/bin
|
||||
|
||||
# /etc/*-release files describe the system
|
||||
if [[ -e "/etc/os-release" ]]; then
|
||||
source /etc/os-release
|
||||
@@ -129,22 +128,36 @@ else
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# CMake executable
|
||||
cmake_executable=cmake
|
||||
time_trace_ninja_msg="apt-get install ninja-build"
|
||||
case "${OS_ID}" in
|
||||
centos|rhel)
|
||||
cmake_executable=cmake3
|
||||
time_trace_ninja_msg="dnf install ninja-build"
|
||||
;;
|
||||
esac
|
||||
|
||||
# CMake build options; starts with toolchain info
|
||||
cmake_common_options="--toolchain=toolchain-linux.cmake"
|
||||
|
||||
# throw error code after running a command in the install script
|
||||
check_exit_code( )
|
||||
{
|
||||
if (( $1 != 0 )); then
|
||||
exit $1
|
||||
fi
|
||||
if (( $1 != 0 )); then
|
||||
exit "$1"
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "$build_release" == true ]]; then
|
||||
# set RCCL-UnitTests path
|
||||
if [[ "${build_release}" == true ]]; then
|
||||
unit_test_path="./build/release/test/rccl-UnitTests"
|
||||
else
|
||||
unit_test_path="./build/debug/test/rccl-UnitTests"
|
||||
fi
|
||||
|
||||
if ($run_tests) && [[ -f $unit_test_path ]]; then
|
||||
if [[ "$build_tests" == false ]]; then
|
||||
if [[ "${run_tests}" == true ]] && [[ -f "${unit_test_path}" ]]; then
|
||||
if [[ "${build_tests}" == false ]]; then
|
||||
clean_build=false
|
||||
fi
|
||||
fi
|
||||
@@ -153,7 +166,7 @@ fi
|
||||
# prep
|
||||
# #################################################
|
||||
# ensure a clean build environment
|
||||
if ($clean_build); then
|
||||
if [[ "${clean_build}" == true ]]; then
|
||||
if [[ "${build_release}" == true ]]; then
|
||||
rm -rf build/release
|
||||
else
|
||||
@@ -164,7 +177,8 @@ fi
|
||||
# Create and go to the build directory.
|
||||
mkdir -p build; cd build
|
||||
|
||||
if ($build_release); then
|
||||
# Create and go to build type directory
|
||||
if [[ "${build_release}" == true ]]; then
|
||||
mkdir -p release; cd release
|
||||
else
|
||||
mkdir -p debug; cd debug
|
||||
@@ -190,17 +204,15 @@ fi
|
||||
# Backward compatibility wrappers
|
||||
if [[ "${build_freorg_bkwdcomp}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=ON"
|
||||
else
|
||||
cmake_common_options="${cmake_common_options} -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF"
|
||||
fi
|
||||
|
||||
# Build local GPU arch only
|
||||
if [[ "$build_local_gpu_only" == true ]]; then
|
||||
if [[ "${build_local_gpu_only}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DBUILD_LOCAL_GPU_TARGET_ONLY=ON"
|
||||
fi
|
||||
|
||||
# Build for specified GPU target(s) only
|
||||
if [[ ! -z "$build_amdgpu_targets" ]]; then
|
||||
if [[ ! -z "${build_amdgpu_targets}" ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DAMDGPU_TARGETS=${build_amdgpu_targets}"
|
||||
fi
|
||||
|
||||
@@ -214,29 +226,29 @@ if [[ "${collective_trace}" == false ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DCOLLTRACE=OFF"
|
||||
fi
|
||||
|
||||
# Disable msccl kernel
|
||||
if [[ "${msccl_kernel_enabled}" == false ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DENABLE_MSCCL_KERNEL=OFF"
|
||||
fi
|
||||
|
||||
# Install dependencies
|
||||
if ($install_dependencies); then
|
||||
if [[ "${install_dependencies}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON"
|
||||
fi
|
||||
|
||||
# Install RCCL library
|
||||
if [[ "${install_library}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DCMAKE_INSTALL_PREFIX=${install_prefix}"
|
||||
fi
|
||||
|
||||
# Enable ROCTX
|
||||
if [[ "${roctx_enabled}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DROCTX=ON"
|
||||
fi
|
||||
|
||||
cmake_executable=cmake
|
||||
case "${OS_ID}" in
|
||||
centos|rhel)
|
||||
cmake_executable=cmake3
|
||||
;;
|
||||
esac
|
||||
|
||||
# Enable NPKit
|
||||
npkit_options=""
|
||||
if ($npkit_enabled); then
|
||||
if [[ "${npkit_enabled}" == true ]]; then
|
||||
npkit_options="-DENABLE_NPKIT \
|
||||
-DENABLE_NPKIT_EVENT_TIME_SYNC_GPU \
|
||||
-DENABLE_NPKIT_EVENT_TIME_SYNC_CPU \
|
||||
@@ -347,59 +359,80 @@ fi
|
||||
|
||||
check_exit_code "$?"
|
||||
|
||||
if ($time_trace); then
|
||||
# Enable ninja build for time tracing
|
||||
if [[ "${time_trace}" == true ]]; then
|
||||
if ! hash ninja &>/dev/null ; then
|
||||
echo "ninja could not be found"
|
||||
echo "Use \"${time_trace_ninja_msg}\" to install ninja"
|
||||
exit 1
|
||||
fi
|
||||
build_system="ninja"
|
||||
enable_ninja="-GNinja"
|
||||
else
|
||||
build_system="make"
|
||||
fi
|
||||
|
||||
if ($build_tests) || (($run_tests) && [[ ! -f ./test/rccl-UnitTests ]]); then
|
||||
CXX=$ROCM_BIN_PATH/hipcc $cmake_executable $cmake_common_options -DBUILD_TESTS=ON -DNPKIT_FLAGS="${npkit_options}" -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCM_PATH=$ROCM_PATH -DONLY_FUNCS="$ONLY_FUNCS" $enable_ninja ../../.
|
||||
# Add common CMake options
|
||||
cmake_common_options="${cmake_common_options} -DROCM_PATH=${ROCM_PATH} -DONLY_FUNCS=${ONLY_FUNCS} ${enable_ninja}"
|
||||
|
||||
# Build RCCL-UnitTests, if enabled
|
||||
if [[ "${build_tests}" == true ]] || ([[ "${run_tests}" == true ]] && [[ ! -x ./test/rccl-UnitTests ]]); then
|
||||
cmake_common_options="${cmake_common_options} -DBUILD_TESTS=ON"
|
||||
fi
|
||||
|
||||
# Initiate RCCL CMake
|
||||
# Passing NPKIT_FLAGS separately (not as part of ${cmake_common_options}) as
|
||||
# ${npkit_options} need to be passed "as-is" i.e. with `-D` to CMakeLists.txt
|
||||
${cmake_executable} ${cmake_common_options} -DNPKIT_FLAGS="${npkit_options}" ../../.
|
||||
check_exit_code "$?"
|
||||
|
||||
# Enable verbose output from Makefile
|
||||
if [[ "${build_verbose}" == true ]]; then
|
||||
build_system="${build_system} VERBOSE=1"
|
||||
fi
|
||||
|
||||
# Initiate RCCL build (and install)
|
||||
if [[ "${install_library}" == true ]]; then
|
||||
${build_system} -j ${num_parallel_jobs} install
|
||||
else
|
||||
CXX=$ROCM_BIN_PATH/hipcc $cmake_executable $cmake_common_options -DBUILD_TESTS=OFF -DNPKIT_FLAGS="${npkit_options}" -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCM_PATH=$ROCM_PATH -DONLY_FUNCS="$ONLY_FUNCS" $enable_ninja ../../.
|
||||
${build_system} -j ${num_parallel_jobs}
|
||||
fi
|
||||
check_exit_code "$?"
|
||||
|
||||
if ($install_library); then
|
||||
VERBOSE=${build_verbose} $build_system -j $num_parallel_jobs install
|
||||
else
|
||||
VERBOSE=${build_verbose} $build_system -j $num_parallel_jobs
|
||||
fi
|
||||
check_exit_code "$?"
|
||||
|
||||
if ($build_package); then
|
||||
# Initiate package build with `make package`, if enabled
|
||||
if [[ "${build_package}" == true ]]; then
|
||||
make package
|
||||
check_exit_code "$?"
|
||||
fi
|
||||
|
||||
# Optionally, run tests if they're enabled.
|
||||
if ($run_tests); then
|
||||
if (test -f "./test/rccl-UnitTests"); then
|
||||
if ($run_tests_all); then
|
||||
# Optionally, run RCCL-UnitTests, if they're enabled.
|
||||
if [[ "${run_tests}" == true ]]; then
|
||||
if [[ -x "./test/rccl-UnitTests" ]]; then
|
||||
if [[ "${run_tests_all}" == true ]]; then
|
||||
./test/rccl-UnitTests
|
||||
else
|
||||
./test/rccl-UnitTests --gtest_filter="AllReduce.*"
|
||||
fi
|
||||
else
|
||||
echo "rccl unit tests have not been built yet; please re-run script with -t to build rccl unit tests."
|
||||
echo "RCCL-UnitTests have not been built yet; Please re-run script with \"-t\" to build RCCL-UnitTests."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if ($time_trace); then
|
||||
search_dir="../../"
|
||||
time_trace_dir=$(find "$search_dir" -type d -name "time-trace" -print -quit)
|
||||
# Generate time trace for RCCL build using tools/time-trace
|
||||
if [[ "${time_trace}" == true ]]; then
|
||||
search_dir="../../tools"
|
||||
time_trace_dir=$(find "${search_dir}" -type d -name "time-trace" -print -quit)
|
||||
|
||||
if [ "$time_trace_dir" ]; then
|
||||
time_trace_script="$time_trace_dir/rccl-TimeTrace.sh"
|
||||
if [ -x "$time_trace_script" ]; then
|
||||
if [[ -n "${time_trace_dir}" ]]; then
|
||||
time_trace_script="${time_trace_dir}/rccl-TimeTrace.sh"
|
||||
if [[ -x "${time_trace_script}" ]]; then
|
||||
echo "Generating RCCL-compile-timeline.html..."
|
||||
(cd "$time_trace_dir" && ./rccl-TimeTrace.sh)
|
||||
(cd "${time_trace_dir}" && ./rccl-TimeTrace.sh)
|
||||
else
|
||||
echo "Error: Unable to execute $time_trace_script. Make sure the file has the correct permissions."
|
||||
echo "Error: Unable to execute ${time_trace_script}. Make sure the file has the correct permissions."
|
||||
fi
|
||||
else
|
||||
echo "Error: time-trace folder not found in $search_dir."
|
||||
echo "Error: time-trace folder not found in ${search_dir}."
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
|
||||
if (DEFINED ENV{ROCM_PATH})
|
||||
set(rocm_bin "$ENV{ROCM_PATH}/bin")
|
||||
else()
|
||||
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to the ROCm installation.")
|
||||
set(rocm_bin "/opt/rocm/bin")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ENV{CXX})
|
||||
set(CMAKE_CXX_COMPILER "${rocm_bin}/hipcc" CACHE PATH "Path to the C++ compiler")
|
||||
else()
|
||||
set(CMAKE_CXX_COMPILER "$ENV{CXX}" CACHE PATH "Path to the C++ compiler")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ENV{CC})
|
||||
set(CMAKE_C_COMPILER "${rocm_bin}/hipcc" CACHE PATH "Path to the C compiler")
|
||||
else()
|
||||
set(CMAKE_C_COMPILER "$ENV{CC}" CACHE PATH "Path to the C compiler")
|
||||
endif()
|
||||
Reference in New Issue
Block a user