From e3b9d785cc88a64bcbf9ebf941cce6d7fcb1f67d Mon Sep 17 00:00:00 2001 From: Nilesh M Negi Date: Wed, 7 May 2025 13:19:10 -0500 Subject: [PATCH] [BUILD] Add options to install script for compiler and GPU targets (#121) * [BUILD] Add options to install script for compiler and GPU targets * Fix GPU_TARGETS field and add option for custom ROCm path * Check for ROCM_PATH --------- Signed-off-by: nileshnegi [ROCm/rccl-tests commit: 41b383a0d490b367d48a33af996787c31bd546c8] --- projects/rccl-tests/install.sh | 96 +++++++++++++++++++++++--------- projects/rccl-tests/src/Makefile | 2 +- 2 files changed, 70 insertions(+), 28 deletions(-) diff --git a/projects/rccl-tests/install.sh b/projects/rccl-tests/install.sh index c56a6bfdde..fead2f1bac 100755 --- a/projects/rccl-tests/install.sh +++ b/projects/rccl-tests/install.sh @@ -10,8 +10,12 @@ function display_help() echo "./install [-h|--help] " echo " [-h|--help] Prints this help message." echo " [-m|--mpi] Build RCCL-tests with MPI support. (see --mpi_home below.)" - echo " [--rccl_home] Specify custom path for RCCL installation (default: /opt/rocm/rccl)" + echo " [-t|--test] Run unit-tests after building RCCL-Tests." + echo " [--rocm_home] Specify custom path for ROCm installation (default: /opt/rocm)" + echo " [--rccl_home] Specify custom path for RCCL installation (default: /opt/rocm)" echo " [--mpi_home] Specify path to your MPI installation." + echo " [--hip_compiler] Specify path to HIP compiler (default: /opt/rocm/bin/amdclang++)" + echo " [--gpu_targets] Specify GPU targets (default:gfx906,gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gxf1101,gfx1102,gfx1200,gfx1201)" } # ################################################# @@ -20,8 +24,12 @@ function display_help() run_tests=false build_release=true mpi_enabled=false -rccl_dir=/opt/rocm/rccl +rocm_dir=${ROCM_PATH} +rccl_dir=${rocm_dir} mpi_dir="" +hip_compiler=${rocm_dir}/bin/amdclang++ +gpu_targets="" + # ################################################# # Parameter parsing # ################################################# @@ -29,7 +37,7 @@ mpi_dir="" # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,mpi,test,rccl_home:,mpi_home: --options hmt -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,mpi,test,rocm_home:,rccl_home:,mpi_home:,hip_compiler:,gpu_targets: --options hmt -- "$@") else echo "Need a new version of getopt" exit 1 @@ -44,28 +52,35 @@ eval set -- "${GETOPT_PARSE}" while true; do case "${1}" in - -h|--help) - display_help - exit 0 - ;; - -m|--mpi) - mpi_enabled=true - shift ;; - -t|--test) - run_tests=true - shift ;; - --rccl_home) - rccl_dir=${2} - shift 2 ;; - --mpi_home) - mpi_dir=${2} - shift 2 ;; - --) shift ; break ;; - *) echo "Unexpected command line parameter received; aborting"; - exit 1 - ;; + -h|--help) + display_help + exit 0 ;; + -m|--mpi) + mpi_enabled=true + shift ;; + -t|--test) + run_tests=true + shift ;; + --rocm_home) + rocm_dir=${2} + shift 2 ;; + --rccl_home) + rccl_dir=${2} + shift 2 ;; + --mpi_home) + mpi_dir=${2} + shift 2 ;; + --hip_compiler) + hip_compiler=${2} + shift 2 ;; + --gpu_targets) + gpu_targets=${2} + shift 2 ;; + --) shift ; break ;; + *) echo "Unexpected command line parameter received; aborting"; + exit 1 ;; esac - done +done # throw error code after running a command in the install script check_exit_code( ) @@ -85,15 +100,42 @@ build_dir=./build # ensure a clean build environment rm -rf ${build_dir} +if [[ -n ${rocm_dir} ]]; then + echo "ROCM_PATH does not exist at ${rocm_dir}. Defaulting to /opt/rocm" + rocm_dir=/opt/rocm +fi + +if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then + echo "HIP Compiler does not exist at ${hip_compiler}. Please check the path." + echo "Defaulting to /opt/rocm/bin/amdclang++" + hip_compiler=${rocm_dir}/bin/amdclang++ + + if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then + echo "${hip_compiler} does not exist. Please be advised." + echo "Defaulting to /opt/rocm/bin/hipcc" + hip_compiler=${rocm_dir}/bin/hipcc + + if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then + echo "${hip_compiler} does not exist!. Please check your ROCm installation." + echo "Cannot proceed with building rccl-tests!" + exit 1 + fi + fi +fi + +if [[ -n ${gpu_targets} ]]; then + GPU_TARGETS="GPU_TARGETS=${gpu_targets}" +fi + if ($mpi_enabled); then if [[ ${mpi_dir} == "" ]]; then echo "MPI flag enabled but path to MPI installation not specified. See --mpi_home command line argument." exit 1 else - make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so MPI=1 MPI_HOME=${mpi_dir} -j$(nproc) + make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so MPI=1 MPI_HOME=${mpi_dir} HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc) fi else - make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so -j$(nproc) + make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so HIP_COMPILER=${hip_compiler} ${GPU_TARGETS} -j$(nproc) fi check_exit_code "$?" @@ -102,6 +144,6 @@ if ($run_tests); then if ($mpi_enabled); then cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib:${mpi_dir}/lib PATH=$PATH:${mpi_dir}/bin python3 -m pytest else - cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib python3 -m pytest + cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib python3 -m pytest -k "not MPI" fi fi diff --git a/projects/rccl-tests/src/Makefile b/projects/rccl-tests/src/Makefile index f23ab65e29..7809da6978 100644 --- a/projects/rccl-tests/src/Makefile +++ b/projects/rccl-tests/src/Makefile @@ -13,7 +13,7 @@ DEBUG ?= 0 NCCL_HOME ?= "" CUSTOM_RCCL_LIB ?= "" -HIPCC = $(ROCM_PATH)/bin/amdclang++ +HIPCC ?= $(ROCM_PATH)/bin/amdclang++ HIPCONFIG = $(ROCM_PATH)/bin/hipconfig CXX = $(HIPCC)