From 6405c76e6826663bbb67bd40aeee8c70aa5d3094 Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Wed, 29 Oct 2025 10:57:56 -0600 Subject: [PATCH] Fixing install script hip_compiler bug and improving logging on fallback (#156) * Fixing install script hip_compiler bug and improving logging on fallback --- install.sh | 127 ++++++++++++++++++++++++++++------------------------- 1 file changed, 66 insertions(+), 61 deletions(-) diff --git a/install.sh b/install.sh index 98882c2eef..23a3c5df69 100755 --- a/install.sh +++ b/install.sh @@ -37,49 +37,49 @@ gpu_targets="" # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,mpi,test,rocm_home:,rccl_home:,mpi_home:,hip_compiler:,gpu_targets: --options hmt -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,mpi,test,rocm_home:,rccl_home:,mpi_home:,hip_compiler:,gpu_targets: --options hmt -- "$@") else - echo "Need a new version of getopt" - exit 1 + echo "Need a new version of getopt" + exit 1 fi if [[ $? -ne 0 ]]; then - echo "getopt invocation failed; could not parse the command line"; - exit 1 + echo "getopt invocation failed; could not parse the command line"; + exit 1 fi eval set -- "${GETOPT_PARSE}" while true; do - case "${1}" in - -h|--help) - display_help - exit 0 ;; - -m|--mpi) - mpi_enabled=true - shift ;; - -t|--test) - run_tests=true - shift ;; - --rocm_home) - rocm_dir=${2} - shift 2 ;; - --rccl_home) - rccl_dir=${2} - shift 2 ;; - --mpi_home) - mpi_dir=${2} - shift 2 ;; - --hip_compiler) - hip_compiler=${2} - shift 2 ;; - --gpu_targets) - gpu_targets=${2} - shift 2 ;; - --) shift ; break ;; - *) echo "Unexpected command line parameter received; aborting"; - exit 1 ;; - esac + case "${1}" in + -h|--help) + display_help + exit 0 ;; + -m|--mpi) + mpi_enabled=true + shift ;; + -t|--test) + run_tests=true + shift ;; + --rocm_home) + rocm_dir=${2} + shift 2 ;; + --rccl_home) + rccl_dir=${2} + shift 2 ;; + --mpi_home) + mpi_dir=${2} + shift 2 ;; + --hip_compiler) + hip_compiler=${2} + shift 2 ;; + --gpu_targets) + gpu_targets=${2} + shift 2 ;; + --) shift ; break ;; + *) echo "Unexpected command line parameter received; aborting"; + exit 1 ;; + esac done # throw error code after running a command in the install script @@ -101,49 +101,54 @@ build_dir=./build rm -rf ${build_dir} if [[ -z ${rocm_dir} ]]; then - echo "ROCM_PATH does not exist at ${rocm_dir}. Defaulting to /opt/rocm" - rocm_dir=/opt/rocm + echo "[WARN] ROCM_PATH does not exist at ${rocm_dir}. Defaulting to /opt/rocm" + rocm_dir=/opt/rocm fi if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then - echo "HIP Compiler does not exist at ${hip_compiler}. Please check the path." - echo "Defaulting to /opt/rocm/bin/amdclang++" - hip_compiler=${rocm_dir}/bin/amdclang++ + echo "[WARN] HIP Compiler does not exist at ${hip_compiler}. Please check the path." + echo "[WARN] - Falling back to ${rocm_dir}/bin/amdclang++" + hip_compiler=${rocm_dir}/bin/amdclang++ + + if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then + echo "[WARN] ${hip_compiler} does not exist. Please be advised." + echo "[WARN] - Falling back to ${rocm_dir}/bin/hipcc" + hip_compiler=${rocm_dir}/bin/hipcc if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then - echo "${hip_compiler} does not exist. Please be advised." - echo "Defaulting to /opt/rocm/bin/hipcc" - hip_compiler=${rocm_dir}/bin/hipcc - - if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then - echo "${hip_compiler} does not exist!. Please check your ROCm installation." - echo "Cannot proceed with building rccl-tests!" - exit 1 - fi + echo "[ERROR] ${hip_compiler} does not exist!. Please check your ROCm installation." >&2 + echo "[ERROR] Cannot proceed with building rccl-tests!" >&2 + exit 1 fi + fi fi +echo "[INFO] Compiling with ${hip_compiler}" if [[ -n ${gpu_targets} ]]; then - GPU_TARGETS="GPU_TARGETS=${gpu_targets}" + GPU_TARGETS="GPU_TARGETS=${gpu_targets}" fi if ($mpi_enabled); then - if [[ ${mpi_dir} == "" ]]; then - echo "MPI flag enabled but path to MPI installation not specified. See --mpi_home command line argument." - exit 1 - else - make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so MPI=1 MPI_HOME=${mpi_dir} HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc) - fi + if [[ ${mpi_dir} == "" ]]; then + echo "[ERROR] MPI flag enabled but path to MPI installation not specified. See --mpi_home command line argument." >&2 + exit 1 + else + echo "[INFO] Compiling with MPI support (Using MPI from ${mpi_dir})" + echo + make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so MPI=1 MPI_HOME=${mpi_dir} HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc) + fi else - make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so HIP_COMPILER=${hip_compiler} ${GPU_TARGETS} -j$(nproc) + echo "[INFO] Compiling without MPI support (MPI support requires -m and --mpi_home)" + echo + make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc) fi check_exit_code "$?" # Optionally, run tests if they're enabled. if ($run_tests); then - if ($mpi_enabled); then - cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib:${mpi_dir}/lib PATH=$PATH:${mpi_dir}/bin python3 -m pytest - else - cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib python3 -m pytest -k "not MPI" - fi + if ($mpi_enabled); then + cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib:${mpi_dir}/lib PATH=$PATH:${mpi_dir}/bin python3 -m pytest + else + cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib python3 -m pytest -k "not MPI" + fi fi