Fixing install script hip_compiler bug and improving logging on fallback (#156)

* Fixing install script hip_compiler bug and improving logging on fallback

[ROCm/rccl-tests commit: 6405c76e68]
This commit is contained in:
gilbertlee-amd
2025-10-29 10:57:56 -06:00
zatwierdzone przez GitHub
rodzic abf0605823
commit 555a5f1892
+66 -61
Wyświetl plik
@@ -37,49 +37,49 @@ gpu_targets=""
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,mpi,test,rocm_home:,rccl_home:,mpi_home:,hip_compiler:,gpu_targets: --options hmt -- "$@")
GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,mpi,test,rocm_home:,rccl_home:,mpi_home:,hip_compiler:,gpu_targets: --options hmt -- "$@")
else
echo "Need a new version of getopt"
exit 1
echo "Need a new version of getopt"
exit 1
fi
if [[ $? -ne 0 ]]; then
echo "getopt invocation failed; could not parse the command line";
exit 1
echo "getopt invocation failed; could not parse the command line";
exit 1
fi
eval set -- "${GETOPT_PARSE}"
while true; do
case "${1}" in
-h|--help)
display_help
exit 0 ;;
-m|--mpi)
mpi_enabled=true
shift ;;
-t|--test)
run_tests=true
shift ;;
--rocm_home)
rocm_dir=${2}
shift 2 ;;
--rccl_home)
rccl_dir=${2}
shift 2 ;;
--mpi_home)
mpi_dir=${2}
shift 2 ;;
--hip_compiler)
hip_compiler=${2}
shift 2 ;;
--gpu_targets)
gpu_targets=${2}
shift 2 ;;
--) shift ; break ;;
*) echo "Unexpected command line parameter received; aborting";
exit 1 ;;
esac
case "${1}" in
-h|--help)
display_help
exit 0 ;;
-m|--mpi)
mpi_enabled=true
shift ;;
-t|--test)
run_tests=true
shift ;;
--rocm_home)
rocm_dir=${2}
shift 2 ;;
--rccl_home)
rccl_dir=${2}
shift 2 ;;
--mpi_home)
mpi_dir=${2}
shift 2 ;;
--hip_compiler)
hip_compiler=${2}
shift 2 ;;
--gpu_targets)
gpu_targets=${2}
shift 2 ;;
--) shift ; break ;;
*) echo "Unexpected command line parameter received; aborting";
exit 1 ;;
esac
done
# throw error code after running a command in the install script
@@ -101,49 +101,54 @@ build_dir=./build
rm -rf ${build_dir}
if [[ -z ${rocm_dir} ]]; then
echo "ROCM_PATH does not exist at ${rocm_dir}. Defaulting to /opt/rocm"
rocm_dir=/opt/rocm
echo "[WARN] ROCM_PATH does not exist at ${rocm_dir}. Defaulting to /opt/rocm"
rocm_dir=/opt/rocm
fi
if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then
echo "HIP Compiler does not exist at ${hip_compiler}. Please check the path."
echo "Defaulting to /opt/rocm/bin/amdclang++"
hip_compiler=${rocm_dir}/bin/amdclang++
echo "[WARN] HIP Compiler does not exist at ${hip_compiler}. Please check the path."
echo "[WARN] - Falling back to ${rocm_dir}/bin/amdclang++"
hip_compiler=${rocm_dir}/bin/amdclang++
if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then
echo "[WARN] ${hip_compiler} does not exist. Please be advised."
echo "[WARN] - Falling back to ${rocm_dir}/bin/hipcc"
hip_compiler=${rocm_dir}/bin/hipcc
if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then
echo "${hip_compiler} does not exist. Please be advised."
echo "Defaulting to /opt/rocm/bin/hipcc"
hip_compiler=${rocm_dir}/bin/hipcc
if ! command -v ${hip_compiler} 2>&1 >/dev/null ; then
echo "${hip_compiler} does not exist!. Please check your ROCm installation."
echo "Cannot proceed with building rccl-tests!"
exit 1
fi
echo "[ERROR] ${hip_compiler} does not exist!. Please check your ROCm installation." >&2
echo "[ERROR] Cannot proceed with building rccl-tests!" >&2
exit 1
fi
fi
fi
echo "[INFO] Compiling with ${hip_compiler}"
if [[ -n ${gpu_targets} ]]; then
GPU_TARGETS="GPU_TARGETS=${gpu_targets}"
GPU_TARGETS="GPU_TARGETS=${gpu_targets}"
fi
if ($mpi_enabled); then
if [[ ${mpi_dir} == "" ]]; then
echo "MPI flag enabled but path to MPI installation not specified. See --mpi_home command line argument."
exit 1
else
make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so MPI=1 MPI_HOME=${mpi_dir} HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc)
fi
if [[ ${mpi_dir} == "" ]]; then
echo "[ERROR] MPI flag enabled but path to MPI installation not specified. See --mpi_home command line argument." >&2
exit 1
else
echo "[INFO] Compiling with MPI support (Using MPI from ${mpi_dir})"
echo
make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so MPI=1 MPI_HOME=${mpi_dir} HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc)
fi
else
make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so HIP_COMPILER=${hip_compiler} ${GPU_TARGETS} -j$(nproc)
echo "[INFO] Compiling without MPI support (MPI support requires -m and --mpi_home)"
echo
make NCCL_HOME=${rccl_dir} CUSTOM_RCCL_LIB=${rccl_dir}/lib/librccl.so HIPCC=${hip_compiler} ${GPU_TARGETS} -j$(nproc)
fi
check_exit_code "$?"
# Optionally, run tests if they're enabled.
if ($run_tests); then
if ($mpi_enabled); then
cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib:${mpi_dir}/lib PATH=$PATH:${mpi_dir}/bin python3 -m pytest
else
cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib python3 -m pytest -k "not MPI"
fi
if ($mpi_enabled); then
cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib:${mpi_dir}/lib PATH=$PATH:${mpi_dir}/bin python3 -m pytest
else
cd test; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${rccl_dir}/lib python3 -m pytest -k "not MPI"
fi
fi