## base docker image ARG ROCM_IMAGE_NAME=rocm/dev-ubuntu-22.04 ARG ROCM_IMAGE_TAG=latest FROM "${ROCM_IMAGE_NAME}:${ROCM_IMAGE_TAG}" ## rccl repo ARG RCCL_REPO=https://github.com/ROCm/rccl ARG RCCL_BRANCH=develop ## rccl-tests repo ARG RCCL_TESTS_REPO=https://github.com/ROCm/rccl-tests ARG RCCL_TESTS_BRANCH=develop ## AMD GPU Targets ARG GPU_TARGETS=gfx942 ## creating scratch space ENV WORKDIR /workspace RUN mkdir -p ${WORKDIR} WORKDIR ${WORKDIR} ## install dependencies RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ ca-certificates \ git \ make \ rocm-cmake \ ninja-build \ gfortran \ build-essential \ libomp5 \ libomp-dev \ libbfd-dev \ libboost-all-dev \ libnuma1 \ libnuma-dev \ libpthread-stubs0-dev \ libzstd-dev \ lcov \ zip \ zlib1g-dev \ wget \ pkg-config \ unzip \ chrpath \ doxygen \ lshw \ build-essential \ libssl-dev \ curl \ libncursesw5-dev \ xz-utils \ liblzma-dev \ python3-pip \ python3-setuptools \ python3-venv \ python3-dev \ python3-tk \ python3-yaml \ vim \ less \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN wget https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-linux-x86_64.sh \ && chmod +x cmake-3.28.0-linux-x86_64.sh \ && bash ./cmake-3.28.0-linux-x86_64.sh --prefix=/usr --exclude-subdir --skip-license \ && rm cmake-3.28.0-linux-x86_64.sh ## Set ROCm path ENV ROCM_PATH=/opt/rocm ## Install UCX ENV UCX_INSTALL_PREFIX=/opt/ucx RUN wget https://github.com/openucx/ucx/releases/download/v1.16.0/ucx-1.16.0.tar.gz \ && mkdir -p ucx \ && tar -zxf ucx-1.16.0.tar.gz -C ucx --strip-components=1 \ && cd ucx \ && mkdir build \ && cd build \ && ../configure --prefix=${UCX_INSTALL_PREFIX} --with-rocm=${ROCM_PATH} \ && make -j16 install \ && cd ../.. \ && rm -rf ucx ucx-1.16.0.tar.gz ## Install OpenMPI ENV MPI_INSTALL_PREFIX=/opt/ompi RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz \ && mkdir -p ompi4 \ && tar -zxf openmpi-4.1.6.tar.gz -C ompi4 --strip-components=1 \ && cd ompi4 \ && mkdir build \ && cd build \ && ../configure --prefix=${MPI_INSTALL_PREFIX} --with-ucx=${UCX_INSTALL_PREFIX} --disable-oshmem --disable-mpi-fortran --enable-orterun-prefix-by-default \ && make -j16 install \ && cd ../.. \ && rm -rf ompi4 openmpi-4.1.6.tar.gz ## building RCCL ENV RCCL_INSTALL_PREFIX=${WORKDIR}/rccl/install RUN git clone --recurse-submodules -b "${RCCL_BRANCH}" "${RCCL_REPO}" \ && cd ./rccl \ && ./install.sh --amdgpu_targets=${GPU_TARGETS} --prefix=${RCCL_INSTALL_PREFIX} ## building RCCL-Tests RUN git clone -b "${RCCL_TESTS_BRANCH}" "${RCCL_TESTS_REPO}" ./rccl-tests \ && cd ./rccl-tests \ && mkdir build \ && cd build \ && cmake -DCMAKE_BUILD_TYPE=Release -DUSE_MPI=ON -DCMAKE_PREFIX_PATH="${RCCL_INSTALL_PREFIX};${MPI_INSTALL_PREFIX}" -DGPU_TARGETS=${GPU_TARGETS} .. \ && make -j16 ## set environment variables ENV PATH="${MPI_INSTALL_PREFIX}/bin:${ROCM_PATH}/bin:${PATH}" ENV LD_LIBRARY_PATH="${RCCL_INSTALL_PREFIX}:${MPI_INSTALL_PREFIX}/lib:${ROCM_PATH}/lib:${LD_LIBRARY_PATH}" ENV UCX_WARN_UNUSED_ENV_VARS=n ENV OMPI_ALLOW_RUN_AS_ROOT=1 ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ENV NCCL_DEBUG=VERSION