948d2b6a68
Signed-off-by: nileshnegi <Nilesh.Negi@amd.com>
125 строки
3.4 KiB
Docker
125 строки
3.4 KiB
Docker
## base docker image
|
|
ARG ROCM_IMAGE_NAME=rocm/dev-ubuntu-22.04
|
|
ARG ROCM_IMAGE_TAG=latest
|
|
FROM "${ROCM_IMAGE_NAME}:${ROCM_IMAGE_TAG}"
|
|
|
|
## rccl repo
|
|
ARG RCCL_REPO=https://github.com/ROCm/rccl
|
|
ARG RCCL_BRANCH=develop
|
|
|
|
## rccl-tests repo
|
|
ARG RCCL_TESTS_REPO=https://github.com/ROCm/rccl-tests
|
|
ARG RCCL_TESTS_BRANCH=develop
|
|
|
|
## AMD GPU Targets
|
|
ARG GPU_TARGETS=gfx942
|
|
|
|
## creating scratch space
|
|
ENV WORKDIR /workspace
|
|
RUN mkdir -p ${WORKDIR}
|
|
WORKDIR ${WORKDIR}
|
|
|
|
## install dependencies
|
|
RUN apt-get update \
|
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
ca-certificates \
|
|
git \
|
|
make \
|
|
rocm-cmake \
|
|
ninja-build \
|
|
gfortran \
|
|
build-essential \
|
|
libomp5 \
|
|
libomp-dev \
|
|
libbfd-dev \
|
|
libboost-all-dev \
|
|
libnuma1 \
|
|
libnuma-dev \
|
|
libpthread-stubs0-dev \
|
|
libzstd-dev \
|
|
lcov \
|
|
zip \
|
|
zlib1g-dev \
|
|
wget \
|
|
pkg-config \
|
|
unzip \
|
|
chrpath \
|
|
doxygen \
|
|
lshw \
|
|
build-essential \
|
|
libssl-dev \
|
|
curl \
|
|
libncursesw5-dev \
|
|
xz-utils \
|
|
liblzma-dev \
|
|
python3-pip \
|
|
python3-setuptools \
|
|
python3-venv \
|
|
python3-dev \
|
|
python3-tk \
|
|
python3-yaml \
|
|
vim \
|
|
less \
|
|
&& \
|
|
apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN wget https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-linux-x86_64.sh \
|
|
&& chmod +x cmake-3.28.0-linux-x86_64.sh \
|
|
&& bash ./cmake-3.28.0-linux-x86_64.sh --prefix=/usr --exclude-subdir --skip-license \
|
|
&& rm cmake-3.28.0-linux-x86_64.sh
|
|
|
|
## Set ROCm path
|
|
ENV ROCM_PATH=/opt/rocm
|
|
|
|
## Install UCX
|
|
ENV UCX_INSTALL_PREFIX=/opt/ucx
|
|
RUN wget https://github.com/openucx/ucx/releases/download/v1.16.0/ucx-1.16.0.tar.gz \
|
|
&& mkdir -p ucx \
|
|
&& tar -zxf ucx-1.16.0.tar.gz -C ucx --strip-components=1 \
|
|
&& cd ucx \
|
|
&& mkdir build \
|
|
&& cd build \
|
|
&& ../configure --prefix=${UCX_INSTALL_PREFIX} --with-rocm=${ROCM_PATH} \
|
|
&& make -j16 install \
|
|
&& cd ../.. \
|
|
&& rm -rf ucx ucx-1.16.0.tar.gz
|
|
|
|
## Install OpenMPI
|
|
ENV MPI_INSTALL_PREFIX=/opt/ompi
|
|
RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz \
|
|
&& mkdir -p ompi4 \
|
|
&& tar -zxf openmpi-4.1.6.tar.gz -C ompi4 --strip-components=1 \
|
|
&& cd ompi4 \
|
|
&& mkdir build \
|
|
&& cd build \
|
|
&& ../configure --prefix=${MPI_INSTALL_PREFIX} --with-ucx=${UCX_INSTALL_PREFIX} --disable-oshmem --disable-mpi-fortran --enable-orterun-prefix-by-default \
|
|
&& make -j16 install \
|
|
&& cd ../.. \
|
|
&& rm -rf ompi4 openmpi-4.1.6.tar.gz
|
|
|
|
|
|
## building RCCL
|
|
ENV RCCL_INSTALL_PREFIX=${WORKDIR}/rccl/install
|
|
RUN git clone --recurse-submodules -b "${RCCL_BRANCH}" "${RCCL_REPO}" \
|
|
&& cd ./rccl \
|
|
&& ./install.sh --amdgpu_targets=${GPU_TARGETS} --prefix=${RCCL_INSTALL_PREFIX}
|
|
|
|
## building RCCL-Tests
|
|
RUN git clone -b "${RCCL_TESTS_BRANCH}" "${RCCL_TESTS_REPO}" ./rccl-tests \
|
|
&& cd ./rccl-tests \
|
|
&& mkdir build \
|
|
&& cd build \
|
|
&& cmake -DCMAKE_BUILD_TYPE=Release -DUSE_MPI=ON -DCMAKE_PREFIX_PATH="${RCCL_INSTALL_PREFIX};${MPI_INSTALL_PREFIX}" -DGPU_TARGETS=${GPU_TARGETS} .. \
|
|
&& make -j16
|
|
|
|
|
|
## set environment variables
|
|
ENV PATH="${MPI_INSTALL_PREFIX}/bin:${ROCM_PATH}/bin:${PATH}"
|
|
ENV LD_LIBRARY_PATH="${RCCL_INSTALL_PREFIX}:${MPI_INSTALL_PREFIX}/lib:${ROCM_PATH}/lib:${LD_LIBRARY_PATH}"
|
|
ENV UCX_WARN_UNUSED_ENV_VARS=n
|
|
ENV OMPI_ALLOW_RUN_AS_ROOT=1
|
|
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
|
|
ENV NCCL_DEBUG=VERSION
|
|
|