15c82d6da8
## Motivation Enable UCX communication tracing and communication metadata ## Technical Details Implement UCX API wrappers to trace transport-layer communication. This adds communication data tracking and exposes “UCX Comm Send/Recv” timelines, enabling detailed analysis of MPI, OpenSHMEM, and other UCX-based runtime communication patterns. - Implements function interception for UCX functions across multiple categories using gotcha component. - Extended comm_data component to track UCX send/recv operations - Added ucx_send and ucx_recv labels for Perfetto counter tracks. Integrated UCX data tracking with existing MPI/RCCL tracking infrastructure. - Added ROCPROFSYS_USE_UCX configuration option (enabled by default). - Created FindUCX.cmake module for UCX header detection. Falls back to internal UCX headers if system headers not found. - Updated all Dockerfiles to include UCX dependencies.
81 rivejä
3.1 KiB
Docker
81 rivejä
3.1 KiB
Docker
ARG DISTRO=ubuntu
|
|
ARG VERSION=22.04
|
|
FROM ${DISTRO}:${VERSION}
|
|
|
|
ENV HOME=/root
|
|
ENV LANG=C.UTF-8
|
|
ENV SHELL=/bin/bash
|
|
ENV BASH_ENV=/etc/bash.bashrc
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
WORKDIR /tmp
|
|
SHELL [ "/bin/bash", "-c" ]
|
|
|
|
ARG EXTRA_PACKAGES=""
|
|
ARG ELFUTILS_DOWNLOAD_VERSION="0.188"
|
|
ARG BOOST_DOWNLOAD_VERSION="1.79.0"
|
|
ARG NJOBS="8"
|
|
|
|
ENV PATH="/usr/local/bin:${PATH}"
|
|
ENV LIBRARY_PATH="/usr/local/lib:/usr/local/lib64"
|
|
ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64"
|
|
ENV CMAKE_PREFIX_PATH="/usr/local"
|
|
|
|
RUN apt-get update && \
|
|
apt-get dist-upgrade -y && \
|
|
apt-get install -y autoconf autotools-dev bash-completion bison build-essential \
|
|
bzip2 chrpath cmake curl environment-modules flex gettext git-core gnupg2 \
|
|
gzip iproute2 libgmock-dev libgtest-dev libiberty-dev libpapi-dev libpfm4-dev \
|
|
libsqlite3-dev libtool locales lsb-release m4 ninja-build nlohmann-json3-dev \
|
|
python3-pip software-properties-common texinfo unzip wget vim zip zlib1g-dev \
|
|
libucx-dev ucx-utils && \
|
|
apt-get autoclean
|
|
|
|
RUN OS_VERSION=$(grep '^VERSION_ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"') && \
|
|
OS_ID=$(grep '^ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"') && \
|
|
if [ "${OS_ID}" == "ubuntu" ] && [ "${OS_VERSION}" == "22.04" ]; then \
|
|
python3 -m pip install 'cmake==3.21' perfetto; \
|
|
else \
|
|
python3 -m pip install --break-system-packages 'cmake==3.21' perfetto; \
|
|
fi
|
|
|
|
RUN if ! pip show perfetto 2>&1 | tee pip_output.log | grep -q "WARNING"; then \
|
|
echo "perfetto successfully installed!"; \
|
|
else \
|
|
echo "ERROR: perfetto install failed."; \
|
|
exit 1; \
|
|
fi
|
|
|
|
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12 13"
|
|
|
|
RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O miniforge.sh && \
|
|
bash miniforge.sh -b -p /opt/conda && \
|
|
export PATH="/opt/conda/bin:${PATH}" && \
|
|
conda config --set always_yes yes --set changeps1 no --set solver classic && \
|
|
conda update -c conda-forge -n base conda && \
|
|
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c conda-forge python=3.${i} pip numpy; done && \
|
|
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \
|
|
conda clean -a -y && \
|
|
cd /tmp && \
|
|
shopt -s dotglob extglob && \
|
|
rm -rf *
|
|
|
|
# The Rock Tarball
|
|
ARG GPU_TYPE=""
|
|
ARG GPU_TARBALL=""
|
|
RUN if [ -n "$GPU_TYPE" ] && [ -n "$GPU_TARBALL" ]; then \
|
|
VERSION=$(echo "$GPU_TARBALL" | sed -nE 's/.*([0-9]+\.[0-9]+\.[0-9]+).*/\1/p'); \
|
|
if [ -z "$VERSION" ]; then \
|
|
echo "Error: Could not extract version from GPU_TARBALL ('$GPU_TARBALL')." >&2; \
|
|
exit 1; \
|
|
fi; \
|
|
pip install --upgrade pip; \
|
|
pip install awscli --break-system-packages; \
|
|
aws s3 cp "s3://therock-nightly-tarball/${GPU_TARBALL}" rocm-${VERSION}-${GPU_TYPE}.tar.gz --no-sign-request; \
|
|
mv rocm-${VERSION}-${GPU_TYPE}.tar.gz /opt/rocm-${VERSION}-${GPU_TYPE}.tar.gz; \
|
|
fi
|
|
|
|
ENV LC_ALL=C.UTF-8
|
|
WORKDIR /home
|
|
SHELL [ "/bin/bash", "--login", "-c" ]
|