Files
Sajina PK 15c82d6da8 [rocprofiler-system]: Enable UCX Communication API tracing (#2306)
## Motivation

Enable UCX communication tracing and communication metadata 

## Technical Details

Implement UCX API wrappers to trace transport-layer communication. This adds communication data tracking and exposes “UCX Comm Send/Recv” timelines, enabling detailed analysis of MPI, OpenSHMEM, and other UCX-based runtime communication patterns.

- Implements function interception for UCX functions across multiple categories using gotcha component.
- Extended comm_data component to track UCX send/recv operations - Added ucx_send and ucx_recv labels for Perfetto counter tracks. Integrated UCX data tracking with existing MPI/RCCL tracking infrastructure.
- Added ROCPROFSYS_USE_UCX configuration option (enabled by default).
- Created FindUCX.cmake module for UCX header detection. Falls back to internal UCX headers if system headers not found.
- Updated all Dockerfiles  to include UCX dependencies.
2026-01-20 13:16:43 -05:00

67 خطوط
2.5 KiB
Docker

ARG DISTRO=opensuse/leap
ARG VERSION=15.5
FROM ${DISTRO}:${VERSION}
ENV HOME=/root
ENV SHELL=/bin/bash
ENV BASH_ENV=/etc/bash.bashrc
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /tmp
SHELL [ "/bin/bash", "-c" ]
ENV PATH=/usr/local/bin:${PATH}
ARG EXTRA_PACKAGES=""
ARG ELFUTILS_DOWNLOAD_VERSION="0.188"
ARG BOOST_DOWNLOAD_VERSION="1.79.0"
ARG NJOBS="8"
RUN set +e; \
zypper --non-interactive -i --gpg-auto-import-keys refresh; \
zypper --non-interactive -i patch; \
zypper --non-interactive -i patch; \
zypper --non-interactive -i --gpg-auto-import-keys refresh; \
exit 0
RUN zypper --non-interactive update -y && \
zypper --non-interactive dist-upgrade -y && \
zypper --non-interactive install -y -t pattern devel_basis && \
zypper --non-interactive install -y chrpath cmake curl dpkg-devel \
gcc-c++ gcc-fortran git gmock gtest iproute2 ninja nlohmann_json-devel \
openmpi3-devel papi-devel python3-devel python3-pip rpm-build \
sqlite3-devel vim wget libucp-devel libuct-devel && \
zypper --non-interactive clean --all && \
python3 -m pip install 'cmake==3.21' perfetto
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12 13"
RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O miniforge.sh && \
bash miniforge.sh -b -p /opt/conda && \
export PATH="/opt/conda/bin:${PATH}" && \
conda config --set always_yes yes --set changeps1 no --set solver classic && \
conda update -c conda-forge -n base conda && \
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c conda-forge python=3.${i} pip numpy; done && \
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \
conda clean -a -y && \
cd /tmp && \
shopt -s dotglob extglob && \
rm -rf *
# The Rock Tarball
ARG GPU_TYPE=""
ARG GPU_TARBALL=""
RUN if [ -n "$GPU_TYPE" ] && [ -n "$GPU_TARBALL" ]; then \
VERSION=$(echo "$GPU_TARBALL" | sed -nE 's/.*([0-9]+\.[0-9]+\.[0-9]+).*/\1/p'); \
if [ -z "$VERSION" ]; then \
echo "Error: Could not extract version from GPU_TARBALL ('$GPU_TARBALL')." >&2; \
exit 1; \
fi; \
python3 -m pip install -U awscli; \
aws s3 cp "s3://therock-nightly-tarball/${GPU_TARBALL}" rocm-${VERSION}-${GPU_TYPE}.tar.gz --no-sign-request; \
mv rocm-${VERSION}-${GPU_TYPE}.tar.gz /opt/rocm-${VERSION}-${GPU_TYPE}.tar.gz; \
fi
WORKDIR /home
SHELL [ "/bin/bash", "--login", "-c" ]