Files
Sajina PK 15c82d6da8 [rocprofiler-system]: Enable UCX Communication API tracing (#2306)
## Motivation

Enable UCX communication tracing and communication metadata 

## Technical Details

Implement UCX API wrappers to trace transport-layer communication. This adds communication data tracking and exposes “UCX Comm Send/Recv” timelines, enabling detailed analysis of MPI, OpenSHMEM, and other UCX-based runtime communication patterns.

- Implements function interception for UCX functions across multiple categories using gotcha component.
- Extended comm_data component to track UCX send/recv operations - Added ucx_send and ucx_recv labels for Perfetto counter tracks. Integrated UCX data tracking with existing MPI/RCCL tracking infrastructure.
- Added ROCPROFSYS_USE_UCX configuration option (enabled by default).
- Created FindUCX.cmake module for UCX header detection. Falls back to internal UCX headers if system headers not found.
- Updated all Dockerfiles  to include UCX dependencies.
2026-01-20 13:16:43 -05:00

62 строки
2.5 KiB
Docker

ARG DISTRO=rockylinux/rockylinux
ARG VERSION=8
FROM ${DISTRO}:${VERSION}
ENV HOME /root
ENV SHELL /bin/bash
ENV BASH_ENV /etc/bash.bashrc
ENV DEBIAN_FRONTEND noninteractive
WORKDIR /tmp
SHELL [ "/bin/bash", "-c" ]
ENV PATH /usr/lib64/openmpi/bin:/usr/local/bin:${PATH}
ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64
RUN yum groupinstall -y "Development Tools" && \
yum install -y epel-release && crb enable && \
yum install -y --allowerasing chrpath cmake curl dpkg-devel gmock-devel gtest-devel \
iproute json-devel libdrm-devel ninja-build numactl-devel openmpi-devel \
papi-devel python3-pip sqlite-devel texinfo wget which zlib-devel ucx-devel && \
yum clean all && \
python3 -m pip install 'cmake==3.21' && \
python3 -m pip install 'perfetto'
ARG ROCM_VERSION=0.0
ARG ROCM_MAJOR=0
ARG ROCM_MINOR=0
ARG ROCM_PATCH=0
ARG ROCM_VERSION_URL=0.0
ARG ROCM_VERSN=0
RUN if [ "${ROCM_MAJOR}" != "0" ] || [ "${ROCM_MINOR}" != "0" ]; then \
OS_VERSION=$(grep '^VERSION_ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"') && \
OS_VERSION_MAJOR=$(echo "$OS_VERSION" | cut -d'.' -f1) && \
RPM_TAG=".el${OS_VERSION_MAJOR}" && \
if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \
dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \
yum install -y https://repo.radeon.com/amdgpu-install/${ROCM_VERSION_URL}/rhel/${OS_VERSION}/amdgpu-install-${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1${RPM_TAG}.noarch.rpm && \
yum install -y rocm-dev && \
yum clean all; \
fi
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12 13"
RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O miniforge.sh && \
bash miniforge.sh -b -p /opt/conda && \
export PATH="/opt/conda/bin:${PATH}" && \
conda config --set always_yes yes --set changeps1 no --set solver classic && \
conda update -c conda-forge -n base conda && \
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c conda-forge python=3.${i} pip; done && \
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \
conda clean -a -y && \
conda init
RUN if [ "${ROCM_VERSION}" != "0.0" ]; then ln -sf /opt/rocm-${ROCM_VERSION}* /opt/rocm; fi
WORKDIR /home
ENV LC_ALL C.UTF-8
SHELL [ "/bin/bash", "--login", "-c" ]
COPY ./entrypoint-rhel.sh /docker-entrypoint.sh
ENTRYPOINT [ "/docker-entrypoint.sh" ]