From 8b52d71cc7ea6dc450b31f88c0f0cc93dddeea3a Mon Sep 17 00:00:00 2001 From: Jason Bonnell <166553723+jbonnell-amd@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:58:55 -0400 Subject: [PATCH] rocprofiler-systems - add gfx containers to ghcr (#883) * Initial skeleton code for rocprofiler-systems-continuous-integration.yml * Add python3-devel to opensuse and rhel ci images * Update rocprofiler-systems-containers.yml to include TheRock tarballs * Update pip install command for Dockerfile.ubuntu.ci * Fix pip install again for Dockerfile.ubuntu.ci * Remove skeleton workflow for CI * Add new ci-gfx containers for TheRock installs * Add set -e and pipefail to ci Dockerfiles to detect errors * Upgrade pip in Dockerfile.ubuntu.ci * revert pipefail set -e change * Replace build-docker-ci.sh script with Docker step for ci-base * Add support for gfx950, add containers-ci-gfx.yml * Add working-directory to matrix setup steps * Try changing containers-ci-gfx.yml * make more changes to containers-ci-gfx.yml * Remove build-docker-ci.sh script from gfx step, fix typo in Dockerfile * Remove gfx110X and gfx120X for now * Update ci-gfx docker workflow to use ghcr.io * Temporary change to test one image * Enable push to test out ghcr package * Add labels to debug oauth issue * add pacakages permissions to step * add rocprofiler-systems-ghcr.yml workflow * Remove cache from Docker push action step * Add prefix to tag * Add back gfx94X and gfx950 support, add back no push on PR * Remove gfx container creation from rocprofiler-systems-containers.yml * Add a gfx950 image for now * Revert change --- .../rocprofiler-systems-containers.yml | 77 ++++++----- .../workflows/rocprofiler-systems-ghcr.yml | 122 ++++++++++++++++++ .../docker/Dockerfile.opensuse.ci | 13 +- .../docker/Dockerfile.rhel.ci | 11 +- .../docker/Dockerfile.ubuntu.ci | 11 +- .../docker/build-docker-ci.sh | 27 +++- 6 files changed, 223 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/rocprofiler-systems-ghcr.yml diff --git a/.github/workflows/rocprofiler-systems-containers.yml b/.github/workflows/rocprofiler-systems-containers.yml index 1485ae2017..e9f9511995 100644 --- a/.github/workflows/rocprofiler-systems-containers.yml +++ b/.github/workflows/rocprofiler-systems-containers.yml @@ -34,12 +34,12 @@ jobs: steps: - uses: actions/checkout@v4 with: - sparse-checkout: projects/rocprofiler-systems + sparse-checkout: projects/rocprofiler-systems/docker - name: Output data for containers matrix + working-directory: projects/rocprofiler-systems/docker id: generate_matrix_ci run: | - pushd projects/rocprofiler-systems/docker MATRIX_CONTENT=$(cat containers-ci.yml | yq '.matrix' -I=0 -o=json) echo "matrix_data=$MATRIX_CONTENT" >> $GITHUB_OUTPUT @@ -55,7 +55,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - sparse-checkout: projects/rocprofiler-systems + sparse-checkout: projects/rocprofiler-systems/docker submodules: recursive - name: Set up QEMU @@ -70,31 +70,39 @@ jobs: username: ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} password: ${{ secrets.ROCPROF_SYS_DOCKER_TOKEN }} - - name: Build CI Container (PR - No Push) - if: github.event_name == 'pull_request' - timeout-minutes: 45 - uses: nick-fields/retry@v3 + - name: Set up Docker variables + id: setup_vars + run: | + if [ ${{ matrix.distro }} = "opensuse" ]; then + DISTRO_IMAGE="opensuse/leap" + elif [ ${{ matrix.distro }} = "rhel" ]; then + DISTRO_IMAGE="rockylinux/rockylinux" + else + DISTRO_IMAGE=${{ matrix.distro }} + fi + echo "distro_image=${DISTRO_IMAGE}" >> $GITHUB_OUTPUT + + if [ ${{ matrix.distro }} = "debian" ]; then + DOCKER_FILE=Dockerfile.ubuntu.ci + else + DOCKER_FILE=Dockerfile.${{ matrix.distro }}.ci + fi + echo "docker_file=${DOCKER_FILE}" >> $GITHUB_OUTPUT + + - name: Build CI Base Container (Does not Push on PR) + uses: docker/build-push-action@v6 with: - retry_wait_seconds: 60 - timeout_minutes: 45 - max_attempts: 3 - command: | - pushd projects/rocprofiler-systems/docker - ./build-docker-ci.sh --distro ${{ matrix.distro }} --versions ${{ matrix.version }} --user ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} --jobs 2 --elfutils-version 0.188 --boost-version 1.79.0 - popd - - - name: Build CI Container (Push) - if: github.event_name != 'pull_request' - timeout-minutes: 45 - uses: nick-fields/retry@v3 - with: - retry_wait_seconds: 60 - timeout_minutes: 45 - max_attempts: 3 - command: | - pushd projects/rocprofiler-systems/docker - ./build-docker-ci.sh --distro ${{ matrix.distro }} --versions ${{ matrix.version }} --user ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} --push --jobs 2 --elfutils-version 0.188 --boost-version 1.79.0 - popd + file: projects/rocprofiler-systems/docker/${{ steps.setup_vars.outputs.docker_file }} + platforms: linux/amd64 + push: ${{ github.event_name != 'pull_request' }} + build-args: | + DISTRO=${{ steps.setup_vars.outputs.distro_image }} + VERSION=${{ matrix.version }} + NJOBS=2 + ELFUTILS_DOWNLOAD_VERSION=0.188 + BOOST_DOWNLOAD_VERSION=1.79.0 + tags: | + ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }}/rocprofiler-systems:ci-base-${{ matrix.distro }}-${{ matrix.version }} prepare_matrix_release: if: github.repository == 'ROCm/rocm-systems' @@ -108,9 +116,9 @@ jobs: sparse-checkout: projects/rocprofiler-systems - name: Output data for containers matrix + working-directory: projects/rocprofiler-systems/docker id: generate_matrix_release run: | - pushd projects/rocprofiler-systems/docker MATRIX_CONTENT=$(cat containers.yml | yq '.matrix' -I=0 -o=json) echo "matrix_data=$MATRIX_CONTENT" >> $GITHUB_OUTPUT @@ -150,7 +158,11 @@ jobs: max_attempts: 3 command: | pushd projects/rocprofiler-systems/docker - ./build-docker.sh --distro ${{ matrix.os-distro }} --versions ${{ matrix.os-version }} --rocm-versions ${{ matrix.rocm-version }} --user ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} + ./build-docker.sh \ + --distro ${{ matrix.os-distro }} \ + --versions ${{ matrix.os-version }} \ + --rocm-versions ${{ matrix.rocm-version }} \ + --user ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} popd - name: Build Base Container (Push) @@ -163,5 +175,10 @@ jobs: max_attempts: 3 command: | pushd projects/rocprofiler-systems/docker - ./build-docker.sh --distro ${{ matrix.os-distro }} --versions ${{ matrix.os-version }} --rocm-versions ${{ matrix.rocm-version }} --user ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} --push + ./build-docker.sh \ + --distro ${{ matrix.os-distro }} \ + --versions ${{ matrix.os-version }} \ + --rocm-versions ${{ matrix.rocm-version }} \ + --user ${{ secrets.ROCPROF_SYS_DOCKER_LOGIN }} \ + --push popd diff --git a/.github/workflows/rocprofiler-systems-ghcr.yml b/.github/workflows/rocprofiler-systems-ghcr.yml new file mode 100644 index 0000000000..e7b739e59d --- /dev/null +++ b/.github/workflows/rocprofiler-systems-ghcr.yml @@ -0,0 +1,122 @@ +name: Publish GHCR Packages for rocprofiler-systems CI Images + +on: + workflow_dispatch: + schedule: + - cron: 0 5 * * * + push: + branches: + - develop + paths: + - '.github/workflows/rocprofiler-systems-ghcr.yml' + - 'projects/rocprofiler-systems/docker/**' + pull_request: + paths: + - '.github/workflows/rocprofiler-systems-ghcr.yml' + - 'projects/rocprofiler-systems/docker/**' + +jobs: + prepare_matrix_ci: + if: github.repository == 'ROCm/rocm-systems' + runs-on: ubuntu-latest + outputs: + matrix_data: ${{ steps.generate_matrix_ci_base.outputs.matrix_data }} + + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: projects/rocprofiler-systems/docker + + - name: Output data for containers matrix + working-directory: projects/rocprofiler-systems/docker + id: generate_matrix_ci_base + run: | + MATRIX_CONTENT=$(cat containers-ci.yml | yq '.matrix' -I=0 -o=json) + echo "matrix_data=$MATRIX_CONTENT" >> $GITHUB_OUTPUT + + rocprofiler-systems-ci-gfx: + needs: prepare_matrix_ci + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + strategy: + fail-fast: false + matrix: + system: ${{ fromJSON(needs.prepare_matrix_ci.outputs.matrix_data) }} + gpu: [ 'gfx94X', 'gfx950' ] + + steps: + - name: Checkout Repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + sparse-checkout: projects/rocprofiler-systems + submodules: recursive + + - name: Login to GitHub Container Registry + uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker variables + id: setup_vars_gfx + run: | + if [ ${{ matrix.system.distro }} = "opensuse" ]; then + DISTRO_IMAGE="opensuse/leap" + elif [ ${{ matrix.system.distro }} = "rhel" ]; then + DISTRO_IMAGE="rockylinux/rockylinux" + else + DISTRO_IMAGE=${{ matrix.system.distro }} + fi + echo "distro_image=${DISTRO_IMAGE}" >> $GITHUB_OUTPUT + + if [ ${{ matrix.system.distro }} = "debian" ]; then + DOCKER_FILE=Dockerfile.ubuntu.ci + else + DOCKER_FILE=Dockerfile.${{ matrix.system.distro }}.ci + fi + echo "docker_file=${DOCKER_FILE}" >> $GITHUB_OUTPUT + + - name: Get the latest build of The Rock tarball + id: therock + run: | + sudo apt-get install -y python3-pip + python3 -m pip install -U pip + python3 -m pip install -U awscli + export PATH=~/.local/bin:$PATH + KEY=$(aws s3api list-objects-v2 \ + --bucket therock-nightly-tarball \ + --no-sign-request \ + --output json \ + --query "sort_by(Contents[?contains(Key, 'linux-${{ matrix.gpu }}')], &LastModified)[-1].Key") + KEY=${KEY//\"/} + test -n "$KEY" || { echo "No ${{ matrix.gpu }} tarball found"; exit 1; } + echo "tarball=${KEY}" >> $GITHUB_OUTPUT + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.7.9 + with: + images: ghcr.io/ROCm/rocprofiler-${{ matrix.system.distro }}-${{ matrix.system.version }} + + - name: Build CI GFX Container (Does not Push on PR) + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + file: projects/rocprofiler-systems/docker/${{ steps.setup_vars_gfx.outputs.docker_file }} + platforms: linux/amd64 + push: ${{ github.event_name != 'pull_request' }} + build-args: | + DISTRO=${{ steps.setup_vars_gfx.outputs.distro_image }} + VERSION=${{ matrix.system.version }} + TYPE=${{ matrix.gpu }} + GPU_TYPE=${{ matrix.gpu }} + GPU_TARBALL=${{ steps.therock.outputs.tarball }} + NJOBS=2 + ELFUTILS_DOWNLOAD_VERSION=0.188 + BOOST_DOWNLOAD_VERSION=1.79.0 + tags: | + ghcr.io/rocm/rocprofiler-${{ matrix.system.distro }}-${{ matrix.system.version }}:systems-ci-${{ matrix.gpu }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci b/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci index 09ac6fd440..d976e2ce51 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci @@ -28,8 +28,8 @@ RUN zypper --non-interactive update -y && \ zypper --non-interactive dist-upgrade -y && \ zypper --non-interactive install -y -t pattern devel_basis && \ zypper --non-interactive install -y binutils-gold chrpath cmake curl dpkg-devel \ - gcc-c++ git iproute2 libnuma-devel ninja openmpi3-devel papi-devel python3-pip \ - rpm-build sqlite3-devel vim wget && \ + gcc-c++ git iproute2 libnuma-devel ninja openmpi3-devel papi-devel python3-devel \ + python3-pip rpm-build sqlite3-devel vim wget && \ zypper --non-interactive clean --all && \ python3 -m pip install 'cmake==3.21' perfetto @@ -47,5 +47,14 @@ RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Minif shopt -s dotglob extglob && \ rm -rf * +# The Rock Tarball +ARG GPU_TYPE="" +ARG GPU_TARBALL="" +RUN if [ -n "$GPU_TYPE" ] && [ -n "$GPU_TARBALL" ]; then \ + python3 -m pip install -U awscli; \ + aws s3 cp "s3://therock-nightly-tarball/${GPU_TARBALL}" rocm-${GPU_TYPE}.tar.gz --no-sign-request; \ + mv rocm-${GPU_TYPE}.tar.gz /opt/rocm-${GPU_TYPE}.tar.gz; \ + fi + WORKDIR /home SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci b/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci index e9db311aca..859e20f4b1 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci @@ -20,7 +20,7 @@ ARG NJOBS="8" RUN yum groupinstall -y "Development Tools" && \ yum install -y epel-release && crb enable && \ yum install -y --allowerasing chrpath cmake curl dpkg-devel iproute ninja-build numactl-devel \ - openmpi-devel papi-devel python3-pip sqlite-devel texinfo wget which vim zlib-devel && \ + openmpi-devel papi-devel python3-devel python3-pip sqlite-devel texinfo wget which vim zlib-devel && \ yum clean all && \ python3 -m pip install 'cmake==3.21' perfetto @@ -38,5 +38,14 @@ RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Minif shopt -s dotglob extglob && \ rm -rf * +# The Rock Tarball +ARG GPU_TYPE="" +ARG GPU_TARBALL="" +RUN if [ -n "$GPU_TYPE" ] && [ -n "$GPU_TARBALL" ]; then \ + python3 -m pip install -U awscli; \ + aws s3 cp "s3://therock-nightly-tarball/${GPU_TARBALL}" rocm-${GPU_TYPE}.tar.gz --no-sign-request; \ + mv rocm-${GPU_TYPE}.tar.gz /opt/rocm-${GPU_TYPE}.tar.gz; \ + fi + WORKDIR /home SHELL [ "/bin/bash", "--login", "-c" ] diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci index 8ea3ca3512..8f95bd3544 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci @@ -1,4 +1,3 @@ - ARG DISTRO ARG VERSION FROM ${DISTRO}:${VERSION} @@ -17,7 +16,6 @@ ARG ELFUTILS_DOWNLOAD_VERSION="0.188" ARG BOOST_DOWNLOAD_VERSION="1.79.0" ARG NJOBS="8" - ENV PATH="/usr/local/bin:${PATH}" ENV LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LIBRARY_PATH}" ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}" @@ -53,6 +51,15 @@ RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Minif shopt -s dotglob extglob && \ rm -rf * +# The Rock Tarball +ARG GPU_TYPE="" +ARG GPU_TARBALL="" +RUN if [ -n "$GPU_TYPE" ] && [ -n "$GPU_TARBALL" ]; then \ + pip install --upgrade pip; \ + pip install awscli --break-system-packages; \ + aws s3 cp "s3://therock-nightly-tarball/${GPU_TARBALL}" rocm-${GPU_TYPE}.tar.gz --no-sign-request; \ + mv rocm-${GPU_TYPE}.tar.gz /opt/rocm-${GPU_TYPE}.tar.gz; \ + fi ENV LC_ALL=C.UTF-8 WORKDIR /home diff --git a/projects/rocprofiler-systems/docker/build-docker-ci.sh b/projects/rocprofiler-systems/docker/build-docker-ci.sh index d24c3ae5ee..e71745e41b 100755 --- a/projects/rocprofiler-systems/docker/build-docker-ci.sh +++ b/projects/rocprofiler-systems/docker/build-docker-ci.sh @@ -3,6 +3,7 @@ set -e : ${USER:=$(whoami)} +: ${TYPE:="base"} : ${DISTRO:=ubuntu} : ${VERSIONS:=24.04} : ${NJOBS=$(nproc)} @@ -11,6 +12,8 @@ set -e : ${PYTHON_VERSIONS:="6 7 8 9 10 11 12 13"} : ${PUSH:=0} : ${PULL:=--pull} +: ${GPU_TYPE:=""} +: ${GPU_TARBALL:=""} verbose-run() { @@ -45,6 +48,7 @@ usage() print_default_option elfutils-version "[0.183..0.188]" "ElfUtils version" "${ELFUTILS_VERSION}" print_default_option boost-version "[1.67.0..1.79.0]" "Boost version" "${BOOST_VERSION}" print_default_option user "[USERNAME]" "DockerHub username" "${USER}" + print_default_option type "[base|gfxXXX]" "Type of image to create" "${TYPE}" } send-error() @@ -69,6 +73,11 @@ do usage exit 0 ;; + "--type") + shift + TYPE=${1} + reset-last + ;; "--distro") shift DISTRO=${1} @@ -99,6 +108,16 @@ do BOOST_VERSION=${1} reset-last ;; + "--gpu-type") + shift + GPU_TYPE=${1} + reset-last + ;; + "--gpu-tarball") + shift + GPU_TARBALL=${1} + reset-last + ;; --user|-u) shift USER=${1} @@ -156,19 +175,21 @@ do verbose-run docker build . \ ${PULL} \ -f ${DOCKER_FILE} \ - --tag ${USER}/rocprofiler-systems:ci-base-${DISTRO}-${VERSION} \ + --tag ${USER}/rocprofiler-systems:ci-${TYPE}-${DISTRO}-${VERSION} \ --build-arg DISTRO=${DISTRO_IMAGE} \ --build-arg VERSION=${VERSION} \ --build-arg NJOBS=${NJOBS} \ --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" \ --build-arg ELFUTILS_DOWNLOAD_VERSION=${ELFUTILS_VERSION} \ - --build-arg BOOST_DOWNLOAD_VERSION=${BOOST_VERSION} + --build-arg BOOST_DOWNLOAD_VERSION=${BOOST_VERSION} \ + --build-arg GPU_TYPE=${GPU_TYPE} \ + --build-arg GPU_TARBALL=${GPU_TARBALL} done if [ "${PUSH}" -gt 0 ]; then for VERSION in ${VERSIONS} do - verbose-run docker push ${USER}/rocprofiler-systems:ci-base-${DISTRO}-${VERSION} + verbose-run docker push ${USER}/rocprofiler-systems:ci-${TYPE}-${DISTRO}-${VERSION} done fi