From 9d902863711b422931972e168c3cec3e2c67478e Mon Sep 17 00:00:00 2001 From: Jason Bonnell <166553723+jbonnell-amd@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:47:16 -0400 Subject: [PATCH] rocprofiler-sdk CI workflow improvements (#956) Update rocprofiler-sdk and aqlprofile CI workflows to improve readability --- .../aqlprofile-continuous_integration.yml | 58 +++--- ...rocprofiler-sdk-build-ci-docker-images.yml | 34 +--- .../rocprofiler-sdk-code_coverage.yml | 72 ++++---- ...rocprofiler-sdk-continuous_integration.yml | 160 +++++++++-------- projects/rocprofiler-sdk/docker/Dockerfile.ci | 169 ++++++++---------- 5 files changed, 227 insertions(+), 266 deletions(-) diff --git a/.github/workflows/aqlprofile-continuous_integration.yml b/.github/workflows/aqlprofile-continuous_integration.yml index f46915d1f1..324c5c8e9b 100644 --- a/.github/workflows/aqlprofile-continuous_integration.yml +++ b/.github/workflows/aqlprofile-continuous_integration.yml @@ -27,6 +27,9 @@ concurrency: env: ROCM_PATH: "/opt/rocm" + ROCM_VERSION: "7.0.0" + PYTHON_VENV_PATH: "aqlprofile" + PYTHON_VENV_ACTIVATE: "aqlprofile/bin/activate" navi3_EXCLUDE_TESTS_REGEX: "" vega20_EXCLUDE_TESTS_REGEX: "" mi200_EXCLUDE_TESTS_REGEX: "" @@ -49,15 +52,14 @@ jobs: strategy: fail-fast: false matrix: - system: [ - {gpu: 'navi4', runner: 'rocprofiler-navi4-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx120X"}, - {gpu: 'navi3', runner: 'rocprofiler-navi3-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx110X"}, - {gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx94X"} - ] + system: + - { gpu: 'navi4', runner: 'rocprofiler-navi4-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', gpu-target: 'gfx120X' } + - { gpu: 'navi3', runner: 'rocprofiler-navi3-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', gpu-target: 'gfx110X' } + - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', gpu-target: 'gfx94X' } runs-on: ${{ matrix.system.runner }} container: - image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.therock-s3 }}-latest + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.gpu-target }}-latest credentials: username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} @@ -79,8 +81,8 @@ jobs: shell: bash working-directory: /tmp run: | - tar -xf /opt/rocm-${{ matrix.system.therock-s3 }}.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 - ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + tar -xf ${{ env.ROCM_PATH }}-${{ matrix.system.gpu-target }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} + ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }} echo "ROCm installed to: ${{ env.ROCM_PATH }}" ln -s -f /usr/bin/git /usr/local/bin/git @@ -106,7 +108,7 @@ jobs: echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version + cat ${{ env.ROCM_PATH }}/.info/version ls -la pwd @@ -114,8 +116,8 @@ jobs: timeout-minutes: 30 shell: bash run: - PATH=~/.local/bin:/opt/rocm/bin:${PATH} - LD_LIBRARY_PATH=$(pwd)/build:/opt/rocm/lib:$LD_LIBRARY_PATH + PATH=~/.local/bin:${{ env.ROCM_PATH }}/bin:${PATH} + LD_LIBRARY_PATH=$(pwd)/build:${{ env.ROCM_PATH }}/lib:$LD_LIBRARY_PATH ctest --output-on-failure -DCTEST_SOURCE_DIRECTORY="$(pwd)" -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${{ matrix.system.runner }}" -DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core @@ -130,15 +132,14 @@ jobs: strategy: fail-fast: false matrix: - system: [ - {gpu: 'mi325', os: 'rhel-8.8', build-type: 'RelWithDebInfo', container: ''}, - {gpu: 'mi325', os: 'rhel-9.5', build-type: 'RelWithDebInfo', container: ''}, - {gpu: 'mi325', os: 'sles-15.6', build-type: 'RelWithDebInfo', container: ''} - ] + system: + - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'rhel-8.8', build-type: 'RelWithDebInfo', gpu-target: 'gfx94X' } + - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'rhel-9.5', build-type: 'RelWithDebInfo', gpu-target: 'gfx94X' } + - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'sles-15.6', build-type: 'RelWithDebInfo', gpu-target: 'gfx94X' } - runs-on: linux-mi325-1gpu-ossci-rocm + runs-on: ${{ matrix.system.runner }} container: - image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-gfx94X-latest + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.gpu-target }}-latest credentials: username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} @@ -163,8 +164,8 @@ jobs: shell: bash working-directory: /tmp run: | - tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 - ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + tar -xf ${{ env.ROCM_PATH }}-${{ matrix.system.gpu-target }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} + ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }} echo "ROCm installed to: ${{ env.ROCM_PATH }}" @@ -173,8 +174,8 @@ jobs: shell: bash run: | git config --global --add safe.directory '*' - python3 -m venv rocprofiler-sdk - source rocprofiler-sdk/bin/activate + python3 -m venv ${{ env.PYTHON_VENV_PATH }} + source ${{ env.PYTHON_VENV_ACTIVATE }} export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH python3 -m pip install --upgrade pip python3 -m pip install cmake @@ -182,12 +183,12 @@ jobs: - name: List Files shell: bash run: | - source rocprofiler-sdk/bin/activate + source ${{ env.PYTHON_VENV_ACTIVATE }} echo "PATH: ${PATH}" echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version + cat ${{ env.ROCM_PATH }}/.info/version ls -la pwd @@ -195,11 +196,12 @@ jobs: timeout-minutes: 30 shell: bash run: - source rocprofiler-sdk/bin/activate; - PATH=~/.local/bin:/opt/rocm/bin:${PATH} - LD_LIBRARY_PATH=$(pwd)/build:/opt/rocm/lib:$LD_LIBRARY_PATH + source ${{ env.PYTHON_VENV_ACTIVATE }} + PATH=~/.local/bin:${{ env.ROCM_PATH }}/bin:${PATH} + LD_LIBRARY_PATH=$(pwd)/build:${{ env.ROCM_PATH }}/lib:$LD_LIBRARY_PATH ctest --output-on-failure -DCTEST_SOURCE_DIRECTORY="$(pwd)" - -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="linux-mi325-1gpu-ossci-rocm" + -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" + -DCTEST_SITE=${{ matrix.system.runner }} -DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core -DCMAKE_CTEST_ARGUMENTS="" -DAQLPROFILE_BUILD_TESTS=ON diff --git a/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml b/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml index b6016ac22f..9f0771e1b8 100644 --- a/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml +++ b/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml @@ -8,9 +8,11 @@ on: branches: [ develop ] paths: - '.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml' + - 'projects/rocprofiler-sdk/docker/Dockerfile.ci' pull_request: paths: - '.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml' + - 'projects/rocprofiler-sdk/docker/Dockerfile.ci' concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -32,14 +34,14 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-22.04', 'rhel-8.8', 'rhel-9.5', 'sles-15.6' ] - gpu: [ 'gfx94X', 'gfx110X', 'gfx120X' ] + gpu: [ 'gfx94X', 'gfx950', 'gfx110X', 'gfx120X' ] steps: - name: Checkout (shallow) uses: actions/checkout@v4 with: sparse-checkout: | projects/rocprofiler-sdk/requirements.txt - projects/rocprofiler-sdk/Docker/Dockerfile.ci + projects/rocprofiler-sdk/docker/Dockerfile.ci - name: Get the latest therock build id: therock @@ -52,26 +54,10 @@ jobs: --bucket therock-nightly-tarball \ --no-sign-request \ --output json \ - --query "sort_by(Contents[?contains(Key, 'linux-gfx120X')], &LastModified)[-1].Key") + --query "sort_by(Contents[?contains(Key, 'linux-${{ matrix.gpu }}')], &LastModified)[-1].Key") KEY=${KEY//\"/} - test -n "$KEY" || { echo "No gfx120X tarball found"; exit 1; } - echo "gfx120X=${KEY}" >> $GITHUB_OUTPUT - KEY=$(aws s3api list-objects-v2 \ - --bucket therock-nightly-tarball \ - --no-sign-request \ - --output json \ - --query "sort_by(Contents[?contains(Key, 'linux-gfx94X')], &LastModified)[-1].Key") - KEY=${KEY//\"/} - test -n "$KEY" || { echo "No gfx94X tarball found"; exit 1; } - echo "gfx94X=${KEY}" >> $GITHUB_OUTPUT - KEY=$(aws s3api list-objects-v2 \ - --bucket therock-nightly-tarball \ - --no-sign-request \ - --output json \ - --query "sort_by(Contents[?contains(Key, 'linux-gfx110X')], &LastModified)[-1].Key") - KEY=${KEY//\"/} - test -n "$KEY" || { echo "No gfx110X tarball found"; exit 1; } - echo "gfx110X=${KEY}" >> $GITHUB_OUTPUT + test -n "$KEY" || { echo "No ${{ matrix.gpu }} tarball found"; exit 1; } + echo "tarball=${KEY}" >> $GITHUB_OUTPUT - name: Login to Docker Hub uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 @@ -88,13 +74,11 @@ jobs: with: file: projects/rocprofiler-sdk/docker/Dockerfile.ci platforms: linux/amd64 - push: true + push: ${{ github.event_name != 'pull_request' }} build-args: | BASE_TAG=${{ matrix.os }} GPU_TYPE=${{ matrix.gpu }} - GFX94X_FILE_NAME=${{ steps.therock.outputs.gfx94X }} - GFX110X_FILE_NAME=${{ steps.therock.outputs.gfx110X }} - GFX120X_FILE_NAME=${{ steps.therock.outputs.gfx120X }} + GPU_TARBALL=${{ steps.therock.outputs.tarball }} tags: | docker.io/rocm/rocprofiler-private:${{ matrix.os }}-${{ matrix.gpu }}-latest cache-from: type=gha,scope=rocprofiler-ci-${{ matrix.os }}-${{ matrix.gpu }} diff --git a/.github/workflows/rocprofiler-sdk-code_coverage.yml b/.github/workflows/rocprofiler-sdk-code_coverage.yml index 5295af14c6..173832aa46 100644 --- a/.github/workflows/rocprofiler-sdk-code_coverage.yml +++ b/.github/workflows/rocprofiler-sdk-code_coverage.yml @@ -32,6 +32,7 @@ concurrency: env: # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake ROCM_PATH: "/opt/rocm" + ROCM_VERSION: "7.0.0" GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1201" PATH: "/usr/bin:$PATH" ## No tests should be excluded here except for extreme emergencies; tests should only be disabled in CMake @@ -58,17 +59,16 @@ env: jobs: code-coverage: - name: Code Coverage • ${{ matrix.runner }} • ${{ matrix.os }} + name: Code Coverage • ${{ matrix.system.gpu }} • ${{ matrix.system.os }} strategy: # fail-fast: false matrix: - runner: ['mi300a'] - os: ['ubuntu-22.04'] - build-type: ['Release'] + system: + - { gpu: 'mi300a', runner: 'rocprofiler-mi300a-dind', os: 'ubuntu-22.04', build-type: 'Release', gpu-target: 'gfx94X' } - runs-on: rocprofiler-mi300a-dind + runs-on: ${{ matrix.system.runner }} container: - image: docker.io/rocm/rocprofiler-private:${{ matrix.os }}-gfx94X-latest + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.gpu-target }}-latest credentials: username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} @@ -79,7 +79,7 @@ jobs: # define this for containers env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 - GPU_RUNNER: 'rocprofiler-mi300a-dind' + GPU_RUNNER: ${{ matrix.system.runner }} GCC_COMPILER_VERSION: 11 ROCPROFILER_PC_SAMPLING_BETA_ENABLED: 1 @@ -88,8 +88,8 @@ jobs: shell: bash working-directory: /tmp run: | - tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 - ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + tar -xf ${{ env.ROCM_PATH }}-${{ matrix.system.gpu-target }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} + ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }} echo "ROCm installed to: ${{ env.ROCM_PATH }}" - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime @@ -199,11 +199,11 @@ jobs: echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version + cat ${{ env.ROCM_PATH }}/.info/version ls -la - name: Enable PC Sampling - if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} + if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }} shell: bash working-directory: projects/rocprofiler-sdk run: | @@ -212,7 +212,7 @@ jobs: - name: Setup ccache uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 with: - key: ccache-${{ matrix.os }}-rocprofiler-mi300a-dind-${{ matrix.runner }} + key: ccache-${{ matrix.system.os }}-${{ matrix.system.runner }}-${{ matrix.system.gpu }} max-size: 2G save: true @@ -223,7 +223,7 @@ jobs: export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:/usr/local/bin:~/.local/bin:$PATH echo -e "Building & Installing ROCDecode..." cmake -B build-rocdecode \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ @@ -234,7 +234,7 @@ jobs: echo -e "ROCDecode Installed Successfully!" echo -e "Building & Installing ROCJPEG..." cmake -B build-rocjpeg \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ @@ -253,8 +253,8 @@ jobs: echo "Install ROCProfiler-Register" cmake -B build-rocprofiler-register \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ . @@ -272,8 +272,8 @@ jobs: echo "Install ROCR-Runtime..." cmake -B build \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }};${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }}/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ . cmake --build build --target all --parallel 16 cmake --build build --target install @@ -288,8 +288,8 @@ jobs: echo "Install Aqlprofile..." cmake -B build-aqlprofile \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ . @@ -305,20 +305,20 @@ jobs: LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-codecov + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-codecov --build-jobs 16 - --site 'rocprofiler-mi300a-dind' + --site ${{ matrix.system.runner }} --gpu-targets ${{ env.GPU_TARGETS }} --coverage all --run-attempt ${{ github.run_attempt }} -- - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" - name: Configure, Build, and Test (Tests Code Coverage) timeout-minutes: 30 @@ -329,20 +329,20 @@ jobs: LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-codecov-tests + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-codecov-tests --build-jobs 16 - --site 'rocprofiler-mi300a-dind' + --site ${{ matrix.system.runner }} --gpu-targets ${{ env.GPU_TARGETS }} --coverage tests --run-attempt ${{ github.run_attempt }} -- - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" - name: Configure, Build, and Test (Samples Code Coverage) timeout-minutes: 30 @@ -353,20 +353,20 @@ jobs: LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-codecov-samples + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-codecov-samples --build-jobs 16 - --site 'rocprofiler-mi300a-dind' + --site ${{ matrix.system.runner }} --gpu-targets ${{ env.GPU_TARGETS }} --coverage samples --run-attempt ${{ github.run_attempt }} -- - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" - name: Save XML Code Coverage id: save-coverage @@ -492,7 +492,7 @@ jobs: # - unittests # - integration-tests # - ctest -N -LE 'samples|tests' -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" -O ctest.mislabeled.log + ctest -N -LE 'samples|tests' -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" -O ctest.mislabeled.log grep 'Total Tests: 0' ctest.mislabeled.log # # if following fails, then there is overlap between the labels. diff --git a/.github/workflows/rocprofiler-sdk-continuous_integration.yml b/.github/workflows/rocprofiler-sdk-continuous_integration.yml index 9fd4f9c9a0..4df949294a 100644 --- a/.github/workflows/rocprofiler-sdk-continuous_integration.yml +++ b/.github/workflows/rocprofiler-sdk-continuous_integration.yml @@ -34,6 +34,9 @@ permissions: env: # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake ROCM_PATH: "/opt/rocm" + ROCM_VERSION: "7.0.0" + PYTHON_VENV_PATH: "rocprofiler-sdk" + PYTHON_VENV_ACTIVATE: "rocprofiler-sdk/bin/activate" GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1201" PATH: "/usr/bin:$PATH" @@ -74,12 +77,12 @@ jobs: fail-fast: false matrix: system: - - { gpu: 'navi4', runner: 'rocprofiler-navi4-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx120X", ci-flags: '--linter clang-tidy', gpu-target: "gfx1201" } - - { gpu: 'navi3', runner: 'rocprofiler-navi3-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx110X", ci-flags: '--linter clang-tidy', gpu-target: "gfx1101" } - - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx94X", ci-flags: '--linter clang-tidy', gpu-target: "gfx942" } + - { gpu: 'navi4', runner: 'rocprofiler-navi4-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', ci-flags: '--linter clang-tidy', gpu-target: 'gfx120X' } + - { gpu: 'navi3', runner: 'rocprofiler-navi3-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', ci-flags: '--linter clang-tidy', gpu-target: 'gfx110X' } + - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', ci-flags: '--linter clang-tidy', gpu-target: 'gfx94X' } runs-on: ${{ matrix.system.runner }} container: - image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.therock-s3 }}-latest + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.gpu-target }}-latest credentials: username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} @@ -95,8 +98,8 @@ jobs: shell: bash working-directory: /tmp run: | - tar -xf /opt/rocm-${{ matrix.system.therock-s3 }}.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 - ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + tar -xf ${{ env.ROCM_PATH }}-${{ matrix.system.gpu-target }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} + ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }} echo "ROCm installed to: ${{ env.ROCM_PATH }}" - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime @@ -182,7 +185,7 @@ jobs: run: | echo -e "Building & Installing ROCDecode..." cmake -B build-rocdecode \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ @@ -193,7 +196,7 @@ jobs: echo -e "ROCDecode Installed Successfully!" echo -e "Building & Installing ROCJPEG..." cmake -B build-rocjpeg \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ @@ -212,8 +215,8 @@ jobs: echo "Install ROCProfiler-Register" cmake -B build-rocprofiler-register \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ . @@ -231,8 +234,8 @@ jobs: echo "Install ROCR-Runtime..." cmake -B build \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }};${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }}/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ . cmake --build build --target all --parallel 16 cmake --build build --target install @@ -247,8 +250,8 @@ jobs: echo "Install Aqlprofile..." cmake -B build-aqlprofile \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ . @@ -264,7 +267,7 @@ jobs: echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version + cat ${{ env.ROCM_PATH }}/.info/version ls -la - name: Enable PC Sampling @@ -294,7 +297,7 @@ jobs: -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} \ -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk \ -DCPACK_GENERATOR='DEB;RPM;TGZ' \ - -DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)" \ + -DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath ${{ env.ROCM_PATH }})" \ -DPython3_EXECUTABLE=$(which python3) \ -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' \ ${{ env.GLOBAL_CMAKE_OPTIONS }} -- \ @@ -358,8 +361,8 @@ jobs: run: | export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH - CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-samples-deb /opt/rocm/share/rocprofiler-sdk/samples - CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests + CMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} cmake -B build-samples-deb ${{ env.ROCM_PATH }}/share/rocprofiler-sdk/samples + CMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} cmake -B build-tests-deb -DGPU_TARGETS="gfx942" ${{ env.ROCM_PATH }}/share/rocprofiler-sdk/tests cmake --build build-samples-deb --target all --parallel 16 cmake --build build-tests-deb --target all --parallel 16 ctest --test-dir build-samples-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure @@ -379,25 +382,25 @@ jobs: # RHEL/SLES (RPM) job(s) # ----------------------------------------------------------------------------- core-rpm: - name: Core • ${{ matrix.runner }} • ${{ matrix.os }} + name: Core • ${{ matrix.system.gpu }} • ${{ matrix.system.os }} strategy: fail-fast: false matrix: - runner: [ 'mi325' ] - os: [ 'rhel-8.8', 'rhel-9.5', 'sles-15.6' ] - build-type: [ 'RelWithDebInfo' ] - ci-flags: [ '' ] - runs-on: linux-mi325-1gpu-ossci-rocm + system: + - { os: 'rhel-8.8', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo', ci-flags: '' } + - { os: 'rhel-9.5', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo', ci-flags: '' } + - { os: 'sles-15.6', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo', ci-flags: '' } + runs-on: ${{ matrix.system.runner }} container: - image: docker.io/rocm/rocprofiler-private:${{ matrix.os }}-gfx94X-latest + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.gpu-target }}-latest credentials: username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} options: --privileged env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 - OS_TYPE: ${{ matrix.os }} - GPU_RUNNER: ${{ matrix.runner }} + OS_TYPE: ${{ matrix.system.os }} + GPU_RUNNER: ${{ matrix.system.gpu }} steps: - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime uses: actions/checkout@v5 @@ -439,8 +442,8 @@ jobs: shell: bash working-directory: /tmp run: | - tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 - ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + tar -xf ${{ env.ROCM_PATH }}-${{ matrix.system.gpu-target }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} + ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }} echo "ROCm installed to: ${{ env.ROCM_PATH }}" - name: Install requirements (venv) @@ -449,8 +452,8 @@ jobs: working-directory: projects/rocprofiler-sdk run: | git config --global --add safe.directory '*' - python3 -m venv rocprofiler-sdk - source rocprofiler-sdk/bin/activate + python3 -m venv ${{ env.PYTHON_VENV_PATH }} + source ${{ env.PYTHON_VENV_ACTIVATE }} export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH python3 -m pip install --upgrade pip python3 -m pip install --upgrade -r requirements.txt @@ -461,7 +464,7 @@ jobs: ${{ env.ROCM_PATH }}/libexec/*rocprofiler-sdk* - name: Install Curl for RHEL 8.8 - if: ${{ matrix.os == 'rhel-8.8' }} + if: ${{ matrix.system.os == 'rhel-8.8' }} run: | dnf install -y curl ln -s /usr/local/bin/curl /usr/bin/curl @@ -469,7 +472,7 @@ jobs: - name: Setup ccache uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 with: - key: ccache-${{ matrix.os }}-linux-mi325-1gpu-ossci-rocm-${{ matrix.runner }} + key: ccache-${{ matrix.system.os }}-${{ matrix.system.runner }}-${{ matrix.system.gpu }} max-size: 2G save: true variant: sccache @@ -481,8 +484,8 @@ jobs: echo "Install ROCProfiler-Register" cmake -B build-rocprofiler-register \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/sccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/sccache \ . @@ -495,16 +498,16 @@ jobs: shell: bash working-directory: projects/rocr-runtime run: | - python3 -m venv rocprofiler-sdk - source rocprofiler-sdk/bin/activate + python3 -m venv ${{ env.PYTHON_VENV_PATH }} + source ${{ env.PYTHON_VENV_ACTIVATE }} export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH python3 -m pip install --upgrade pip python3 -m pip install --upgrade cmake echo "Install ROCR-Runtime..." cmake -B build \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }};${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }}/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ . cmake --build build --target all --parallel 16 cmake --build build --target install @@ -515,15 +518,15 @@ jobs: working-directory: projects/aqlprofile run: | echo "Install Aqlprofile." - python3 -m venv rocprofiler-sdk - source rocprofiler-sdk/bin/activate + python3 -m venv ${{ env.PYTHON_VENV_PATH }} + source ${{ env.PYTHON_VENV_ACTIVATE }} export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH python3 -m pip install --upgrade pip python3 -m pip install --upgrade cmake cmake -B build-aqlprofile \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/sccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/sccache \ . @@ -532,7 +535,7 @@ jobs: echo "✅ AQLProfile Installation complete!" - name: Enable PC Sampling - if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} + if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }} shell: bash working-directory: projects/rocprofiler-sdk run: @@ -542,12 +545,12 @@ jobs: shell: bash working-directory: projects/rocprofiler-sdk run: | - source rocprofiler-sdk/bin/activate + source ${{ env.PYTHON_VENV_ACTIVATE }} echo "PATH: ${PATH}" echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version + cat ${{ env.ROCM_PATH }}/.info/version ls -la - name: Configure, Build, and Test @@ -555,41 +558,42 @@ jobs: shell: bash working-directory: projects/rocprofiler-sdk run: - source rocprofiler-sdk/bin/activate; + source ${{ env.PYTHON_VENV_ACTIVATE }} PATH=~/.local/bin:/opt/rh/gcc-toolset-11/root/usr/bin:$PATH python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core --mode ${CI_MODE} --build-jobs 16 - --site 'linux-mi325-1gpu-ossci-rocm' + --site ${{ matrix.system.runner }} --gpu-targets ${{ env.GPU_TARGETS }} --run-attempt ${{ github.run_attempt }} - ${{ matrix.ci-flags }} + ${{ matrix.system.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON -DROCPROFILER_BUILD_DOCS=OFF - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" sanitizers: - name: ${{ matrix.sanitizer }} • ${{ matrix.runner }} • ${{ matrix.os }} + name: ${{ matrix.system.sanitizer }} • ${{ matrix.system.gpu }} • ${{ matrix.system.os }} strategy: fail-fast: false matrix: - runner: ['mi325'] - sanitizer: ['AddressSanitizer', 'ThreadSanitizer', 'LeakSanitizer', 'UndefinedBehaviorSanitizer'] - os: ['ubuntu-22.04'] - build-type: ['RelWithDebInfo'] + system: + - { sanitizer: 'AddressSanitizer', os: 'ubuntu-22.04', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo' } + - { sanitizer: 'ThreadSanitizer', os: 'ubuntu-22.04', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo' } + - { sanitizer: 'LeakSanitizer', os: 'ubuntu-22.04', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo' } + - { sanitizer: 'UndefinedBehaviorSanitizer', os: 'ubuntu-22.04', runner: 'linux-mi325-1gpu-ossci-rocm', gpu: 'mi325', gpu-target: 'gfx94X', build-type: 'RelWithDebInfo' } if: ${{ contains(github.event_name, 'pull_request') }} - runs-on: linux-mi325-1gpu-ossci-rocm + runs-on: ${{ matrix.system.runner }} container: - image: docker.io/rocm/rocprofiler-private:${{ matrix.os }}-gfx94X-latest + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.gpu-target }}-latest credentials: username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} @@ -601,7 +605,7 @@ jobs: env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 GCC_COMPILER_VERSION: 13 - GPU_RUNNER: ${{ matrix.runner }} + GPU_RUNNER: ${{ matrix.system.gpu }} steps: - name: Install Latest Nightly ROCm @@ -609,8 +613,8 @@ jobs: working-directory: /tmp run: | ls -lah /opt/ - tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 - ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + tar -xf ${{ env.ROCM_PATH }}-${{ matrix.system.gpu-target }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} + ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }} echo "ROCm installed to: ${{ env.ROCM_PATH }}" - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime @@ -671,11 +675,11 @@ jobs: run: | which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version + cat ${{ env.ROCM_PATH }}/.info/version ls -la - name: Enable PC Sampling - if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} + if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }} shell: bash working-directory: projects/rocprofiler-sdk run: echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV @@ -683,7 +687,7 @@ jobs: - name: Setup ccache uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 with: - key: ccache-${{ matrix.os }}-linux-mi325-1gpu-ossci-rocm-${{ matrix.runner }}-${{ matrix.sanitizer}} + key: ccache-${{ matrix.system.os }}-${{ matrix.system.runner }}-${{ matrix.system.gpu }}-${{ matrix.system.sanitizer}} max-size: 2G save: true @@ -694,8 +698,8 @@ jobs: echo "Install ROCProfiler-Register" cmake -B build-rocprofiler-register \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ . @@ -711,8 +715,8 @@ jobs: echo "Install ROCR-Runtime..." cmake -B build \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }};${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }}/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ . @@ -727,8 +731,8 @@ jobs: echo "Install Aqlprofile." cmake -B build-aqlprofile \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ - -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ . @@ -743,18 +747,18 @@ jobs: run: sudo sysctl -w vm.mmap_rnd_bits=28; python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-${{ matrix.sanitizer }} + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-${{ matrix.system.sanitizer }} --build-jobs 16 - --site 'linux-mi325-1gpu-ossci-rocm' + --site ${{ matrix.system.runner }} --gpu-targets ${{ env.GPU_TARGETS }} - --memcheck ${{ matrix.sanitizer }} + --memcheck ${{ matrix.system.sanitizer }} --run-attempt ${{ github.run_attempt }} -- - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} -DCMAKE_INSTALL_PREFIX="${{ env.ROCM_PATH }}" -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" diff --git a/projects/rocprofiler-sdk/docker/Dockerfile.ci b/projects/rocprofiler-sdk/docker/Dockerfile.ci index 2dbaab6258..18f59052a8 100644 --- a/projects/rocprofiler-sdk/docker/Dockerfile.ci +++ b/projects/rocprofiler-sdk/docker/Dockerfile.ci @@ -1,18 +1,8 @@ # Build a thin "base with deps" image atop the private runner image ARG BASE_TAG=ubuntu-22.04 FROM rocm/rocprofiler-private:${BASE_TAG} - -# GPU Type from GitHub Actions ARG GPU_TYPE=gfx94X -ENV GPU_TYPE=${GPU_TYPE} - -# Nightly Tarball Keys from GitHub Actions -ARG GFX94X_FILE_NAME -ENV GFX94X_FILE_NAME=${GFX94X_FILE_NAME} -ARG GFX110X_FILE_NAME -ENV GFX110X_FILE_NAME=${GFX110X_FILE_NAME} -ARG GFX120X_FILE_NAME -ENV GFX120X_FILE_NAME=${GFX120X_FILE_NAME} +ARG GPU_TARBALL SHELL ["/bin/bash","-lc"] COPY projects/rocprofiler-sdk/requirements.txt /root/requirements.txt @@ -27,100 +17,81 @@ ENV CMAKE_PREFIX_PATH=/opt/rocm ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:/opt/rocm/bin:/opt/rocm/llvm/bin:/usr/local/bin:~/.local/bin:${PATH} ENV LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/llvm/lib:${LD_LIBRARY_PATH} -RUN set -euo pipefail; \ +# Debian/Ubuntu +RUN set-euo pipefail; \ if [ -f /etc/debian_version ]; then \ - apt-get update && \ - apt-get install -y curl wget gpg python3 python3-pip build-essential coreutils software-properties-common git cmake g++-11 g++-12 libdw-dev libsqlite3-dev libdrm-dev file autoconf pkg-config rpm libzstd-dev && \ - add-apt-repository ppa:git-core/ppa && \ - mkdir -p /etc/apt/keyrings && \ - wget -N -P /tmp/ https://repo.radeon.com/amdgpu-install/.7.0/ubuntu/jammy/amdgpu-install_7.0.70000-1_all.deb && \ - apt-get install -y /tmp/amdgpu-install_7.0.70000-1_all.deb && \ - sed -i "s/\/30.10/\/.30.10/" /etc/apt/sources.list.d/amdgpu*.list && \ - sed -i "s|rocm/apt/7.0|rocm/apt/.apt_7.0|; s|graphics/7.0|graphics/.7.0|" /etc/apt/sources.list.d/rocm.list && \ - apt-get update && \ - apt-get install -y git rocm-openmp-sdk libva-amdgpu-dev rocm-llvm-dev && \ - python3 -m pip install -U awscli pipx && \ - python3 -m pip install -U --user -r /root/requirements.txt; \ - elif [ $(grep -i "ID=.*rhel" /etc/os-release | wc -l) -gt 0 ]; then \ - dnf clean all || true; \ - dnf install -y perl-ExtUtils-MakeMaker python3-pip || true; \ - if [ $(grep -i "VERSION_ID=\"8.8\"" /etc/os-release | wc -l) -gt 0 ]; then \ - wget https://www.kernel.org/pub/software/scm/git/git-2.51.0.tar.xz; \ - tar -xf git-2.51.0.tar.xz; \ - cd git-2.51.0; \ - rm -rf /etc/yum.repos.d/redhat-partner.repo || true; \ + apt-get update && \ + apt-get install -y curl wget gpg python3 python3-pip build-essential coreutils software-properties-common git cmake g++-11 g++-12 libdw-dev libsqlite3-dev libdrm-dev file autoconf pkg-config rpm libzstd-dev && \ + add-apt-repository ppa:git-core/ppa && \ + mkdir -p /etc/apt/keyrings && \ + wget -N -P /tmp/ https://repo.radeon.com/amdgpu-install/7.0/ubuntu/jammy/amdgpu-install_7.0.70000-1_all.deb && \ + apt-get install -y /tmp/amdgpu-install_7.0.70000-1_all.deb && \ + apt-get update && \ + apt-get install -y git rocm-openmp-sdk libva-amdgpu-dev rocm-llvm-dev && \ + python3 -m pip install -U awscli pipx && \ + python3 -m pip install -U --user -r /root/requirements.txt; \ + fi; + +# RHEL +RUN set -euo pipefail; \ + if [ $(grep -i "ID=.*rhel" /etc/os-release | wc -l) -gt 0 ]; then \ + dnf clean all || true; \ + dnf install -y perl-ExtUtils-MakeMaker python3-pip || true; \ + if [ $(grep -i "VERSION_ID=\"8.8\"" /etc/os-release | wc -l) -gt 0 ]; then \ + wget https://www.kernel.org/pub/software/scm/git/git-2.51.0.tar.xz; \ + tar -xf git-2.51.0.tar.xz; \ + cd git-2.51.0; \ + rm -rf /etc/yum.repos.d/redhat-partner.repo || true; \ + dnf clean all; \ + dnf install -y perl-ExtUtils-MakeMaker || true; \ + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH; \ + make prefix=/usr all -j 32; \ + make prefix=/usr install; \ + cd ..; rm -rf git-2.51.0*; \ + echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/el8/7.0/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/rocm.repo; \ + echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.9/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo; \ + else \ + rm -rf /etc/yum.repos.d/RHEL-partners.repo; \ + dnf clean all; \ + echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/el9/7.0/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/rocm.repo; \ + echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/30.10/rhel/9.6/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo; \ + fi; \ dnf clean all; \ - dnf install -y perl-ExtUtils-MakeMaker || true; \ - export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH; \ - make prefix=/usr all -j 32; \ - make prefix=/usr install; \ - cd ..; rm -rf git-2.51.0*; \ - echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/el8/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/rocm.repo; \ - echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo; \ - else \ - rm -rf /etc/yum.repos.d/RHEL-partners.repo; \ - dnf clean all; \ - echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/el9/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/rocm.repo; \ - echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/30.10_rc1/rhel/9.5/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo; \ - fi; \ - dnf clean all; \ - dnf install -y rocm-openmp rocm-openmp-sdk rocm-llvm-devel hipify-clang libsqlite3x-devel elfutils-devel; \ - python3 -m pip install -U awscli pipx; \ - python3 -m venv rocprofiler-sdk; \ - source rocprofiler-sdk/bin/activate; \ - export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:${PATH}; \ - python3 -m pip install --upgrade pip; \ - python3 -m pip install --upgrade -r /root/requirements.txt; \ - else \ - echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/zyp/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/zypp/repos.d/rocm.repo; \ - echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/30.10_rc1/sle/15.6/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/zypp/repos.d/amdgpu.repo; \ - zypper --gpg-auto-import-keys refresh; \ - zypper --non-interactive install -y rocm-openmp rocm-openmp-sdk rocm-llvm-devel hipify-clang sqlite3-devel python3-pip; \ - python3 -m venv rocprofiler-sdk; \ - source rocprofiler-sdk/bin/activate; \ - python3 -m pip install --upgrade pip pipx; \ - python3 -m pipx install awscli; \ - python3 -m pipx ensurepath; \ - source ~/.bashrc; \ - python3 -m pip install --upgrade pip || true; \ - python3 -m pip install --upgrade -r /root/requirements.txt || true; \ - cd /tmp; wget https://www.kernel.org/pub/software/scm/git/git-2.51.0.tar.xz; \ - tar -xf git-2.51.0.tar.xz; cd git-2.51.0; make prefix=/usr all -j 32; make prefix=/usr install; \ - cd /tmp; ln -s -f /usr/bin/git /usr/local/bin/git; rm -rf git-2.51.0*; \ - fi + dnf install -y rocm-openmp rocm-openmp-sdk rocm-llvm-devel hipify-clang libsqlite3x-devel elfutils-devel; \ + python3 -m pip install -U awscli pipx; \ + python3 -m venv rocprofiler-sdk; \ + source rocprofiler-sdk/bin/activate; \ + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:${PATH}; \ + python3 -m pip install --upgrade pip; \ + python3 -m pip install --upgrade -r /root/requirements.txt; \ + fi; -# Nightly Tarball - gfx94X +# SLES +RUN set -euo pipefail; \ + if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ + echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/zyp/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/zypp/repos.d/rocm.repo; \ + echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/30.10_rc1/sle/15.6/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/zypp/repos.d/amdgpu.repo; \ + zypper --gpg-auto-import-keys refresh; \ + zypper --non-interactive install -y rocm-openmp rocm-openmp-sdk rocm-llvm-devel hipify-clang sqlite3-devel python3-pip; \ + python3 -m venv rocprofiler-sdk; \ + source rocprofiler-sdk/bin/activate; \ + python3 -m pip install --upgrade pip pipx; \ + python3 -m pipx install awscli; \ + python3 -m pipx ensurepath; \ + source ~/.bashrc; \ + python3 -m pip install --upgrade pip || true; \ + python3 -m pip install --upgrade -r /root/requirements.txt || true; \ + cd /tmp; wget https://www.kernel.org/pub/software/scm/git/git-2.51.0.tar.xz; \ + tar -xf git-2.51.0.tar.xz; cd git-2.51.0; make prefix=/usr all -j 32; make prefix=/usr install; \ + cd /tmp; ln -s -f /usr/bin/git /usr/local/bin/git; rm -rf git-2.51.0*; \ + fi; + +# Nightly Tarball RUN set -euo pipefail; \ - if [ "${GPU_TYPE}" = "gfx94X" ]; then \ if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ source rocprofiler-sdk/bin/activate; \ python3 -m pipx ensurepath; \ source ~/.bashrc; \ fi; \ - aws s3 cp "s3://therock-nightly-tarball/${GFX94X_FILE_NAME}" rocm-gfx94X.tar.gz --no-sign-request && \ - mv rocm-gfx94X.tar.gz /opt/rocm-gfx94X.tar.gz; \ - fi - -# Nightly Tarball - gfx110X -RUN set -euo pipefail; \ - if [ "${GPU_TYPE}" = "gfx110X" ]; then \ - if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ - source rocprofiler-sdk/bin/activate; \ - python3 -m pipx ensurepath; \ - source ~/.bashrc; \ - fi; \ - aws s3 cp "s3://therock-nightly-tarball/${GFX110X_FILE_NAME}" rocm-gfx110X.tar.gz --no-sign-request && \ - mv rocm-gfx110X.tar.gz /opt/rocm-gfx110X.tar.gz; \ - fi - -# Nightly Tarball - gfx120X -RUN set -euo pipefail; \ - if [ "${GPU_TYPE}" = "gfx120X" ]; then \ - if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ - source rocprofiler-sdk/bin/activate; \ - python3 -m pipx ensurepath; \ - source ~/.bashrc; \ - fi; \ - aws s3 cp "s3://therock-nightly-tarball/${GFX120X_FILE_NAME}" rocm-gfx120X.tar.gz --no-sign-request && \ - mv rocm-gfx120X.tar.gz /opt/rocm-gfx120X.tar.gz; \ - fi + aws s3 cp "s3://therock-nightly-tarball/${GPU_TARBALL}" rocm-${GPU_TYPE}.tar.gz --no-sign-request && \ + mv rocm-${GPU_TYPE}.tar.gz /opt/rocm-${GPU_TYPE}.tar.gz;