diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index ceb5a49d72..9fdd20916d 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -11,8 +11,6 @@ on: paths-ignore: - '*.md' - 'source/docs/**' - issue_comment: - types: [created] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -31,32 +29,11 @@ jobs: strategy: fail-fast: true matrix: - runner: [mi200-runner-set, mi300-runner-set, vega20-runner-set, navi3-runner-set, rocprofiler-runner-set-multi-gpus] - vega20: - - ${{ github.event_name == 'issue_comment' && contains(github.event.comment.body, 'rerun') && contains(github.event.comment.body, 'vega20') }} - navi3: - - ${{ github.event_name == 'issue_comment' && contains(github.event.comment.body, 'rerun') && contains(github.event.comment.body, 'navi3') }} - mi300: - - ${{ github.event_name == 'issue_comment' && contains(github.event.comment.body, 'rerun') && contains(github.event.comment.body, 'mi300') }} - mi200: - - ${{ github.event_name == 'issue_comment' && contains(github.event.comment.body, 'rerun') && contains(github.event.comment.body, 'mi200') }} - general: - - ${{ github.event_name == 'pull_request' || github.event_name == 'push' }} + runner: [rocprofiler-runner-set-multi-gpus] os: [ubuntu-22.04] build-type: [RelWithDebInfo] ci-flags: ['--linter clang-tidy'] name-tag: [''] - exclude: - - mi300: false - runner: mi300-runner-set - - mi200: false - runner: mi200-runner-set - - navi3: false - runner: navi3-runner-set - - vega20: false - runner: vega20-runner-set - - general: false - runner: rocprofiler-runner-set-multi-gpus runs-on: ${{ matrix.runner }} diff --git a/.github/workflows/rerun.yml b/.github/workflows/rerun.yml new file mode 100644 index 0000000000..3a493eb171 --- /dev/null +++ b/.github/workflows/rerun.yml @@ -0,0 +1,128 @@ +name: Rerun Command - Continuous Integration + +on: + issue_comment: + types: [created] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake + ROCM_PATH: "/opt/rocm" + GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" + PATH: "/usr/bin:$PATH" + +jobs: + rerun: + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + strategy: + fail-fast: true + matrix: + runner: [mi200-runner-set, mi300-runner-set, vega20-runner-set, navi3-runner-set] + vega20: + - | + ${{ github.event.issue.pull_request && + github.event.comment.body=='/rerun vega20' && + !github.event.issue.pull_request.closed }} + navi3: + - | + ${{ github.event.issue.pull_request && + github.event.comment.body=='/rerun navi3' && + !github.event.issue.pull_request.closed }} + mi300: + - | + ${{ github.event.issue.pull_request && + github.event.comment.body=='/rerun mi300' && + !github.event.issue.pull_request.closed }} + mi200: + - | + ${{ github.event.issue.pull_request && + github.event.comment.body=='/rerun mi200' && + !github.event.issue.pull_request.closed }} + os: [ubuntu-22.04] + build-type: [RelWithDebInfo] + ci-flags: ['--linter clang-tidy'] + name-tag: [''] + exclude: + - mi300: false + runner: mi300-runner-set + - mi200: false + runner: mi200-runner-set + - navi3: false + runner: navi3-runner-set + - vega20: false + runner: vega20-runner-set + + runs-on: ${{ matrix.runner }} + + # define this for containers + env: + GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 + + steps: + - uses: actions/checkout@v4 + + - name: Install requirements + shell: bash + run: | + git config --global --add safe.directory '*' + apt-get update + apt-get install -y cmake clang-tidy g++-11 g++-12 python3-pip + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 + python3 -m pip install -r requirements.txt + python3 -m pip install pytest + python3 -m pip install 'cmake>=3.22.0' + + - name: List Files + shell: bash + run: | + which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } + for i in python python3 git cmake ctest; do which-realpath $i; done + ls -la + cat /sys/class/kfd/kfd/topology/nodes/*/gpu_id || true; /opt/rocm/bin/rocminfo | grep gfx + + - name: Configure, Build, and Test + id: build-test + timeout-minutes: 30 + shell: bash + run: + python3 ./source/scripts/run-ci.py -B build + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} + --build-jobs 16 + --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') + --gpu-targets ${{ env.GPU_LIST }} + ${{ matrix.ci-flags }} + -- + -DROCPROFILER_DEP_ROCMCORE=ON + -DROCPROFILER_BUILD_DOCS=ON + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk + -DCPACK_GENERATOR='DEB;RPM;TGZ' + -DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)" + -DPython3_EXECUTABLE=$(which python3) + + # Post a success/failure comment to the PR. + - name: Add comment to PR + uses: octokit/request-action@v2.x + with: + route: POST /repos/{repository}/issues/{issue_number}/comments + repository: ${{ github.repository }} + issue_number: ${{ github.event.issue.number }} + body: "${{ steps.build-test.outputs.message }}, please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details!" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Post a failure message when any of the previous steps fail. + - name: Add failure comment to PR + if: ${{ failure() }} + uses: octokit/request-action@v2.x + with: + route: POST /repos/{repository}/issues/{issue_number}/comments + repository: ${{ github.repository }} + issue_number: ${{ github.event.issue.number }} + body: "Rerun Failed" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}