From cccc350dc620e61ae2554978b62ab3532dc10bd9 Mon Sep 17 00:00:00 2001 From: Jason Bonnell <166553723+jbonnell-amd@users.noreply.github.com> Date: Wed, 8 Oct 2025 15:18:56 -0400 Subject: [PATCH] [rocprofiler-systems] Add different test coverage for CI/Nightly, add better logging for failures (#1272) * Try outputting LastTest.log * Update if condition for outputting log * Another attempt * Only run Ubuntu Noble on MI355 in push/PR * Try exclude matrix * Move conditional statement in matrix exclusion * Create ci-matrix.yml file * Add needs parameter to ubuntu job * Fix typo in matrix output variable * Add back pull_request_template.md * Add back pull_request_template.md --- ...rofiler-systems-continuous-integration.yml | 39 +++++++++++++++---- .../rocprofiler-systems/.github/ci-matrix.yml | 12 ++++++ 2 files changed, 44 insertions(+), 7 deletions(-) create mode 100644 projects/rocprofiler-systems/.github/ci-matrix.yml diff --git a/.github/workflows/rocprofiler-systems-continuous-integration.yml b/.github/workflows/rocprofiler-systems-continuous-integration.yml index f9f7e71f24..59bc426f49 100644 --- a/.github/workflows/rocprofiler-systems-continuous-integration.yml +++ b/.github/workflows/rocprofiler-systems-continuous-integration.yml @@ -36,18 +36,34 @@ env: ROCM_VERSION: "7.0.0" jobs: + prepare_matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.generate_matrix.outputs.matrix }} + + steps: + - uses: actions/checkout@v5 + with: + sparse-checkout: projects/rocprofiler-systems/.github + + - name: Generate and output matrix + id: generate_matrix + working-directory: projects/rocprofiler-systems/.github + run: | + if [ '${{ github.event_name }}' = 'schedule' ]; then + MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-nightly' -I=0 -o=json) + else + MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-ci' -I=0 -o=json) + fi + echo "matrix=${MATRIX_CONTENT}" >> $GITHUB_OUTPUT + ubuntu: name: Ubuntu ${{ matrix.system.os-release }} • ${{ matrix.system.arch }} + needs: prepare_matrix strategy: fail-fast: false matrix: - system: - # MI355 - - { os-release: '22.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' } - - { os-release: '24.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' } - # MI325 - - { os-release: '22.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' } - - { os-release: '24.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' } + system: ${{ fromJSON(needs.prepare_matrix.outputs.matrix) }} runs-on: ${{ matrix.system.runner }} env: HIP_PLATFORM: "amd" @@ -65,6 +81,7 @@ jobs: --device /dev/kfd --device /dev/dri --cap-add CAP_SYS_ADMIN + steps: - uses: actions/checkout@v5 with: @@ -122,6 +139,7 @@ jobs: echo "✅ ROCm Installation Complete!" - name: Configure, Build, and Test + id: run_ci timeout-minutes: 30 working-directory: projects/rocprofiler-systems run: | @@ -152,3 +170,10 @@ jobs: -- \ -L "rocm" \ -LE "rccl|runtime|ompvv" + + - name: Output Logs + if: failure() && steps.run_ci.outcome == 'failure' + working-directory: projects/rocprofiler-systems + run: | + echo "❌ Run Failed: Now outputting LastTest.log files for detailed logs..." + cat build/Testing/Temporary/LastTest*.log diff --git a/projects/rocprofiler-systems/.github/ci-matrix.yml b/projects/rocprofiler-systems/.github/ci-matrix.yml new file mode 100644 index 0000000000..a5c1e31da3 --- /dev/null +++ b/projects/rocprofiler-systems/.github/ci-matrix.yml @@ -0,0 +1,12 @@ +# Matrix definitions for rocprofiler-systems-continuous-integration + +matrix-nightly: + # MI355 + - { os-release: '22.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' } + - { os-release: '24.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' } + # MI325 + - { os-release: '22.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' } + - { os-release: '24.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' } +matrix-ci: + # MI355 + - { os-release: '24.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' }