[rocprofiler-systems] Add different test coverage for CI/Nightly, add better logging for failures (#1272)

* Try outputting LastTest.log

* Update if condition for outputting log

* Another attempt

* Only run Ubuntu Noble on MI355 in push/PR

* Try exclude matrix

* Move conditional statement in matrix exclusion

* Create ci-matrix.yml file

* Add needs parameter to ubuntu job

* Fix typo in matrix output variable

* Add back pull_request_template.md

* Add back pull_request_template.md
Cette révision appartient à :
Jason Bonnell
2025-10-08 15:18:56 -04:00
révisé par GitHub
Parent 5cc7a7356a
révision cccc350dc6
2 fichiers modifiés avec 44 ajouts et 7 suppressions
+32 -7
Voir le fichier
@@ -36,18 +36,34 @@ env:
ROCM_VERSION: "7.0.0"
jobs:
prepare_matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.generate_matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v5
with:
sparse-checkout: projects/rocprofiler-systems/.github
- name: Generate and output matrix
id: generate_matrix
working-directory: projects/rocprofiler-systems/.github
run: |
if [ '${{ github.event_name }}' = 'schedule' ]; then
MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-nightly' -I=0 -o=json)
else
MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-ci' -I=0 -o=json)
fi
echo "matrix=${MATRIX_CONTENT}" >> $GITHUB_OUTPUT
ubuntu:
name: Ubuntu ${{ matrix.system.os-release }} • ${{ matrix.system.arch }}
needs: prepare_matrix
strategy:
fail-fast: false
matrix:
system:
# MI355
- { os-release: '22.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' }
- { os-release: '24.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' }
# MI325
- { os-release: '22.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' }
- { os-release: '24.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' }
system: ${{ fromJSON(needs.prepare_matrix.outputs.matrix) }}
runs-on: ${{ matrix.system.runner }}
env:
HIP_PLATFORM: "amd"
@@ -65,6 +81,7 @@ jobs:
--device /dev/kfd
--device /dev/dri
--cap-add CAP_SYS_ADMIN
steps:
- uses: actions/checkout@v5
with:
@@ -122,6 +139,7 @@ jobs:
echo "✅ ROCm Installation Complete!"
- name: Configure, Build, and Test
id: run_ci
timeout-minutes: 30
working-directory: projects/rocprofiler-systems
run: |
@@ -152,3 +170,10 @@ jobs:
-- \
-L "rocm" \
-LE "rccl|runtime|ompvv"
- name: Output Logs
if: failure() && steps.run_ci.outcome == 'failure'
working-directory: projects/rocprofiler-systems
run: |
echo "❌ Run Failed: Now outputting LastTest.log files for detailed logs..."
cat build/Testing/Temporary/LastTest*.log
+12
Voir le fichier
@@ -0,0 +1,12 @@
# Matrix definitions for rocprofiler-systems-continuous-integration
matrix-nightly:
# MI355
- { os-release: '22.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' }
- { os-release: '24.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' }
# MI325
- { os-release: '22.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' }
- { os-release: '24.04', gpu: 'mi325', arch: 'gfx94X', runner: 'linux-mi325-1gpu-ossci-rocm' }
matrix-ci:
# MI355
- { os-release: '24.04', gpu: 'mi355', arch: 'gfx950', runner: 'linux-mi355-1gpu-ossci-rocm' }