[rocprof-compute] Merge CDash Nightly and Continuous workflow files (#2279)
* merged code-coverage and continuous workflow files * fixed runner typos and added build mode * add actor name to Continuous build * improve error handling and remove redundant verbose * fixed workflow file log output * revert logs output in run_ci.py * ruff format
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
9a8ed9f45d
Коммит
81720183ad
@@ -1,157 +0,0 @@
|
||||
name: rocprofiler-compute Code Coverage
|
||||
run-name: rocprofiler-compute-code-coverage
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 2 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
runner_matrix:
|
||||
description: 'Runner configuration'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
env:
|
||||
ROCM_PATH: "/opt/rocm"
|
||||
ROCM_VERSION: "7.0.2"
|
||||
|
||||
jobs:
|
||||
prepare_matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.generate_matrix.outputs.matrix }}
|
||||
|
||||
steps:
|
||||
- name: Generate matrix
|
||||
id: generate_matrix
|
||||
run: |
|
||||
MATRIX='[{"os-release":"22.04","gpu":"mi355","arch":"gfx950","runner":"linux-mi355-1gpu-ossci-rocm","code-name":"jammy"},{"os-release":"24.04","gpu":"mi355","arch":"gfx950","runner":"linux-mi355-1gpu-ossci-rocm","code-name":"noble"},{"os-release":"22.04","gpu":"mi325","arch":"gfx94X","runner":"linux-mi325-1gpu-ossci-rocm","code-name":"jammy"},{"os-release":"24.04","gpu":"mi325","arch":"gfx94X","runner":"linux-mi325-1gpu-ossci-rocm","code-name":"noble"}]'
|
||||
|
||||
if [ -n "${{ github.event.inputs.runner_matrix }}" ]; then
|
||||
MATRIX='${{ github.event.inputs.runner_matrix }}'
|
||||
fi
|
||||
|
||||
echo "matrix=${MATRIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
coverage:
|
||||
name: Coverage • ${{ matrix.system.gpu }} • Ubuntu ${{ matrix.system.os-release }}
|
||||
needs: prepare_matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
system: ${{ fromJSON(needs.prepare_matrix.outputs.matrix) }}
|
||||
runs-on: ${{ matrix.system.runner }}
|
||||
env:
|
||||
HIP_PLATFORM: "amd"
|
||||
OMPI_ALLOW_RUN_AS_ROOT: 1
|
||||
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
|
||||
permissions:
|
||||
packages: read
|
||||
container:
|
||||
image: ghcr.io/rocm/rocprofiler-ubuntu:${{ matrix.system.os-release }}-systems-ci-${{ matrix.system.arch }}
|
||||
options:
|
||||
--privileged
|
||||
--ipc host
|
||||
--group-add video
|
||||
--device /dev/kfd
|
||||
--device /dev/dri
|
||||
--cap-add CAP_SYS_ADMIN
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
sparse-checkout: projects/rocprofiler-compute/
|
||||
|
||||
- name: Update system packages
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get dist-upgrade -y
|
||||
echo "✅ System packages updated!"
|
||||
|
||||
- name: Install amdgpu and dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
ROCM_MAJOR=$(echo ${{ env.ROCM_VERSION }} | sed 's/\./ /g' | awk '{print $1}')
|
||||
ROCM_MINOR=$(echo ${{ env.ROCM_VERSION }} | sed 's/\./ /g' | awk '{print $2}')
|
||||
ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) ))
|
||||
wget -N -P /tmp/ https://repo.radeon.com/amdgpu-install/${ROCM_MAJOR}.${ROCM_MINOR}/ubuntu/${{ matrix.system.code-name }}/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb
|
||||
apt-get install -y /tmp/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb
|
||||
apt-get update
|
||||
apt install -y amd-smi-lib libdw-dev
|
||||
echo "✅ amdgpu and dependencies Installed!"
|
||||
|
||||
- name: Install MPI
|
||||
run: |
|
||||
apt install -y libopenmpi-dev
|
||||
echo "✅ MPI Installed!"
|
||||
|
||||
- name: Install ROCm
|
||||
run: |
|
||||
apt install -y rocm-dev
|
||||
echo "✅ ROCm Installation Complete!"
|
||||
|
||||
- name: Install Python Dependencies
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
pip install -r requirements.txt --break-system-packages --ignore-installed
|
||||
pip install -r requirements-test.txt --break-system-packages --ignore-installed
|
||||
echo "✅ Python dependencies installed!"
|
||||
|
||||
- name: Configure, Build, Test, and Upload Coverage
|
||||
id: run_coverage
|
||||
timeout-minutes: 120
|
||||
working-directory: projects/rocprofiler-compute
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -e
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
git config --global --add safe.directory ${PWD}
|
||||
|
||||
BUILD_NAME="ROCm/rocprofiler-compute-${{ github.ref_name }}-ubuntu-${{ matrix.system.os-release }}-${{ matrix.system.gpu }}"
|
||||
SITE_NAME="${{ matrix.system.runner }}"
|
||||
|
||||
PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH \
|
||||
LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH \
|
||||
python3 ./tools/run-code-coverage.py \
|
||||
--build-name "${BUILD_NAME}" \
|
||||
--site "${SITE_NAME}" \
|
||||
--mode "Nightly" \
|
||||
--build-jobs 16 \
|
||||
--pytest-numprocs 8 \
|
||||
--install
|
||||
|
||||
- name: Check Coverage Configuration
|
||||
if: always()
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
echo "=== Checking Build Configuration ==="
|
||||
if [ -f "build/CMakeCache.txt" ]; then
|
||||
echo "CMakeCache.txt contents (relevant variables):"
|
||||
grep -E "ENABLE_TESTS|ENABLE_COVERAGE|INSTALL_TESTS|PYTEST_NUMPROCS" build/CMakeCache.txt || echo "⚠️ Coverage variables not found!"
|
||||
echo ""
|
||||
echo "=== Checking for coverage.xml ==="
|
||||
if [ -f "build/coverage.xml" ]; then
|
||||
echo "✅ coverage.xml found!"
|
||||
ls -lh build/coverage.xml
|
||||
else
|
||||
echo "❌ coverage.xml NOT found!"
|
||||
fi
|
||||
echo ""
|
||||
echo "=== Checking CTest results ==="
|
||||
if [ -f "build/Testing/Temporary/LastTest.log" ]; then
|
||||
echo "Last 50 lines of test log:"
|
||||
tail -n 50 build/Testing/Temporary/LastTest.log
|
||||
fi
|
||||
else
|
||||
echo "❌ build/CMakeCache.txt not found - build may have failed!"
|
||||
fi
|
||||
|
||||
- name: Upload Logs on Failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-logs-${{ matrix.system.gpu }}-${{ matrix.system.os-release }}
|
||||
path: |
|
||||
projects/rocprofiler-compute/build/Testing/Temporary/LastTest*.log
|
||||
projects/rocprofiler-compute/build/coverage.xml
|
||||
retention-days: 7
|
||||
@@ -5,6 +5,15 @@ on:
|
||||
schedule:
|
||||
- cron: '0 6 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
mode:
|
||||
description: 'Build mode'
|
||||
required: false
|
||||
default: 'continuous'
|
||||
type: choice
|
||||
options:
|
||||
- continuous
|
||||
- nightly
|
||||
push:
|
||||
branches: [ develop ]
|
||||
paths:
|
||||
@@ -49,7 +58,7 @@ jobs:
|
||||
id: generate_matrix
|
||||
working-directory: projects/rocprofiler-compute/.github
|
||||
run: |
|
||||
if [ '${{ github.event_name }}' = 'schedule' ]; then
|
||||
if [ '${{ github.event_name }}' = 'schedule' ] || [ '${{ inputs.mode }}' = 'nightly' ]; then
|
||||
MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-ubuntu-nightly' -I=0 -o=json)
|
||||
else
|
||||
MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-ubuntu-ci' -I=0 -o=json)
|
||||
@@ -88,17 +97,20 @@ jobs:
|
||||
- name: Setup Environment
|
||||
id: setup_env
|
||||
run: |
|
||||
if [ '${{ github.event_name }}' = 'schedule' ]; then
|
||||
if [ '${{ github.event_name }}' = 'schedule' ] || [ '${{ inputs.mode }}' = 'nightly' ]; then
|
||||
MODE=Nightly
|
||||
EXCLUDED_TESTS=""
|
||||
ADD_COVERAGE="--coverage"
|
||||
else
|
||||
MODE=Continuous
|
||||
EXCLUDED_TESTS="test_profile_live_attach_detach"
|
||||
ADD_COVERAGE=""
|
||||
fi
|
||||
echo "mode=${MODE}" >> $GITHUB_OUTPUT
|
||||
echo "excluded_tests=${EXCLUDED_TESTS}" >> $GITHUB_OUTPUT
|
||||
echo "add_coverage=${ADD_COVERAGE}" >> $GITHUB_OUTPUT
|
||||
|
||||
if [ '${{ matrix.system.os-release }}' = '24.04']; then
|
||||
if [ '${{ matrix.system.os-release }}' = '24.04' ]; then
|
||||
CODE_NAME=noble
|
||||
else
|
||||
CODE_NAME=jammy
|
||||
@@ -129,13 +141,21 @@ jobs:
|
||||
- name: Install Python Requirements
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
pip install -r requirements.txt --break-system-packages --ignore-installed
|
||||
pip install -r requirements-test.txt --break-system-packages --ignore-installed
|
||||
for i in 1 2 3; do
|
||||
pip install -r requirements.txt --break-system-packages --ignore-installed --timeout 60 && break
|
||||
echo "⚠️ pip install attempt $i failed, retrying..."
|
||||
sleep 10
|
||||
done
|
||||
for i in 1 2 3; do
|
||||
pip install -r requirements-test.txt --break-system-packages --ignore-installed --timeout 60 && break
|
||||
echo "⚠️ pip install attempt $i failed, retrying..."
|
||||
sleep 10
|
||||
done
|
||||
echo "✅ pip requirements installed!"
|
||||
|
||||
- name: Configure, Build, and Test
|
||||
id: test
|
||||
timeout-minutes: 90
|
||||
timeout-minutes: ${{ steps.setup_env.outputs.mode == 'Nightly' && 120 || 90 }}
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
set -e
|
||||
@@ -145,9 +165,11 @@ jobs:
|
||||
LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:${{ env.ROCM_PATH }}/lib/rocm_sysdeps/lib:$LD_LIBRARY_PATH \
|
||||
python3 ./tools/run-ci.py \
|
||||
--name "ROCm/rocprofiler-compute-${{ github.ref_name }}-ubuntu-${{ matrix.system.os-release }}-${{ matrix.system.gpu }}" \
|
||||
--actor "${{ github.actor }}" \
|
||||
--site ${{ matrix.system.runner }} \
|
||||
--mode ${{ steps.setup_env.outputs.mode }} \
|
||||
--build-jobs 16 \
|
||||
${{ steps.setup_env.outputs.add_coverage }} \
|
||||
-- \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \
|
||||
@@ -158,8 +180,14 @@ jobs:
|
||||
-E "${{ steps.setup_env.outputs.excluded_tests }}"
|
||||
|
||||
- name: Output Logs
|
||||
if: failure() && steps.test.outcome == 'failure'
|
||||
if: failure()
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
echo "❌ Run Failed: Now outputting LastTest.log files for detailed logs..."
|
||||
cat build/Testing/Temporary/LastTest*.log
|
||||
echo "❌ Run Failed: Outputting available log files..."
|
||||
for log in build/Testing/Temporary/LastTest*.log build/Testing/Temporary/LastConfigure*.log; do
|
||||
if [ -f "$log" ]; then
|
||||
echo "=== $log ==="
|
||||
cat "$log"
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user