[rocprofiler-compute] Add Nightly and CI on MI355/MI325 Runners (#1455)
* Initial work in progress for compute CI workflow * Update run-ci.py script location, enable test creation * Add new lines to files * Add coverage file argument to run-ci.py * Remove run-ci.py script usage from rocprofiler-compute-continuous-integration.yml workflow * Add --break-system-packages parameter * Add --ignore-installed to pip install * Checkout specific branch until amdclang issue fixed in develop * Add missing slash to path for cxx compiler * Remove specific branch from checkout action * Use run-ci.py in rocprofiler-compute-continuous-integration.yml * Update install python requirements step * Fix typo in build-name * Update run-ci.py to have toggle for code coverage * Apply ruff formatting * Ruff again * Exclude live attach detach and roofline tests in CI * Add ctest args * Revert run-ci.py changes * Try new run-ci-2.py * Update type of pytest-numprocs argument * Try casting arg to str * Fix typo in arg reference * upgrade pip before running python installs * Use jammy instead of noble for CI * Remove python nproc arg from run-ci-2.py * Switch to MI325 runners for CI * Fix spacing issue * Rename run-ci.py to run-code-coverage.py, add new run-ci.py * Update to ROCm version 7.1.0 to debug sdk issues * Testing out tarball install again * Update regex on tarball version * Update tarball regex on compute * ruff formatting * Revert change to systems CI file * Switch back to rocm-dev install * ruff formatting again * Add ld_lib_path for rocm_sysdeps * Remove excluded tests temporarily * Add back excluded tests, add timeout for test step * Address PR feedback * Add git safe directory lines * Revert dependencies change to debug new failures * Exclude roofline again, rework dependencies * Add in hip-runtime-amd dependency * Install hip dev package * Add TEST_FROM_INSTALL cmake arg to compute CI workflow * Remove test_from_install for now * Enable roofline tests again
이 커밋은 다음에 포함됨:
@@ -112,7 +112,7 @@ jobs:
|
||||
|
||||
PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH \
|
||||
LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH \
|
||||
python3 ./tools/run-ci.py \
|
||||
python3 ./tools/run-code-coverage.py \
|
||||
--build-name "${BUILD_NAME}" \
|
||||
--site "${SITE_NAME}" \
|
||||
--mode "Nightly" \
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
name: rocprofiler-compute Continuous Integration
|
||||
run-name: rocprofiler-compute-ci
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 6 * * *'
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ develop ]
|
||||
paths:
|
||||
- '.github/workflows/rocprofiler-compute-continuous-integration.yml'
|
||||
- 'projects/rocprofiler-compute/**'
|
||||
- '!projects/rocprofiler-compute/*.md'
|
||||
- '!projects/rocprofiler-compute/docs/**'
|
||||
- '!projects/rocprofiler-compute/source/docs/**'
|
||||
- '!projects/rocprofiler-compute/source/python/gui/**'
|
||||
- '!projects/rocprofiler-compute/docker/**'
|
||||
- '!projects/rocprofiler-compute/.wordlist.txt'
|
||||
- '!projects/rocprofiler-compute/CMakePresets.json'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/rocprofiler-compute-continuous-integration.yml'
|
||||
- 'projects/rocprofiler-compute/**'
|
||||
- '!projects/rocprofiler-compute/*.md'
|
||||
- '!projects/rocprofiler-compute/docs/**'
|
||||
- '!projects/rocprofiler-compute/source/docs/**'
|
||||
- '!projects/rocprofiler-compute/source/python/gui/**'
|
||||
- '!projects/rocprofiler-compute/docker/**'
|
||||
- '!projects/rocprofiler-compute/.wordlist.txt'
|
||||
- '!projects/rocprofiler-compute/CMakePresets.json'
|
||||
|
||||
env:
|
||||
ROCPROFSYS_CI: ON
|
||||
ROCM_PATH: "/opt/rocm"
|
||||
ROCM_VERSION: "7.0.0"
|
||||
|
||||
jobs:
|
||||
prepare_matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.generate_matrix.outputs.matrix }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
sparse-checkout: projects/rocprofiler-compute/.github
|
||||
|
||||
- name: Generate and output matrix
|
||||
id: generate_matrix
|
||||
working-directory: projects/rocprofiler-compute/.github
|
||||
run: |
|
||||
if [ '${{ github.event_name }}' = 'schedule' ]; then
|
||||
MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-ubuntu-nightly' -I=0 -o=json)
|
||||
else
|
||||
MATRIX_CONTENT=$(cat ci-matrix.yml | yq '.matrix-ubuntu-ci' -I=0 -o=json)
|
||||
fi
|
||||
echo "matrix=${MATRIX_CONTENT}" >> $GITHUB_OUTPUT
|
||||
|
||||
ubuntu:
|
||||
name: Ubuntu ${{ matrix.system.os-release }} • ${{ matrix.system.arch }}
|
||||
needs: prepare_matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
system: ${{ fromJSON(needs.prepare_matrix.outputs.matrix) }}
|
||||
runs-on: ${{ matrix.system.runner }}
|
||||
env:
|
||||
HIP_PLATFORM: "amd"
|
||||
OMPI_ALLOW_RUN_AS_ROOT: 1
|
||||
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
|
||||
permissions:
|
||||
packages: read
|
||||
container:
|
||||
image: ghcr.io/rocm/rocprofiler-ubuntu:${{ matrix.system.os-release }}-systems-ci-${{ matrix.system.arch }}
|
||||
options:
|
||||
--privileged
|
||||
--ipc host
|
||||
--group-add video
|
||||
--device /dev/kfd
|
||||
--device /dev/dri
|
||||
--cap-add CAP_SYS_ADMIN
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
sparse-checkout: projects/rocprofiler-compute/
|
||||
|
||||
- name: Setup Environment
|
||||
id: setup_env
|
||||
run: |
|
||||
if [ '${{ github.event_name }}' = 'schedule' ]; then
|
||||
MODE=Nightly
|
||||
EXCLUDED_TESTS=""
|
||||
else
|
||||
MODE=Continuous
|
||||
EXCLUDED_TESTS="test_profile_live_attach_detach"
|
||||
fi
|
||||
echo "mode=${MODE}" >> $GITHUB_OUTPUT
|
||||
echo "excluded_tests=${EXCLUDED_TESTS}" >> $GITHUB_OUTPUT
|
||||
|
||||
if [ '${{ matrix.system.os-release }}' = '24.04']; then
|
||||
CODE_NAME=noble
|
||||
else
|
||||
CODE_NAME=jammy
|
||||
fi
|
||||
echo "code_name=${CODE_NAME}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install amdgpu and dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
ROCM_MAJOR=$(echo ${{ env.ROCM_VERSION }} | sed 's/\./ /g' | awk '{print $1}')
|
||||
ROCM_MINOR=$(echo ${{ env.ROCM_VERSION }} | sed 's/\./ /g' | awk '{print $2}')
|
||||
ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) ))
|
||||
wget -N -P /tmp/ https://repo.radeon.com/amdgpu-install/${ROCM_MAJOR}.${ROCM_MINOR}/ubuntu/${{ steps.setup_env.outputs.code_name }}/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb
|
||||
apt-get install -y /tmp/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb
|
||||
apt-get update
|
||||
apt install -y amd-smi-lib libdw-dev hip-dev
|
||||
echo "✅ amdgpu and ROCm dependencies Installed!"
|
||||
|
||||
- name: Install Latest Nightly ROCm
|
||||
run: |
|
||||
set -e
|
||||
TARBALL_ROCM_VERSION=$(ls /opt/*.tar.gz | grep -Eo '*([0-9]+\.[0-9]+\.[0-9]+)*')
|
||||
tar -xf ${{ env.ROCM_PATH }}-${TARBALL_ROCM_VERSION}-${{ matrix.system.arch }}.tar.gz -C ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }}
|
||||
ln -s ${{ env.ROCM_PATH }}-${{ env.ROCM_VERSION }} ${{ env.ROCM_PATH }}
|
||||
echo "ROCm installed to: ${{ env.ROCM_PATH }}"
|
||||
echo "✅ ROCm Installation Complete!"
|
||||
|
||||
- name: Install Python Requirements
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
pip install -r requirements.txt --break-system-packages --ignore-installed
|
||||
pip install -r requirements-test.txt --break-system-packages --ignore-installed
|
||||
echo "✅ pip requirements installed!"
|
||||
|
||||
- name: Configure, Build, and Test
|
||||
id: test
|
||||
timeout-minutes: 90
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
set -e
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
git config --global --add safe.directory ${PWD}
|
||||
PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH \
|
||||
LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:${{ env.ROCM_PATH }}/lib/rocm_sysdeps/lib:$LD_LIBRARY_PATH \
|
||||
python3 ./tools/run-ci.py \
|
||||
--name "ROCm/rocprofiler-compute-${{ github.ref_name }}-ubuntu-${{ matrix.system.os-release }}-${{ matrix.system.gpu }}" \
|
||||
--site ${{ matrix.system.runner }} \
|
||||
--mode ${{ steps.setup_env.outputs.mode }} \
|
||||
--build-jobs 16 \
|
||||
-- \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \
|
||||
-DENABLE_TESTS=ON \
|
||||
-DINSTALL_TESTS=ON \
|
||||
-DPYTEST_NUMPROCS=8 \
|
||||
-- \
|
||||
-E "${{ steps.setup_env.outputs.excluded_tests }}"
|
||||
|
||||
- name: Output Logs
|
||||
if: failure() && steps.test.outcome == 'failure'
|
||||
working-directory: projects/rocprofiler-compute
|
||||
run: |
|
||||
echo "❌ Run Failed: Now outputting LastTest.log files for detailed logs..."
|
||||
cat build/Testing/Temporary/LastTest*.log
|
||||
새 이슈에서 참조
사용자 차단