922 строки
34 KiB
YAML
922 строки
34 KiB
YAML
name: AMDSMI CI
|
|
|
|
on:
|
|
pull_request:
|
|
branches: [develop]
|
|
paths:
|
|
- 'projects/amdsmi/**'
|
|
- '.github/workflows/amdsmi-build.yml'
|
|
push:
|
|
branches: [develop]
|
|
paths:
|
|
- 'projects/amdsmi/**'
|
|
- '.github/workflows/amdsmi-build.yml'
|
|
workflow_dispatch:
|
|
|
|
permissions:
|
|
contents: read
|
|
env:
|
|
DEBIAN_FRONTEND: noninteractive
|
|
DEBCONF_NONINTERACTIVE_SEEN: true
|
|
BUILD_TYPE: Release
|
|
ROCM_DIR: /opt/rocm
|
|
|
|
jobs:
|
|
debian-buildinstall:
|
|
name: Build
|
|
runs-on:
|
|
- self-hosted
|
|
- ${{ vars.RUNNER_TYPE }}
|
|
continue-on-error: true
|
|
strategy:
|
|
max-parallel: 10
|
|
matrix:
|
|
os: [Ubuntu20, Ubuntu22, Debian10]
|
|
container:
|
|
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
|
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules -u root
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Set Artifact Metadata
|
|
if: github.event_name == 'pull_request'
|
|
run: |
|
|
# Set PR number and date for artifact naming
|
|
echo "PR_NUMBER=PR${{ github.event.pull_request.number }}" >> $GITHUB_ENV
|
|
# Set date in MMDDYY-HHMM format (UTC time)
|
|
echo "BUILD_DATE=$(date -u +%m%d%y-%H%M)" >> $GITHUB_ENV
|
|
|
|
- name: Set Project Directory
|
|
run: |
|
|
# Find the directory containing the main CMakeLists.txt for AMDSMI
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -path "*/projects/amdsmi/CMakeLists.txt" -exec dirname {} \;)
|
|
|
|
if [ -z "$TARGET_DIR" ]; then
|
|
echo "Could not find CMakeLists.txt in projects/amdsmi. Searching root..."
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -maxdepth 2 -name "CMakeLists.txt" -exec dirname {} \; | head -n 1)
|
|
fi
|
|
|
|
echo "PROJECT_DIR=$TARGET_DIR" >> $GITHUB_ENV
|
|
|
|
- name: Update repositories for Debian10
|
|
if: matrix.os == 'Debian10'
|
|
run: |
|
|
set -e
|
|
echo 'Updating repositories for Debian10 (archived)'
|
|
cat > /etc/apt/sources.list << EOF
|
|
deb http://archive.debian.org/debian buster main
|
|
deb http://archive.debian.org/debian-security buster/updates main
|
|
EOF
|
|
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-disable-check-valid-until
|
|
apt update
|
|
|
|
- name: Build AMDSMI
|
|
run: |
|
|
set -e
|
|
echo 'Building on ${{ matrix.os }}'
|
|
BUILD_FOLDER=${{ env.PROJECT_DIR }}/build
|
|
RETRIES=3
|
|
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Build attempt $i for ${{ matrix.os }}..."
|
|
rm -rf $BUILD_FOLDER
|
|
mkdir -p $BUILD_FOLDER
|
|
cd $BUILD_FOLDER
|
|
|
|
# Configure, build, and package
|
|
if cmake ${{ env.PROJECT_DIR }} -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON 2>&1 | tee cmake.log && \
|
|
make -j $(nproc) 2>&1 | tee make.log && \
|
|
make package 2>&1 | tee package.log; then
|
|
|
|
# Parse and report warnings as GitHub annotations
|
|
echo "::group::Build Warnings"
|
|
grep -i "warning" cmake.log make.log package.log | while read -r line; do
|
|
echo "::warning::$line"
|
|
done
|
|
echo "::endgroup::"
|
|
|
|
echo "Build successful on attempt $i"
|
|
break
|
|
else
|
|
echo "Build failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES build attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
echo "Build completed on ${{ matrix.os }}"
|
|
|
|
- name: Upload Debian Package Artifacts
|
|
if: github.event_name == 'pull_request'
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: amd-smi-lib-deb-${{ matrix.os }}-${{ env.PR_NUMBER }}-${{ env.BUILD_DATE }}
|
|
path: ${{ env.PROJECT_DIR }}/build/amd-smi-lib*99999-local_amd64.deb
|
|
if-no-files-found: warn
|
|
retention-days: 7
|
|
|
|
- name: Install AMDSMI
|
|
run: |
|
|
cd ${{ env.PROJECT_DIR }}/build
|
|
if [ "${{ matrix.os }}" != "Debian10" ]; then
|
|
apt update
|
|
fi
|
|
|
|
RETRIES=3
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Installation attempt $i for ${{ matrix.os }}..."
|
|
if apt install -y ./amd-smi-lib*99999-local_amd64.deb; then
|
|
echo "Installation successful on attempt $i"
|
|
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
|
|
|
# Verify Installation
|
|
echo 'Verifying installation:'
|
|
amd-smi version
|
|
python3 -m pip list | grep amd
|
|
python3 -m pip list | grep pip
|
|
python3 -m pip list | grep setuptools
|
|
echo 'Completed installation on ${{ matrix.os }}'
|
|
break
|
|
else
|
|
echo "Installation failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES installation attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
echo "Build completed on ${{ matrix.os }}"
|
|
|
|
- name: Uninstall
|
|
if: always()
|
|
run: |
|
|
set -e
|
|
echo 'Uninstalling on ${{ matrix.os }}'
|
|
apt remove -y amd-smi-lib || true
|
|
rm -f /usr/local/bin/amd-smi
|
|
if [ -d /opt/rocm/share/amd_smi ]; then
|
|
echo '/opt/rocm/share/amd_smi exists. Removing.'
|
|
rm -rf /opt/rocm/share/amd_smi
|
|
fi
|
|
echo 'Uninstall done on ${{ matrix.os }}'
|
|
|
|
debian-test:
|
|
name: Tests
|
|
needs: debian-buildinstall
|
|
runs-on:
|
|
- self-hosted
|
|
- ${{ vars.RUNNER_TYPE }}
|
|
continue-on-error: true
|
|
strategy:
|
|
max-parallel: 10
|
|
matrix:
|
|
os: [Ubuntu20, Ubuntu22, Debian10]
|
|
container:
|
|
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
|
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules -u root
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Set Project Directory
|
|
run: |
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -path "*/projects/amdsmi/CMakeLists.txt" -exec dirname {} \;)
|
|
if [ -z "$TARGET_DIR" ]; then
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -maxdepth 2 -name "CMakeLists.txt" -exec dirname {} \; | head -n 1)
|
|
fi
|
|
echo "PROJECT_DIR=$TARGET_DIR" >> $GITHUB_ENV
|
|
|
|
- name: Update repositories for Debian10
|
|
if: matrix.os == 'Debian10'
|
|
run: |
|
|
set -e
|
|
echo 'Updating repositories for Debian10 (archived)'
|
|
cat > /etc/apt/sources.list << EOF
|
|
deb http://archive.debian.org/debian buster main
|
|
deb http://archive.debian.org/debian-security buster/updates main
|
|
EOF
|
|
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-disable-check-valid-until
|
|
apt update
|
|
|
|
- name: Build and Install for Test
|
|
run: |
|
|
set -e
|
|
echo 'Building for test on ${{ matrix.os }}'
|
|
BUILD_FOLDER=${{ env.PROJECT_DIR }}/build
|
|
RETRIES=3
|
|
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Build attempt $i for ${{ matrix.os }} test..."
|
|
rm -rf $BUILD_FOLDER
|
|
mkdir -p $BUILD_FOLDER
|
|
cd $BUILD_FOLDER
|
|
|
|
if cmake ${{ env.PROJECT_DIR }} -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
|
make -j $(nproc) && \
|
|
make package; then
|
|
echo "Build successful on attempt $i"
|
|
break
|
|
else
|
|
echo "Build failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES build attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
|
|
echo 'Installing for test on ${{ matrix.os }}'
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Installation attempt $i for test on ${{ matrix.os }}..."
|
|
if apt install -y $BUILD_FOLDER/amd-smi-lib*99999-local_amd64.deb; then
|
|
echo "Installation successful on attempt $i"
|
|
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
|
echo 'Install done for test on ${{ matrix.os }}'
|
|
break
|
|
else
|
|
echo "Installation failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES installation attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
|
|
- name: AMDSMI Command Tests
|
|
shell: bash
|
|
run: |
|
|
set -e
|
|
echo "Running AMDSMI commands on ${{ matrix.os }}"
|
|
mkdir -p /tmp/test-results-${{ matrix.os }}
|
|
commands=(
|
|
"amd-smi version"
|
|
"amd-smi list"
|
|
"amd-smi static"
|
|
"amd-smi firmware"
|
|
"amd-smi ucode"
|
|
"amd-smi bad-pages"
|
|
"amd-smi metric"
|
|
"amd-smi process"
|
|
"amd-smi topology"
|
|
"amd-smi monitor"
|
|
"amd-smi dmon"
|
|
"amd-smi xgmi"
|
|
"amd-smi partition"
|
|
)
|
|
for cmd in "${commands[@]}"; do
|
|
debug_cmd="$cmd --loglevel debug"
|
|
echo "Running: $debug_cmd"
|
|
if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then
|
|
echo "Command '$debug_cmd' failed."
|
|
cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log
|
|
exit 1
|
|
else
|
|
echo "$debug_cmd passed."
|
|
fi
|
|
done
|
|
echo "AMDSMI commands done on ${{ matrix.os }}"
|
|
|
|
- name: Upload AMDSMI Command Test Results
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: amdsmi-command-tests-${{ matrix.os }}
|
|
path: /tmp/test-results-${{ matrix.os }}
|
|
|
|
- name: Run AMDSMI, Python, and Example Tests
|
|
shell: bash
|
|
run: |
|
|
set -e
|
|
echo 'Running other tests on ${{ matrix.os }}'
|
|
|
|
# AMDSMI Tests
|
|
echo 'Running AMDSMI tests'
|
|
cd /opt/rocm/share/amd_smi/tests
|
|
source amdsmitst.exclude
|
|
|
|
AMDSMI_RETRIES=3
|
|
for attempt in $(seq 1 $AMDSMI_RETRIES); do
|
|
echo "AMDSMI test attempt $attempt for ${{ matrix.os }}..."
|
|
if ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1; then
|
|
echo "AMDSMI tests passed on attempt $attempt"
|
|
echo "=============== TEST OUTPUT ==============="
|
|
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
|
echo "=============================================="
|
|
echo "AMDSMI tests done"
|
|
break
|
|
else
|
|
TEST_EXIT_CODE=$?
|
|
echo "AMDSMI tests failed on attempt $attempt with exit code $TEST_EXIT_CODE"
|
|
if [ $attempt -eq $AMDSMI_RETRIES ]; then
|
|
echo "All $AMDSMI_RETRIES AMDSMI test attempts failed. Final failure."
|
|
echo "=============== TEST OUTPUT ==============="
|
|
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
|
echo "=============================================="
|
|
echo "AMDSMI tests failed"
|
|
exit $TEST_EXIT_CODE
|
|
else
|
|
echo "Retrying AMDSMI tests in $((2 * attempt)) seconds..."
|
|
sleep $((2 * attempt))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Python Tests
|
|
echo 'Running Python tests'
|
|
cd /opt/rocm/share/amd_smi/tests/python_unittest
|
|
echo "Running integration tests..."
|
|
if ! ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1; then
|
|
echo "Integration tests failed!"
|
|
echo "=============== INTEGRATION TEST OUTPUT ==============="
|
|
tail -100 /tmp/test-results-${{ matrix.os }}/integration_test_output.txt
|
|
echo "======================================================="
|
|
exit 1
|
|
else
|
|
echo "Integration tests passed"
|
|
fi
|
|
|
|
echo "Running unit tests..."
|
|
if ! ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1; then
|
|
echo "Unit tests failed!"
|
|
echo "=============== UNIT TEST OUTPUT ==============="
|
|
tail -100 /tmp/test-results-${{ matrix.os }}/unit_test_output.txt
|
|
echo "================================================"
|
|
exit 1
|
|
else
|
|
echo "Unit tests passed"
|
|
fi
|
|
|
|
echo "Python tests done"
|
|
|
|
# Example Tests
|
|
echo 'Running Example tests'
|
|
cd ${{ env.PROJECT_DIR }}/example
|
|
rm -rf build
|
|
cmake -B build -DENABLE_ESMI_LIB=OFF
|
|
make -C build -j $(nproc)
|
|
cd build
|
|
./amd_smi_drm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log 2>&1 || echo 'amd_smi_drm_ex failed'
|
|
./amd_smi_nodrm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log 2>&1 || echo 'amd_smi_nodrm_ex failed'
|
|
echo "Example tests done"
|
|
|
|
- name: AMDSMI Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying AMDSMI test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log || echo "No AMDSMI test results found for ${{ matrix.os }}"
|
|
|
|
- name: Integration Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Integration test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/integration_test_output.txt || echo "No integration test results found for ${{ matrix.os }}"
|
|
|
|
- name: Unit Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Unit Test Results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/unit_test_output.txt || echo "No unit test results found for ${{ matrix.os }}"
|
|
|
|
- name: Example DRM Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Example DRM test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log || echo "No DRM example test results found for ${{ matrix.os }}"
|
|
|
|
- name: Example NoDRM Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Example NoDRM test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log || echo "No NoDRM example test results found for ${{ matrix.os }}"
|
|
|
|
rpm-buildinstall:
|
|
name: Build
|
|
runs-on:
|
|
- self-hosted
|
|
- ${{ vars.RUNNER_TYPE }}
|
|
continue-on-error: true
|
|
strategy:
|
|
max-parallel: 10
|
|
matrix:
|
|
os:
|
|
- SLES
|
|
- RHEL8
|
|
- RHEL9
|
|
- RHEL10
|
|
- AzureLinux3
|
|
- AlmaLinux8
|
|
container:
|
|
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
|
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules -u root
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Set Artifact Metadata
|
|
if: github.event_name == 'pull_request'
|
|
run: |
|
|
# Set PR number and date for artifact naming
|
|
echo "PR_NUMBER=PR${{ github.event.pull_request.number }}" >> $GITHUB_ENV
|
|
# Set date in MMDDYY-HHMM format (UTC time)
|
|
echo "BUILD_DATE=$(date -u +%m%d%y-%H%M)" >> $GITHUB_ENV
|
|
|
|
- name: Set Project Directory
|
|
run: |
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -path "*/projects/amdsmi/CMakeLists.txt" -exec dirname {} \;)
|
|
if [ -z "$TARGET_DIR" ]; then
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -maxdepth 2 -name "CMakeLists.txt" -exec dirname {} \; | head -n 1)
|
|
fi
|
|
echo "PROJECT_DIR=$TARGET_DIR" >> $GITHUB_ENV
|
|
|
|
- name: Set PkgMgr
|
|
run: |
|
|
set -e
|
|
case "${{ matrix.os }}" in
|
|
SLES)
|
|
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
|
|
;;
|
|
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
|
|
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
|
|
;;
|
|
esac
|
|
|
|
- name: Add more_itertools
|
|
if: matrix.os == 'AzureLinux3'
|
|
run: |
|
|
set -e
|
|
echo 'Installing more_itertools on ${{ matrix.os }}'
|
|
python3 -m pip install more_itertools
|
|
|
|
- name: Build AMDSMI(RHEL10 & AlmaLinux8)
|
|
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
|
|
run: |
|
|
set -e
|
|
echo 'Building on ${{ matrix.os }} with retries and QA_RPATHS'
|
|
BUILD_FOLDER=${{ env.PROJECT_DIR }}/build
|
|
RETRIES=5
|
|
|
|
# Set QA_RPATHS to ignore empty (0x0010) and invalid (0x0002) RPATHs
|
|
export QA_RPATHS=$((0x0010 | 0x0002))
|
|
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Build attempt $i for ${{ matrix.os }} ..."
|
|
rm -rf $BUILD_FOLDER
|
|
mkdir -p $BUILD_FOLDER
|
|
cd $BUILD_FOLDER
|
|
|
|
if cmake ${{ env.PROJECT_DIR }} -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
|
make -j $(nproc) && \
|
|
make package; then
|
|
echo "Build successful on attempt $i"
|
|
break
|
|
else
|
|
echo "Build failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES build attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
echo "Build completed on ${{ matrix.os }}"
|
|
|
|
- name: Upload RPM Package Artifacts (RHEL10 & AlmaLinux8)
|
|
if: github.event_name == 'pull_request' && (matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8')
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: amd-smi-lib-rpm-${{ matrix.os }}-${{ env.PR_NUMBER }}-${{ env.BUILD_DATE }}
|
|
path: ${{ env.PROJECT_DIR }}/build/amd-smi-lib-*99999-local*.rpm
|
|
if-no-files-found: warn
|
|
retention-days: 7
|
|
|
|
- name: Build AMDSMI
|
|
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
|
run: |
|
|
set -e
|
|
echo 'Building on ${{ matrix.os }}'
|
|
BUILD_FOLDER=${{ env.PROJECT_DIR }}/build
|
|
RETRIES=3
|
|
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Build attempt $i for ${{ matrix.os }}..."
|
|
rm -rf $BUILD_FOLDER
|
|
mkdir -p $BUILD_FOLDER
|
|
cd $BUILD_FOLDER
|
|
|
|
# Capture build output to parse warnings
|
|
if cmake ${{ env.PROJECT_DIR }} -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON 2>&1 | tee cmake.log && \
|
|
make -j $(nproc) 2>&1 | tee make.log && \
|
|
make package 2>&1 | tee package.log; then
|
|
|
|
# Parse and report warnings as GitHub annotations
|
|
echo "::group::Build Warnings"
|
|
grep -i "warning" cmake.log make.log package.log | while read -r line; do
|
|
echo "::warning::$line"
|
|
done
|
|
echo "::endgroup::"
|
|
|
|
echo "Build successful on attempt $i"
|
|
break
|
|
else
|
|
echo "Build failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES build attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
echo "Build completed on ${{ matrix.os }}"
|
|
|
|
- name: Upload RPM Package Artifacts
|
|
if: github.event_name == 'pull_request' && matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: amd-smi-lib-rpm-${{ matrix.os }}-${{ env.PR_NUMBER }}-${{ env.BUILD_DATE }}
|
|
path: ${{ env.PROJECT_DIR }}/build/amd-smi-lib-*99999-local*.rpm
|
|
if-no-files-found: warn
|
|
retention-days: 7
|
|
|
|
- name: Install AMDSMI(RHEL10 & AlmaLinux8)
|
|
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
|
|
run: |
|
|
cd ${{ env.PROJECT_DIR }}/build
|
|
dnf install python3-setuptools python3-wheel -y
|
|
|
|
RETRIES=3
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "RHEL10: Installation attempt $i..."
|
|
if timeout 10m dnf install -y --skip-broken --disablerepo=* ./amd-smi-lib-*99999-local*.rpm; then
|
|
echo "Installation successful on attempt $i"
|
|
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
|
|
|
echo 'Verifying installation:'
|
|
amd-smi version
|
|
python3 -m pip list | grep amd
|
|
python3 -m pip list | grep pip
|
|
python3 -m pip list | grep setuptools
|
|
echo 'Completed installation on RHEL10'
|
|
break
|
|
else
|
|
echo "Installation failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES installation attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
|
|
- name: Install AMDSMI
|
|
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
|
run: |
|
|
cd ${{ env.PROJECT_DIR }}/build
|
|
case ${{ env.PACKAGE_MANAGER }} in
|
|
zypper)
|
|
timeout 10m zypper --no-refresh --no-gpg-checks install -y ./amd-smi-lib-*99999-local*.rpm
|
|
;;
|
|
dnf)
|
|
dnf install python3-setuptools python3-wheel -y
|
|
RETRIES=3
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Attempt $i: Installing AMDSMI package..."
|
|
if timeout 10m dnf install -y --skip-broken --disablerepo=* ./amd-smi-lib-*99999-local*.rpm; then
|
|
echo "AMDSMI package installed successfully."
|
|
break
|
|
else
|
|
echo "Installation failed on attempt $i. Retrying..."
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep 10
|
|
fi
|
|
done
|
|
;;
|
|
esac
|
|
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
|
|
|
# Verify Installation
|
|
echo 'Verifying installation:'
|
|
amd-smi version
|
|
python3 -m pip list | grep amd
|
|
python3 -m pip list | grep pip
|
|
python3 -m pip list | grep setuptools
|
|
echo 'Completed installation on ${{ matrix.os }}'
|
|
|
|
- name: Uninstall
|
|
if: always()
|
|
run: |
|
|
set -e
|
|
echo 'Uninstalling on ${{ matrix.os }}'
|
|
case ${{ matrix.os }} in
|
|
SLES)
|
|
zypper remove -y amd-smi-lib || true
|
|
;;
|
|
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
|
|
dnf remove -y amd-smi-lib || true
|
|
;;
|
|
esac
|
|
rm -f /usr/local/bin/amd-smi
|
|
if [ -d /opt/rocm/share/amd_smi ]; then
|
|
echo '/opt/rocm/share/amd_smi exists. Removing.'
|
|
rm -rf /opt/rocm/share/amd_smi
|
|
fi
|
|
echo 'Uninstall done on ${{ matrix.os }}'
|
|
|
|
rpm-test:
|
|
name: Tests
|
|
needs: [rpm-buildinstall, debian-test]
|
|
runs-on:
|
|
- self-hosted
|
|
- ${{ vars.RUNNER_TYPE }}
|
|
continue-on-error: true
|
|
strategy:
|
|
max-parallel: 10
|
|
matrix:
|
|
os:
|
|
- SLES
|
|
- RHEL8
|
|
- RHEL9
|
|
- RHEL10
|
|
- AzureLinux3
|
|
- AlmaLinux8
|
|
container:
|
|
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
|
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules -u root
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Set Project Directory
|
|
run: |
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -path "*/projects/amdsmi/CMakeLists.txt" -exec dirname {} \;)
|
|
if [ -z "$TARGET_DIR" ]; then
|
|
TARGET_DIR=$(find $GITHUB_WORKSPACE -maxdepth 2 -name "CMakeLists.txt" -exec dirname {} \; | head -n 1)
|
|
fi
|
|
echo "PROJECT_DIR=$TARGET_DIR" >> $GITHUB_ENV
|
|
|
|
- name: Set PkgMgr
|
|
run: |
|
|
set -e
|
|
case "${{ matrix.os }}" in
|
|
SLES)
|
|
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
|
|
;;
|
|
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
|
|
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
|
|
;;
|
|
esac
|
|
|
|
- name: Add more_itertools
|
|
if: matrix.os == 'AzureLinux3'
|
|
run: |
|
|
set -e
|
|
echo 'Installing more_itertools on ${{ matrix.os }}'
|
|
python3 -m pip install more_itertools
|
|
|
|
- name: Build and Install for Tests (RHEL10 & AlmaLinux8)
|
|
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
|
|
run: |
|
|
set -e
|
|
echo 'Building for test on RHEL10/AlmaLinux8 with retries and QA_RPATHS'
|
|
BUILD_FOLDER=${{ env.PROJECT_DIR }}/build
|
|
RETRIES=5
|
|
|
|
# Set QA_RPATHS to ignore empty (0x0010 | 0x0002) RPATHs
|
|
export QA_RPATHS=$((0x0010 | 0x0002))
|
|
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Build attempt $i for RHEL10/AlmaLinux8 test..."
|
|
rm -rf $BUILD_FOLDER
|
|
mkdir -p $BUILD_FOLDER
|
|
cd $BUILD_FOLDER
|
|
|
|
if cmake ${{ env.PROJECT_DIR }} -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
|
make -j $(nproc) && \
|
|
make package; then
|
|
echo "Build successful on attempt $i"
|
|
break
|
|
else
|
|
echo "Build failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES build attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
|
|
echo 'Installing for test on RHEL10/AlmaLinux8'
|
|
dnf install python3-setuptools python3-wheel -y
|
|
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "RHEL10/AlmaLinux8: Installation attempt $i for test..."
|
|
if timeout 10m dnf install -y --skip-broken --disablerepo=* $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm; then
|
|
echo "Installation successful on attempt $i"
|
|
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
|
echo 'Install done for test on RHEL10/AlmaLinux8'
|
|
break
|
|
else
|
|
echo "Installation failed on attempt $i"
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All $RETRIES installation attempts failed. Exiting."
|
|
exit 1
|
|
fi
|
|
sleep $((2 * i))
|
|
fi
|
|
done
|
|
|
|
- name: Build and Install for Tests
|
|
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
|
run: |
|
|
set -e
|
|
echo 'Building for test on ${{ matrix.os }}'
|
|
BUILD_FOLDER=${{ env.PROJECT_DIR }}/build
|
|
rm -rf $BUILD_FOLDER
|
|
mkdir -p $BUILD_FOLDER
|
|
cd $BUILD_FOLDER
|
|
cmake ${{ env.PROJECT_DIR }} -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON
|
|
make -j $(nproc)
|
|
make package
|
|
|
|
echo 'Installing for test on ${{ matrix.os }}'
|
|
case ${{ env.PACKAGE_MANAGER }} in
|
|
zypper)
|
|
timeout 10m zypper --no-refresh --no-gpg-checks install -y $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm
|
|
;;
|
|
dnf)
|
|
dnf install python3-setuptools python3-wheel -y
|
|
RETRIES=3
|
|
for i in $(seq 1 $RETRIES); do
|
|
echo "Attempt $i: Installing..."
|
|
if timeout 10m dnf install -y --skip-broken --disablerepo=* $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm; then
|
|
echo "Install successful."
|
|
break
|
|
else
|
|
echo "Attempt $i failed. Retrying..."
|
|
if [ $i -eq $RETRIES ]; then
|
|
echo "All attempts failed."
|
|
exit 1
|
|
fi
|
|
sleep 10
|
|
fi
|
|
done
|
|
;;
|
|
esac
|
|
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
|
echo 'Install done for test on ${{ matrix.os }}'
|
|
|
|
- name: AMDSMI Command Tests
|
|
shell: bash
|
|
run: |
|
|
set -e
|
|
echo "Running AMDSMI commands on ${{ matrix.os }}"
|
|
mkdir -p /tmp/test-results-${{ matrix.os }}
|
|
commands=(
|
|
"amd-smi version"
|
|
"amd-smi list"
|
|
"amd-smi static"
|
|
"amd-smi firmware"
|
|
"amd-smi ucode"
|
|
"amd-smi bad-pages"
|
|
"amd-smi metric"
|
|
"amd-smi process"
|
|
"amd-smi topology"
|
|
"amd-smi monitor"
|
|
"amd-smi dmon"
|
|
"amd-smi xgmi"
|
|
"amd-smi partition"
|
|
)
|
|
for cmd in "${commands[@]}"; do
|
|
debug_cmd="$cmd --loglevel debug"
|
|
echo "Running: $debug_cmd"
|
|
if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then
|
|
echo "Command '$debug_cmd' failed."
|
|
cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log
|
|
exit 1
|
|
else
|
|
echo "$debug_cmd passed."
|
|
fi
|
|
done
|
|
echo "AMDSMI commands done on ${{ matrix.os }}"
|
|
|
|
- name: Upload AMDSMI Command Test Results
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: amdsmi-command-tests-${{ matrix.os }}
|
|
path: /tmp/test-results-${{ matrix.os }}
|
|
|
|
- name: Run AMDSMI, Python, and Example Tests
|
|
shell: bash
|
|
run: |
|
|
set -e
|
|
echo 'Running other tests on ${{ matrix.os }}'
|
|
|
|
# AMDSMI Tests
|
|
echo 'Running AMDSMI tests'
|
|
cd /opt/rocm/share/amd_smi/tests
|
|
source amdsmitst.exclude
|
|
|
|
AMDSMI_RETRIES=3
|
|
for attempt in $(seq 1 $AMDSMI_RETRIES); do
|
|
echo "AMDSMI test attempt $attempt for ${{ matrix.os }}..."
|
|
if ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1; then
|
|
echo "AMDSMI tests passed on attempt $attempt"
|
|
echo "=============== TEST OUTPUT ==============="
|
|
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
|
echo "=============================================="
|
|
echo "AMDSMI tests done"
|
|
break
|
|
else
|
|
TEST_EXIT_CODE=$?
|
|
echo "AMDSMI tests failed on attempt $attempt with exit code $TEST_EXIT_CODE"
|
|
if [ $attempt -eq $AMDSMI_RETRIES ]; then
|
|
echo "All $AMDSMI_RETRIES AMDSMI test attempts failed. Final failure."
|
|
echo "=============== TEST OUTPUT ==============="
|
|
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
|
echo "=============================================="
|
|
echo "AMDSMI tests failed"
|
|
exit $TEST_EXIT_CODE
|
|
else
|
|
echo "Retrying AMDSMI tests in $((2 * attempt)) seconds..."
|
|
sleep $((2 * attempt))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Python Tests
|
|
echo 'Running Python tests'
|
|
cd /opt/rocm/share/amd_smi/tests/python_unittest
|
|
echo "Running integration tests..."
|
|
if ! ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1; then
|
|
echo "Integration tests failed!"
|
|
echo "=============== INTEGRATION TEST OUTPUT ==============="
|
|
tail -100 /tmp/test-results-${{ matrix.os }}/integration_test_output.txt
|
|
echo "======================================================="
|
|
exit 1
|
|
else
|
|
echo "Integration tests passed"
|
|
fi
|
|
|
|
echo "Running unit tests..."
|
|
if ! ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1; then
|
|
echo "Unit tests failed!"
|
|
echo "=============== UNIT TEST OUTPUT ==============="
|
|
tail -100 /tmp/test-results-${{ matrix.os }}/unit_test_output.txt
|
|
echo "================================================"
|
|
exit 1
|
|
else
|
|
echo "Unit tests passed"
|
|
fi
|
|
|
|
echo "Python tests done"
|
|
|
|
# Example Tests
|
|
echo 'Running Example tests'
|
|
cd ${{ env.PROJECT_DIR }}/example
|
|
rm -rf build
|
|
cmake -B build -DENABLE_ESMI_LIB=OFF
|
|
make -C build -j $(nproc)
|
|
cd build
|
|
./amd_smi_drm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log 2>&1 || echo 'amd_smi_drm_ex failed'
|
|
./amd_smi_nodrm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log 2>&1 || echo 'amd_smi_nodrm_ex failed'
|
|
echo "Example tests done"
|
|
|
|
- name: AMDSMI Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying AMDSMI test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log || echo "No AMDSMI test results found for ${{ matrix.os }}"
|
|
|
|
- name: Integration Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Integration test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/integration_test_output.txt || echo "No integration test results found for ${{ matrix.os }}"
|
|
|
|
- name: Unit Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Unit Test Results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/unit_test_output.txt || echo "No unit test results found for ${{ matrix.os }}"
|
|
|
|
- name: Example DRM Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Example DRM test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log || echo "No DRM example test results found for ${{ matrix.os }}"
|
|
|
|
- name: Example NoDRM Test Results
|
|
if: always()
|
|
run: |
|
|
echo "Displaying Example NoDRM test results for ${{ matrix.os }}"
|
|
cat /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log || echo "No NoDRM example test results found for ${{ matrix.os }}"
|