From 757aa016f4612e46772c5268fedeee0a95694aad Mon Sep 17 00:00:00 2001 From: "Williams, Justin" Date: Tue, 29 Apr 2025 12:34:22 -0700 Subject: [PATCH] [SWDEV-500518/SWDEV-527430] CI v4.1 (#316) [SWDEV-500518] CI v4.1 Signed-off-by: Justin Williams --- .github/workflows/abi-check.yml | 19 ++---- .github/workflows/abi-check2.yml | 33 +++-------- .github/workflows/amdsmi-build.yml | 94 ++++++++++++++++++++---------- 3 files changed, 76 insertions(+), 70 deletions(-) diff --git a/.github/workflows/abi-check.yml b/.github/workflows/abi-check.yml index 4eb7140ab3..365e91c51b 100644 --- a/.github/workflows/abi-check.yml +++ b/.github/workflows/abi-check.yml @@ -4,14 +4,12 @@ on: pull_request: branches: - amd-staging - - amd-mainline - release/rocm-rel-* paths: - 'include/amd_smi/amdsmi.h' push: branches: - amd-staging - - amd-mainline - release/rocm-rel-* paths: - 'include/amd_smi/amdsmi.h' @@ -53,12 +51,7 @@ jobs: echo "Pull request head branch: ${{ github.head_ref }}" # Fetch the appropriate base branch - if [ "${{ github.base_ref }}" = "amd-mainline" ]; then - # Pull request to amd-mainline: use amd-mainline as old - git fetch origin amd-mainline - git checkout FETCH_HEAD -- include/amd_smi/amdsmi.h - mv include/amd_smi/amdsmi.h amdsmi_old.h - elif [ "${{ github.base_ref }}" = "amd-staging" ]; then + if [ "${{ github.base_ref }}" = "amd-staging" ]; then # Pull request to amd-staging: use amd-staging as old git fetch origin amd-staging git checkout FETCH_HEAD -- include/amd_smi/amdsmi.h @@ -71,10 +64,7 @@ jobs: # Create a status file with default failure echo "abi_exit_code=1" > $GITHUB_WORKSPACE/abi_status.txt - if [ "${{ github.base_ref }}" = "amd-mainline" ]; then - echo "Comparing amd-mainline (old) with ${{ github.head_ref }} (new)" - abi-compliance-checker -lib amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 amd-mainline -v2 ${{ github.head_ref }} -report-path abi-report.html && echo "abi_exit_code=0" > $GITHUB_WORKSPACE/abi_status.txt - elif [ "${{ github.base_ref }}" = "amd-staging" ]; then + if [ "${{ github.base_ref }}" = "amd-staging" ]; then echo "Comparing amd-staging (old) with ${{ github.head_ref }} (new)" abi-compliance-checker -lib amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 amd-staging -v2 ${{ github.head_ref }} -report-path abi-report.html && echo "abi_exit_code=0" > $GITHUB_WORKSPACE/abi_status.txt fi @@ -114,10 +104,11 @@ jobs: source $GITHUB_WORKSPACE/abi_status.txt echo "ABI check exit code: $abi_exit_code" if [ "$abi_exit_code" -ne 0 ]; then - echo "ABI check failed with exit code $abi_exit_code. Check logs for more Details." + echo "::error::⚠️ ABI BREAKAGE FOUND ⚠️ CHECK \"Run ABI Compliance Check\" LOGS OR THE abi-report ARTIFACT ATTACHED TO THIS WORKFLOW FOR MORE DETAILS" + echo "::error::View the HTML report in the Artifacts section of this workflow run for detailed ABI compatibility analysis" exit 1 else - echo "ABI check succeeded" + echo "✅ ABI check succeeded" fi else echo "ABI status file not found at $GITHUB_WORKSPACE/abi_status.txt, assuming failure" diff --git a/.github/workflows/abi-check2.yml b/.github/workflows/abi-check2.yml index 7cbf706df2..4f9065f6cd 100644 --- a/.github/workflows/abi-check2.yml +++ b/.github/workflows/abi-check2.yml @@ -4,14 +4,12 @@ on: pull_request: branches: - amd-staging - - amd-mainline - release/rocm-rel-* paths: - 'include/amd_smi/amdsmi.h' push: branches: - amd-staging - - amd-mainline - release/rocm-rel-* paths: - 'include/amd_smi/amdsmi.h' @@ -53,12 +51,7 @@ jobs: if [ "${{ github.event_name }}" = "pull_request" ]; then # For pull requests, use the target branch as the old file - if [ "${{ github.base_ref }}" = "amd-mainline" ]; then - # Pull request to amd-mainline: use amd-mainline as old - git fetch origin amd-mainline - git checkout FETCH_HEAD -- include/amd_smi/amdsmi.h - mv include/amd_smi/amdsmi.h amdsmi_old.h - elif [ "${{ github.base_ref }}" = "amd-staging" ]; then + if [ "${{ github.base_ref }}" = "amd-staging" ]; then # Pull request to amd-staging: use amd-staging as old git fetch origin amd-staging git checkout FETCH_HEAD -- include/amd_smi/amdsmi.h @@ -66,12 +59,7 @@ jobs: fi elif [ "${{ github.event_name }}" = "push" ]; then # For pushes, determine which branch was pushed to - if [ "${{ github.ref_name }}" = "amd-mainline" ]; then - # Push to amd-mainline: use amd-mainline as old - git fetch origin amd-mainline - git checkout FETCH_HEAD -- include/amd_smi/amdsmi.h - mv include/amd_smi/amdsmi.h amdsmi_old.h - elif [ "${{ github.ref_name }}" = "amd-staging" ]; then + if [ "${{ github.ref_name }}" = "amd-staging" ]; then # Push to amd-staging: use amd-staging as old git fetch origin amd-staging git checkout FETCH_HEAD -- include/amd_smi/amdsmi.h @@ -101,18 +89,12 @@ jobs: # Set comparison message and run the appropriate check if [ "${{ github.event_name }}" = "pull_request" ]; then - if [ "${{ github.base_ref }}" = "amd-mainline" ]; then - COMPARE_MSG="amd-mainline vs ${{ github.head_ref }}" - abi-compliance-checker -l amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 amd-mainline -v2 ${{ github.head_ref }} -report-path abi-report.html -strict || { EXIT_CODE=$?; echo "abi_exit_code=$EXIT_CODE" > $GITHUB_WORKSPACE/abi_status.txt; exit $EXIT_CODE; } - elif [ "${{ github.base_ref }}" = "amd-staging" ]; then + if [ "${{ github.base_ref }}" = "amd-staging" ]; then COMPARE_MSG="amd-staging vs ${{ github.head_ref }}" abi-compliance-checker -l amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 amd-staging -v2 ${{ github.head_ref }} -report-path abi-report.html -strict || { EXIT_CODE=$?; echo "abi_exit_code=$EXIT_CODE" > $GITHUB_WORKSPACE/abi_status.txt; exit $EXIT_CODE; } fi elif [ "${{ github.event_name }}" = "push" ]; then - if [ "${{ github.ref_name }}" = "amd-mainline" ]; then - COMPARE_MSG="amd-mainline vs amd-staging" - abi-compliance-checker -l amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 amd-mainline -v2 amd-staging -report-path abi-report.html -strict || { EXIT_CODE=$?; echo "abi_exit_code=$EXIT_CODE" > $GITHUB_WORKSPACE/abi_status.txt; exit $EXIT_CODE; } - elif [ "${{ github.ref_name }}" = "amd-staging" ]; then + if [ "${{ github.ref_name }}" = "amd-staging" ]; then COMPARE_MSG="amd-staging vs ${{ github.ref_name }}" abi-compliance-checker -l amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 amd-staging -v2 ${{ github.ref_name }} -report-path abi-report.html -strict || { EXIT_CODE=$?; echo "abi_exit_code=$EXIT_CODE" > $GITHUB_WORKSPACE/abi_status.txt; exit $EXIT_CODE; } fi @@ -191,12 +173,13 @@ jobs: source $GITHUB_WORKSPACE/abi_status.txt echo "ABI check exit code: $abi_exit_code" if [ "$abi_exit_code" -ne 0 ]; then - echo "ABI check failed with exit code $abi_exit_code. Check logs for more Details." + echo "::error::⚠️ CHANGES TO AMDSMI.H FILE FOUND ⚠️ CHECK \"Run ABI Compliance Check\" LOGS OR THE abi-report ARTIFACT ATTACHED TO THIS WORKFLOW FOR MORE DETAILS" + echo "::error::View the HTML report in the Artifacts section of this workflow run for detailed ABI compatibility analysis" exit 1 else - echo "ABI check succeeded" + echo "✅ ABI check succeeded" fi else - echo "ABI status file not found at $GITHUB_WORKSPACE/abi_status.txt, assuming failure" + echo "::error::ABI status file not found at $GITHUB_WORKSPACE/abi_status.txt, assuming failure" exit 1 fi diff --git a/.github/workflows/amdsmi-build.yml b/.github/workflows/amdsmi-build.yml index d2371c0c22..981e0505d4 100644 --- a/.github/workflows/amdsmi-build.yml +++ b/.github/workflows/amdsmi-build.yml @@ -21,6 +21,7 @@ jobs: - ${{ vars.RUNNER_TYPE }} continue-on-error: true strategy: + max-parallel: 10 matrix: os: [Ubuntu20, Ubuntu22, Debian10] container: @@ -121,22 +122,25 @@ jobs: "amd-smi list" "amd-smi static" "amd-smi firmware" + "amd-smi ucode" "amd-smi bad-pages" "amd-smi metric" "amd-smi process" "amd-smi topology" "amd-smi monitor" + "amd-smi dmon" "amd-smi xgmi" "amd-smi partition" ) for cmd in "${commands[@]}"; do - echo "Running: $cmd" - if ! $cmd > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then - echo "Command '$cmd' failed." + debug_cmd="$cmd --loglevel debug" + echo "Running: $debug_cmd" + if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then + echo "Command '$debug_cmd' failed." cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log exit 1 else - echo "$cmd passed." + echo "$debug_cmd passed." fi done echo "AMDSMI commands done on ${{ matrix.os }}" @@ -148,25 +152,37 @@ jobs: echo 'Running other tests on ${{ matrix.os }}' # AMDSMI Tests - echo 'AMDSMI tests' + echo 'Running AMDSMI tests' cd /opt/rocm/share/amd_smi/tests source amdsmitst.exclude ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1 - if [ $? -ne 0 ]; then + TEST_EXIT_CODE=$? + + # Always show the test output (last 10 lines) + if [ $TEST_EXIT_CODE -ne 0 ]; then + echo "AMDSMI tests failed with exit code $TEST_EXIT_CODE." + echo "=============== TEST OUTPUT ===============" + cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]" + echo "==============================================" echo "AMDSMI tests failed" - exit 1 + exit $TEST_EXIT_CODE + else + echo "AMDSMI tests passed" + echo "=============== TEST OUTPUT ===============" + cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]" + echo "==============================================" + echo "AMDSMI tests done" fi - echo "AMDSMI tests done" # Python Tests - echo 'Python tests' + echo 'Running Python tests' cd /opt/rocm/share/amd_smi/tests/python_unittest ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1 ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1 echo "Python tests done" # Example Tests - echo 'Example tests' + echo 'Running Example tests' cd $GITHUB_WORKSPACE/example rm -rf build cmake -B build -DENABLE_ESMI_LIB=OFF @@ -213,6 +229,7 @@ jobs: - ${{ vars.RUNNER_TYPE }} continue-on-error: true strategy: + max-parallel: 10 matrix: os: - SLES @@ -249,8 +266,8 @@ jobs: echo 'Installing more_itertools on ${{ matrix.os }}' python3 -m pip install more_itertools - - name: Build AMDSMI for RHEL10 - if: matrix.os == 'RHEL10' + - name: Build AMDSMI(RHEL10 & AlmaLinux8) + if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8' run: | set -e echo 'Building on RHEL10 with retries' @@ -279,7 +296,7 @@ jobs: done echo "Build completed on RHEL10" - - name: Build AMDSMI for other RPM distros + - name: Build AMDSMI if: matrix.os != 'RHEL10' run: | set -e @@ -293,8 +310,8 @@ jobs: make package echo "Build completed on ${{ matrix.os }}" - - name: Install AMDSMI on RHEL10 - if: matrix.os == 'RHEL10' + - name: Install AMDSMI(RHEL10 & AlmaLinux8) + if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8' run: | cd $GITHUB_WORKSPACE/build dnf install python3-setuptools python3-wheel -y @@ -323,8 +340,8 @@ jobs: fi done - - name: Install AMDSMI on other RPM distros - if: matrix.os != 'RHEL10' + - name: Install AMDSMI + if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8' run: | cd $GITHUB_WORKSPACE/build case ${{ env.PACKAGE_MANAGER }} in @@ -424,8 +441,8 @@ jobs: echo 'Installing more_itertools on ${{ matrix.os }}' python3 -m pip install more_itertools - - name: Build and Install for RHEL10 Test - if: matrix.os == 'RHEL10' + - name: Build and Install for Tests (RHEL10 & AlmaLinux8) + if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8' run: | set -e echo 'Building for test on RHEL10 with retries' @@ -473,8 +490,8 @@ jobs: fi done - - name: Build and Install for other RPM distros Test - if: matrix.os != 'RHEL10' + - name: Build and Install for Tests + if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8' run: | set -e echo 'Building for test on ${{ matrix.os }}' @@ -524,22 +541,25 @@ jobs: "amd-smi list" "amd-smi static" "amd-smi firmware" + "amd-smi ucode" "amd-smi bad-pages" "amd-smi metric" "amd-smi process" "amd-smi topology" "amd-smi monitor" + "amd-smi dmon" "amd-smi xgmi" "amd-smi partition" ) for cmd in "${commands[@]}"; do - echo "Running: $cmd" - if ! $cmd > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then - echo "Command '$cmd' failed." + debug_cmd="$cmd --loglevel debug" + echo "Running: $debug_cmd" + if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then + echo "Command '$debug_cmd' failed." cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log exit 1 else - echo "$cmd passed." + echo "$debug_cmd passed." fi done echo "AMDSMI commands done on ${{ matrix.os }}" @@ -551,25 +571,37 @@ jobs: echo 'Running other tests on ${{ matrix.os }}' # AMDSMI Tests - echo 'AMDSMI tests' + echo 'Running AMDSMI tests' cd /opt/rocm/share/amd_smi/tests source amdsmitst.exclude ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1 - if [ $? -ne 0 ]; then + TEST_EXIT_CODE=$? + + # Always show the test output (last 10 lines) + if [ $TEST_EXIT_CODE -ne 0 ]; then + echo "AMDSMI tests failed with exit code $TEST_EXIT_CODE." + echo "=============== TEST OUTPUT ===============" + cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]" + echo "==============================================" echo "AMDSMI tests failed" - exit 1 + exit $TEST_EXIT_CODE + else + echo "AMDSMI tests passed" + echo "=============== TEST OUTPUT ===============" + cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]" + echo "==============================================" + echo "AMDSMI tests done" fi - echo "AMDSMI tests done" # Python Tests - echo 'Python tests' + echo 'Running Python tests' cd /opt/rocm/share/amd_smi/tests/python_unittest ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1 ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1 echo "Python tests done" # Example Tests - echo 'Example tests' + echo 'Running Example tests' cd $GITHUB_WORKSPACE/example rm -rf build cmake -B build -DENABLE_ESMI_LIB=OFF