dfdb64572c
* Adding working single node tests
* Revert to old docker sha
* adding back no perf tests
---------
Co-authored-by: Aravind Ravikumar <arravikum@amd.com>
[ROCm/rccl commit: 4b295c9893]
148 Zeilen
4.8 KiB
YAML
148 Zeilen
4.8 KiB
YAML
name: TheRock CI Linux
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
amdgpu_families:
|
|
type: string
|
|
artifact_group:
|
|
type: string
|
|
extra_cmake_options:
|
|
type: string
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
therock-build-linux:
|
|
name: Build Linux Packages
|
|
runs-on: azure-linux-scale-rocm
|
|
permissions:
|
|
id-token: write
|
|
container:
|
|
image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:1f1ce0ab151146c7f86ee4345be74c42d8ca83200d9d26843e8a71df01ecad4e
|
|
options: -v /runner/config:/home/awsconfig/
|
|
env:
|
|
AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }}
|
|
TEATIME_FORCE_INTERACTIVE: 0
|
|
AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini
|
|
CACHE_DIR: ${{ github.workspace }}/.container-cache
|
|
# The ccache.conf will be written by setup_ccache.py before this gets used.
|
|
CCACHE_CONFIGPATH: ${{ github.workspace }}/.ccache/ccache.conf
|
|
steps:
|
|
- name: Checkout TheRock repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
with:
|
|
repository: "ROCm/TheRock"
|
|
ref: ff46daa79b4c826c4f4676893d0d6586de567dfa # 2026-01-12 commit
|
|
|
|
- name: Checkout rccl repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
with:
|
|
repository: "ROCm/rccl"
|
|
path: rccl
|
|
|
|
- name: Checkout rccl-tests repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
with:
|
|
repository: "ROCm/rccl-tests"
|
|
path: rccl-tests
|
|
|
|
- name: Install python deps
|
|
run: |
|
|
pip install -r requirements.txt
|
|
|
|
# safe.directory must be set before Runner Health Status
|
|
- name: Adjust git config
|
|
run: |
|
|
git config --global --add safe.directory $PWD
|
|
git config fetch.parallel 10
|
|
|
|
- name: Setup ccache
|
|
run: |
|
|
./build_tools/setup_ccache.py \
|
|
--config-preset "github-oss-presubmit" \
|
|
--dir "$(dirname $CCACHE_CONFIGPATH)" \
|
|
--local-path "$CACHE_DIR/ccache"
|
|
|
|
- name: Runner health status
|
|
run: |
|
|
./build_tools/health_status.py
|
|
|
|
- name: Fetch sources
|
|
run: |
|
|
./build_tools/fetch_sources.py --jobs 12
|
|
|
|
- name: Configure Projects
|
|
env:
|
|
amdgpu_families: ${{ env.AMDGPU_FAMILIES }}
|
|
package_version: ADHOCBUILD
|
|
extra_cmake_options: ${{ inputs.extra_cmake_options }}
|
|
BUILD_DIR: build
|
|
run: |
|
|
python3 build_tools/github_actions/build_configure.py
|
|
|
|
- name: Build therock-dist
|
|
run: cmake --build build
|
|
|
|
- name: Build therock-archives
|
|
run: cmake --build build --target therock-archives
|
|
|
|
- name: Report
|
|
#if: ${{ !cancelled() }}
|
|
run: |
|
|
echo "Full SDK du:"
|
|
echo "------------"
|
|
du -h -d 1 build/dist/rocm
|
|
echo "Artifact Archives:"
|
|
echo "------------------"
|
|
ls -lh build/artifacts/*.tar.xz
|
|
echo "Artifacts:"
|
|
echo "----------"
|
|
du -h -d 1 build/artifacts
|
|
echo "CCache Stats:"
|
|
echo "-------------"
|
|
ccache -s -v
|
|
tail -v -n +1 .ccache/compiler_check_cache/* > build/logs/ccache_compiler_check_cache.log
|
|
|
|
- name: Configure AWS Credentials for non-forked repos
|
|
if: ${{ always() && !github.event.pull_request.head.repo.fork }}
|
|
uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a # v4.3.1
|
|
with:
|
|
aws-region: us-east-2
|
|
role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external
|
|
|
|
- name: Post Build Upload
|
|
if: always()
|
|
run: |
|
|
python3 build_tools/github_actions/post_build_upload.py \
|
|
--run-id ${{ github.run_id }} \
|
|
--artifact-group ${{ env.AMDGPU_FAMILIES }} \
|
|
--build-dir build \
|
|
--upload
|
|
|
|
therock-test-linux-multi-node:
|
|
name: "Test multi-node"
|
|
if: ${{ inputs.amdgpu_families == 'gfx950-dcgpu' }}
|
|
permissions:
|
|
contents: read
|
|
id-token: write
|
|
needs: [therock-build-linux]
|
|
uses: ./.github/workflows/therock-test-packages-multi-node.yml
|
|
with:
|
|
amdgpu_families: ${{ inputs.amdgpu_families }}
|
|
artifact_group: ${{ inputs.artifact_group }}
|
|
test_runs_on: nova-linux-slurm-scale-runner
|
|
artifact_run_id: ${{ github.run_id }}
|
|
|
|
therock-test-linux-single-node:
|
|
name: "Test single-node"
|
|
if: ${{ inputs.amdgpu_families == 'gfx94X-dcgpu' }}
|
|
needs: [therock-build-linux]
|
|
uses: ./.github/workflows/therock-test-packages-single-node.yml
|
|
with:
|
|
amdgpu_families: ${{ inputs.amdgpu_families }}
|
|
artifact_group: ${{ inputs.artifact_group }}
|
|
test_runs_on: linux-mi325-4gpu-ossci-rocm
|
|
artifact_run_id: ${{ github.run_id }}
|