Files
rocm-systems/projects/rccl/.github/workflows/therock-ci-linux.yml
T

148 lines
4.8 KiB
YAML

name: TheRock CI Linux
on:
workflow_call:
inputs:
amdgpu_families:
type: string
artifact_group:
type: string
extra_cmake_options:
type: string
permissions:
contents: read
jobs:
therock-build-linux:
name: Build Linux Packages
runs-on: azure-linux-scale-rocm
permissions:
id-token: write
container:
image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:1f1ce0ab151146c7f86ee4345be74c42d8ca83200d9d26843e8a71df01ecad4e
options: -v /runner/config:/home/awsconfig/
env:
AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }}
TEATIME_FORCE_INTERACTIVE: 0
AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini
CACHE_DIR: ${{ github.workspace }}/.container-cache
# The ccache.conf will be written by setup_ccache.py before this gets used.
CCACHE_CONFIGPATH: ${{ github.workspace }}/.ccache/ccache.conf
steps:
- name: Checkout TheRock repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: "ROCm/TheRock"
ref: ff46daa79b4c826c4f4676893d0d6586de567dfa # 2026-01-12 commit
- name: Checkout rccl repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: "ROCm/rccl"
path: rccl
- name: Checkout rccl-tests repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: "ROCm/rccl-tests"
path: rccl-tests
- name: Install python deps
run: |
pip install -r requirements.txt
# safe.directory must be set before Runner Health Status
- name: Adjust git config
run: |
git config --global --add safe.directory $PWD
git config fetch.parallel 10
- name: Setup ccache
run: |
./build_tools/setup_ccache.py \
--config-preset "github-oss-presubmit" \
--dir "$(dirname $CCACHE_CONFIGPATH)" \
--local-path "$CACHE_DIR/ccache"
- name: Runner health status
run: |
./build_tools/health_status.py
- name: Fetch sources
run: |
./build_tools/fetch_sources.py --jobs 12
- name: Configure Projects
env:
amdgpu_families: ${{ env.AMDGPU_FAMILIES }}
package_version: ADHOCBUILD
extra_cmake_options: ${{ inputs.extra_cmake_options }}
BUILD_DIR: build
run: |
python3 build_tools/github_actions/build_configure.py
- name: Build therock-dist
run: cmake --build build
- name: Build therock-archives
run: cmake --build build --target therock-archives
- name: Report
#if: ${{ !cancelled() }}
run: |
echo "Full SDK du:"
echo "------------"
du -h -d 1 build/dist/rocm
echo "Artifact Archives:"
echo "------------------"
ls -lh build/artifacts/*.tar.xz
echo "Artifacts:"
echo "----------"
du -h -d 1 build/artifacts
echo "CCache Stats:"
echo "-------------"
ccache -s -v
tail -v -n +1 .ccache/compiler_check_cache/* > build/logs/ccache_compiler_check_cache.log
- name: Configure AWS Credentials for non-forked repos
if: ${{ always() && !github.event.pull_request.head.repo.fork }}
uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a # v4.3.1
with:
aws-region: us-east-2
role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external
- name: Post Build Upload
if: always()
run: |
python3 build_tools/github_actions/post_build_upload.py \
--run-id ${{ github.run_id }} \
--artifact-group ${{ env.AMDGPU_FAMILIES }} \
--build-dir build \
--upload
therock-test-linux-multi-node:
name: "Test multi-node"
if: ${{ inputs.amdgpu_families == 'gfx950-dcgpu' }}
permissions:
contents: read
id-token: write
needs: [therock-build-linux]
uses: ./.github/workflows/therock-test-packages-multi-node.yml
with:
amdgpu_families: ${{ inputs.amdgpu_families }}
artifact_group: ${{ inputs.artifact_group }}
test_runs_on: nova-linux-slurm-scale-runner
artifact_run_id: ${{ github.run_id }}
therock-test-linux-single-node:
name: "Test single-node"
if: ${{ inputs.amdgpu_families == 'gfx94X-dcgpu' }}
needs: [therock-build-linux]
uses: ./.github/workflows/therock-test-packages-single-node.yml
with:
amdgpu_families: ${{ inputs.amdgpu_families }}
artifact_group: ${{ inputs.artifact_group }}
test_runs_on: linux-mi325-4gpu-ossci-rocm
artifact_run_id: ${{ github.run_id }}