diff --git a/.github/scripts/therock_configure_ci.py b/.github/scripts/therock_configure_ci.py index 9169c208eb..851c91a54c 100644 --- a/.github/scripts/therock_configure_ci.py +++ b/.github/scripts/therock_configure_ci.py @@ -5,14 +5,19 @@ Required environment variables: - SUBTREES """ +import fnmatch import json import logging +import subprocess +import sys from therock_matrix import subtree_to_project_map, project_map -from typing import Mapping +import time +from typing import Mapping, Optional, Iterable import os logging.basicConfig(level=logging.INFO) + def set_github_output(d: Mapping[str, str]): """Sets GITHUB_OUTPUT values. See https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/passing-information-between-jobs @@ -20,18 +25,64 @@ def set_github_output(d: Mapping[str, str]): logging.info(f"Setting github output:\n{d}") step_output_file = os.environ.get("GITHUB_OUTPUT", "") if not step_output_file: - logging.warning("Warning: GITHUB_OUTPUT env var not set, can't set github outputs") + logging.warning( + "Warning: GITHUB_OUTPUT env var not set, can't set github outputs" + ) return with open(step_output_file, "a") as f: f.writelines(f"{k}={v}" + "\n" for k, v in d.items()) + +def retry(max_attempts, delay_seconds, exceptions): + def decorator(func): + def newfn(*args, **kwargs): + attempt = 0 + while attempt < max_attempts: + try: + return func(*args, **kwargs) + except exceptions as e: + print(f'Exception {str(e)} thrown when attempting to run , attempt {attempt} of {max_attempts}') + attempt += 1 + if attempt < max_attempts: + backoff = delay_seconds * (2 ** (attempt - 1)) + time.sleep(backoff) + return func(*args, **kwargs) + return newfn + return decorator + +@retry(max_attempts=3, delay_seconds=2, exceptions=(TimeoutError)) +def get_modified_paths(base_ref: str) -> Optional[Iterable[str]]: + """Returns the paths of modified files relative to the base reference.""" + return subprocess.run( + ["git", "diff", "--name-only", base_ref], + stdout=subprocess.PIPE, + check=True, + text=True, + timeout=60, + ).stdout.splitlines() + + +GITHUB_WORKFLOWS_CI_PATTERNS = [ + "therock*", +] + + +def is_path_workflow_file_related_to_ci(path: str) -> bool: + return any( + fnmatch.fnmatch(path, ".github/workflows/" + pattern) + for pattern in GITHUB_WORKFLOWS_CI_PATTERNS + ) or any( + fnmatch.fnmatch(path, ".github/scripts/" + pattern) + for pattern in GITHUB_WORKFLOWS_CI_PATTERNS + ) + + +def check_for_workflow_file_related_to_ci(paths: Optional[Iterable[str]]) -> bool: + if paths is None: + return False + return any(is_path_workflow_file_related_to_ci(p) for p in paths) def retrieve_projects(args): - # TODO(geomin12): #590 Enable TheRock CI for forked PRs - if args.get("is_forked_pr"): - logging.info("Warning: not enabling any projects due to is_forked_pr. Builds/tests for forked PRs are disabled pending: https://github.com/ROCm/rocm-libraries/issues/590") - return [] - if args.get("is_pull_request"): subtrees = args.get("input_subtrees").split("\n") @@ -45,14 +96,21 @@ def retrieve_projects(args): if args.get("is_push"): subtrees = list(subtree_to_project_map.keys()) + # If .github/*/therock* were changed, run all subtrees + base_ref = args.get("base_ref") + modified_paths = get_modified_paths(base_ref) + print("modified_paths (max 200):", modified_paths[:200]) + related_to_therock_ci = check_for_workflow_file_related_to_ci(modified_paths) + if related_to_therock_ci: + subtrees = list(subtree_to_project_map.keys()) + projects = set() # collect the associated subtree to project for subtree in subtrees: if subtree in subtree_to_project_map: projects.add(subtree_to_project_map.get(subtree)) - - # retrieve the subtrees to checkout, cmake options to build, and projects to test + # retrieve the subtrees to checkout, cmake options to build, and projects to test project_to_run = [] for project in projects: if project in project_map: @@ -73,15 +131,14 @@ if __name__ == "__main__": args["is_push"] = github_event_name == "push" args["is_workflow_dispatch"] = github_event_name == "workflow_dispatch" - is_forked_pr = os.getenv("IS_FORKED_PR") - args["is_forked_pr"] = is_forked_pr == "true" - input_subtrees = os.getenv("SUBTREES", "") args["input_subtrees"] = input_subtrees input_projects = os.getenv("PROJECTS", "") args["input_projects"] = input_projects + args["base_ref"] = os.environ.get("BASE_REF", "HEAD^") + logging.info(f"Retrieved arguments {args}") run(args) diff --git a/.github/scripts/therock_matrix.py b/.github/scripts/therock_matrix.py index 7e2e7bbbe1..c2a731764b 100644 --- a/.github/scripts/therock_matrix.py +++ b/.github/scripts/therock_matrix.py @@ -2,22 +2,17 @@ This dictionary is used to map specific file directory changes to the corresponding build flag and tests """ subtree_to_project_map = { - "projects/rocprim": "prim", - "projects/rocthrust": "prim", - "projects/hipcub": "prim", - "projects/rocrand": "rand", - "projects/hiprand": "rand" + "projects/clr": "core", + "projects/hip": "core", + "projects/hip-tests": "core", + "projects/rocminfo": "core", + "projects/rocr-runtime": "core", } project_map = { - "prim": { - "cmake_options": "-DTHEROCK_ENABLE_PRIM=ON -DTHEROCK_ENABLE_ALL=OFF", - "project_to_test": "rocprim, rocthrust, hipcub", - "subtree_checkout": "projects/rocprim\nprojects/hipcub\nprojects/rocthrust", - }, - "rand": { - "cmake_options": "-DTHEROCK_ENABLE_RAND=ON -DTHEROCK_ENABLE_ALL=OFF", - "project_to_test": "rocrand, hiprand", - "subtree_checkout": "projects/rocrand\nprojects/hiprand", + "core": { + "cmake_options": "-DTHEROCK_ENABLE_CORE=ON -DTHEROCK_ENABLE_HIP_RUNTIME=ON -DTHEROCK_ENABLE_ALL=OFF", + "project_to_test": "hip-tests", + "subtree_checkout": "projects/clr\nprojects/hip\nprojects/hip-tests\nprojects/rocminfo\nprojects/rocr-runtime", }, } diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 7e3b3be942..238e515eda 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -22,37 +22,25 @@ jobs: id-token: write container: image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:044b113562629f4bd2ec5d2e64b32eee11562d48fb1a75d7493daec9dd8d8292 + options: -v /runner/config:/home/awsconfig/ strategy: fail-fast: true env: CACHE_DIR: ${{ github.workspace }}/.container-cache - CCACHE_DIR: "${{ github.workspace }}/.container-cache/ccache" - CCACHE_MAXSIZE: "700M" + CCACHE_CONFIGPATH: ${{ github.workspace }}/.ccache/ccache.conf AMDGPU_FAMILIES: "gfx94X-dcgpu" TEATIME_FORCE_INTERACTIVE: 0 + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini steps: - - name: Generate a token for rocm-systems - id: generate-token - uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6 - with: - app-id: ${{ secrets.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} - owner: ${{ github.repository_owner }} - - name: "Checking out repository for rocm-systems" uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - sparse-checkout: | - .github - ${{ inputs.subtree_checkout }} - token: ${{ steps.generate-token.outputs.token }} - name: Checkout TheRock repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: "ROCm/TheRock" path: "TheRock" - ref: 7afbe45f7eaa4f2e9abcb9cd7f1c4042a2d7b638 # 2025-09-24 commit + ref: 83b25f87ea8f163ef37295f28204a9b4ab86f168 # 2025-09-30 commit - name: Install python deps run: | @@ -78,17 +66,21 @@ jobs: - name: Fetch sources timeout-minutes: 30 run: | - ./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-systems --no-include-ml-frameworks + ./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-systems --no-include-rocm-libraries --no-include-ml-frameworks + - name: Patch rocm-systems + run: | + git -c user.name="therockbot" -c "user.email=therockbot@amd.com" am --whitespace=nowarn ./TheRock/patches/amd-mainline/rocm-systems/*.patch + - name: Configure Projects env: amdgpu_families: ${{ env.AMDGPU_FAMILIES }} package_version: ADHOCBUILD - extra_cmake_options: "-DTHEROCK_USE_EXTERNAL_ROCM_LIBRARIES=ON -DTHEROCK_ROCM_LIBRARIES_SOURCE_DIR=../ ${{ inputs.cmake_options }}" + extra_cmake_options: "-DTHEROCK_ROCM_SYSTEMS_SOURCE_DIR=../ ${{ inputs.cmake_options }}" BUILD_DIR: build run: | - python3 TheRock/build_tools/github_actions/build_configure.py + python TheRock/build_tools/github_actions/build_configure.py - name: Build therock-dist run: cmake --build TheRock/build --target therock-dist @@ -113,41 +105,20 @@ jobs: ccache -s - name: Configure AWS Credentials - if: always() + if: ${{ always() && !github.event.pull_request.head.repo.fork }} uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 with: aws-region: us-east-2 role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external - - name: Create Logs index Files + - name: Post Build Upload if: always() run: | - python3 TheRock/build_tools/github_actions/create_log_index.py \ - --build-dir=TheRock/build \ - --amdgpu-family=${{ env.AMDGPU_FAMILIES }} - - - name: Upload artifacts - run: | - python TheRock/build_tools/github_actions/upload_build_artifacts.py \ + python TheRock/build_tools/github_actions/post_build_upload.py \ --run-id ${{ github.run_id }} \ --amdgpu-family ${{ env.AMDGPU_FAMILIES }} \ - --build-dir TheRock/build - - - name: Upload Logs - if: always() - run: | - python3 TheRock/build_tools/github_actions/upload_build_logs_to_s3.py \ - --build-dir=TheRock/build \ - --run-id ${{ github.run_id }} \ - --amdgpu-family ${{ env.AMDGPU_FAMILIES }} - - - name: Add Links to Job Summary - if: always() - run: | - python TheRock/build_tools/github_actions/upload_build_summary.py \ - --run-id ${{ github.run_id }} \ - --amdgpu-family ${{ env.AMDGPU_FAMILIES }} \ - --build-dir TheRock/build + --build-dir TheRock/build \ + --upload therock-test-linux: name: "Test" diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index b79243a77a..4b98b6ca7e 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -33,28 +33,15 @@ jobs: # To get a fast signal of windows building for TheRock, adding gfx110X AMDGPU_FAMILIES: "gfx110X-dgpu" steps: - - name: Generate a token for rocm-systems - id: generate-token - uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6 - with: - app-id: ${{ secrets.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} - owner: ${{ github.repository_owner }} - - name: "Checking out repository for rocm-systems" uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - sparse-checkout: | - .github - ${{ inputs.subtree_checkout }} - token: ${{ steps.generate-token.outputs.token }} - name: Checkout TheRock repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: "ROCm/TheRock" path: "TheRock" - ref: 7afbe45f7eaa4f2e9abcb9cd7f1c4042a2d7b638 # 2025-09-24 commit + ref: 83b25f87ea8f163ef37295f28204a9b4ab86f168 # 2025-09-30 commit - name: Set up Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 @@ -65,10 +52,16 @@ jobs: run: | pip install -r TheRock/requirements.txt + - name: Patch rocm-systems + run: | + git -c user.name="therockbot" -c "user.email=therockbot@amd.com" am --whitespace=nowarn ./TheRock/patches/amd-mainline/rocm-systems/*.patch + - name: Install requirements run: | choco install --no-progress -y ccache - choco install --no-progress -y ninja + # ninja pinned due to a bug in the 1.13.0 release: + # https://github.com/ninja-build/ninja/issues/2616 + choco install --no-progress -y ninja --version 1.12.1 choco install --no-progress -y strawberryperl echo "$PATH;C:\Strawberry\c\bin" >> $GITHUB_PATH choco install --no-progress -y awscli @@ -78,6 +71,10 @@ jobs: with: version: '3.62.0' + - uses: iterative/setup-dvc@4bdfd2b0f6f1ad7e08afadb03b1a895c352a5239 # v2.0.0 + with: + version: '3.62.0' + # After other installs, so MSVC get priority in the PATH. - name: Configure MSVC uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 @@ -91,17 +88,18 @@ jobs: timeout-minutes: 30 run: | git config --global core.longpaths true - python ./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-systems --no-include-ml-frameworks + python ./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-systems --no-include-rocm-libraries --no-include-ml-frameworks + dvc pull - name: Configure Projects env: amdgpu_families: ${{ env.AMDGPU_FAMILIES }} package_version: "ADHOCBUILD" - extra_cmake_options: "-DBUILD_TESTING=OFF -DTHEROCK_USE_EXTERNAL_ROCM_LIBRARIES=ON -DTHEROCK_ROCM_LIBRARIES_SOURCE_DIR=${{ github.workspace }} ${{ inputs.cmake_options }}" + extra_cmake_options: "-DTHEROCK_ROCM_SYSTEMS_SOURCE_DIR=${{ github.workspace }} ${{ inputs.cmake_options }}" run: | # clear cache before build and after download ccache -z - python3 TheRock/build_tools/github_actions/build_configure.py + python TheRock/build_tools/github_actions/build_configure.py - name: Build therock-dist run: cmake --build "${{ env.BUILD_DIR }}" --target therock-dist @@ -135,41 +133,20 @@ jobs: get-disk | Select-object @{Name="Size(GB)";Expression={$_.Size/1GB}} | Write-Host - name: Configure AWS Credentials - if: always() + if: ${{ always() && !github.event.pull_request.head.repo.fork }} uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 with: aws-region: us-east-2 role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external - - name: Create Logs index Files + - name: Post Build Upload if: always() run: | - python3 TheRock/build_tools/github_actions/create_log_index.py \ - --build-dir=${{ env.BUILD_DIR }} \ - --amdgpu-family=${{ env.AMDGPU_FAMILIES }} - - - name: Upload artifacts - run: | - python TheRock/build_tools/github_actions/upload_build_artifacts.py \ + python TheRock/build_tools/github_actions/post_build_upload.py \ --run-id ${{ github.run_id }} \ --amdgpu-family ${{ env.AMDGPU_FAMILIES }} \ - --build-dir ${{ env.BUILD_DIR }} - - - name: Upload Logs - if: always() - run: | - python3 TheRock/build_tools/github_actions/upload_build_logs_to_s3.py \ - --build-dir=${{ env.BUILD_DIR }} \ - --run-id ${{ github.run_id }} \ - --amdgpu-family ${{ env.AMDGPU_FAMILIES }} - - - name: Add Links to Job Summary - if: always() - run: | - python TheRock/build_tools/github_actions/upload_build_summary.py \ - --run-id ${{ github.run_id }} \ - --amdgpu-family ${{ env.AMDGPU_FAMILIES }} \ - --build-dir ${{ env.BUILD_DIR }} + --build-dir ${{ env.BUILD_DIR }} \ + --upload therock-test-windows: name: "Test" @@ -178,5 +155,5 @@ jobs: with: project_to_test: ${{ inputs.project_to_test }} amdgpu_families: ${{ needs.therock-build-windows.outputs.AMDGPU_FAMILIES }} - test_runs_on: "" + test_runs_on: "windows-strix-halo-gpu-rocm" platform: "windows" diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 3c1d2f7313..515f1f3b78 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -14,7 +14,7 @@ on: inputs: projects: type: string - description: "Insert space-separated list of projects to test or 'all' to test all projects. ex: 'projects/rocprim projects/hipcub'" + description: "Insert space-separated list of projects to test or 'all' to test all projects. ex: 'projects/clr projects/rocminfo'" permissions: contents: read @@ -31,6 +31,10 @@ jobs: setup: name: "Setup" runs-on: ubuntu-24.04 + env: + # The commit being checked out is the merge commit for a PR. Its first + # parent will be the tip of the base branch. + BASE_REF: HEAD^ outputs: projects: ${{ steps.projects.outputs.projects }} steps: @@ -39,14 +43,7 @@ jobs: with: sparse-checkout: .github sparse-checkout-cone-mode: true - - # # will be needed for `patch_monorepo.py` but necessary now - # - name: Checkout TheRock Repository - # uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - # with: - # repository: "ROCm/TheRock" - # path: "TheRock" - # ref: 0730cccf8b98e012d64750f26032bd760360fa45 + fetch-depth: 2 - name: Set up Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 @@ -57,18 +54,12 @@ jobs: run: | python -m pip install --upgrade pip pip install pydantic requests - - # # For our "rocPRIM" MVP, there are no patches to apply. This will be un-commented and used later! - # - name: Patch monorepo - # run: | - # python TheRock/build_tools/github_actions/patch_monorepo.py \ - # --repo rocm-systems - + - name: Detect changed subtrees id: detect if: github.event_name == 'pull_request' env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ github.token }} run: | python .github/scripts/pr_detect_changed_subtrees.py \ --repo "${{ github.repository }}" \ @@ -80,13 +71,11 @@ jobs: env: SUBTREES: ${{ steps.detect.outputs.subtrees }} PROJECTS: ${{ inputs.projects }} - # TODO(geomin12): #590 Enable TheRock CI for forked PRs - IS_FORKED_PR: ${{ github.event.pull_request.head.repo.fork == true }} run: | python .github/scripts/therock_configure_ci.py therock-ci-linux: - name: TheRock CI Linux + name: Linux (${{ matrix.projects.project_to_test }}) permissions: contents: read id-token: write @@ -104,7 +93,7 @@ jobs: subtree_checkout: ${{ matrix.projects.subtree_checkout }} therock-ci-windows: - name: TheRock CI Windows + name: Windows (${{ matrix.projects.project_to_test }}) permissions: contents: read id-token: write diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index cc9d559c05..03910e4649 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -52,6 +52,14 @@ jobs: test_components: name: 'Test ${{ matrix.components.job_name }}' runs-on: ${{ inputs.test_runs_on }} + container: + image: ${{ inputs.platform == 'linux' && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26' || null }} + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 992 + --env-file /etc/podinfo/gha-gpu-isolation-settings needs: configure_test_matrix # skip tests if no test matrix to run if: ${{ needs.configure_test_matrix.outputs.components != '[]' }} @@ -94,6 +102,7 @@ jobs: VENV_DIR: ${{ env.VENV_DIR }} FETCH_ARTIFACT_ARGS: ${{ matrix.components.fetch_artifact_args }} PLATFORM: ${{ inputs.platform }} + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} - name: Install additional packages if: ${{ inputs.platform == 'linux' && (matrix.components.job_name == 'rocblas' || matrix.components.job_name == 'hipblaslt') }}