Add S3 upload support for Perf and test reports by run ID and architecture (#2020)

* Commits to enable scp report copy

* Added Post report upload step

* Added extra arg for fetch artifacts

* Moved to a specific commit

* Add write permissions to s3

* Added comment for TheRock sha commit date

---------

Co-authored-by: arravikum <arravikum@amd.com>

[ROCm/rccl commit: 07f8f6d6c6]
Этот коммит содержится в:
Aravind Ravikumar
2025-11-03 19:09:34 -05:00
коммит произвёл GitHub
родитель 6d22ce9b1a
Коммит 4babb01f4d
3 изменённых файлов: 39 добавлений и 4 удалений
+6
Просмотреть файл
@@ -5,6 +5,8 @@ on:
inputs:
amdgpu_families:
type: string
artifact_group:
type: string
extra_cmake_options:
type: string
@@ -122,10 +124,14 @@ jobs:
therock-test-linux-multi-node:
name: "Test multi-node"
if: ${{ inputs.amdgpu_families == 'gfx950-dcgpu' }}
permissions:
contents: read
id-token: write
needs: [therock-build-linux]
uses: ./.github/workflows/therock-test-packages-multi-node.yml
with:
amdgpu_families: ${{ inputs.amdgpu_families }}
artifact_group: ${{ inputs.artifact_group }}
test_runs_on: nova-linux-slurm-scale-runner
artifact_run_id: ${{ github.run_id }}
+1
Просмотреть файл
@@ -57,6 +57,7 @@ jobs:
secrets: inherit
with:
amdgpu_families: ${{ matrix.amdgpu_family }}
artifact_group: ${{ matrix.amdgpu_family }}
extra_cmake_options: >
-DTHEROCK_ENABLE_ALL=OFF
-DTHEROCK_BUILD_TESTING=ON
+32 -4
Просмотреть файл
@@ -5,6 +5,8 @@ on:
inputs:
amdgpu_families:
type: string
artifact_group:
type: string
test_runs_on:
type: string
artifact_run_id:
@@ -13,13 +15,16 @@ on:
inputs:
amdgpu_families:
type: string
artifact_group:
type: string
test_runs_on:
type: string
artifact_run_id:
type: string
permissions:
contents: read
id-token: write
jobs:
test_rccl_multi_node:
@@ -28,6 +33,9 @@ jobs:
defaults:
run:
shell: bash
permissions:
contents: read
id-token: write
env:
VENV_DIR: ${{ github.workspace }}/.venv
ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id }}"
@@ -38,16 +46,17 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: "ROCm/TheRock"
ref: 890c856134d955441790c8ed2d60ad4fb027f4e5
ref: 6ecc2af91fc8a4271a949005d7404bd13278c005 # 2025-10-23 commit
- name: Run setup test environment workflow
uses: './.github/actions/setup_test_environment'
with:
ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }}
AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }}
ARTIFACT_GROUP: ${{ inputs.artifact_group }}
OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }}
VENV_DIR: ${{ env.VENV_DIR }}
FETCH_ARTIFACT_ARGS: "--rccl"
FETCH_ARTIFACT_ARGS: "--rccl --tests"
IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }}
# The following step leverages slurm to run multi node rccl tests on the slurm mi350x cluster.
@@ -63,6 +72,25 @@ jobs:
--cluster_file ./input/cluster.json \
--config_file ./input/mi350_config.json \
--log-file=/tmp/rccl_log.log \
--html=/home/arravikum/cvs/ci_test_report.html \
--html=/home/arravikum/cvs/test_reports/ci_test_report.html \
--capture=tee-sys \
--self-contained-html"
- name: Configure AWS Credentials for non-forked repos
if: ${{ always() && !github.event.pull_request.head.repo.fork }}
uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a # v4.3.1
with:
aws-region: us-east-2
role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external
- name: Post test report upload
if: always()
working-directory: ${{ github.workspace }}
run: |
export PYTHONPATH="${PYTHONPATH}:${{ github.workspace }}/build_tools"
python3 build_tools/github_actions/upload_test_report_script.py \
--run-id "${{ github.run_id }}" \
--amdgpu-family "${{ inputs.amdgpu_families }}" \
--report-path "/home/arravikum/cvs/test_reports" \
--log-destination "/logs/gfx950-dcgpu" \
--index-file-name "index_rccl_test_report.html"