diff --git a/.github/workflows/aqlprofile-continuous_integration.yml b/.github/workflows/aqlprofile-continuous_integration.yml index bb9aea6ced..f46915d1f1 100644 --- a/.github/workflows/aqlprofile-continuous_integration.yml +++ b/.github/workflows/aqlprofile-continuous_integration.yml @@ -1,6 +1,8 @@ name: AqlProfile Continuous Integration on: + schedule: + - cron: '0 7 * * *' workflow_dispatch: push: branches: @@ -24,8 +26,7 @@ concurrency: cancel-in-progress: true env: - # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake - PATH: "/usr/bin:$PATH" + ROCM_PATH: "/opt/rocm" navi3_EXCLUDE_TESTS_REGEX: "" vega20_EXCLUDE_TESTS_REGEX: "" mi200_EXCLUDE_TESTS_REGEX: "" @@ -43,17 +44,26 @@ env: jobs: core-deb: + name: Core • ${{ matrix.system.gpu }} • ${{ matrix.system.os }} # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix strategy: fail-fast: false matrix: system: [ - {gpu: 'navi4', runner: 'rocprofiler-navi4', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo'}, - {gpu: 'navi3', runner: 'rocprofiler-navi3', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo'}, - {gpu: 'mi3xx', runner: 'rocprofiler-mi3xx', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo'} + {gpu: 'navi4', runner: 'rocprofiler-navi4-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx120X"}, + {gpu: 'navi3', runner: 'rocprofiler-navi3-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx110X"}, + {gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx94X"} ] runs-on: ${{ matrix.system.runner }} + container: + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.therock-s3 }}-latest + credentials: + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + env: + DEBIAN_FRONTEND: noninteractive + options: --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined permissions: contents: read @@ -62,9 +72,19 @@ jobs: # define this for containers env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 + GPU_RUNNER: ${{ matrix.system.gpu }} steps: - - uses: actions/checkout@v4 + - name: Install Latest Nightly ROCm + shell: bash + working-directory: /tmp + run: | + tar -xf /opt/rocm-${{ matrix.system.therock-s3 }}.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 + ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + echo "ROCm installed to: ${{ env.ROCM_PATH }}" + ln -s -f /usr/bin/git /usr/local/bin/git + + - uses: actions/checkout@v5 with: sparse-checkout: projects/aqlprofile set-safe-directory: true @@ -94,25 +114,35 @@ jobs: timeout-minutes: 30 shell: bash run: - LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH /usr/bin/ctest --output-on-failure -VV -DCTEST_SOURCE_DIRECTORY="$(pwd)" - -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${RUNNER_HOSTNAME}" - -DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.os }}-${{ matrix.gpu }}-core + PATH=~/.local/bin:/opt/rocm/bin:${PATH} + LD_LIBRARY_PATH=$(pwd)/build:/opt/rocm/lib:$LD_LIBRARY_PATH + ctest --output-on-failure -DCTEST_SOURCE_DIRECTORY="$(pwd)" + -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${{ matrix.system.runner }}" + -DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core -DCMAKE_CTEST_ARGUMENTS="" -DAQLPROFILE_BUILD_TESTS=ON -DAQLPROFILE_EXTRA_CONFIGURE_ARGS="" -S $(pwd)/projects/aqlprofile/dashboard.cmake core-rpm: + name: Core • ${{ matrix.system.gpu }} • ${{ matrix.system.os }} # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix strategy: fail-fast: false matrix: - runner: ['mi300'] - os: ['rhel-9', 'sles-15'] - build-type: ['RelWithDebInfo'] - ci-flags: ['--linter clang-tidy'] + system: [ + {gpu: 'mi325', os: 'rhel-8.8', build-type: 'RelWithDebInfo', container: ''}, + {gpu: 'mi325', os: 'rhel-9.5', build-type: 'RelWithDebInfo', container: ''}, + {gpu: 'mi325', os: 'sles-15.6', build-type: 'RelWithDebInfo', container: ''} + ] - runs-on: rocprofiler-${{ matrix.os }} + runs-on: linux-mi325-1gpu-ossci-rocm + container: + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-gfx94X-latest + credentials: + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + options: --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined permissions: contents: read @@ -121,21 +151,38 @@ jobs: # define this for containers env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 + GPU_RUNNER: ${{ matrix.system.gpu }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: sparse-checkout: projects/aqlprofile set-safe-directory: true + - name: Install Latest Nightly ROCm using TheRock Tarballs + shell: bash + working-directory: /tmp + run: | + tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 + ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + echo "ROCm installed to: ${{ env.ROCM_PATH }}" + + - name: Install requirements timeout-minutes: 10 shell: bash - run: git config --global --add safe.directory '*' + run: | + git config --global --add safe.directory '*' + python3 -m venv rocprofiler-sdk + source rocprofiler-sdk/bin/activate + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH + python3 -m pip install --upgrade pip + python3 -m pip install cmake - name: List Files shell: bash run: | + source rocprofiler-sdk/bin/activate echo "PATH: ${PATH}" echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } @@ -148,9 +195,12 @@ jobs: timeout-minutes: 30 shell: bash run: - LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH ctest --output-on-failure -VV -DCTEST_SOURCE_DIRECTORY="$(pwd)" - -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${RUNNER_HOSTNAME}" - -DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.os }}-${{ matrix.runner }}-core + source rocprofiler-sdk/bin/activate; + PATH=~/.local/bin:/opt/rocm/bin:${PATH} + LD_LIBRARY_PATH=$(pwd)/build:/opt/rocm/lib:$LD_LIBRARY_PATH + ctest --output-on-failure -DCTEST_SOURCE_DIRECTORY="$(pwd)" + -DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="linux-mi325-1gpu-ossci-rocm" + -DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core -DCMAKE_CTEST_ARGUMENTS="" -DAQLPROFILE_BUILD_TESTS=ON -DAQLPROFILE_EXTRA_CONFIGURE_ARGS="" diff --git a/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml b/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml new file mode 100644 index 0000000000..b6016ac22f --- /dev/null +++ b/.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml @@ -0,0 +1,101 @@ +name: rocprofiler-sdk Build CI Docker Images + +on: + workflow_dispatch: + schedule: + - cron: '0 6 * * *' + push: + branches: [ develop ] + paths: + - '.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml' + pull_request: + paths: + - '.github/workflows/rocprofiler-sdk-build-ci-docker-images.yml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +# Needed to push/pull cached Docker layers + GHCR images +permissions: + contents: read + packages: write + +jobs: + # ----------------------------------------------------------------------------- + # Build a cached CI base image per OS using Buildx + GHA cache (Item #6) + # ----------------------------------------------------------------------------- + build-ci-base: + name: Build CI Base Image • ${{ matrix.gpu }} • ${{ matrix.os }} + runs-on: azure-linux-scale-rocm + strategy: + fail-fast: false + matrix: + os: [ 'ubuntu-22.04', 'rhel-8.8', 'rhel-9.5', 'sles-15.6' ] + gpu: [ 'gfx94X', 'gfx110X', 'gfx120X' ] + steps: + - name: Checkout (shallow) + uses: actions/checkout@v4 + with: + sparse-checkout: | + projects/rocprofiler-sdk/requirements.txt + projects/rocprofiler-sdk/Docker/Dockerfile.ci + + - name: Get the latest therock build + id: therock + run: | + sudo apt-get install -y python3-pip + python3 -m pip install -U pip + python3 -m pip install -U awscli + export PATH=~/.local/bin:$PATH + KEY=$(aws s3api list-objects-v2 \ + --bucket therock-nightly-tarball \ + --no-sign-request \ + --output json \ + --query "sort_by(Contents[?contains(Key, 'linux-gfx120X')], &LastModified)[-1].Key") + KEY=${KEY//\"/} + test -n "$KEY" || { echo "No gfx120X tarball found"; exit 1; } + echo "gfx120X=${KEY}" >> $GITHUB_OUTPUT + KEY=$(aws s3api list-objects-v2 \ + --bucket therock-nightly-tarball \ + --no-sign-request \ + --output json \ + --query "sort_by(Contents[?contains(Key, 'linux-gfx94X')], &LastModified)[-1].Key") + KEY=${KEY//\"/} + test -n "$KEY" || { echo "No gfx94X tarball found"; exit 1; } + echo "gfx94X=${KEY}" >> $GITHUB_OUTPUT + KEY=$(aws s3api list-objects-v2 \ + --bucket therock-nightly-tarball \ + --no-sign-request \ + --output json \ + --query "sort_by(Contents[?contains(Key, 'linux-gfx110X')], &LastModified)[-1].Key") + KEY=${KEY//\"/} + test -n "$KEY" || { echo "No gfx110X tarball found"; exit 1; } + echo "gfx110X=${KEY}" >> $GITHUB_OUTPUT + + - name: Login to Docker Hub + uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 + with: + registry: docker.io + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Build & Push (to Docker Hub; cache to GHA) + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + file: projects/rocprofiler-sdk/docker/Dockerfile.ci + platforms: linux/amd64 + push: true + build-args: | + BASE_TAG=${{ matrix.os }} + GPU_TYPE=${{ matrix.gpu }} + GFX94X_FILE_NAME=${{ steps.therock.outputs.gfx94X }} + GFX110X_FILE_NAME=${{ steps.therock.outputs.gfx110X }} + GFX120X_FILE_NAME=${{ steps.therock.outputs.gfx120X }} + tags: | + docker.io/rocm/rocprofiler-private:${{ matrix.os }}-${{ matrix.gpu }}-latest + cache-from: type=gha,scope=rocprofiler-ci-${{ matrix.os }}-${{ matrix.gpu }} + cache-to: type=gha,mode=max,scope=rocprofiler-ci-${{ matrix.os }}-${{ matrix.gpu }} diff --git a/.github/workflows/rocprofiler-sdk-code_coverage.yml b/.github/workflows/rocprofiler-sdk-code_coverage.yml index 35451648c0..5295af14c6 100644 --- a/.github/workflows/rocprofiler-sdk-code_coverage.yml +++ b/.github/workflows/rocprofiler-sdk-code_coverage.yml @@ -54,9 +54,11 @@ env: mi3xx_EXCLUDE_LABEL_REGEX: "" navi4_EXCLUDE_LABEL_REGEX: "" GLOBAL_CMAKE_OPTIONS: "" + DISABLE_ROCR_BUILD: "true" jobs: code-coverage: + name: Code Coverage • ${{ matrix.runner }} • ${{ matrix.os }} strategy: # fail-fast: false matrix: @@ -64,21 +66,84 @@ jobs: os: ['ubuntu-22.04'] build-type: ['Release'] - runs-on: rocprofiler-mi300a + runs-on: rocprofiler-mi300a-dind + container: + image: docker.io/rocm/rocprofiler-private:${{ matrix.os }}-gfx94X-latest + credentials: + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + env: + DEBIAN_FRONTEND: noninteractive + options: --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined - # define this for container + # define this for containers env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 + GPU_RUNNER: 'rocprofiler-mi300a-dind' GCC_COMPILER_VERSION: 11 ROCPROFILER_PC_SAMPLING_BETA_ENABLED: 1 steps: - - uses: actions/checkout@v4 + - name: Install Latest Nightly ROCm + shell: bash + working-directory: /tmp + run: | + tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 + ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + echo "ROCm installed to: ${{ env.ROCM_PATH }}" + + - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime + uses: actions/checkout@v5 with: - sparse-checkout: projects/rocprofiler-sdk - submodules: true + sparse-checkout: | + projects/rocprofiler-sdk + projects/aqlprofile + projects/rocprofiler-register + projects/rocr-runtime + submodules: false set-safe-directory: true + - name: Compute submodule cache key + id: submods + shell: bash + run: | + git config --global --add safe.directory '*' + git submodule status --recursive | awk '{print $1,$2}' > .git-submodules-status + echo "hash=$(sha256sum .git-submodules-status | cut -d' ' -f1)" >> "$GITHUB_OUTPUT" + # collect submodule paths for cache 'path' + git config --file .gitmodules --get-regexp path | awk '{print $2}' > .git-submodule-paths + { echo "paths<> "$GITHUB_OUTPUT" + + - name: Restore submodule cache + uses: actions/cache@v4 + with: + path: | + .git/modules + ${{ steps.submods.outputs.paths }} + key: submods-${{ runner.os }}-${{ steps.submods.outputs.hash }} + restore-keys: | + submods-${{ runner.os }}- + submods- + + - name: Init/Update submodules + run: git submodule update --init --recursive --jobs 16 + + - name: Clone ROCDecode + uses: actions/checkout@v5 + with: + repository: 'ROCm/rocDecode' + ref: 'release/rocm-rel-7.0' + set-safe-directory: true + path: 'rocDecode' + + - name: Clone ROCJPEG + uses: actions/checkout@v5 + with: + repository: 'ROCm/rocJPEG' + ref: 'release/rocm-rel-7.0' + set-safe-directory: true + path: 'rocJPEG' + - name: Load Existing XML Code Coverage if: github.event_name == 'pull_request' id: load-coverage @@ -102,14 +167,19 @@ jobs: - name: Install requirements timeout-minutes: 10 shell: bash + working-directory: projects/rocprofiler-sdk run: | - cd projects/rocprofiler-sdk git config --global --add safe.directory '*' apt-get update - apt-get install -y build-essential cmake python3-pip gcovr wkhtmltopdf xvfb xfonts-base xfonts-75dpi xfonts-100dpi xfonts-utils xfonts-encodings libfontconfig libdw-dev libsqlite3-dev - apt-get install -y rccl-dev rccl-unittests rocjpeg-dev rocjpeg-test rocdecode-dev rocdecode-test + apt-get install -y build-essential cmake python3-pip gcovr wkhtmltopdf xvfb xfonts-base xfonts-75dpi xfonts-100dpi xfonts-utils xfonts-encodings libfontconfig libdw-dev libsqlite3-dev libdrm-dev file autoconf pkg-config rpm libzstd-dev python3 -m pip install -U --user -r requirements.txt - rm -rf /opt/rocm/lib/*rocprofiler-sdk* /opt/rocm/lib/cmake/*rocprofiler-sdk* /opt/rocm/share/*rocprofiler-sdk* /opt/rocm/libexec/*rocprofiler-sdk* + rm -rf \ + ${{ env.ROCM_PATH }}/lib/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/lib/cmake/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/share/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/libexec/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}*/lib/python*/site-packages/roctx \ + ${{ env.ROCM_PATH }}*/lib/python*/site-packages/rocpd - name: Sync gcov with compilers timeout-minutes: 10 @@ -139,20 +209,111 @@ jobs: run: | echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV + - name: Setup ccache + uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 + with: + key: ccache-${{ matrix.os }}-rocprofiler-mi300a-dind-${{ matrix.runner }} + max-size: 2G + save: true + + - name: Install Missing ROCm Dependencies + shell: bash + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:/usr/local/bin:~/.local/bin:$PATH + echo -e "Building & Installing ROCDecode..." + cmake -B build-rocdecode \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ + -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${GITHUB_WORKSPACE}/rocDecode + cmake --build build-rocdecode --target all --parallel 16 + cmake --build build-rocdecode --target install + echo -e "ROCDecode Installed Successfully!" + echo -e "Building & Installing ROCJPEG..." + cmake -B build-rocjpeg \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ + -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${GITHUB_WORKSPACE}/rocJPEG + cmake --build build-rocjpeg --target all --parallel 16 + cmake --build build-rocjpeg --target install + echo -e "ROCJPEG Installed Successfully!" + + - name: Build and Install ROCProfiler-Register + shell: bash + working-directory: projects/rocprofiler-register + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:/usr/local/bin:~/.local/bin:$PATH + echo "Install ROCProfiler-Register" + cmake -B build-rocprofiler-register \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + . + cmake --build build-rocprofiler-register --target all --parallel 16 + cmake --build build-rocprofiler-register --target install + echo "✅ ROCProfiler-Register Installation complete!" + + - name: Build and Install ROCR-Runtime + if: ${{ !contains(env.DISABLE_ROCR_BUILD, 'true') }} + shell: bash + working-directory: projects/rocr-runtime + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:/usr/local/bin:~/.local/bin:$PATH + echo "Install ROCR-Runtime..." + cmake -B build \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + . + cmake --build build --target all --parallel 16 + cmake --build build --target install + echo "✅ ROCR-Runtime Installation complete!" + + - name: Build and Install Aqlprofile + shell: bash + working-directory: projects/aqlprofile + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:/usr/local/bin:~/.local/bin:$PATH + echo "Install Aqlprofile..." + cmake -B build-aqlprofile \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + . + cmake --build build-aqlprofile --target all --parallel 16 + cmake --build build-aqlprofile --target install + echo "✅ AQLProfile Installation complete!" + - name: Configure, Build, and Test (Total Code Coverage) timeout-minutes: 30 shell: bash working-directory: projects/rocprofiler-sdk run: + LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH python3 ./source/scripts/run-ci.py -B build --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-codecov --build-jobs 16 - --site ${RUNNER_HOSTNAME} + --site 'rocprofiler-mi300a-dind' --gpu-targets ${{ env.GPU_TARGETS }} --coverage all --run-attempt ${{ github.run_attempt }} -- -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- @@ -165,15 +326,18 @@ jobs: working-directory: projects/rocprofiler-sdk run: find build -type f | egrep '\.gcda$' | xargs rm && + LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH python3 ./source/scripts/run-ci.py -B build --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-codecov-tests --build-jobs 16 - --site ${RUNNER_HOSTNAME} + --site 'rocprofiler-mi300a-dind' --gpu-targets ${{ env.GPU_TARGETS }} --coverage tests --run-attempt ${{ github.run_attempt }} -- -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- @@ -186,15 +350,18 @@ jobs: shell: bash run: find build -type f | egrep '\.gcda$' | xargs rm && + LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH python3 ./source/scripts/run-ci.py -B build --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-codecov-samples --build-jobs 16 - --site ${RUNNER_HOSTNAME} + --site 'rocprofiler-mi300a-dind' --gpu-targets ${{ env.GPU_TARGETS }} --coverage samples --run-attempt ${{ github.run_attempt }} -- -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- diff --git a/.github/workflows/rocprofiler-sdk-continuous_integration.yml b/.github/workflows/rocprofiler-sdk-continuous_integration.yml index bfc3183861..35c158a4f0 100644 --- a/.github/workflows/rocprofiler-sdk-continuous_integration.yml +++ b/.github/workflows/rocprofiler-sdk-continuous_integration.yml @@ -2,9 +2,10 @@ name: rocprofiler-sdk Continuous Integration on: workflow_dispatch: + schedule: + - cron: '0 7 * * *' push: - branches: - - develop + branches: [ develop ] paths: - 'projects/rocprofiler-sdk/**' - '!projects/rocprofiler-sdk/*.md' @@ -25,11 +26,17 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +# Needed to push/pull cached Docker layers + GHCR images +permissions: + contents: read + packages: write + env: # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake ROCM_PATH: "/opt/rocm" GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1201" PATH: "/usr/bin:$PATH" + ## No tests should be excluded here except for extreme emergencies; tests should only be disabled in CMake ## A task should be assigned directly to fix the issues ## Scratch memory tests need to be fixed for ROCm 7.0 release @@ -41,6 +48,7 @@ env: mi325_EXCLUDE_TESTS_REGEX: "" mi3xx_EXCLUDE_TESTS_REGEX: "" navi4_EXCLUDE_TESTS_REGEX: "" + navi3_EXCLUDE_LABEL_REGEX: "" vega20_EXCLUDE_LABEL_REGEX: "" mi200_EXCLUDE_LABEL_REGEX: "" @@ -49,255 +57,593 @@ env: mi325_EXCLUDE_LABEL_REGEX: "" mi3xx_EXCLUDE_LABEL_REGEX: "" navi4_EXCLUDE_LABEL_REGEX: "" + GLOBAL_CMAKE_OPTIONS: "" + DISABLE_ROCR_BUILD: "true" jobs: + # ----------------------------------------------------------------------------- + # Ubuntu / DEB job(s) + # ----------------------------------------------------------------------------- core-deb: - # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + name: Core • ${{ matrix.system.gpu }} • ${{ matrix.system.os }} strategy: fail-fast: false matrix: - runner: ['navi4', 'navi3', 'mi3xx'] - os: ['ubuntu-22.04'] - build-type: ['RelWithDebInfo'] - - runs-on: rocprofiler-${{ matrix.runner }} - - # define this for containers + system: + - { gpu: 'navi4', runner: 'rocprofiler-navi4-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx120X", ci-flags: '--linter clang-tidy', gpu-target: "gfx1201" } + - { gpu: 'navi3', runner: 'rocprofiler-navi3-dind', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx110X", ci-flags: '--linter clang-tidy', gpu-target: "gfx1101" } + - { gpu: 'mi325', runner: 'linux-mi325-1gpu-ossci-rocm', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', therock-s3: "gfx94X", ci-flags: '--linter clang-tidy', gpu-target: "gfx942" } + runs-on: ${{ matrix.system.runner }} + container: + image: docker.io/rocm/rocprofiler-private:${{ matrix.system.os }}-${{ matrix.system.therock-s3 }}-latest + credentials: + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + env: + DEBIAN_FRONTEND: noninteractive + options: --privileged env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 - CORE_EXT_RUNNER: mi3xx - + CORE_EXT_RUNNER: mi325 + GPU_RUNNER: ${{ matrix.system.gpu }} steps: - - name: Checkout - uses: actions/checkout@v4 - with: - sparse-checkout: projects/rocprofiler-sdk - submodules: true - set-safe-directory: true + - name: Install Latest Nightly ROCm + shell: bash + working-directory: /tmp + run: | + tar -xf /opt/rocm-${{ matrix.system.therock-s3 }}.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 + ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + echo "ROCm installed to: ${{ env.ROCM_PATH }}" - - name: Install requirements - timeout-minutes: 10 - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - git config --global --add safe.directory '*' - apt-get update - apt-get install -y build-essential cmake g++-11 g++-12 python3-pip libdw-dev libsqlite3-dev rccl-dev rccl-unittests rocjpeg-dev rocjpeg-test rocdecode-dev rocdecode-test - update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11 - update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12 - python3 -m pip install -U --user -r requirements.txt - rm -rf /opt/rocm/lib/*rocprofiler-sdk* /opt/rocm/lib/cmake/*rocprofiler-sdk* /opt/rocm/share/*rocprofiler-sdk* /opt/rocm/libexec/*rocprofiler-sdk* /opt/rocm*/lib/python*/site-packages/roctx /opt/rocm*/lib/python*/site-packages/rocpd + - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime + uses: actions/checkout@v5 + with: + sparse-checkout: | + projects/rocprofiler-sdk + projects/aqlprofile + projects/rocprofiler-register + projects/rocr-runtime + submodules: false + set-safe-directory: true - - name: List Files - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - echo "PATH: ${PATH}" - echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" - which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } - for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version - ls -la + - name: Compute submodule cache key + id: submods + shell: bash + run: | + git config --global --add safe.directory '*' + git submodule status --recursive | awk '{print $1,$2}' > .git-submodules-status + echo "hash=$(sha256sum .git-submodules-status | cut -d' ' -f1)" >> "$GITHUB_OUTPUT" + # collect submodule paths for cache 'path' + git config --file .gitmodules --get-regexp path | awk '{print $2}' > .git-submodule-paths + { echo "paths<> "$GITHUB_OUTPUT" - - name: Enable PC Sampling - if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} - shell: bash - working-directory: projects/rocprofiler-sdk - run: - echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV + - name: Restore submodule cache + uses: actions/cache@v4 + with: + path: | + .git/modules + ${{ steps.submods.outputs.paths }} + key: submods-${{ runner.os }}-${{ steps.submods.outputs.hash }} + restore-keys: | + submods-${{ runner.os }}- + submods- - - name: Configure, Build, and Test - timeout-minutes: 30 - shell: bash - working-directory: projects/rocprofiler-sdk - run: - python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core - --build-jobs 16 - --site ${RUNNER_HOSTNAME} - --gpu-targets ${{ env.GPU_TARGETS }} - --run-attempt ${{ github.run_attempt }} - -- - -DROCPROFILER_DEP_ROCMCORE=ON - -DROCPROFILER_BUILD_DOCS=OFF - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} - -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk - -DCPACK_GENERATOR='DEB;RPM;TGZ' - -DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)" - -DPython3_EXECUTABLE=$(which python3) - ${{ env.GLOBAL_CMAKE_OPTIONS }} - -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + - name: Init/Update submodules + run: git submodule update --init --recursive --jobs 16 - - name: Install - if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} - timeout-minutes: 10 - working-directory: projects/rocprofiler-sdk - run: - cmake --build build --target install --parallel 16 + - name: Clone ROCDecode + uses: actions/checkout@v5 + with: + repository: 'ROCm/rocDecode' + ref: 'release/rocm-rel-7.0' + set-safe-directory: true + path: 'rocDecode' - - name: Build Packaging - if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} - timeout-minutes: 10 - working-directory: projects/rocprofiler-sdk - run: - cmake --build build --target package --parallel 16 + - name: Clone ROCJPEG + uses: actions/checkout@v5 + with: + repository: 'ROCm/rocJPEG' + ref: 'release/rocm-rel-7.0' + set-safe-directory: true + path: 'rocJPEG' - - name: Test Install Build - if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} - timeout-minutes: 20 - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - CMAKE_PREFIX_PATH=/opt/rocprofiler-sdk cmake -B build-samples samples - CMAKE_PREFIX_PATH=/opt/rocprofiler-sdk cmake -B build-tests -DGPU_TARGETS="gfx942" tests - export LD_LIBRARY_PATH=/opt/rocprofiler-sdk/lib:${LD_LIBRARY_PATH} - cmake --build build-samples --target all --parallel 16 - cmake --build build-tests --target all --parallel 16 - ctest --test-dir build-samples -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure - ctest --test-dir build-tests -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + - name: Install requirements + timeout-minutes: 10 + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + git config --global --add safe.directory '*' + apt-get update + apt-get install -y g++-11 g++-12 cmake python3-pip libdw-dev libsqlite3-dev libdrm-dev file autoconf pkg-config rpm libzstd-dev + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12 + python3 -m pip install -U --user -r requirements.txt + rm -rf \ + ${{ env.ROCM_PATH }}/lib/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/lib/cmake/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/share/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/libexec/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}*/lib/python*/site-packages/roctx \ + ${{ env.ROCM_PATH }}*/lib/python*/site-packages/rocpd - - name: Install Packages - if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} - timeout-minutes: 5 - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - export PATH=${PATH}:/usr/local/sbin:/usr/sbin:/sbin - ls -la - ls -la ./build - dpkg --force-all -i ./build/rocprofiler-sdk-roctx_*.deb - dpkg --force-all -i ./build/rocprofiler-sdk-rocpd_*.deb - for i in $(ls -S ./build/rocprofiler-sdk*.deb | egrep -v 'roctx|rocpd'); do dpkg --force-all -i ${i}; done; + - name: Setup ccache + uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 + with: + key: ccache-${{ matrix.system.os }}-${{ matrix.system.runner }}-${{ matrix.system.gpu }} + max-size: 2G + save: true - - name: Test Installed Packages - if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }} - timeout-minutes: 20 - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-samples-deb /opt/rocm/share/rocprofiler-sdk/samples - CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests - cmake --build build-samples-deb --target all --parallel 16 - cmake --build build-tests-deb --target all --parallel 16 - ctest --test-dir build-samples-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure - ctest --test-dir build-tests-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + - name: Install Missing ROCm Dependencies + shell: bash + run: | + echo -e "Building & Installing ROCDecode..." + cmake -B build-rocdecode \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ + -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + ${GITHUB_WORKSPACE}/rocDecode + cmake --build build-rocdecode --target all --parallel 16 + cmake --build build-rocdecode --target install + echo -e "ROCDecode Installed Successfully!" + echo -e "Building & Installing ROCJPEG..." + cmake -B build-rocjpeg \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }} \ + -DCMAKE_CXX_COMPILER=${{ env.ROCM_PATH }}/bin/amdclang++ \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + ${GITHUB_WORKSPACE}/rocJPEG + cmake --build build-rocjpeg --target all --parallel 16 + cmake --build build-rocjpeg --target install + echo -e "ROCJPEG Installed Successfully!" - - name: Archive production artifacts - if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.runner, env.CORE_EXT_RUNNER) }} - uses: actions/upload-artifact@v4 - with: - name: installers-deb - path: | - ${{github.workspace}}/build/*.deb - ${{github.workspace}}/build/*.rpm - ${{github.workspace}}/build/*.tgz + - name: Build and Install ROCProfiler-Register + shell: bash + working-directory: projects/rocprofiler-register + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + echo "Install ROCProfiler-Register" + cmake -B build-rocprofiler-register \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + . + cmake --build build-rocprofiler-register --target all --parallel 16 + cmake --build build-rocprofiler-register --target install + echo "✅ ROCProfiler-Register Installation complete!" + - name: Build and Install ROCR-Runtime + if: ${{ !contains(env.DISABLE_ROCR_BUILD, 'true') }} + shell: bash + working-directory: projects/rocr-runtime + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + echo "Install ROCR-Runtime..." + cmake -B build \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + . + cmake --build build --target all --parallel 16 + cmake --build build --target install + echo "✅ ROCR-Runtime Installation complete!" + + - name: Build and Install Aqlprofile + shell: bash + working-directory: projects/aqlprofile + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + echo "Install Aqlprofile..." + cmake -B build-aqlprofile \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + . + cmake --build build-aqlprofile --target all --parallel 16 + cmake --build build-aqlprofile --target install + echo "✅ AQLProfile Installation complete!" + + - name: List Files + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + echo "PATH: ${PATH}" + echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" + which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } + for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done + cat /opt/rocm/.info/version + ls -la + + - name: Enable PC Sampling + if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }} + shell: bash + working-directory: projects/rocprofiler-sdk + run: echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV + + - name: Configure, Build, and Test + timeout-minutes: 30 + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH \ + PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH \ + python3 ./source/scripts/run-ci.py \ + -B build \ + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core \ + --build-jobs 16 \ + --site ${{ matrix.system.runner }} \ + --gpu-targets ${{ env.GPU_TARGETS }} \ + --run-attempt ${{ github.run_attempt }} \ + ${{ matrix.system.ci-flags }} -- \ + -DROCPROFILER_DEP_ROCMCORE=ON \ + -DROCPROFILER_BUILD_DOCS=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }} \ + -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk \ + -DCPACK_GENERATOR='DEB;RPM;TGZ' \ + -DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)" \ + -DPython3_EXECUTABLE=$(which python3) \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' \ + ${{ env.GLOBAL_CMAKE_OPTIONS }} -- \ + -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" \ + -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" + + - name: Install + if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + timeout-minutes: 10 + working-directory: projects/rocprofiler-sdk + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + cmake --build build --target install --parallel 16 + + - name: Build Packaging + if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + timeout-minutes: 10 + working-directory: projects/rocprofiler-sdk + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + cmake --build build --target package --parallel 16 + + - name: Test Install Build + if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + timeout-minutes: 20 + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + CMAKE_PREFIX_PATH=/opt/rocprofiler-sdk cmake -B build-samples samples + CMAKE_PREFIX_PATH=/opt/rocprofiler-sdk cmake -B build-tests -DGPU_TARGETS="gfx942" tests + export LD_LIBRARY_PATH=/opt/rocprofiler-sdk/lib:${LD_LIBRARY_PATH} + cmake --build build-samples --target all --parallel 16 + cmake --build build-tests --target all --parallel 16 + ctest --test-dir build-samples -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + ctest --test-dir build-tests -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + + - name: Install Packages + if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + timeout-minutes: 5 + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + export PATH=${PATH}:/usr/local/sbin:/usr/sbin:/sbin + ls -la + ls -la ./build + dpkg --force-all -i ./build/rocprofiler-sdk-roctx_*.deb + dpkg --force-all -i ./build/rocprofiler-sdk-rocpd_*.deb + for i in $(ls -S ./build/rocprofiler-sdk*.deb | egrep -v 'roctx|rocpd'); do dpkg --force-all -i ${i}; done; + + - name: Test Installed Packages + if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + timeout-minutes: 20 + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + export LD_LIBRARY_PATH=${{ env.ROCM_PATH }}/lib:${{ env.ROCM_PATH }}/llvm/lib:$LD_LIBRARY_PATH + export PATH=${{ env.ROCM_PATH }}/bin:${{ env.ROCM_PATH }}/llvm/bin:$PATH + CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-samples-deb /opt/rocm/share/rocprofiler-sdk/samples + CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests + cmake --build build-samples-deb --target all --parallel 16 + cmake --build build-tests-deb --target all --parallel 16 + ctest --test-dir build-samples-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + ctest --test-dir build-tests-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure + + - name: Archive production artifacts + if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }} + uses: actions/upload-artifact@v4 + with: + name: installers-deb + path: | + ${{github.workspace}}/build/*.deb + ${{github.workspace}}/build/*.rpm + ${{github.workspace}}/build/*.tgz + + # ----------------------------------------------------------------------------- + # RHEL/SLES (RPM) job(s) + # ----------------------------------------------------------------------------- core-rpm: - # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + name: Core • ${{ matrix.runner }} • ${{ matrix.os }} strategy: fail-fast: false matrix: - runner: ['mi300'] - os: ['rhel-9', 'sles-15'] - build-type: ['RelWithDebInfo'] - ci-flags: [''] - - runs-on: rocprofiler-${{ matrix.os }} - - # define this for containers + runner: [ 'mi325' ] + os: [ 'rhel-8.8', 'rhel-9.5', 'sles-15.6' ] + build-type: [ 'RelWithDebInfo' ] + ci-flags: [ '' ] + runs-on: linux-mi325-1gpu-ossci-rocm + container: + image: docker.io/rocm/rocprofiler-private:${{ matrix.os }}-gfx94X-latest + credentials: + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + options: --privileged env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 OS_TYPE: ${{ matrix.os }} - + GPU_RUNNER: ${{ matrix.runner }} steps: - - name: Checkout - uses: actions/checkout@v4 - with: - sparse-checkout: projects/rocprofiler-sdk + - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime + uses: actions/checkout@v5 + with: + sparse-checkout: | + projects/rocprofiler-sdk + projects/aqlprofile + projects/rocprofiler-register + projects/rocr-runtime + submodules: false + set-safe-directory: true - - name: Install requirements - timeout-minutes: 10 - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - git config --global --add safe.directory '*' - if [ "${OS_TYPE}" == "rhel-9" ]; then - dnf makecache - dnf groupinstall -y "Development Tools" - dnf install -y llvm14-devel - fi - python3 -m pip install --upgrade pip - python3 -m pip install -U --user -r requirements.txt - rm -rf /opt/rocm/lib/*rocprofiler-sdk* /opt/rocm/lib/cmake/*rocprofiler-sdk* /opt/rocm/share/*rocprofiler-sdk* /opt/rocm/libexec/*rocprofiler-sdk* + - name: Compute submodule cache key + id: submods + shell: bash + run: | + git config --global --add safe.directory '*' + git submodule status --recursive | awk '{print $1,$2}' > .git-submodules-status + echo "hash=$(sha256sum .git-submodules-status | cut -d' ' -f1)" >> "$GITHUB_OUTPUT" + # collect submodule paths for cache 'path' + git config --file .gitmodules --get-regexp path | awk '{print $2}' > .git-submodule-paths + { echo "paths<> "$GITHUB_OUTPUT" - - name: List Files - shell: bash - working-directory: projects/rocprofiler-sdk - run: | - echo "PATH: ${PATH}" - echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" - which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } - for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done - cat /opt/rocm/.info/version - ls -la + - name: Restore submodule cache + uses: actions/cache@v4 + with: + path: | + .git/modules + ${{ steps.submods.outputs.paths }} + key: submods-${{ runner.os }}-${{ steps.submods.outputs.hash }} + restore-keys: | + submods-${{ runner.os }}- + submods- - - name: Enable PC Sampling - if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} - shell: bash - working-directory: projects/rocprofiler-sdk - run: - echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV + - name: Init/Update submodules + run: git submodule update --init --recursive --jobs 16 - - name: Configure, Build, and Test - timeout-minutes: 30 - shell: bash - working-directory: projects/rocprofiler-sdk - run: - /usr/bin/python3 ./source/scripts/run-ci.py -B build - --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core - --build-jobs 16 - --site ${RUNNER_HOSTNAME} - --gpu-targets ${{ env.GPU_TARGETS }} - --run-attempt ${{ github.run_attempt }} - ${{ matrix.ci-flags }} - -- - -DROCPROFILER_DEP_ROCMCORE=ON - -DROCPROFILER_BUILD_DOCS=OFF - -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} - -DPython3_EXECUTABLE=$(which python3) - ${{ env.GLOBAL_CMAKE_OPTIONS }} - -- - -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" - -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" + - name: Install Latest Nightly ROCm using TheRock Tarballs + shell: bash + working-directory: /tmp + run: | + tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 + ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + echo "ROCm installed to: ${{ env.ROCM_PATH }}" + + - name: Install requirements (venv) + timeout-minutes: 10 + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + git config --global --add safe.directory '*' + python3 -m venv rocprofiler-sdk + source rocprofiler-sdk/bin/activate + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH + python3 -m pip install --upgrade pip + python3 -m pip install --upgrade -r requirements.txt + rm -rf \ + ${{ env.ROCM_PATH }}/lib/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/lib/cmake/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/share/*rocprofiler-sdk* \ + ${{ env.ROCM_PATH }}/libexec/*rocprofiler-sdk* + + - name: Install Curl for RHEL 8.8 + if: ${{ matrix.os == 'rhel-8.8' }} + run: | + dnf install -y curl + ln -s /usr/local/bin/curl /usr/bin/curl + + - name: Setup ccache + uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 + with: + key: ccache-${{ matrix.os }}-linux-mi325-1gpu-ossci-rocm-${{ matrix.runner }} + max-size: 2G + save: true + variant: sccache + + - name: Build and Install ROCProfiler-Register + shell: bash + working-directory: projects/rocprofiler-register + run: | + echo "Install ROCProfiler-Register" + cmake -B build-rocprofiler-register \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/sccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/sccache \ + . + cmake --build build-rocprofiler-register --target all --parallel 16 + cmake --build build-rocprofiler-register --target install + echo "✅ ROCProfiler-Register Installation complete!" + + - name: Build and Install ROCR-Runtime + if: ${{ !contains(env.DISABLE_ROCR_BUILD, 'true') }} + shell: bash + working-directory: projects/rocr-runtime + run: | + python3 -m venv rocprofiler-sdk + source rocprofiler-sdk/bin/activate + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH + python3 -m pip install --upgrade pip + python3 -m pip install --upgrade cmake + echo "Install ROCR-Runtime..." + cmake -B build \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + . + cmake --build build --target all --parallel 16 + cmake --build build --target install + echo "✅ ROCR-Runtime Installation complete!" + + - name: Build and Install Aqlprofile + shell: bash + working-directory: projects/aqlprofile + run: | + echo "Install Aqlprofile." + python3 -m venv rocprofiler-sdk + source rocprofiler-sdk/bin/activate + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH + python3 -m pip install --upgrade pip + python3 -m pip install --upgrade cmake + cmake -B build-aqlprofile \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/sccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/sccache \ + . + cmake --build build-aqlprofile --target all --parallel 16 + cmake --build build-aqlprofile --target install + echo "✅ AQLProfile Installation complete!" + + - name: Enable PC Sampling + if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }} + shell: bash + working-directory: projects/rocprofiler-sdk + run: + echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV + + - name: List Files + shell: bash + working-directory: projects/rocprofiler-sdk + run: | + source rocprofiler-sdk/bin/activate + echo "PATH: ${PATH}" + echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" + which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; } + for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done + cat /opt/rocm/.info/version + ls -la + + - name: Configure, Build, and Test + timeout-minutes: 30 + shell: bash + working-directory: projects/rocprofiler-sdk + run: + source rocprofiler-sdk/bin/activate; + PATH=~/.local/bin:/opt/rh/gcc-toolset-11/root/usr/bin:$PATH + python3 ./source/scripts/run-ci.py -B build + --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core + --build-jobs 16 + --site 'linux-mi325-1gpu-ossci-rocm' + --gpu-targets ${{ env.GPU_TARGETS }} + --run-attempt ${{ github.run_attempt }} + ${{ matrix.ci-flags }} + -- + -DROCPROFILER_DEP_ROCMCORE=ON + -DROCPROFILER_BUILD_DOCS=OFF + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' + -DPython3_EXECUTABLE=$(which python3) + ${{ env.GLOBAL_CMAKE_OPTIONS }} + -- + -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" + -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" sanitizers: + name: ${{ matrix.sanitizer }} • ${{ matrix.runner }} • ${{ matrix.os }} strategy: fail-fast: false matrix: - runner: ['mi3xx'] + runner: ['mi325'] sanitizer: ['AddressSanitizer', 'ThreadSanitizer', 'LeakSanitizer', 'UndefinedBehaviorSanitizer'] os: ['ubuntu-22.04'] build-type: ['RelWithDebInfo'] if: ${{ contains(github.event_name, 'pull_request') }} - runs-on: rocprofiler-${{ matrix.runner }} + runs-on: linux-mi325-1gpu-ossci-rocm + container: + image: docker.io/rocm/rocprofiler-private:${{ matrix.os }}-gfx94X-latest + credentials: + username: ${{ secrets.ROCPROFILER_AZURE_CI_USER }} + password: ${{ secrets.ROCPROFILER_AZURE_CI_PASS }} + env: + DEBIAN_FRONTEND: noninteractive + options: --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined # define this for containers env: GIT_DISCOVERY_ACROSS_FILESYSTEM: 1 GCC_COMPILER_VERSION: 13 + GPU_RUNNER: ${{ matrix.runner }} steps: - - name: Checkout - uses: actions/checkout@v4 + - name: Install Latest Nightly ROCm + shell: bash + working-directory: /tmp + run: | + ls -lah /opt/ + tar -xf /opt/rocm-gfx94X.tar.gz -C ${{ env.ROCM_PATH }}-7.0.0 + ln -s ${{ env.ROCM_PATH }}-7.0.0 ${{ env.ROCM_PATH }} + echo "ROCm installed to: ${{ env.ROCM_PATH }}" + + - name: Clone ROCProfiler SDK & AQLProfile & ROCProfiler Register & ROCR-Runtime + uses: actions/checkout@v5 with: - sparse-checkout: projects/rocprofiler-sdk - submodules: true + sparse-checkout: | + projects/rocprofiler-sdk + projects/aqlprofile + projects/rocprofiler-register + projects/rocr-runtime + submodules: false set-safe-directory: true + - name: Compute submodule cache key + id: submods + shell: bash + run: | + git config --global --add safe.directory '*' + git submodule status --recursive | awk '{print $1,$2}' > .git-submodules-status + echo "hash=$(sha256sum .git-submodules-status | cut -d' ' -f1)" >> "$GITHUB_OUTPUT" + # collect submodule paths for cache 'path' + git config --file .gitmodules --get-regexp path | awk '{print $2}' > .git-submodule-paths + { echo "paths<> "$GITHUB_OUTPUT" + + - name: Restore submodule cache + uses: actions/cache@v4 + with: + path: | + .git/modules + ${{ steps.submods.outputs.paths }} + key: submods-${{ runner.os }}-${{ steps.submods.outputs.hash }} + restore-keys: | + submods-${{ runner.os }}- + submods- + + - name: Init/Update submodules + run: git submodule update --init --recursive --jobs 16 + - name: Install requirements timeout-minutes: 10 shell: bash @@ -305,14 +651,14 @@ jobs: run: | git config --global --add safe.directory '*' apt-get update - apt-get install -y build-essential cmake python3-pip libasan8 libtsan2 software-properties-common clang-15 libdw-dev libsqlite3-dev + apt-get install -y build-essential cmake python3-pip libasan8 libtsan2 software-properties-common clang-15 libdw-dev libsqlite3-dev libdrm-dev file autoconf pkg-config add-apt-repository ppa:ubuntu-toolchain-r/test apt-get update apt-get upgrade -y apt-get install -y gcc-${{ env.GCC_COMPILER_VERSION }} g++-${{ env.GCC_COMPILER_VERSION }} update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${{ env.GCC_COMPILER_VERSION }} 100 --slave /usr/bin/g++ g++ /usr/bin/g++-${{ env.GCC_COMPILER_VERSION }} --slave /usr/bin/gcov gcov /usr/bin/gcov-${{ env.GCC_COMPILER_VERSION }} python3 -m pip install -U --user -r requirements.txt - rm -rf /opt/rocm/lib/*rocprofiler-sdk* /opt/rocm/lib/cmake/*rocprofiler-sdk* /opt/rocm/share/*rocprofiler-sdk* /opt/rocm/libexec/*rocprofiler-sdk* + rm -rf ${{ env.ROCM_PATH }}/lib/*rocprofiler-sdk* ${{ env.ROCM_PATH }}/lib/cmake/*rocprofiler-sdk* ${{ env.ROCM_PATH }}/share/*rocprofiler-sdk* ${{ env.ROCM_PATH }}/libexec/*rocprofiler-sdk* ${{ env.ROCM_PATH }}*/lib/python*/site-packages/roctx ${{ env.ROCM_PATH }}*/lib/python*/site-packages/rocpd - name: List Files shell: bash @@ -329,21 +675,79 @@ jobs: working-directory: projects/rocprofiler-sdk run: echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV + - name: Setup ccache + uses: hendrikmuhs/ccache-action@63069e3931dedbf3b63792097479563182fe70d1 # v1.2.18 + with: + key: ccache-${{ matrix.os }}-linux-mi325-1gpu-ossci-rocm-${{ matrix.runner }}-${{ matrix.sanitizer}} + max-size: 2G + save: true + + - name: Build and Install ROCProfiler-Register + shell: bash + working-directory: projects/rocprofiler-register + run: | + echo "Install ROCProfiler-Register" + cmake -B build-rocprofiler-register \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + . + cmake --build build-rocprofiler-register --target all --parallel 16 + cmake --build build-rocprofiler-register --target install + echo "✅ ROCProfiler-Register Installation complete!" + + - name: Build and Install ROCR-Runtime + if: ${{ !contains(env.DISABLE_ROCR_BUILD, 'true') }} + shell: bash + working-directory: projects/rocr-runtime + run: | + echo "Install ROCR-Runtime..." + cmake -B build \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }}-7.0.0;${{ env.ROCM_PATH }}-7.0.0/llvm' \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + . + cmake --build build --target all --parallel 16 + cmake --build build --target install + echo "✅ ROCR-Runtime Installation complete!" + + - name: Build and Install Aqlprofile + shell: bash + working-directory: projects/aqlprofile + run: | + echo "Install Aqlprofile." + cmake -B build-aqlprofile \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_PREFIX_PATH=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_INSTALL_PREFIX=${{ env.ROCM_PATH }}-7.0.0 \ + -DCMAKE_C_COMPILER_LAUNCHER=/usr/bin/ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/bin/ccache \ + . + cmake --build build-aqlprofile --target all --parallel 16 + cmake --build build-aqlprofile --target install + echo "✅ Installation complete!" + - name: Configure, Build, and Test timeout-minutes: 45 shell: bash working-directory: projects/rocprofiler-sdk run: + sudo sysctl -w vm.mmap_rnd_bits=28; python3 ./source/scripts/run-ci.py -B build --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-${{ matrix.sanitizer }} --build-jobs 16 - --site ${RUNNER_HOSTNAME} + --site 'linux-mi325-1gpu-ossci-rocm' --gpu-targets ${{ env.GPU_TARGETS }} --memcheck ${{ matrix.sanitizer }} --run-attempt ${{ github.run_attempt }} -- -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -DCMAKE_INSTALL_PREFIX="${{ env.ROCM_PATH }}" + -DCMAKE_PREFIX_PATH='${{ env.ROCM_PATH }};${{ env.ROCM_PATH }}/llvm' -DPython3_EXECUTABLE=$(which python3) ${{ env.GLOBAL_CMAKE_OPTIONS }} -- diff --git a/projects/aqlprofile/Readme.txt b/projects/aqlprofile/Readme.txt index fbc12559d2..1f35407207 100644 --- a/projects/aqlprofile/Readme.txt +++ b/projects/aqlprofile/Readme.txt @@ -62,6 +62,3 @@ $ /opt/rocm/opencl/bin/clang -cl-std=CL2.0 -include /opt/rocm/opencl/include/ope With newer device-libs layout, use this recompile command: $ /opt/rocm/opencl/bin/clang -cl-std=CL2.0 -include /opt/rocm/opencl/include/opencl-c.h --hip-device-lib-path=/opt/rocm/amdgcn/bitcode -target amdgcn-amd-amdhsa -mcpu=gfx906 vector_add_kernel.cl -o vector_add_kernel.so - -### ROCm 5.7 -Added support for GFX10/GFX11 diff --git a/projects/aqlprofile/dashboard.cmake b/projects/aqlprofile/dashboard.cmake index 18b61e7ac4..ec6f0c4a8d 100644 --- a/projects/aqlprofile/dashboard.cmake +++ b/projects/aqlprofile/dashboard.cmake @@ -8,10 +8,10 @@ macro(dashboard_submit) endif() endmacro() -set(CTEST_PROJECT_NAME "aqlprofile") -set(CTEST_NIGHTLY_START_TIME "01:00:00 UTC") -set(CTEST_DROP_METHOD "http") -set(CTEST_DROP_SITE "cdash.rocprofiler.amd.com") +set(CTEST_PROJECT_NAME "AQLProfile") +set(CTEST_NIGHTLY_START_TIME "05:00:00 UTC") +set(CTEST_DROP_METHOD "https") +set(CTEST_DROP_SITE "my.cdash.org") set(CTEST_DROP_LOCATION "/submit.php?project=${CTEST_PROJECT_NAME}") set(CTEST_DROP_SITE_CDASH TRUE) @@ -62,13 +62,13 @@ endif() macro(handle_error _message _ret) if(NOT ${${_ret}} EQUAL 0) dashboard_submit(PARTS Done RETURN_VALUE _submit_ret) - message(WARNING "${_message} failed: ${${_ret}}") + message(AUTHOR_WARNING "${_message} failed: ${${_ret}}") endif() endmacro() ctest_start(Continuous) -ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _update_ret) +ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}" RETURN_VALUE _update_ret) handle_error("Update" _update_ret) ctest_configure(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _configure_ret) @@ -76,12 +76,12 @@ dashboard_submit(PARTS Start Update Configure RETURN_VALUE _submit_ret) handle_error("Configure" _configure_ret) -ctest_build(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _build_ret) +ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _build_ret) dashboard_submit(PARTS Build RETURN_VALUE _submit_ret) handle_error("Build" _build_ret) -ctest_test(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _test_ret) +ctest_test(BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _test_ret) dashboard_submit(PARTS Test RETURN_VALUE _submit_ret) handle_error("Testing" _test_ret) @@ -90,5 +90,5 @@ dashboard_submit(PARTS Done RETURN_VALUE _submit_ret) if(_submit_ret EQUAL 0) message(STATUS "Dashboard submission successful.") else() - message(WARNING "Dashboard submission failed with code ${_submit_ret}.") + message(AUTHOR_WARNING "Dashboard submission failed with code ${_submit_ret}.") endif() diff --git a/projects/rocprofiler-sdk/README.md b/projects/rocprofiler-sdk/README.md index adc70859e5..469920f6c2 100644 --- a/projects/rocprofiler-sdk/README.md +++ b/projects/rocprofiler-sdk/README.md @@ -125,9 +125,3 @@ Please report issues on GitHub OR send an email to [!WARNING] -> To use ROCprofiler-SDK, obtain the latest mainline version of AQLprofile from [here](https://github.com/ROCm/aqlprofile). diff --git a/projects/rocprofiler-sdk/docker/Dockerfile.ci b/projects/rocprofiler-sdk/docker/Dockerfile.ci new file mode 100644 index 0000000000..2dbaab6258 --- /dev/null +++ b/projects/rocprofiler-sdk/docker/Dockerfile.ci @@ -0,0 +1,126 @@ +# Build a thin "base with deps" image atop the private runner image +ARG BASE_TAG=ubuntu-22.04 +FROM rocm/rocprofiler-private:${BASE_TAG} + +# GPU Type from GitHub Actions +ARG GPU_TYPE=gfx94X +ENV GPU_TYPE=${GPU_TYPE} + +# Nightly Tarball Keys from GitHub Actions +ARG GFX94X_FILE_NAME +ENV GFX94X_FILE_NAME=${GFX94X_FILE_NAME} +ARG GFX110X_FILE_NAME +ENV GFX110X_FILE_NAME=${GFX110X_FILE_NAME} +ARG GFX120X_FILE_NAME +ENV GFX120X_FILE_NAME=${GFX120X_FILE_NAME} + +SHELL ["/bin/bash","-lc"] +COPY projects/rocprofiler-sdk/requirements.txt /root/requirements.txt + +ENV DEBIAN_FRONTEND=noninteractive +ENV CMAKE_HIP_PLATFORM=amd +ENV HIP_PLATFORM=amd +ENV HIP_RUNTIME=rocclr +ENV HIP_COMPILER=amdclang++ +ENV LLVM_PATH=/opt/rocm/llvm +ENV CMAKE_PREFIX_PATH=/opt/rocm +ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:/opt/rocm/bin:/opt/rocm/llvm/bin:/usr/local/bin:~/.local/bin:${PATH} +ENV LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/llvm/lib:${LD_LIBRARY_PATH} + +RUN set -euo pipefail; \ + if [ -f /etc/debian_version ]; then \ + apt-get update && \ + apt-get install -y curl wget gpg python3 python3-pip build-essential coreutils software-properties-common git cmake g++-11 g++-12 libdw-dev libsqlite3-dev libdrm-dev file autoconf pkg-config rpm libzstd-dev && \ + add-apt-repository ppa:git-core/ppa && \ + mkdir -p /etc/apt/keyrings && \ + wget -N -P /tmp/ https://repo.radeon.com/amdgpu-install/.7.0/ubuntu/jammy/amdgpu-install_7.0.70000-1_all.deb && \ + apt-get install -y /tmp/amdgpu-install_7.0.70000-1_all.deb && \ + sed -i "s/\/30.10/\/.30.10/" /etc/apt/sources.list.d/amdgpu*.list && \ + sed -i "s|rocm/apt/7.0|rocm/apt/.apt_7.0|; s|graphics/7.0|graphics/.7.0|" /etc/apt/sources.list.d/rocm.list && \ + apt-get update && \ + apt-get install -y git rocm-openmp-sdk libva-amdgpu-dev rocm-llvm-dev && \ + python3 -m pip install -U awscli pipx && \ + python3 -m pip install -U --user -r /root/requirements.txt; \ + elif [ $(grep -i "ID=.*rhel" /etc/os-release | wc -l) -gt 0 ]; then \ + dnf clean all || true; \ + dnf install -y perl-ExtUtils-MakeMaker python3-pip || true; \ + if [ $(grep -i "VERSION_ID=\"8.8\"" /etc/os-release | wc -l) -gt 0 ]; then \ + wget https://www.kernel.org/pub/software/scm/git/git-2.51.0.tar.xz; \ + tar -xf git-2.51.0.tar.xz; \ + cd git-2.51.0; \ + rm -rf /etc/yum.repos.d/redhat-partner.repo || true; \ + dnf clean all; \ + dnf install -y perl-ExtUtils-MakeMaker || true; \ + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH; \ + make prefix=/usr all -j 32; \ + make prefix=/usr install; \ + cd ..; rm -rf git-2.51.0*; \ + echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/el8/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/rocm.repo; \ + echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo; \ + else \ + rm -rf /etc/yum.repos.d/RHEL-partners.repo; \ + dnf clean all; \ + echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/el9/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/rocm.repo; \ + echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/30.10_rc1/rhel/9.5/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo; \ + fi; \ + dnf clean all; \ + dnf install -y rocm-openmp rocm-openmp-sdk rocm-llvm-devel hipify-clang libsqlite3x-devel elfutils-devel; \ + python3 -m pip install -U awscli pipx; \ + python3 -m venv rocprofiler-sdk; \ + source rocprofiler-sdk/bin/activate; \ + export PATH=/opt/rh/gcc-toolset-11/root/usr/bin:${PATH}; \ + python3 -m pip install --upgrade pip; \ + python3 -m pip install --upgrade -r /root/requirements.txt; \ + else \ + echo -e "[ROCm-7.0.0]\nname=ROCm7.0.0\nbaseurl=https://repo.radeon.com/rocm/zyp/7.0_rc1/main\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/zypp/repos.d/rocm.repo; \ + echo -e "[amdgpu]\nname=amdgpu\nbaseurl=https://repo.radeon.com/amdgpu/30.10_rc1/sle/15.6/main/x86_64/\nenabled=1\npriority=50\ngpgcheck=1\ngpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/zypp/repos.d/amdgpu.repo; \ + zypper --gpg-auto-import-keys refresh; \ + zypper --non-interactive install -y rocm-openmp rocm-openmp-sdk rocm-llvm-devel hipify-clang sqlite3-devel python3-pip; \ + python3 -m venv rocprofiler-sdk; \ + source rocprofiler-sdk/bin/activate; \ + python3 -m pip install --upgrade pip pipx; \ + python3 -m pipx install awscli; \ + python3 -m pipx ensurepath; \ + source ~/.bashrc; \ + python3 -m pip install --upgrade pip || true; \ + python3 -m pip install --upgrade -r /root/requirements.txt || true; \ + cd /tmp; wget https://www.kernel.org/pub/software/scm/git/git-2.51.0.tar.xz; \ + tar -xf git-2.51.0.tar.xz; cd git-2.51.0; make prefix=/usr all -j 32; make prefix=/usr install; \ + cd /tmp; ln -s -f /usr/bin/git /usr/local/bin/git; rm -rf git-2.51.0*; \ + fi + +# Nightly Tarball - gfx94X +RUN set -euo pipefail; \ + if [ "${GPU_TYPE}" = "gfx94X" ]; then \ + if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ + source rocprofiler-sdk/bin/activate; \ + python3 -m pipx ensurepath; \ + source ~/.bashrc; \ + fi; \ + aws s3 cp "s3://therock-nightly-tarball/${GFX94X_FILE_NAME}" rocm-gfx94X.tar.gz --no-sign-request && \ + mv rocm-gfx94X.tar.gz /opt/rocm-gfx94X.tar.gz; \ + fi + +# Nightly Tarball - gfx110X +RUN set -euo pipefail; \ + if [ "${GPU_TYPE}" = "gfx110X" ]; then \ + if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ + source rocprofiler-sdk/bin/activate; \ + python3 -m pipx ensurepath; \ + source ~/.bashrc; \ + fi; \ + aws s3 cp "s3://therock-nightly-tarball/${GFX110X_FILE_NAME}" rocm-gfx110X.tar.gz --no-sign-request && \ + mv rocm-gfx110X.tar.gz /opt/rocm-gfx110X.tar.gz; \ + fi + +# Nightly Tarball - gfx120X +RUN set -euo pipefail; \ + if [ "${GPU_TYPE}" = "gfx120X" ]; then \ + if [ $(grep -i "sles" /etc/os-release | wc -l) -gt 0 ]; then \ + source rocprofiler-sdk/bin/activate; \ + python3 -m pipx ensurepath; \ + source ~/.bashrc; \ + fi; \ + aws s3 cp "s3://therock-nightly-tarball/${GFX120X_FILE_NAME}" rocm-gfx120X.tar.gz --no-sign-request && \ + mv rocm-gfx120X.tar.gz /opt/rocm-gfx120X.tar.gz; \ + fi diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp index edc98375f3..424b237e47 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -953,7 +953,27 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipDrvLaunchKernelEx) #if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 12 ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemGetHandleForAddressRange) #endif - +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipModuleGetFunctionCount) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D8) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D8Async) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D16) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D16Async) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D32) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D32Async) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetAttribute) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamSetAttribute) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipModuleLoadFatBinary) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpyBatchAsync) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DBatchAsync) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DPeer) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DPeerAsync) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipGetDriverEntryPoint) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipGetDriverEntryPoint_spt) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemPrefetchAsync_v2) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemAdvise_v2) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetId) +#endif #if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0 static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 442); #elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1 @@ -982,6 +1002,8 @@ static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 476); static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 477); #elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 13 static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 477); +#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 14 +static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 496); #else # if !defined(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) && \ (defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h index f036d90109..cfcd3218ca 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h @@ -3147,6 +3147,144 @@ typedef union rocprofiler_hip_api_args_t unsigned long long flags; } hipMemGetHandleForAddressRange; #endif +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 + struct + { + unsigned int* count; + hipModule_t mod; + } hipModuleGetFunctionCount; + struct + { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned char value; + size_t width; + size_t height; + } hipMemsetD2D8; + struct + { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned char value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemsetD2D8Async; + struct + { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned short value; + size_t width; + size_t height; + } hipMemsetD2D16; + struct + { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned short value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemsetD2D16Async; + struct + { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned int value; + size_t width; + size_t height; + } hipMemsetD2D32; + struct + { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemsetD2D32Async; + struct + { + hipStream_t stream; + hipLaunchAttributeID attr; + const hipLaunchAttributeValue* value_out; + } hipStreamGetAttribute; + struct + { + hipStream_t stream; + hipLaunchAttributeID attr; + const hipLaunchAttributeValue* value; + } hipStreamSetAttribute; + struct + { + hipModule_t* module; + const void* fatbin; + } hipModuleLoadFatBinary; + struct + { + void** dsts; + void** srcs; + size_t* sizes; + size_t count; + hipMemcpyAttributes* attrs; + size_t* attrsIdxs; + size_t numAttrs; + size_t* failIdx; + hipStream_t stream; + } hipMemcpyBatchAsync; + struct + { + size_t numOps; + hipMemcpy3DBatchOp* opList; + size_t* failIdx; + unsigned long long flags; + hipStream_t stream; + } hipMemcpy3DBatchAsync; + struct + { + hipMemcpy3DPeerParms* p; + } hipMemcpy3DPeer; + struct + { + hipMemcpy3DPeerParms* p; + hipStream_t stream; + } hipMemcpy3DPeerAsync; + struct + { + const char* symbol; + void** funcPtr; + unsigned long long flags; + hipDriverEntryPointQueryResult* driverStatus; + } hipGetDriverEntryPoint; + struct + { + const char* symbol; + void** funcPtr; + unsigned long long flags; + hipDriverEntryPointQueryResult* driverStatus; + } hipGetDriverEntryPoint_spt; + struct + { + const void* dev_ptr; + size_t count; + hipMemLocation location; + unsigned int flags; + hipStream_t stream; + } hipMemPrefetchAsync_v2; + struct + { + const void* dev_ptr; + size_t count; + hipMemoryAdvise advice; + hipMemLocation location; + } hipMemAdvise_v2; + struct + { + hipStream_t stream; + unsigned long long* streamId; + } hipStreamGetId; +#endif } rocprofiler_hip_api_args_t; ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/runtime_api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/runtime_api_id.h index 0b78889d14..1bccfa1f2c 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/runtime_api_id.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/runtime_api_id.h @@ -530,6 +530,27 @@ typedef enum rocprofiler_hip_runtime_api_id_t // NOLINT(performance-enum-size) #endif #if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 12 ROCPROFILER_HIP_RUNTIME_API_ID_hipMemGetHandleForAddressRange, +#endif +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 + ROCPROFILER_HIP_RUNTIME_API_ID_hipModuleGetFunctionCount, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D8, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D8Async, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D16, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D16Async, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D32, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D32Async, + ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetAttribute, + ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamSetAttribute, + ROCPROFILER_HIP_RUNTIME_API_ID_hipModuleLoadFatBinary, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpyBatchAsync, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DBatchAsync, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DPeer, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DPeerAsync, + ROCPROFILER_HIP_RUNTIME_API_ID_hipGetDriverEntryPoint, + ROCPROFILER_HIP_RUNTIME_API_ID_hipGetDriverEntryPoint_spt, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemPrefetchAsync_v2, + ROCPROFILER_HIP_RUNTIME_API_ID_hipMemAdvise_v2, + ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetId, #endif ROCPROFILER_HIP_RUNTIME_API_ID_LAST, } rocprofiler_hip_runtime_api_id_t; diff --git a/projects/rocprofiler-sdk/source/lib/common/regex.cpp b/projects/rocprofiler-sdk/source/lib/common/regex.cpp index a8604fb9b4..6c335b713a 100644 --- a/projects/rocprofiler-sdk/source/lib/common/regex.cpp +++ b/projects/rocprofiler-sdk/source/lib/common/regex.cpp @@ -168,9 +168,11 @@ struct Parser { case 'd': return make_cls([](unsigned char x) { return std::isdigit(x) != 0; }); case 'D': return make_cls([](unsigned char x) { return std::isdigit(x) == 0; }); - case 'w': return make_cls([](unsigned char x) { return std::isalnum(x) || x == '_'; }); + case 'w': + return make_cls([](unsigned char x) { return (std::isalnum(x) != 0) || x == '_'; }); case 'W': - return make_cls([](unsigned char x) { return !(std::isalnum(x) || x == '_'); }); + return make_cls( + [](unsigned char x) { return !((std::isalnum(x) != 0) || x == '_'); }); case 's': return make_cls([](unsigned char x) { return std::isspace(x) != 0; }); case 'S': return make_cls([](unsigned char x) { return std::isspace(x) == 0; }); case 'n': return Node('\n'); @@ -204,13 +206,30 @@ struct Parser char e = get(); if(e == 'd' || e == 'D' || e == 'w' || e == 'W' || e == 's' || e == 'S') { - special_preds.push_back( - e == 'd' ? [](unsigned char x) { return std::isdigit(x) != 0; } - : e == 'D' ? [](unsigned char x) { return std::isdigit(x) == 0; } - : e == 'w' ? [](unsigned char x) { return std::isalnum(x) || x == '_'; } - : e == 'W' ? [](unsigned char x) { return !(std::isalnum(x) || x == '_'); } - : e == 's' ? [](unsigned char x) { return std::isspace(x) != 0; } - : [](unsigned char x) { return std::isspace(x) == 0; }); + switch(e) + { + case 'd': + special_preds.emplace_back( + [](unsigned char x) { return std::isdigit(x) != 0; }); + break; + case 'D': + special_preds.emplace_back( + [](unsigned char x) { return std::isdigit(x) == 0; }); + break; + case 'w': + special_preds.emplace_back( + [](unsigned char x) { return (std::isalnum(x) != 0) || x == '_'; }); + break; + case 'W': + special_preds.emplace_back([](unsigned char x) { + return !((std::isalnum(x) != 0) || x == '_'); + }); + break; + case 's': + special_preds.emplace_back( + [](unsigned char x) { return std::isspace(x) != 0; }); + break; + } continue; } else @@ -250,7 +269,7 @@ struct Parser auto specials = std::move(special_preds); auto pred = [rs, ss, specials, negate](unsigned char x) { bool in = false; - for(auto& r : rs) + for(const auto& r : rs) { if(r.a <= x && x <= r.b) { @@ -271,7 +290,7 @@ struct Parser } if(!in) { - for(auto& sp : specials) + for(const auto& sp : specials) { if(sp(x)) { @@ -918,7 +937,9 @@ struct CaptureMatcher } }; -static int +namespace +{ +int count_captures(const Node& n) { switch(n.kind) @@ -938,7 +959,7 @@ count_captures(const Node& n) } // Expand replacement with captures for a single match span [b,e) -static std::string +std::string expand_replacement(std::string_view text, const std::vector>& groups, size_t b, @@ -983,12 +1004,12 @@ expand_replacement(std::string_view text, } // $1..$99 (ECMAScript semantics: if two digits are present, always consume both) - if(std::isdigit(static_cast(n1))) + if(std::isdigit(static_cast(n1)) != 0) { int idx = n1 - '0'; size_t j = i + 2; - if(j < repl.size() && std::isdigit(static_cast(repl[j]))) + if(j < repl.size() && (std::isdigit(static_cast(repl[j])) != 0)) { int d2 = repl[j] - '0'; idx = idx * 10 + d2; // ALWAYS consume the second digit if present @@ -1014,6 +1035,7 @@ expand_replacement(std::string_view text, return out; } +} // namespace // ============================ Public API =========================== diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 816b50b263..e109b954c3 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -46,13 +46,22 @@ def check_function_availability(connection, function_name): """ cursor = connection.cursor() - # Query pragma_function_list to check for the function - cursor.execute( - "SELECT EXISTS(SELECT 1 FROM pragma_function_list WHERE name=?)", (function_name,) - ) - result = cursor.fetchone()[0] - - return bool(result) + try: + # Try the modern approach first (SQLite 3.30.0+) + cursor.execute( + "SELECT EXISTS(SELECT 1 FROM pragma_function_list WHERE name=?)", + (function_name,), + ) + result = cursor.fetchone()[0] + return bool(result) + except Exception: + # Fallback for older SQLite versions (Workaround for RHEL 8) + # Try to execute a simple query using the function to see if it exists + try: + cursor.execute(f"SELECT {function_name}(1)") + return True + except Exception: + return False def get_temp_view_names(connection: RocpdImportData) -> List[str]: diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp index a2996fe8b2..2db3af9f22 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp @@ -165,7 +165,7 @@ struct SemaphoreGuard sem_t* sem = nullptr; std::string name; - SemaphoreGuard(const std::string& sem_name) + SemaphoreGuard(const std::string& sem_name) // NOLINT : name(sem_name) {} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/abi.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/abi.cpp index 5269f98a46..ea8b5dae96 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/abi.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/abi.cpp @@ -571,6 +571,28 @@ ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipDrvLaunchKernelEx_fn, 475) ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemGetHandleForAddressRange_fn, 476) #endif +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipModuleGetFunctionCount_fn, 477); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemsetD2D8_fn, 478); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemsetD2D8Async_fn, 479); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemsetD2D16_fn, 480); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemsetD2D16Async_fn, 481); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemsetD2D32_fn, 482); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemsetD2D32Async_fn, 483); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipStreamGetAttribute_fn, 484); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipStreamSetAttribute_fn, 485); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipModuleLoadFatBinary_fn, 486); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemcpyBatchAsync_fn, 487); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemcpy3DBatchAsync_fn, 488); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemcpy3DPeer_fn, 489); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemcpy3DPeerAsync_fn, 490); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipGetDriverEntryPoint_fn, 491); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipGetDriverEntryPoint_spt_fn, 492); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemPrefetchAsync_v2_fn, 493); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemAdvise_v2_fn, 494); +ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipStreamGetId_fn, 495); +#endif + #if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0 ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 442) #elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1 @@ -599,6 +621,8 @@ ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 476) ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 477) #elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 13 ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 477) +#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 14 +ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 496) #else INTERNAL_CI_ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 0) #endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/format.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/format.hpp index beef1ac6e9..f65cf016a6 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/format.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/format.hpp @@ -263,6 +263,9 @@ struct formatter : rocprofiler::hip::details::base_formatt ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemAllocationType, Invalid); ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemAllocationType, Pinned); ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemAllocationType, Max); +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemAllocationType, Uncached); +#endif ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipMemAllocationType); } return fmt::format_to(ctx.out(), "Unknown"); @@ -279,6 +282,11 @@ struct formatter : rocprofiler::hip::details::base_formatter { ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemLocationType, Invalid); ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemLocationType, Device); +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemLocationType, Host); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemLocationType, HostNuma); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemLocationType, HostNumaCurrent); +#endif ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipMemLocationType); } return fmt::format_to(ctx.out(), "Unknown"); @@ -428,6 +436,197 @@ ROCP_SDK_HIP_FORMATTER(HIP_LAUNCH_CONFIG_st, v.numAttrs, '}') #endif + +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(const hipMemcpy3DOperand& v, Ctx& ctx) const + { + switch(v.type) + { + case hipMemcpyOperandTypePointer: + return fmt::format_to( + ctx.out(), + "{{type=Pointer, ptr={}, rowLength={}, layerHeight={}, locHint={}}}", + static_cast(v.op.ptr.ptr), + v.op.ptr.rowLength, + v.op.ptr.layerHeight, + v.op.ptr.locHint); + + case hipMemcpyOperandTypeArray: + return fmt::format_to(ctx.out(), + "{{type=Array, array={}, offset={}}}", + static_cast(v.op.array.array), + v.op.array.offset); + + default: + return fmt::format_to(ctx.out(), "{{type=UNKNOWN({})}}", static_cast(v.type)); + } + } +}; +ROCP_SDK_HIP_FORMATTER(hipAccessPolicyWindow, + "{{base_ptr={}, hitProp={}, hitRatio={}, missProp={}, num_bytes={}}}", + static_cast(v.base_ptr), + v.hitProp, + v.hitRatio, + v.missProp, + v.num_bytes) +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipSynchronizationPolicy v, Ctx& ctx) const + { + switch(v) + { + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipSyncPolicy, Auto); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipSyncPolicy, Spin); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipSyncPolicy, Yield); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipSyncPolicy, BlockingSync); + ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipSyncPolicy); + } + return fmt::format_to(ctx.out(), "Unknown"); + } +}; +ROCP_SDK_HIP_FORMATTER(hipLaunchMemSyncDomainMap, "{{default={}, remote={}}}", v.default_, v.remote) +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipLaunchMemSyncDomain v, Ctx& ctx) const + { + switch(v) + { + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchMemSyncDomain, Default); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchMemSyncDomain, Remote); + ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipLaunchMemSyncDomain); + } + return fmt::format_to(ctx.out(), "Unknown"); + } +}; +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipMemcpySrcAccessOrder v, Ctx& ctx) const + { + switch(v) + { + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemcpySrcAccessOrder, Invalid); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemcpySrcAccessOrder, Stream); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemcpySrcAccessOrder, DuringApiCall); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemcpySrcAccessOrder, Any); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipMemcpySrcAccessOrder, Max); + ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipMemcpySrcAccessOrder); + } + return fmt::format_to(ctx.out(), "Unknown"); + } +}; +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipAccessProperty v, Ctx& ctx) const + { + switch(v) + { + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipAccessProperty, Normal); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipAccessProperty, Streaming); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipAccessProperty, Persisting); + ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipAccessProperty); + } + return fmt::format_to(ctx.out(), "Unknown"); + } +}; +ROCP_SDK_HIP_FORMATTER(hipOffset3D, "{{x={}, y={}, z={}}}", v.x, v.y, v.z) +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipLaunchAttributeID v, Ctx& ctx) const + { + switch(v) + { + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, AccessPolicyWindow); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, Cooperative); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, SynchronizationPolicy); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, Priority); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, MemSyncDomainMap); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, MemSyncDomain); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipLaunchAttribute, Max); + ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipLaunchAttributeID); + } + return fmt::format_to(ctx.out(), "Unknown"); + } +}; +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipLaunchAttributeValue v, Ctx& ctx) const + { + return fmt::format_to( + ctx.out(), + "{{accessPolicyWindow={}, cooperative={}, priority={}, syncPolicy={}, " + "memSyncDomainMap={}, memSyncDomain={}}}", + v.accessPolicyWindow, + v.cooperative, + v.priority, + v.syncPolicy, + v.memSyncDomainMap, + v.memSyncDomain); + } +}; +ROCP_SDK_HIP_FORMATTER(hipMemcpyAttributes, + "{}srcAccessOrder={}, srcLocHint={}, dstLocHint={}, flags={}{}", + '{', + v.srcAccessOrder, + v.srcLocHint, + v.dstLocHint, + v.flags, + '}') +ROCP_SDK_HIP_FORMATTER(hipMemcpy3DBatchOp, + "{}src={}, dst={}, extent={}, srcAccessOrder={}, flags={}{}", + '{', + v.src, + v.dst, + v.extent, + v.srcAccessOrder, + v.flags, + '}') +ROCP_SDK_HIP_FORMATTER(hipMemcpy3DPeerParms, + "{}srcArray={}, srcPos={}, srcPtr={}, srcDevice={}, dstArray={}, dstPos={}, " + "dstPtr={}, dstDevice={}, extent={}{}", + '{', + static_cast(v.srcArray), + v.srcPos, + v.srcPtr, + v.srcDevice, + static_cast(v.dstArray), + v.dstPos, + v.dstPtr, + v.dstDevice, + v.extent, + '}') +template <> +struct formatter : rocprofiler::hip::details::base_formatter +{ + template + auto format(hipDriverEntryPointQueryResult v, Ctx& ctx) const + { + switch(v) + { + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipDriverEntryPoint, Success); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipDriverEntryPoint, SymbolNotFound); + ROCP_SDK_HIP_FORMAT_CASE_STMT(hipDriverEntryPoint, VersionNotSufficent); + ROCP_SDK_HIP_FORMAT_DFLT_CASE(hipDriverEntryPointQueryResult); + } + return fmt::format_to(ctx.out(), "Unknown"); + } +}; +#endif } // namespace fmt #undef ROCP_SDK_HIP_FORMATTER diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp index e116a1eee1..dbf9ca05a1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp @@ -597,6 +597,28 @@ HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNT #if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 12 HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemGetHandleForAddressRange, hipMemGetHandleForAddressRange, hipMemGetHandleForAddressRange_fn, handle, dptr, size, handleType, flags); #endif + +#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 14 +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipModuleGetFunctionCount, hipModuleGetFunctionCount, hipModuleGetFunctionCount_fn, count, mod); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D8, hipMemsetD2D8, hipMemsetD2D8_fn, dst, dstPitch, value, width, height); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D8Async, hipMemsetD2D8Async, hipMemsetD2D8Async_fn, dst, dstPitch, value, width, height, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D16, hipMemsetD2D16, hipMemsetD2D16_fn, dst, dstPitch, value, width, height); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D16Async, hipMemsetD2D16Async, hipMemsetD2D16Async_fn, dst, dstPitch, value, width, height, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D32, hipMemsetD2D32, hipMemsetD2D32_fn, dst, dstPitch, value, width, height); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemsetD2D32Async, hipMemsetD2D32Async, hipMemsetD2D32Async_fn, dst, dstPitch, value, width, height, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetAttribute, hipStreamGetAttribute, hipStreamGetAttribute_fn, stream, attr, value_out); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamSetAttribute, hipStreamSetAttribute, hipStreamSetAttribute_fn, stream, attr, value); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipModuleLoadFatBinary, hipModuleLoadFatBinary, hipModuleLoadFatBinary_fn, module, fatbin); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpyBatchAsync, hipMemcpyBatchAsync, hipMemcpyBatchAsync_fn, dsts, srcs, sizes, attrs, attrsIdxs, numAttrs, failIdx, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DBatchAsync, hipMemcpy3DBatchAsync, hipMemcpy3DBatchAsync_fn, numOps, opList, failIdx, flags, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DPeer, hipMemcpy3DPeer, hipMemcpy3DPeer_fn, p); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemcpy3DPeerAsync, hipMemcpy3DPeerAsync, hipMemcpy3DPeerAsync_fn, p, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipGetDriverEntryPoint, hipGetDriverEntryPoint, hipGetDriverEntryPoint_fn, symbol, funcPtr, flags, driverStatus); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipGetDriverEntryPoint_spt, hipGetDriverEntryPoint_spt, hipGetDriverEntryPoint_spt_fn, symbol, funcPtr, flags, driverStatus); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemPrefetchAsync_v2, hipMemPrefetchAsync_v2, hipMemPrefetchAsync_v2_fn, dev_ptr, count, location, flags, stream); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemAdvise_v2, hipMemAdvise_v2, hipMemAdvise_v2_fn, dev_ptr, count, advice, location); +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetId, hipStreamGetId, hipStreamGetId_fn, stream, streamId); +#endif // clang-format on #else diff --git a/projects/rocprofiler-sdk/source/lib/tests/common/regex.cpp b/projects/rocprofiler-sdk/source/lib/tests/common/regex.cpp index 7e9101229f..2dce06ff06 100644 --- a/projects/rocprofiler-sdk/source/lib/tests/common/regex.cpp +++ b/projects/rocprofiler-sdk/source/lib/tests/common/regex.cpp @@ -21,7 +21,9 @@ struct StdRes size_t e = 0; }; -static std::optional +namespace +{ +std::optional TryStdMatch(std::string_view text, std::string_view pat) { try @@ -34,7 +36,7 @@ TryStdMatch(std::string_view text, std::string_view pat) } } -static std::optional +std::optional TryStdSearch(std::string_view text, std::string_view pat) { try @@ -55,7 +57,7 @@ TryStdSearch(std::string_view text, std::string_view pat) } } -static std::optional +std::optional TryStdReplace(std::string_view text, std::string_view pat, std::string_view repl) { try @@ -67,6 +69,7 @@ TryStdReplace(std::string_view text, std::string_view pat, std::string_view repl return std::nullopt; } } +} // namespace // ----------------------------- Tests ------------------------------- @@ -107,7 +110,7 @@ TEST(regex_parity, literals_and_escapes) TEST(regex_parity, dot_and_anchors) { - auto cmp = [&](std::string s, std::string p) { + auto cmp = [&](const std::string& s, const std::string& p) { auto sm = TryStdMatch(s, p); if(!sm) return; EXPECT_EQ(R::regex_match(s, p), *sm); @@ -279,7 +282,7 @@ TEST(regex_parity, env_patterns_from_issue) "(.*)%q\\{([A-Z0-9_]+)\\}(.*)" // should NOT match here }; - for(auto& p : pats) + for(const auto& p : pats) { auto ss = TryStdSearch(fpath, p); ASSERT_TRUE(ss.has_value()); @@ -650,7 +653,9 @@ TEST(regex_compatibility, thread_safety) } }; - std::vector threads; + auto threads = std::vector{}; + threads.reserve(4); + for(int i = 0; i < 4; ++i) { threads.emplace_back(worker); diff --git a/projects/rocprofiler-sdk/source/scripts/run-ci.py b/projects/rocprofiler-sdk/source/scripts/run-ci.py index fe13d5f1ec..b8dee89788 100755 --- a/projects/rocprofiler-sdk/source/scripts/run-ci.py +++ b/projects/rocprofiler-sdk/source/scripts/run-ci.py @@ -14,8 +14,8 @@ import multiprocessing # and default value for CTEST_SUBMIT_URL # _PROJECT_NAME = "rocprofiler-v2-internal" # _BASE_URL = "10.194.116.31/cdash" -_PROJECT_NAME = "rocprofiler-sdk" -_BASE_URL = "cdash.rocprofiler.amd.com" +_PROJECT_NAME = "rocprofiler-sdk-alt" +_BASE_URL = "my.cdash.org" _GCOVR_GENERATE_CMD = None # these are various default values @@ -193,9 +193,9 @@ def generate_custom(args, cmake_args, ctest_args): set(CTEST_PROJECT_NAME "{_PROJECT_NAME}") set(CTEST_NIGHTLY_START_TIME "05:00:00 UTC") - set(CTEST_DROP_METHOD "http") + set(CTEST_DROP_METHOD "https") set(CTEST_DROP_SITE_CDASH TRUE) - set(CTEST_SUBMIT_URL "http://{SUBMIT_URL}") + set(CTEST_SUBMIT_URL "https://{SUBMIT_URL}") set(CTEST_UPDATE_TYPE git) set(CTEST_UPDATE_VERSION_ONLY TRUE)