From abcb5dc0e3480811da7d8f18d3ca904d6e1a2db7 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Tue, 10 Dec 2024 14:35:02 -0500 Subject: [PATCH 01/12] Add installers for rocm-6.3 and rhel-9.5 and update installer script template (#50) * Add installers for rocm-6.3 and rhel-9.5 * Updated the template "rocprof-sys-install.py.in". Fixed the installer for the "rocm-x.y.z" style tags. --------- Signed-off-by: David Galiffi [ROCm/rocprofiler-systems commit: 7e2242414ccbee021f47f2f8b26c0655dc95129f] --- .../.github/workflows/containers.yml | 37 ++++++++++++++++--- .../.github/workflows/cpack.yml | 37 ++++++++++++------- .../.github/workflows/redhat.yml | 4 +- .../cmake/Templates/rocprof-sys-install.py.in | 3 +- .../scripts/write-rocprof-sys-install.cmake | 32 ++++++++++++++-- 5 files changed, 87 insertions(+), 26 deletions(-) diff --git a/projects/rocprofiler-systems/.github/workflows/containers.yml b/projects/rocprofiler-systems/.github/workflows/containers.yml index f5ff05f0ae..cf57d613ac 100644 --- a/projects/rocprofiler-systems/.github/workflows/containers.yml +++ b/projects/rocprofiler-systems/.github/workflows/containers.yml @@ -91,6 +91,9 @@ jobs: - os-distro: "ubuntu" os-version: "20.04" rocm-version: "6.2" + - os-distro: "ubuntu" + os-version: "20.04" + rocm-version: "6.3" # ubuntu 22.04 - os-distro: "ubuntu" os-version: "22.04" @@ -98,6 +101,9 @@ jobs: - os-distro: "ubuntu" os-version: "22.04" rocm-version: "6.2" + - os-distro: "ubuntu" + os-version: "22.04" + rocm-version: "6.3" # opensuse 15.5 - os-distro: "opensuse" os-version: "15.5" @@ -105,6 +111,9 @@ jobs: - os-distro: "opensuse" os-version: "15.5" rocm-version: "6.2" + - os-distro: "opensuse" + os-version: "15.5" + rocm-version: "6.3" # opensuse 15.6 - os-distro: "opensuse" os-version: "15.6" @@ -112,12 +121,15 @@ jobs: - os-distro: "opensuse" os-version: "15.6" rocm-version: "6.2" - # RHEL 8.8 + - os-distro: "opensuse" + os-version: "15.6" + rocm-version: "6.3" + # RHEL 8.9 - os-distro: "rhel" - os-version: "8.8" + os-version: "8.9" rocm-version: "0.0" - os-distro: "rhel" - os-version: "8.8" + os-version: "8.9" rocm-version: "6.2" # RHEL 8.10 - os-distro: "rhel" @@ -126,12 +138,15 @@ jobs: - os-distro: "rhel" os-version: "8.10" rocm-version: "6.2" - # RHEL 9.2 - os-distro: "rhel" - os-version: "9.2" + os-version: "8.10" + rocm-version: "6.3" + # RHEL 9.3 + - os-distro: "rhel" + os-version: "9.3" rocm-version: "0.0" - os-distro: "rhel" - os-version: "9.2" + os-version: "9.3" rocm-version: "6.2" # RHEL 9.4 - os-distro: "rhel" @@ -140,6 +155,16 @@ jobs: - os-distro: "rhel" os-version: "9.4" rocm-version: "6.2" + - os-distro: "rhel" + os-version: "9.4" + rocm-version: "6.3" + # RHEL 9.5 + - os-distro: "rhel" + os-version: "9.5" + rocm-version: "0.0" + - os-distro: "rhel" + os-version: "9.5" + rocm-version: "6.3" steps: - uses: actions/checkout@v4 diff --git a/projects/rocprofiler-systems/.github/workflows/cpack.yml b/projects/rocprofiler-systems/.github/workflows/cpack.yml index 263d5101be..29f20ab7c0 100644 --- a/projects/rocprofiler-systems/.github/workflows/cpack.yml +++ b/projects/rocprofiler-systems/.github/workflows/cpack.yml @@ -40,6 +40,9 @@ jobs: - os-distro: "ubuntu" os-version: "20.04" rocm-version: "6.2" + - os-distro: "ubuntu" + os-version: "20.04" + rocm-version: "6.3" # ubuntu 22.04 - os-distro: "ubuntu" os-version: "22.04" @@ -47,6 +50,9 @@ jobs: - os-distro: "ubuntu" os-version: "22.04" rocm-version: "6.2" + - os-distro: "ubuntu" + os-version: "22.04" + rocm-version: "6.3" # opensuse 15.5 - os-distro: "opensuse" os-version: "15.5" @@ -54,6 +60,9 @@ jobs: - os-distro: "opensuse" os-version: "15.5" rocm-version: "6.2" + - os-distro: "opensuse" + os-version: "15.5" + rocm-version: "6.3" # opensuse 15.6 - os-distro: "opensuse" os-version: "15.6" @@ -61,13 +70,9 @@ jobs: - os-distro: "opensuse" os-version: "15.6" rocm-version: "6.2" - # RHEL 8.8 - - os-distro: "rhel" - os-version: "8.8" - rocm-version: "0.0" - - os-distro: "rhel" - os-version: "8.8" - rocm-version: "6.2" + - os-distro: "opensuse" + os-version: "15.6" + rocm-version: "6.3" # RHEL 8.9 - os-distro: "rhel" os-version: "8.9" @@ -82,13 +87,9 @@ jobs: - os-distro: "rhel" os-version: "8.10" rocm-version: "6.2" - # RHEL 9.2 - os-distro: "rhel" - os-version: "9.2" - rocm-version: "0.0" - - os-distro: "rhel" - os-version: "9.2" - rocm-version: "6.2" + os-version: "8.10" + rocm-version: "6.3" # RHEL 9.3 - os-distro: "rhel" os-version: "9.3" @@ -103,6 +104,16 @@ jobs: - os-distro: "rhel" os-version: "9.4" rocm-version: "6.2" + - os-distro: "rhel" + os-version: "9.4" + rocm-version: "6.3" + # RHEL 9.5 + - os-distro: "rhel" + os-version: "9.5" + rocm-version: "0.0" + - os-distro: "rhel" + os-version: "9.5" + rocm-version: "6.3" steps: - name: Free Disk Space diff --git a/projects/rocprofiler-systems/.github/workflows/redhat.yml b/projects/rocprofiler-systems/.github/workflows/redhat.yml index aee533bf03..0e9491760d 100644 --- a/projects/rocprofiler-systems/.github/workflows/redhat.yml +++ b/projects/rocprofiler-systems/.github/workflows/redhat.yml @@ -46,8 +46,8 @@ jobs: fail-fast: false matrix: compiler: ['g++'] - os-release: [ '8.8', '8.10', '9.2', '9.4' ] - rocm-version: [ '0.0', '6.2' ] + os-release: [ '8.10', '9.2', '9.4' ] + rocm-version: [ '0.0', '6.2', '6.3' ] build-type: ['Release'] steps: diff --git a/projects/rocprofiler-systems/cmake/Templates/rocprof-sys-install.py.in b/projects/rocprofiler-systems/cmake/Templates/rocprof-sys-install.py.in index 6d07cd5a45..364d2612ff 100755 --- a/projects/rocprofiler-systems/cmake/Templates/rocprof-sys-install.py.in +++ b/projects/rocprofiler-systems/cmake/Templates/rocprof-sys-install.py.in @@ -11,6 +11,7 @@ from urllib import request from urllib.error import HTTPError rocprofsys_version = "@ROCPROFSYS_VERSION@" +rocprofsys_git_tag = "@ROCPROFSYS_GIT_TAG@" _rocm_path = os.environ.get("ROCM_PATH", "/opt/rocm") _rocm_version = None @@ -232,7 +233,7 @@ if __name__ == "__main__": ) script = f"rocprofiler-systems-{rocprofsys_version}-{os_distrib}-{os_version}{rocm_version}{extensions}.sh" - url = f"https://github.com/ROCm/rocprofiler-systems/releases/download/v{rocprofsys_version}/{script}" + url = f"https://github.com/ROCm/rocprofiler-systems/releases/download/{rocprofsys_git_tag}/{script}" download_dir = ( tempfile.mkdtemp(prefix="rocprof-sys-install-") if args.download_path is None diff --git a/projects/rocprofiler-systems/scripts/write-rocprof-sys-install.cmake b/projects/rocprofiler-systems/scripts/write-rocprof-sys-install.cmake index 9552722a71..a8ce50e09a 100644 --- a/projects/rocprofiler-systems/scripts/write-rocprof-sys-install.cmake +++ b/projects/rocprofiler-systems/scripts/write-rocprof-sys-install.cmake @@ -7,14 +7,38 @@ if(NOT DEFINED ROCPROFSYS_VERSION) ROCPROFSYS_VERSION "${FULL_VERSION_STRING}") endif() +find_package(Git) + +if(Git_FOUND AND EXISTS ".git") + execute_process( + COMMAND ${GIT_EXECUTABLE} describe --tags + OUTPUT_VARIABLE ROCPROFSYS_GIT_TAG + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _GIT_DESCRIBE_RESULT + ERROR_QUIET) + if(NOT _GIT_DESCRIBE_RESULT EQUAL 0) + execute_process( + COMMAND ${GIT_EXECUTABLE} describe + OUTPUT_VARIABLE ROCPROFSYS_GIT_TAG + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _GIT_DESCRIBE_RESULT + ERROR_QUIET) + endif() +else() + message( + STATUS + "Git not found or .git directory not found; using version ${ROCPROFSYS_VERSION}" + ) + set(GIT_DESCRIBE "v${ROCPROFSYS_VERSION}") +endif() + if(NOT DEFINED OUTPUT_DIR) set(OUTPUT_DIR ${CMAKE_CURRENT_LIST_DIR}) endif() -message( - STATUS - "Writing ${OUTPUT_DIR}/rocprofiler-systems-install.py for rocprofiler-systems v${ROCPROFSYS_VERSION}" - ) +message(STATUS "Writing ${OUTPUT_DIR}/rocprofiler-systems-install.py.") +message(STATUS "rocprofiler-systems version: ${ROCPROFSYS_VERSION}.") +message(STATUS "rocprofiler-systems git describe: ${ROCPROFSYS_GIT_TAG}") configure_file(${CMAKE_CURRENT_LIST_DIR}/../cmake/Templates/rocprof-sys-install.py.in ${OUTPUT_DIR}/rocprofiler-systems-install.py @ONLY) From b73bd13a860b069ca5bdf13a0912d655da737e6f Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Wed, 11 Dec 2024 19:36:04 -0500 Subject: [PATCH 02/12] Adding installer for Ubuntu 24.04 (#14) * Add installers for ubuntu 24.04 * Formatting change to the ubuntu-focal and ubuntu-jammy workflows * Initial Ubuntu 24.04 workflow - just build test [ROCm/rocprofiler-systems commit: 398ea62629a81f5e6c209875072aa1f53de91ab4] --- .../.github/workflows/containers.yml | 12 ++ .../.github/workflows/cpack.yml | 10 ++ .../.github/workflows/ubuntu-focal.yml | 7 +- .../.github/workflows/ubuntu-jammy.yml | 4 +- .../.github/workflows/ubuntu-noble.yml | 112 ++++++++++++++++++ .../docker/Dockerfile.ubuntu | 9 +- .../docker/Dockerfile.ubuntu.ci | 12 +- .../docker/build-docker.sh | 3 + .../lib/rocprof-sys/library/runtime.cpp | 4 +- 9 files changed, 163 insertions(+), 10 deletions(-) create mode 100644 projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml diff --git a/projects/rocprofiler-systems/.github/workflows/containers.yml b/projects/rocprofiler-systems/.github/workflows/containers.yml index cf57d613ac..86d12fcf4d 100644 --- a/projects/rocprofiler-systems/.github/workflows/containers.yml +++ b/projects/rocprofiler-systems/.github/workflows/containers.yml @@ -33,6 +33,8 @@ jobs: version: "20.04" - distro: "ubuntu" version: "22.04" + - distro: "ubuntu" + version: "24.04" - distro: "opensuse" version: "15.5" - distro: "opensuse" @@ -104,6 +106,16 @@ jobs: - os-distro: "ubuntu" os-version: "22.04" rocm-version: "6.3" + # ubuntu 24.04 + - os-distro: "ubuntu" + os-version: "24.04" + rocm-version: "0.0" + - os-distro: "ubuntu" + os-version: "24.04" + rocm-version: "6.2" + - os-distro: "ubuntu" + os-version: "24.04" + rocm-version: "6.3" # opensuse 15.5 - os-distro: "opensuse" os-version: "15.5" diff --git a/projects/rocprofiler-systems/.github/workflows/cpack.yml b/projects/rocprofiler-systems/.github/workflows/cpack.yml index 29f20ab7c0..5bb01cea1f 100644 --- a/projects/rocprofiler-systems/.github/workflows/cpack.yml +++ b/projects/rocprofiler-systems/.github/workflows/cpack.yml @@ -53,6 +53,16 @@ jobs: - os-distro: "ubuntu" os-version: "22.04" rocm-version: "6.3" + # ubuntu 24.04 + - os-distro: "ubuntu" + os-version: "24.04" + rocm-version: "0.0" + - os-distro: "ubuntu" + os-version: "24.04" + rocm-version: "6.2" + - os-distro: "ubuntu" + os-version: "24.04" + rocm-version: "6.3" # opensuse 15.5 - os-distro: "opensuse" os-version: "15.5" diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml index 67c72611ec..31e813d1d0 100644 --- a/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml @@ -428,7 +428,12 @@ jobs: max_attempts: 5 command: | sudo apt-get update && - sudo apt-get install -y autoconf bison build-essential clang environment-modules gettext libomp-dev libtool m4 python3-pip texinfo ${{ matrix.compiler }} ${{ matrix.deps }} && + sudo apt-get install -y \ + autoconf autotools-dev bash-completion bison build-essential \ + bzip2 cmake curl environment-modules flex gettext git-core gnupg2 \ + gzip libiberty-dev libomp-dev libpapi-dev libpfm4-dev libtool locales \ + lsb-release m4 python3-pip texinfo unzip wget zip zlib1g-dev \ + ${{ matrix.deps }} ${{ matrix.compiler }} && if [ "${{ matrix.mpi }}" = "mpich" ]; then sudo apt-get install -y libmpich-dev mpich; fi && if [ "${{ matrix.mpi }}" = "openmpi" ]; then sudo apt-get install -y libopenmpi-dev openmpi-bin libfabric-dev; fi && wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml index 0bdc77ff6f..47026737c0 100644 --- a/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml @@ -111,7 +111,9 @@ jobs: apt-get update && apt-get install -y software-properties-common && apt-get upgrade -y && - apt-get install -y autoconf bison build-essential clang environment-modules gettext libfabric-dev libiberty-dev libomp-dev libopenmpi-dev libtool m4 openmpi-bin python3-pip texinfo ${{ matrix.compiler }} && + apt-get install -y autoconf bison build-essential clang environment-modules \ + gettext libfabric-dev libiberty-dev libomp-dev libopenmpi-dev libtool m4 \ + openmpi-bin python3-pip texinfo ${{ matrix.compiler }} && python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && python3 -m pip install 'cmake==3.18.4' && diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml new file mode 100644 index 0000000000..30e03dd784 --- /dev/null +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml @@ -0,0 +1,112 @@ +name: Ubuntu 24.04 (GCC, Python, ROCm) +run-name: ubuntu-noble + +on: + push: + branches: [ amd-mainline, amd-staging, release/** ] + paths-ignore: + - '*.md' + - 'docs/**' + - 'source/docs/**' + - 'source/python/gui/**' + - '.github/workflows/docs.yml' + - '.github/workflows/cpack.yml' + - '.github/workflows/containers.yml' + - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' + - 'docker/**' + pull_request: + branches: [ amd-mainline, amd-staging, release/** ] + paths-ignore: + - '*.md' + - 'docs/**' + - 'source/docs/**' + - 'source/python/gui/**' + - '.github/workflows/docs.yml' + - '.github/workflows/cpack.yml' + - '.github/workflows/containers.yml' + - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' + - 'docker/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + ROCPROFSYS_CI: ON + ROCPROFSYS_TMPDIR: "%env{PWD}%/testing-tmp" + +jobs: + ubuntu-noble: + runs-on: ubuntu-20.04 + container: + image: dgaliffiamd/rocprofiler-systems:ci-base-ubuntu-24.04 + strategy: + fail-fast: false + matrix: + compiler: ['g++'] + build-type: ['Release', 'Debug'] + strip: ['OFF'] + build-dyninst: ['OFF'] + rocm-version: ['0.0', '6.3'] + + env: + ROCPROFSYS_CI: 'ON' + + steps: + - uses: actions/checkout@v4 + + - name: Install Packages + timeout-minutes: 25 + uses: nick-fields/retry@v3 + with: + retry_wait_seconds: 30 + timeout_minutes: 25 + max_attempts: 5 + command: | + apt-get -y update && apt-get upgrade -y && + apt-get install -y \ + libiberty-dev clang libomp-dev libopenmpi-dev libfabric-dev \ + openmpi-bin ${{ matrix.compiler }} && + for i in 8 9 10 11 12; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + + - name: Install ROCm Packages + if: ${{ matrix.rocm-version > 0 }} + timeout-minutes: 30 + shell: bash + run: | + ROCM_VERSION=${{ matrix.rocm-version }} + ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}') + ROCM_MINOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $2}') + ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) )) + echo "ROCM_MAJOR=${ROCM_MAJOR} ROCM_MINOR=${ROCM_MINOR} ROCM_VERSN=${ROCM_VERSN}" + wget -q https://repo.radeon.com/amdgpu-install/${{ matrix.rocm-version }}/ubuntu/noble/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb + apt-get install -y ./amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb + apt-get update + apt-get install -y rocm-dev + + - name: Configure + timeout-minutes: 30 + shell: bash + run: | + git config --global --add safe.directory ${PWD} && + cmake --version + USE_ROCM=OFF + if [ ${{ matrix.rocm-version }} != "0.0" ]; then USE_ROCM=ON; fi + cmake -B build \ + -DCMAKE_C_COMPILER=$(echo '${{ matrix.compiler }}' | sed 's/+/c/g') \ + -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems \ + -DROCPROFSYS_BUILD_TESTING=ON \ + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" \ + -DROCPROFSYS_USE_HIP=${USE_ROCM} \ + -DRCOPROFSYS_USE_PYTHON=ON \ + -DROCPROFSYS_STRIP_LIBRARIES=${{ matrix.strip }} \ + -DROCPROFSYS_PYTHON_PREFIX=/opt/conda/envs \ + -DROCPROFSYS_PYTHON_ENVS="py3.8;py3.9;py3.10;py3.11;py3.12" + + - name: Build + timeout-minutes: 115 + run: cmake --build build --parallel 2 diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu index 02e5c58fd9..d41fcb9c85 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu @@ -25,10 +25,15 @@ ENV PATH ${HOME}/.local/bin:${PATH} RUN apt-get update && \ apt-get dist-upgrade -y && \ apt-get install -y apt-utils autoconf autotools-dev bash-completion bison \ - build-essential cmake curl git-core gnupg2 libnuma1 libopenmpi-dev \ + build-essential cmake curl flex gettext git-core gnupg2 libnuma1 libopenmpi-dev \ libpapi-dev libpfm4-dev librpm-dev libtool libudev1 lsb-release m4 \ python3-pip rpm texinfo wget && \ - python3 -m pip install 'cmake==3.18.4' + OS_VERSION=$(cat /etc/os-release | grep VERSION_ID | sed 's/=/ /'1 | awk '{print $NF}' | sed 's/"//g') && \ + if [ "${OS_VERSION}" == "24.04" ]; then \ + python3 -m pip install --break-system-packages 'cmake==3.18.4'; \ + else \ + python3 -m pip install 'cmake==3.18.4'; \ + fi RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \ diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci index c505bc32d0..5058ef81c1 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci @@ -26,11 +26,15 @@ ENV CMAKE_PREFIX_PATH /usr/local:${CMAKE_PREFIX_PATH} RUN apt-get update && \ apt-get dist-upgrade -y && \ apt-get install -y autoconf autotools-dev bash-completion bison build-essential \ - bzip2 cmake curl environment-modules git-core gnupg2 gzip libiberty-dev \ - libpapi-dev libpfm4-dev libtool locales lsb-release m4 python3-pip texinfo \ - unzip wget zip zlib1g-dev && \ + bzip2 cmake curl environment-modules flex gettext git-core gnupg2 gzip \ + libiberty-dev libpapi-dev libpfm4-dev libtool locales lsb-release m4 \ + python3-pip texinfo unzip wget zip zlib1g-dev && \ apt-get autoclean && \ - python3 -m pip install 'cmake==3.18.4' + if [ "${OS_VERSION}" == "24.04" ]; then \ + python3 -m pip install --break-system-packages 'cmake==3.18.4' \ + else \ + python3 -m pip install 'cmake==3.18.4'; \ + fi COPY ./dyninst-source /tmp/dyninst diff --git a/projects/rocprofiler-systems/docker/build-docker.sh b/projects/rocprofiler-systems/docker/build-docker.sh index 79656307d6..94d7fb5c66 100755 --- a/projects/rocprofiler-systems/docker/build-docker.sh +++ b/projects/rocprofiler-systems/docker/build-docker.sh @@ -176,6 +176,9 @@ do case "${ROCM_VERSION}" in 6.*) case "${VERSION}" in + 24.04) + ROCM_REPO_DIST="noble" + ;; 22.04) ROCM_REPO_DIST="jammy" ;; diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.cpp index db0827102a..373234c3e0 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.cpp @@ -63,11 +63,11 @@ auto& get_sampling_on_child_threads_history(int64_t _idx = utility::get_thread_index()) { static auto _v = utility::get_filled_array( - []() { return utility::get_reserved_vector(32); }); + []() { return utility::get_reserved_vector(64); }); if(_idx >= ROCPROFSYS_MAX_THREADS) { - static thread_local auto _tl_v = utility::get_reserved_vector(32); + static thread_local auto _tl_v = utility::get_reserved_vector(128); return _tl_v; } From ab379457a18ec9081e67c4a5ed921940a58f5ef5 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 13 Dec 2024 14:52:10 -0500 Subject: [PATCH 03/12] Allow ElfUtils_CONFIG_OPTIONS to provide additional configuration options (#58) Signed-off-by: David Galiffi [ROCm/rocprofiler-systems commit: c76fb0cb81b933985b5950cc7b73c43959f1c29c] --- projects/rocprofiler-systems/cmake/ElfUtils.cmake | 3 ++- projects/rocprofiler-systems/external/dyninst | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-systems/cmake/ElfUtils.cmake b/projects/rocprofiler-systems/cmake/ElfUtils.cmake index 15d46bae4a..0d9c9a46e2 100644 --- a/projects/rocprofiler-systems/cmake/ElfUtils.cmake +++ b/projects/rocprofiler-systems/cmake/ElfUtils.cmake @@ -112,7 +112,8 @@ externalproject_add( CXXFLAGS=-fPIC\ -O3\ -Wno-error=null-dereference [=[LDFLAGS=-Wl,-rpath='$$ORIGIN']=] /configure --enable-install-elfh --prefix=${_eu_root} --disable-libdebuginfod --disable-debuginfod --disable-nls - --enable-thread-safety --enable-silent-rules --libdir=${_eu_root}/lib + --enable-thread-safety --enable-silent-rules ${ElfUtils_CONFIG_OPTIONS} + --libdir=${_eu_root}/lib BUILD_COMMAND ${MAKE_COMMAND} install -s BUILD_BYPRODUCTS "${_eu_build_byproducts}" INSTALL_COMMAND "") diff --git a/projects/rocprofiler-systems/external/dyninst b/projects/rocprofiler-systems/external/dyninst index ccf4c24749..e539e0a02c 160000 --- a/projects/rocprofiler-systems/external/dyninst +++ b/projects/rocprofiler-systems/external/dyninst @@ -1 +1 @@ -Subproject commit ccf4c247497a24742e418883350df5beb74ecec9 +Subproject commit e539e0a02ca2e53fb6cce3345f2f1ac03054ac65 From 95b8f8fdd9889e3b356bd82708bb8e17a68b8a68 Mon Sep 17 00:00:00 2001 From: Peter Park Date: Fri, 13 Dec 2024 15:59:07 -0500 Subject: [PATCH 04/12] docs: Fix docutils warnings (#59) * fix typo * fix `Lexing literal_block` docutils warning * fix `Title underline too short` docutils warning * use consistent file type * fix `Malformed table` error * improve index.rst and front-load TOC [ROCm/rocprofiler-systems commit: 39468e886720cdf97a9fd9c2a3b5975e3242be7c] --- .../conceptual/rocprof-sys-feature-set.rst | 8 ++-- .../how-to/configuring-runtime-options.rst | 2 +- .../how-to/general-tips-using-rocprof-sys.rst | 4 +- .../how-to/performing-causal-profiling.rst | 38 ++++++++--------- .../docs/how-to/profiling-python-scripts.rst | 8 ++-- .../docs/how-to/sampling-call-stack.rst | 3 +- .../understanding-rocprof-sys-output.rst | 2 +- .../docs/how-to/using-rocprof-sys-api.rst | 6 +-- projects/rocprofiler-systems/docs/index.rst | 30 ++++++------- projects/rocprofiler-systems/docs/license.md | 4 -- projects/rocprofiler-systems/docs/license.rst | 8 ++++ .../docs/reference/development-guide.rst | 12 +++--- .../docs/reference/rocprof-sys-glossary.rst | 42 +++++++++---------- .../docs/sphinx/_toc.yml.in | 22 +++++----- .../docs/tutorials/video-tutorials.rst | 6 +-- .../docs/what-is-rocprof-sys.rst | 4 +- 16 files changed, 102 insertions(+), 97 deletions(-) delete mode 100644 projects/rocprofiler-systems/docs/license.md create mode 100644 projects/rocprofiler-systems/docs/license.rst diff --git a/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst b/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst index b26e8f1335..5f6307774c 100644 --- a/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst +++ b/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst @@ -2,9 +2,9 @@ :description: ROCm Systems Profiler feature set documentation and reference :keywords: rocprof-sys, rocprofiler-systems, Omnitrace, ROCm, profiler, feature set, use cases, tracking, visualization, tool, Instinct, accelerator, AMD -*************************************** -The ROCm Systems Profiler feature set and use cases -*************************************** +******************************************** +ROCm Systems Profiler features and use cases +******************************************** `ROCm Systems Profiler `_ is designed to be highly extensible. Internally, it leverages the `Timemory performance analysis toolkit `_ @@ -129,4 +129,4 @@ broad picture. In terms of CPU analysis, ROCm Systems Profiler does not target any specific vendor. It works just as well on AMD and non-AMD CPUs. With regard to the GPU, ROCm Systems Profiler is currently restricted to HIP and HSA APIs -and kernels running on AMD GPUs. \ No newline at end of file +and kernels running on AMD GPUs. diff --git a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst index bc81688358..f624318a6b 100644 --- a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst +++ b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst @@ -173,7 +173,7 @@ PAPI components from different namespaces: about the PAPI library used by ROCm Systems Profiler (because ROCm Systems Profiler statically links to ``libpapi``). However, all of these tools are installed with the prefix ``rocprof-sys-`` with - underscores replaced with hypens, for example ``papi_avail`` becomes ``rocprof-sys-papi-avail``. + underscores replaced with hyphens, for example ``papi_avail`` becomes ``rocprof-sys-papi-avail``. ROCPROFSYS_ROCM_EVENTS ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/projects/rocprofiler-systems/docs/how-to/general-tips-using-rocprof-sys.rst b/projects/rocprofiler-systems/docs/how-to/general-tips-using-rocprof-sys.rst index b74b4b5483..bf0aee9d10 100644 --- a/projects/rocprofiler-systems/docs/how-to/general-tips-using-rocprof-sys.rst +++ b/projects/rocprofiler-systems/docs/how-to/general-tips-using-rocprof-sys.rst @@ -2,9 +2,9 @@ :description: ROCm Systems Profiler general tips and usage documentation and reference :keywords: rocprof-sys, rocprofiler-systems, Omnitrace, ROCm, tips, how to, profiler, tracking, visualization, tool, Instinct, accelerator, AMD -********************************** +******************************************** General tips for using ROCm Systems Profiler -********************************** +******************************************** Follow these general guidelines when using ROCm Systems Profiler. For an explanation of the terms used in this topic, see the :doc:`ROCm Systems Profiler glossary <../reference/rocprof-sys-glossary>`. diff --git a/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst b/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst index a2629b156a..c95a6d12a4 100644 --- a/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst +++ b/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst @@ -97,32 +97,32 @@ This can happen in three different ways: Key concepts ----------------------------------- -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ -| Concept | Setting | Options | Description | -+==================+=====================================+==================================+============================================+ ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ +| Concept | Setting | Options | Description | ++==================+======================================+==================================+============================================+ | Backend | ``ROCPROFSYS_CAUSAL_BACKEND`` | ``perf``, ``timer`` | Backend for recording samples required | -| | | | to calculate the virtual speed-up | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | to calculate the virtual speed-up | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ | Mode | ``ROCPROFSYS_CAUSAL_MODE`` | ``function``, ``line`` | Select an entire function or individual | -| | | | line of code for causal experiments | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | line of code for causal experiments | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ | End-to-end | ``ROCPROFSYS_CAUSAL_END_TO_END`` | Boolean | Perform a single experiment during the | -| | | | entire run (does not require | -| | | | progress points) | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | entire run (does not require | +| | | | progress points) | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ | Fixed speed-up | ``ROCPROFSYS_CAUSAL_FIXED_SPEEDUP`` | one or more values from [0, 100] | Virtual speed-up or pool of virtual | -| | | | speed-ups to randomly select | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | speed-ups to randomly select | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ | Binary scope | ``ROCPROFSYS_CAUSAL_BINARY_SCOPE`` | regular expression(s) | Dynamic binaries containing code for | -| | | | experiments | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | experiments | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ | Source scope | ``ROCPROFSYS_CAUSAL_SOURCE_SCOPE`` | regular expression(s) | ```` and/or ``:`` | -| | | | containing code to include in experiments | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | containing code to include in experiments | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ | Function scope | ``ROCPROFSYS_CAUSAL_FUNCTION_SCOPE`` | regular expression(s) | Restricts experiments to matching | -| | | | functions (function mode) or lines of | -| | | | code within matching functions (line mode) | -+------------------+-------------------------------------+----------------------------------+--------------------------------------------+ +| | | | functions (function mode) or lines of | +| | | | code within matching functions (line mode) | ++------------------+--------------------------------------+----------------------------------+--------------------------------------------+ .. note:: diff --git a/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst b/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst index 9b0b3efaa8..695e070fbd 100644 --- a/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst +++ b/projects/rocprofiler-systems/docs/how-to/profiling-python-scripts.rst @@ -28,7 +28,7 @@ be the same size. ``OS`` is the operating system, and ``ABI`` is the application binary interface, for example, ``libpyrocprofsys.cpython-38-x86_64-linux-gnu.so``. -Getting Started +Getting started ======================================== The ROCm Systems Profiler Python package is installed in ``lib/pythonX.Y/site-packages/rocprofsys``. @@ -44,7 +44,7 @@ Both the ``share/rocprofiler-systems/setup-env.sh`` script and the module file i environment variable. Running ROCm Systems Profiler on a Python script -======================================== +================================================ ROCm Systems Profiler provides an ``rocprof-sys-python`` helper bash script which ensures ``PYTHONPATH`` is properly set and the correct Python interpreter is used. @@ -200,7 +200,7 @@ And then run using the command ``rocprof-sys-python -b -- ./example.py``, ROCm S |-----------------------------------------------------------| ROCm Systems Profiler Python source instrumentation -======================================== +=================================================== Starting with the unmodified ``example.py`` script above, import the ``rocprofsys`` module: @@ -268,7 +268,7 @@ original ``rocprofsys-python ./example.py`` results: numerous functions called when more complex modules are imported, such as ``import numpy``. ROCm Systems Profiler Python source instrumentation configuration -------------------------------------------------------------- +----------------------------------------------------------------- Within the Python source code, the profiler can be configured by directly modifying the ``rocprof-sys.profiler.config`` data fields. diff --git a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst index 0a4417d632..f8702373e0 100644 --- a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst +++ b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst @@ -343,7 +343,7 @@ An rocprof-sys-sample example Here is the full output from the previous ``rocprof-sys-sample -PTDH -E all -o rocprof-sys-output %tag% -- ./parallel-overhead-locks 30 4 100`` command: -.. code-block:: shell +.. code-block:: shell-session $ rocprof-sys-sample -PTDH -E all -o rocprof-sys-output %tag% -c -- ./parallel-overhead-locks 30 4 100 @@ -403,3 +403,4 @@ Here is the full output from the previous [rocprof-sys][1785877][metadata]> Outputting 'rocprof-sys-output/2024-07-15_16.21/parallel-overhead-locksmetadata-1785877.json' and 'rocprof-sys-output/2024-07-15_16.21/parallel-overhead-locksfunctions-1785877.json' [rocprof-sys][1785877][0][rocprofsys_finalize] Finalized: 0.054582 sec wall_clock, 0.000 MB peak_rss, -1.798 MB page_rss, 0.040000 sec cpu_clock, 73.3 % cpu_util [989.312] perfetto.cc:60128 Tracing session 1 ended, total sessions:0 + diff --git a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst index 22549e247c..66cb931202 100644 --- a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst +++ b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst @@ -238,7 +238,7 @@ Metadata JSON Sample } Configuring the ROCm Systems Profiler output -======================================== +============================================ ROCm Systems Profiler includes a core set of options for controlling the format and contents of the output files. For additional information, see the guide on diff --git a/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst b/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst index 78b4c80880..7de4c11c99 100644 --- a/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst +++ b/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst @@ -10,7 +10,7 @@ The following example shows how a program can use the ROCm Systems Profiler API for run-time analysis. ROCm Systems Profiler user API example program -======================================== +============================================== You can use the ROCm Systems Profiler API to define custom regions to profile and trace. The following C++ program demonstrates this technique by calling several functions from the @@ -157,7 +157,7 @@ ROCm Systems Profiler API, such as ``rocprofsys_user_push_region`` and } Linking the ROCm Systems Profiler libraries to another program -======================================================= +============================================================== To link the ``rocprofiler-systems-user-library`` to another program, use the following CMake and ``g++`` directives. @@ -186,7 +186,7 @@ Output from the API example program First, instrument and run the program. -.. code-block:: shell +.. code-block:: shell-session $ rocprof-sys-instrument -l --min-instructions=8 -E custom_push_region -o -- ./user-api ... diff --git a/projects/rocprofiler-systems/docs/index.rst b/projects/rocprofiler-systems/docs/index.rst index d11bd33913..c498487be1 100644 --- a/projects/rocprofiler-systems/docs/index.rst +++ b/projects/rocprofiler-systems/docs/index.rst @@ -2,17 +2,17 @@ :description: ROCm Systems Profiler documentation and reference :keywords: rocprof-sys, rocprofiler-systems, Omnitrace, ROCm, profiler, tracking, visualization, tool, Instinct, accelerator, AMD -*********************** +*********************************** ROCm Systems Profiler documentation -*********************** +*********************************** -ROCm Systems Profiler, formerly known as "Omnitrace", is designed for the high-level profiling and comprehensive tracing +ROCm Systems Profiler is designed for the high-level profiling and comprehensive tracing of applications running on the CPU or the CPU and GPU. It supports dynamic binary instrumentation, call-stack sampling, and various other features for determining which function and line number are currently executing. To learn more, see :doc:`what-is-rocprof-sys` -The code is open and hosted at ``_. - +ROCm Systems Profiler is open source and hosted at ``__. +It is the successor to ``__. .. grid:: 2 :gutter: 3 @@ -22,17 +22,12 @@ The code is open and hosted at ``_. * :doc:`Quick start <./install/quick-start>` * :doc:`ROCm Systems Profiler installation <./install/install>` - -The documentation is structured as follows: +Use the following topics to learn more about the advantages of ROCm Systems Profiler in application +profiling, how it supports performance analysis, and how to leverage its capabilities in practice: .. grid:: 2 :gutter: 3 - .. grid-item-card:: Tutorials - - * `GitHub examples `_ - * :doc:`Video tutorials <./tutorials/video-tutorials>` - .. grid-item-card:: How to * :doc:`Configuring and validating the ROCm Systems Profiler environment <./how-to/configuring-validating-environment>` @@ -48,19 +43,24 @@ The documentation is structured as follows: .. grid-item-card:: Conceptual * :doc:`Data collection modes <./conceptual/data-collection-modes>` - * :doc:`The ROCm Systems Profiler feature set <./conceptual/rocprof-sys-feature-set>` + * :doc:`Features and use cases <./conceptual/rocprof-sys-feature-set>` .. grid-item-card:: Reference * :doc:`Development guide <./reference/development-guide>` - * :doc:`ROCm Systems Profiler glossary <./reference/rocprof-sys-glossary>` + * :doc:`Glossary <./reference/rocprof-sys-glossary>` * :doc:`API library <./doxygen/html/files>` * :doc:`Class member functions <./doxygen/html/functions>` * :doc:`Globals <./doxygen/html/globals>` * :doc:`Classes, structures, and interfaces <./doxygen/html/annotated>` + .. grid-item-card:: Tutorials + + * `GitHub examples `_ + * :doc:`Video tutorials <./tutorials/video-tutorials>` + To contribute to the documentation, refer to `Contributing to ROCm `_. You can find licensing information on the -`Licensing `_ page. \ No newline at end of file +`Licensing `_ page. diff --git a/projects/rocprofiler-systems/docs/license.md b/projects/rocprofiler-systems/docs/license.md deleted file mode 100644 index 1f8761f246..0000000000 --- a/projects/rocprofiler-systems/docs/license.md +++ /dev/null @@ -1,4 +0,0 @@ -# License - -```{include} ../LICENSE -``` diff --git a/projects/rocprofiler-systems/docs/license.rst b/projects/rocprofiler-systems/docs/license.rst new file mode 100644 index 0000000000..a65784da98 --- /dev/null +++ b/projects/rocprofiler-systems/docs/license.rst @@ -0,0 +1,8 @@ +.. meta:: + :description: ROCm Systems Profiler license + +******* +License +******* + +.. include:: ../LICENSE diff --git a/projects/rocprofiler-systems/docs/reference/development-guide.rst b/projects/rocprofiler-systems/docs/reference/development-guide.rst index 1f47fbfa0c..2a6c881d54 100644 --- a/projects/rocprofiler-systems/docs/reference/development-guide.rst +++ b/projects/rocprofiler-systems/docs/reference/development-guide.rst @@ -16,7 +16,7 @@ Executables This section lists the ROCm Systems Profiler executables. rocprof-sys-avail: `source/bin/rocprof-sys-avail `_ -------------------------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------------------------------------- The ``main`` routine of ``rocprof-sys-avail`` has three important sections: @@ -25,7 +25,7 @@ The ``main`` routine of ``rocprof-sys-avail`` has three important sections: * Printing hardware counters rocprof-sys-sample: `source/bin/rocprof-sys-sample `_ ----------------------------------------------------------------------------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``rocprof-sys-sample -- `` * Translates command-line options into environment variables @@ -33,7 +33,7 @@ rocprof-sys-sample: `source/bin/rocprof-sys-sample `` and a modified environment rocprof-sys-casual: `source/bin/rocprof-sys-causal `_ ----------------------------------------------------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------------------------------------------------------------------- When there is exactly one causal profiling configuration variant (which enables debugging), ``rocprof-sys-casual`` has a nearly identical design to ``rocprof-sys-sample`` @@ -46,7 +46,7 @@ the following actions take place for each variant: * the parent process waits for the child process to finish rocprof-sys-instrument: `source/bin/rocprof-sys-instrument `_ ----------------------------------------------------------------------------------------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``rocprof-sys-instrument -- `` * Allows the user to provide options specifying whether to perform runtime instrumentation, use binary rewrite, or @@ -95,7 +95,7 @@ librocprof-sys: `source/lib/rocprof-sys `_ --------------------------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------------------------------- This is a lightweight, front-end library for ``librocprof-sys`` which serves three primary purposes: @@ -106,7 +106,7 @@ This is a lightweight, front-end library for ``librocprof-sys`` which serves thr * Coordinates communication between ``librocprof-sys-user`` and ``librocprof-sys`` librocprof-sys-user: `source/lib/rocprof-sys-user `_ --------------------------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------------------------------------- * Provides a set of functions and types for the users to add to their code, for example, disabling data collection globally or on a specific thread or diff --git a/projects/rocprofiler-systems/docs/reference/rocprof-sys-glossary.rst b/projects/rocprofiler-systems/docs/reference/rocprof-sys-glossary.rst index f14919fb75..f259bb851c 100644 --- a/projects/rocprofiler-systems/docs/reference/rocprof-sys-glossary.rst +++ b/projects/rocprofiler-systems/docs/reference/rocprof-sys-glossary.rst @@ -2,9 +2,9 @@ :description: ROCm Systems Profiler glossary and reference :keywords: rocprof-sys, rocprofiler-systems, Omnitrace, ROCm, glossary, terminology, profiler, tracking, visualization, tool, Instinct, accelerator, AMD -******************* -ROCm Systems Profiler Glossary -******************* +******** +Glossary +******** This topic explains the terminology necessary to use ROCm Systems Profiler. The list below provides a basic glossary for those who @@ -13,59 +13,59 @@ when certain terms have different contextual meanings, for example, the ROCm Systems Profiler meaning of the term "module" when instrumenting Python. -**Binary** +Binary A file written in the Executable and Linkable Format (ELF). This is the standard file format for executable files, shared libraries, etc. -**Binary instrumentation** +Binary instrumentation Inserting callbacks to instrumentation into an existing binary. This can be performed statically or dynamically. -**Static binary instrumentation** +Static binary instrumentation Loads an existing binary, determines instrumentation points, and generates a new binary with instrumentation directly embedded. It is applicable to executables and libraries but limited to only the functions defined in the binary. This is also known as **Binary rewrite**. -**Dynamic binary instrumentation** +Dynamic binary instrumentation Loads an existing binary into memory, inserts instrumentation, and runs the binary. It is limited to executables but is capable of instrumenting linked libraries. This is also known as **Runtime instrumentation**. -**Statistical sampling** +Statistical sampling At periodic intervals, the application is paused and the current call-stack of the CPU is recorded along with various other metrics. It uses timers that measure either (A) real clock time or (B) the CPU time used by the current thread and the CPU time expended on behalf of the thread by the system. This is also known as simply **sampling**. - **Sampling rate** + Sampling rate * The period at which (A) or (B) are triggered (in units of ``# interrupts / second``) * Higher values increase the number of samples - **Sampling delay** + Sampling delay * How long to wait before (A) and (B) begin triggering at their designated rate - **Sampling duration** + Sampling duration * The amount of time (in real-time) after the start of the application to record samples. * After this time limit has been reached, no more samples are recorded. -**Process sampling** +Process sampling At periodic (real-time) intervals, a background thread records global metrics without interrupting the current process. These metrics include, but are not limited to: CPU frequency, CPU memory high-water mark (i.e. peak memory usage), GPU temperature, and GPU power usage. - **Sampling rate** + Sampling rate * The real-time period for recording metrics (in units of ``# measurements / second``) * Higher values increase the number of samples - **Sampling delay** + Sampling delay * How long to wait (in real-time) before recording samples - **Sampling duration** + Sampling duration * The amount of time (in real-time) after the start of the application to record samples. * After this time limit has been reached, no more samples are recorded. -**Module** +Module With respect to binary instrumentation, a module is defined as either the filename (such as ``foo.c``) or library name (``libfoo.so``) which contains the definition of one or more functions. @@ -74,18 +74,18 @@ when instrumenting Python. the definition of one or more functions. The full path to this file typically contains the name of the "Python module". -**Basic block** +Basic block A straight-line code sequence with no branches in (except for the entry) and no branches out (except for the exit). -**Address range** +Address range The instructions for a function in a binary start at certain address with the ELF file and end at a certain address. The range is ``end - start``. The address range is a decent approximation for the "cost" of a function. For example, a larger address range approximately equates to more instructions. -**Instrumentation traps** +Instrumentation traps On the x86 architecture, because instructions are of variable size, an instruction might be too small for Dyninst to replace it with the normal code sequence used to call instrumentation. When instrumentation is placed at points other @@ -93,10 +93,10 @@ when instrumenting Python. the instrumentation fits. (By default, ``rocprof-sys-instrument`` avoids instrumentation which requires a trap.) -**Overlapping functions** +Overlapping functions Due to language constructs or compiler optimizations, it might be possible for multiple functions to overlap (that is, share part of the same function body) or for a single function to have multiple entry points. In practice, it's impossible to determine the difference between multiple overlapping functions and a single function with multiple entry points. (By default, ``rocprof-sys-instrument`` - avoids instrumenting overlapping functions.) \ No newline at end of file + avoids instrumenting overlapping functions.) diff --git a/projects/rocprofiler-systems/docs/sphinx/_toc.yml.in b/projects/rocprofiler-systems/docs/sphinx/_toc.yml.in index 486613a72b..e6fb884da4 100644 --- a/projects/rocprofiler-systems/docs/sphinx/_toc.yml.in +++ b/projects/rocprofiler-systems/docs/sphinx/_toc.yml.in @@ -15,13 +15,6 @@ subtrees: - file: install/install.rst title: ROCm Systems Profiler installation guide - - caption: Tutorials - entries: - - url: https://github.com/ROCm/rocprofiler-systems/tree/amd-mainline/examples - title: GitHub examples - - file: tutorials/video-tutorials.rst - title: Video tutorials - - caption: How to entries: - file: how-to/configuring-validating-environment.rst @@ -45,17 +38,17 @@ subtrees: - caption: Conceptual entries: + - file: conceptual/rocprof-sys-feature-set.rst + title: Features and use cases - file: conceptual/data-collection-modes.rst title: Data collection modes - - file: conceptual/rocprof-sys-feature-set.rst - title: The ROCm Systems Profiler feature set and use cases - caption: Reference entries: - file: reference/development-guide.rst title: Development guide - file: reference/rocprof-sys-glossary.rst - title: ROCm Systems Profiler glossary + title: Glossary - file: doxygen/html/files title: API library - file: doxygen/html/functions @@ -65,6 +58,13 @@ subtrees: - file: doxygen/html/annotated title: Classes, structures, and interfaces + - caption: Tutorials + entries: + - url: https://github.com/ROCm/rocprofiler-systems/tree/amd-mainline/examples + title: GitHub examples + - file: tutorials/video-tutorials.rst + title: Video tutorials + - caption: About entries: - - file: license.md + - file: license.rst diff --git a/projects/rocprofiler-systems/docs/tutorials/video-tutorials.rst b/projects/rocprofiler-systems/docs/tutorials/video-tutorials.rst index 71ef2b0c05..37fcecb9ad 100644 --- a/projects/rocprofiler-systems/docs/tutorials/video-tutorials.rst +++ b/projects/rocprofiler-systems/docs/tutorials/video-tutorials.rst @@ -23,8 +23,8 @@ Instrumenting a binary

-Writing an ROCm Systems Profiler configuration file -======================================== +Writing a ROCm Systems Profiler configuration file +================================================== .. raw:: html @@ -35,4 +35,4 @@ Visualization and features of Perfetto traces .. raw:: html -

\ No newline at end of file +

diff --git a/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst b/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst index 09ec88a66a..fb2a4ed16b 100644 --- a/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst +++ b/projects/rocprofiler-systems/docs/what-is-rocprof-sys.rst @@ -2,9 +2,9 @@ :description: ROCm Systems Profiler introduction, explanation, and reference :keywords: rocprof-sys, rocprofiler-systems, Omnitrace, ROCm, profiler, explanation, introduction, what is, tracking, visualization, tool, Instinct, accelerator, AMD -****************** +****************************** What is ROCm Systems Profiler? -****************** +****************************** ROCm Systems Profiler is designed for the high-level profiling and comprehensive tracing of applications running on the CPU or the CPU and GPU. It supports dynamic binary From 417d22ee3e808ca88a8428c79ca7c0ae970a320f Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 13 Dec 2024 17:35:22 -0500 Subject: [PATCH 05/12] Update VERSION to 0.2.0 [ROCm/rocprofiler-systems commit: d3725df81608f590912188d4322c359b4b1703f1] --- projects/rocprofiler-systems/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rocprofiler-systems/VERSION b/projects/rocprofiler-systems/VERSION index 6e8bf73aa5..0ea3a944b3 100644 --- a/projects/rocprofiler-systems/VERSION +++ b/projects/rocprofiler-systems/VERSION @@ -1 +1 @@ -0.1.0 +0.2.0 From b29cfac106f1cf41915f6ecc2175adfd7d143db7 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 13 Dec 2024 18:48:39 -0500 Subject: [PATCH 06/12] Update to use rocprofiler-sdk (#55) - Renames the CMake option "ROCPROFSYS_USE_HIP" to "ROCPROFSYS_USE_ROCM" - Remove the "ROCPROFSYS_USE_ROCM_SMI option. Controlled with the "ROCPROFSYS_USE_ROCM" option, instead. - Runtime configuration can still toggle ROCPROFSYS_USE_ROCM_SMI to disable the sampling. - Rename ROCPROFSYS_HIP_VERSION macro to ROCPROFSYS_ROCM_VERSION and remove blocks for `ROCPROFSYS_ROCM_VERSION < 60000` - Remove ROCPROFSYS_USE_ROCTRACER and ROCPROFSYS_USE_ROCPROFILER - Update test cases - Update docker files and workflows to install cmake 3.21, which is required for the rocprofiler-sdk findPackage script. - Removed rocm-6.2 from workflows due to a rocprofiler-sdk API change. [ROCm/rocprofiler-systems commit: 88aa2d3cbe13bdfb3dc8ebf2669556bb331d6a53] --- .../.github/workflows/containers.yml | 40 +- .../.github/workflows/cpack.yml | 35 - .../.github/workflows/opensuse.yml | 4 +- .../.github/workflows/redhat.yml | 8 +- .../.github/workflows/ubuntu-focal.yml | 26 +- .../.github/workflows/ubuntu-jammy.yml | 21 +- .../.github/workflows/ubuntu-noble.yml | 2 +- projects/rocprofiler-systems/CMakeLists.txt | 43 +- .../cmake/ConfigCPack.cmake | 15 +- projects/rocprofiler-systems/cmake/PAPI.cmake | 32 +- .../rocprofiler-systems/cmake/Packages.cmake | 94 +- .../cmake/Templates/modulefile.in | 4 - .../cmake/Templates/setup-env.sh.in | 9 - .../docker/Dockerfile.opensuse | 2 +- .../docker/Dockerfile.opensuse.ci | 2 +- .../docker/Dockerfile.rhel | 2 +- .../docker/Dockerfile.rhel.ci | 2 +- .../docker/Dockerfile.ubuntu | 4 +- .../docker/Dockerfile.ubuntu.ci | 4 +- .../how-to/configuring-runtime-options.rst | 14 +- .../docs/how-to/sampling-call-stack.rst | 16 +- .../docs/install/install.rst | 32 +- .../scripts/build-release.sh | 8 +- .../source/bin/CMakeLists.txt | 15 +- .../source/bin/rocprof-sys-avail/avail.cpp | 21 +- .../bin/rocprof-sys-avail/generate_config.cpp | 2 +- .../bin/rocprof-sys-avail/info_type.cpp | 2 - .../source/bin/rocprof-sys-causal/impl.cpp | 9 - .../bin/rocprof-sys-instrument/CMakeLists.txt | 2 + .../rocprof-sys-instrument/internal_libs.cpp | 26 +- .../rocprof-sys-instrument.cpp | 10 +- .../source/bin/rocprof-sys-sample/impl.cpp | 96 +- .../source/lib/CMakeLists.txt | 15 +- .../source/lib/common/CMakeLists.txt | 4 +- .../source/lib/common/defines.h.in | 24 +- .../source/lib/common/setup.hpp | 142 -- .../source/lib/common/static_object.hpp | 207 +++ .../source/lib/common/synchronized.hpp | 167 +++ .../source/lib/core/CMakeLists.txt | 10 +- .../source/lib/core/argparse.cpp | 97 +- .../source/lib/core/categories.hpp | 32 +- .../source/lib/core/components/fwd.hpp | 10 +- .../source/lib/core/config.cpp | 257 +--- .../source/lib/core/config.hpp | 62 +- .../lib/core/containers/stable_vector.hpp | 18 +- .../source/lib/core/gpu.cpp | 418 +----- .../source/lib/core/gpu.hpp | 4 +- .../source/lib/core/hip_runtime.hpp | 27 +- .../source/lib/core/perfetto.hpp | 1 + .../source/lib/core/rccl.hpp | 10 +- .../source/lib/core/rocprofiler-sdk.cpp | 576 ++++++++ .../source/lib/core/rocprofiler-sdk.hpp | 70 + .../source/lib/core/state.cpp | 6 +- .../source/lib/core/utility.hpp | 9 + .../source/lib/rocprof-sys-dl/CMakeLists.txt | 3 +- .../source/lib/rocprof-sys-dl/dl.cpp | 189 ++- .../source/lib/rocprof-sys-dl/dl/dl.hpp | 24 +- .../source/lib/rocprof-sys-dl/main.c | 33 +- .../rocprofiler-systems/categories.h | 16 +- .../source/lib/rocprof-sys/library.cpp | 45 +- .../lib/rocprof-sys/library/CMakeLists.txt | 27 +- .../library/components/CMakeLists.txt | 12 - .../library/components/category_region.hpp | 8 +- .../components/pthread_create_gotcha.cpp | 6 +- .../library/components/rocprofiler.cpp | 193 --- .../library/components/rocprofiler.hpp | 241 --- .../library/components/roctracer.cpp | 396 ----- .../library/components/roctracer.hpp | 117 -- .../source/lib/rocprof-sys/library/rocm.cpp | 210 +-- .../source/lib/rocprof-sys/library/rocm.hpp | 40 +- .../rocprof-sys/library/rocm/CMakeLists.txt | 7 - .../library/rocm/hsa_rsrc_factory.cpp | 1027 ------------- .../library/rocm/hsa_rsrc_factory.hpp | 582 -------- .../lib/rocprof-sys/library/rocm_smi.hpp | 5 +- .../rocprof-sys/library/rocprofiler-sdk.cpp | 1308 +++++++++++++++++ .../{rocprofiler.hpp => rocprofiler-sdk.hpp} | 62 +- .../library/rocprofiler-sdk/CMakeLists.txt | 9 + .../library/rocprofiler-sdk/counters.cpp | 135 ++ .../library/rocprofiler-sdk/counters.hpp | 168 +++ .../library/rocprofiler-sdk/fwd.cpp | 270 ++++ .../library/rocprofiler-sdk/fwd.hpp | 252 ++++ .../lib/rocprof-sys/library/rocprofiler.cpp | 834 ----------- .../lib/rocprof-sys/library/roctracer.cpp | 967 ------------ .../lib/rocprof-sys/library/roctracer.hpp | 89 -- .../lib/rocprof-sys/library/runtime.hpp | 1 - .../tests/rocprof-sys-rocm-tests.cmake | 36 +- .../tests/rocprof-sys-testing.cmake | 23 +- 87 files changed, 3842 insertions(+), 6261 deletions(-) create mode 100644 projects/rocprofiler-systems/source/lib/common/static_object.hpp create mode 100644 projects/rocprofiler-systems/source/lib/common/synchronized.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp create mode 100644 projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.hpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.cpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.hpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.cpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.hpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/CMakeLists.txt delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.cpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.hpp create mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp rename projects/rocprofiler-systems/source/lib/rocprof-sys/library/{rocprofiler.hpp => rocprofiler-sdk.hpp} (54%) create mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/CMakeLists.txt create mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp create mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.hpp create mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp create mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.cpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.cpp delete mode 100644 projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.hpp diff --git a/projects/rocprofiler-systems/.github/workflows/containers.yml b/projects/rocprofiler-systems/.github/workflows/containers.yml index 86d12fcf4d..09bb556028 100644 --- a/projects/rocprofiler-systems/.github/workflows/containers.yml +++ b/projects/rocprofiler-systems/.github/workflows/containers.yml @@ -39,12 +39,10 @@ jobs: version: "15.5" - distro: "opensuse" version: "15.6" - - distro: "rhel" - version: "8.8" - distro: "rhel" version: "8.10" - distro: "rhel" - version: "9.2" + version: "9.3" - distro: "rhel" version: "9.4" @@ -90,9 +88,6 @@ jobs: - os-distro: "ubuntu" os-version: "20.04" rocm-version: "0.0" - - os-distro: "ubuntu" - os-version: "20.04" - rocm-version: "6.2" - os-distro: "ubuntu" os-version: "20.04" rocm-version: "6.3" @@ -100,9 +95,6 @@ jobs: - os-distro: "ubuntu" os-version: "22.04" rocm-version: "0.0" - - os-distro: "ubuntu" - os-version: "22.04" - rocm-version: "6.2" - os-distro: "ubuntu" os-version: "22.04" rocm-version: "6.3" @@ -110,9 +102,6 @@ jobs: - os-distro: "ubuntu" os-version: "24.04" rocm-version: "0.0" - - os-distro: "ubuntu" - os-version: "24.04" - rocm-version: "6.2" - os-distro: "ubuntu" os-version: "24.04" rocm-version: "6.3" @@ -120,9 +109,6 @@ jobs: - os-distro: "opensuse" os-version: "15.5" rocm-version: "0.0" - - os-distro: "opensuse" - os-version: "15.5" - rocm-version: "6.2" - os-distro: "opensuse" os-version: "15.5" rocm-version: "6.3" @@ -130,43 +116,19 @@ jobs: - os-distro: "opensuse" os-version: "15.6" rocm-version: "0.0" - - os-distro: "opensuse" - os-version: "15.6" - rocm-version: "6.2" - os-distro: "opensuse" os-version: "15.6" rocm-version: "6.3" - # RHEL 8.9 - - os-distro: "rhel" - os-version: "8.9" - rocm-version: "0.0" - - os-distro: "rhel" - os-version: "8.9" - rocm-version: "6.2" - # RHEL 8.10 - os-distro: "rhel" os-version: "8.10" rocm-version: "0.0" - - os-distro: "rhel" - os-version: "8.10" - rocm-version: "6.2" - os-distro: "rhel" os-version: "8.10" rocm-version: "6.3" - # RHEL 9.3 - - os-distro: "rhel" - os-version: "9.3" - rocm-version: "0.0" - - os-distro: "rhel" - os-version: "9.3" - rocm-version: "6.2" # RHEL 9.4 - os-distro: "rhel" os-version: "9.4" rocm-version: "0.0" - - os-distro: "rhel" - os-version: "9.4" - rocm-version: "6.2" - os-distro: "rhel" os-version: "9.4" rocm-version: "6.3" diff --git a/projects/rocprofiler-systems/.github/workflows/cpack.yml b/projects/rocprofiler-systems/.github/workflows/cpack.yml index 5bb01cea1f..e922318f70 100644 --- a/projects/rocprofiler-systems/.github/workflows/cpack.yml +++ b/projects/rocprofiler-systems/.github/workflows/cpack.yml @@ -37,9 +37,6 @@ jobs: - os-distro: "ubuntu" os-version: "20.04" rocm-version: "0.0" - - os-distro: "ubuntu" - os-version: "20.04" - rocm-version: "6.2" - os-distro: "ubuntu" os-version: "20.04" rocm-version: "6.3" @@ -47,9 +44,6 @@ jobs: - os-distro: "ubuntu" os-version: "22.04" rocm-version: "0.0" - - os-distro: "ubuntu" - os-version: "22.04" - rocm-version: "6.2" - os-distro: "ubuntu" os-version: "22.04" rocm-version: "6.3" @@ -57,9 +51,6 @@ jobs: - os-distro: "ubuntu" os-version: "24.04" rocm-version: "0.0" - - os-distro: "ubuntu" - os-version: "24.04" - rocm-version: "6.2" - os-distro: "ubuntu" os-version: "24.04" rocm-version: "6.3" @@ -67,9 +58,6 @@ jobs: - os-distro: "opensuse" os-version: "15.5" rocm-version: "0.0" - - os-distro: "opensuse" - os-version: "15.5" - rocm-version: "6.2" - os-distro: "opensuse" os-version: "15.5" rocm-version: "6.3" @@ -77,43 +65,20 @@ jobs: - os-distro: "opensuse" os-version: "15.6" rocm-version: "0.0" - - os-distro: "opensuse" - os-version: "15.6" - rocm-version: "6.2" - os-distro: "opensuse" os-version: "15.6" rocm-version: "6.3" - # RHEL 8.9 - - os-distro: "rhel" - os-version: "8.9" - rocm-version: "0.0" - - os-distro: "rhel" - os-version: "8.9" - rocm-version: "6.2" # RHEL 8.10 - os-distro: "rhel" os-version: "8.10" rocm-version: "0.0" - - os-distro: "rhel" - os-version: "8.10" - rocm-version: "6.2" - os-distro: "rhel" os-version: "8.10" rocm-version: "6.3" - # RHEL 9.3 - - os-distro: "rhel" - os-version: "9.3" - rocm-version: "0.0" - - os-distro: "rhel" - os-version: "9.3" - rocm-version: "6.2" # RHEL 9.4 - os-distro: "rhel" os-version: "9.4" rocm-version: "0.0" - - os-distro: "rhel" - os-version: "9.4" - rocm-version: "6.2" - os-distro: "rhel" os-version: "9.4" rocm-version: "6.3" diff --git a/projects/rocprofiler-systems/.github/workflows/opensuse.yml b/projects/rocprofiler-systems/.github/workflows/opensuse.yml index 89ae9ba727..dad6a343ba 100644 --- a/projects/rocprofiler-systems/.github/workflows/opensuse.yml +++ b/projects/rocprofiler-systems/.github/workflows/opensuse.yml @@ -66,7 +66,7 @@ jobs: fi python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done - name: Configure Env @@ -93,7 +93,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems -DROCPROFSYS_BUILD_TESTING=ON -DROCPROFSYS_USE_MPI=OFF - -DROCPROFSYS_USE_HIP=OFF + -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_OMPT=OFF -DROCPROFSYS_USE_PYTHON=ON -DROCPROFSYS_INSTALL_PERFETTO_TOOLS=OFF diff --git a/projects/rocprofiler-systems/.github/workflows/redhat.yml b/projects/rocprofiler-systems/.github/workflows/redhat.yml index 0e9491760d..d582f14dc5 100644 --- a/projects/rocprofiler-systems/.github/workflows/redhat.yml +++ b/projects/rocprofiler-systems/.github/workflows/redhat.yml @@ -46,8 +46,8 @@ jobs: fail-fast: false matrix: compiler: ['g++'] - os-release: [ '8.10', '9.2', '9.4' ] - rocm-version: [ '0.0', '6.2', '6.3' ] + os-release: [ '8.10', '9.3', '9.4' ] + rocm-version: [ '0.0', '6.3' ] build-type: ['Release'] steps: @@ -70,7 +70,7 @@ jobs: fi python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done - name: Install ROCm Packages @@ -108,7 +108,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems -DROCPROFSYS_BUILD_TESTING=ON -DROCPROFSYS_USE_MPI=OFF - -DROCPROFSYS_USE_HIP=${USE_HIP} + -DROCPROFSYS_USE_ROCM=${USE_HIP} -DROCPROFSYS_USE_OMPT=OFF -DROCPROFSYS_USE_PYTHON=ON -DROCPROFSYS_USE_MPI_HEADERS=ON diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml index 31e813d1d0..04eb1577f4 100644 --- a/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml @@ -100,7 +100,7 @@ jobs: chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && @@ -145,7 +145,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems -DROCPROFSYS_BUILD_TESTING=ON -DROCPROFSYS_USE_MPI=OFF - -DROCPROFSYS_USE_HIP=OFF + -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_OMPT=OFF -DROCPROFSYS_USE_PAPI=OFF -DROCPROFSYS_USE_PYTHON=${{ matrix.python }} @@ -245,16 +245,10 @@ jobs: fail-fast: false matrix: compiler: ['g++'] - rocm-version: ['6.2'] + rocm-version: ['6.3'] mpi-headers: ['OFF'] build-jobs: ['3'] - ctest-exclude: ['-LE "mpi-example|transpose"'] - include: - - compiler: 'g++' - rocm-version: 'latest' - mpi-headers: 'ON' - build-jobs: '2' - ctest-exclude: '-LE transpose' + ctest-exclude: ['-LE "transpose"'] env: BUILD_TYPE: MinSizeRel @@ -282,7 +276,7 @@ jobs: chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && @@ -336,7 +330,7 @@ jobs: -DROCPROFSYS_BUILD_EXTRA_OPTIMIZATIONS=OFF -DROCPROFSYS_BUILD_LTO=OFF -DROCPROFSYS_USE_MPI=OFF - -DROCPROFSYS_USE_HIP=ON + -DROCPROFSYS_USE_ROCM=ON -DROCPROFSYS_MAX_THREADS=64 -DROCPROFSYS_USE_PAPI=OFF -DROCPROFSYS_USE_OMPT=OFF @@ -440,7 +434,7 @@ jobs: chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && sudo apt-get -y --purge autoremove && sudo apt-get -y clean @@ -477,7 +471,7 @@ jobs: -DROCPROFSYS_BUILD_TESTING=ON -DROCPROFSYS_BUILD_DYNINST=ON -DROCPROFSYS_USE_MPI=${USE_MPI} - -DROCPROFSYS_USE_HIP=OFF + -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_PYTHON=${{ matrix.python }} -DROCPROFSYS_USE_OMPT=${{ matrix.ompt }} -DROCPROFSYS_USE_PAPI=${{ matrix.papi }} @@ -593,7 +587,7 @@ jobs: chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && @@ -625,7 +619,7 @@ jobs: -DROCPROFSYS_USE_PYTHON=ON -DROCPROFSYS_USE_OMPT=ON -DROCPROFSYS_USE_PAPI=ON - -DROCPROFSYS_USE_HIP=OFF + -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_RCCL=OFF -DROCPROFSYS_MAX_THREADS=64 -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml index 47026737c0..1debacf5fb 100644 --- a/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-jammy.yml @@ -75,22 +75,7 @@ jobs: static-libgcc: 'OFF' static-libstdcxx: 'OFF' build-dyninst: 'OFF' - rocm-version: '6.2' - - compiler: 'g++' - hip: 'ON' - mpi: 'OFF' - ompt: 'OFF' - papi: 'OFF' - python: 'ON' - lto: 'OFF' - strip: 'OFF' - hidden: 'ON' - build-type: 'Release' - mpi-headers: 'OFF' - static-libgcc: 'OFF' - static-libstdcxx: 'OFF' - build-dyninst: 'OFF' - rocm-version: 'latest' + rocm-version: '6.3' env: OMPI_ALLOW_RUN_AS_ROOT: 1 @@ -116,7 +101,7 @@ jobs: openmpi-bin python3-pip texinfo ${{ matrix.compiler }} && python3 -m pip install --upgrade pip && python3 -m pip install --upgrade numpy perfetto dataclasses && - python3 -m pip install 'cmake==3.18.4' && + python3 -m pip install 'cmake==3.21' && for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done - name: Install ROCm Packages @@ -183,7 +168,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems-dev -DROCPROFSYS_BUILD_TESTING=ON -DROCPROFSYS_USE_MPI=${{ matrix.mpi }} - -DROCPROFSYS_USE_HIP=${{ matrix.hip }} + -DROCPROFSYS_USE_ROCM=${{ matrix.hip }} -DROCPROFSYS_USE_OMPT=${{ matrix.ompt }} -DROCPROFSYS_USE_PAPI=${{ matrix.papi }} -DROCPROFSYS_USE_PYTHON=${{ matrix.python }} diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml index 30e03dd784..68ef66484e 100644 --- a/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-noble.yml @@ -101,7 +101,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems \ -DROCPROFSYS_BUILD_TESTING=ON \ -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" \ - -DROCPROFSYS_USE_HIP=${USE_ROCM} \ + -DROCPROFSYS_USE_ROCM=${USE_ROCM} \ -DRCOPROFSYS_USE_PYTHON=ON \ -DROCPROFSYS_STRIP_LIBRARIES=${{ matrix.strip }} \ -DROCPROFSYS_PYTHON_PREFIX=/opt/conda/envs \ diff --git a/projects/rocprofiler-systems/CMakeLists.txt b/projects/rocprofiler-systems/CMakeLists.txt index a3e3b60d39..68d13c4dc2 100644 --- a/projects/rocprofiler-systems/CMakeLists.txt +++ b/projects/rocprofiler-systems/CMakeLists.txt @@ -176,18 +176,11 @@ rocprofiler_systems_add_option(ROCPROFSYS_USE_CLANG_TIDY "Enable clang-tidy" OFF rocprofiler_systems_add_option(ROCPROFSYS_USE_BFD "Enable BFD support (map call-stack samples to LOC)" ON) rocprofiler_systems_add_option(ROCPROFSYS_USE_MPI "Enable MPI support" OFF) -rocprofiler_systems_add_option(ROCPROFSYS_USE_HIP "Enable HIP support" ON) +rocprofiler_systems_add_option(ROCPROFSYS_USE_ROCM "Enable ROCm support" ON) rocprofiler_systems_add_option(ROCPROFSYS_USE_PAPI "Enable HW counter support via PAPI" ON) -rocprofiler_systems_add_option(ROCPROFSYS_USE_ROCTRACER "Enable roctracer support" - ${ROCPROFSYS_USE_HIP}) -rocprofiler_systems_add_option(ROCPROFSYS_USE_ROCPROFILER "Enable rocprofiler support" - ${ROCPROFSYS_USE_HIP}) -rocprofiler_systems_add_option( - ROCPROFSYS_USE_ROCM_SMI "Enable rocm-smi support for power/temp/etc. sampling" - ${ROCPROFSYS_USE_HIP}) rocprofiler_systems_add_option(ROCPROFSYS_USE_RCCL "Enable RCCL support" - ${ROCPROFSYS_USE_HIP}) + ${ROCPROFSYS_USE_ROCM}) rocprofiler_systems_add_option( ROCPROFSYS_USE_MPI_HEADERS "Enable wrapping MPI functions w/o enabling MPI dependency" ON) @@ -217,30 +210,10 @@ elseif("$ENV{ROCPROFSYS_CI}") endif() endif() -if(NOT ROCPROFSYS_USE_HIP) - set(ROCPROFSYS_USE_ROCTRACER - OFF - CACHE BOOL "Disabled via ROCPROFSYS_USE_HIP=OFF" FORCE) - set(ROCPROFSYS_USE_ROCPROFILER - OFF - CACHE BOOL "Disabled via ROCPROFSYS_USE_HIP=OFF" FORCE) - set(ROCPROFSYS_USE_ROCM_SMI - OFF - CACHE BOOL "Disabled via ROCPROFSYS_USE_HIP=OFF" FORCE) +if(NOT ROCPROFSYS_USE_ROCM) set(ROCPROFSYS_USE_RCCL OFF - CACHE BOOL "Disabled via ROCPROFSYS_USE_HIP=OFF" FORCE) -elseif( - ROCPROFSYS_USE_HIP - AND NOT ROCPROFSYS_USE_ROCTRACER - AND NOT ROCPROFSYS_USE_ROCPROFILER - AND NOT ROCPROFSYS_USE_ROCM_SMI - AND NOT ROCPROFSYS_USE_RCCL) - rocprofiler_systems_message( - AUTHOR_WARNING - "Setting ROCPROFSYS_USE_HIP=OFF because roctracer, rocprofiler, rccl, and rocm-smi options are disabled" - ) - set(ROCPROFSYS_USE_HIP OFF) + CACHE BOOL "Disabled via ROCPROFSYS_USE_ROCM=OFF" FORCE) endif() if(ROCPROFSYS_BUILD_TESTING) @@ -378,14 +351,6 @@ endif() # # ------------------------------------------------------------------------------# -if(NOT ROCPROFSYS_USE_ROCTRACER AND NOT ROCPROFSYS_USE_ROCPROFILER) - set(ROCPROFSYS_HSA_ENV "# ") -endif() - -if(NOT ROCPROFSYS_USE_ROCPROFILER) - set(ROCPROFSYS_ROCP_ENV "# ") -endif() - configure_file( ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/doc/${PROJECT_NAME}/LICENSE diff --git a/projects/rocprofiler-systems/cmake/ConfigCPack.cmake b/projects/rocprofiler-systems/cmake/ConfigCPack.cmake index ce5fa73146..bb10029b64 100644 --- a/projects/rocprofiler-systems/cmake/ConfigCPack.cmake +++ b/projects/rocprofiler-systems/cmake/ConfigCPack.cmake @@ -54,9 +54,7 @@ set(ROCPROFSYS_CPACK_SYSTEM_NAME CACHE STRING "System name, e.g. Linux or Ubuntu-20.04") set(ROCPROFSYS_CPACK_PACKAGE_SUFFIX "") -if(ROCPROFSYS_USE_HIP - OR ROCPROFSYS_USE_ROCTRACER - OR ROCPROFSYS_USE_ROCM_SMI) +if(ROCPROFSYS_USE_ROCM) set(ROCPROFSYS_CPACK_PACKAGE_SUFFIX "${ROCPROFSYS_CPACK_PACKAGE_SUFFIX}-ROCm-${ROCmVersion_NUMERIC_VERSION}") endif() @@ -159,19 +157,12 @@ if(NOT ROCPROFSYS_BUILD_DYNINST) endif() endif() if(ROCmVersion_FOUND) - set(_ROCPROFILER_SUFFIX " (>= 1.0.0.${ROCmVersion_NUMERIC_VERSION})") - set(_ROCTRACER_SUFFIX " (>= 1.0.0.${ROCmVersion_NUMERIC_VERSION})") set(_ROCM_SMI_SUFFIX " (>= ${ROCmVersion_MAJOR_VERSION}.0.0.${ROCmVersion_NUMERIC_VERSION})") endif() -if(ROCPROFSYS_USE_ROCM_SMI) +if(ROCPROFSYS_USE_ROCM) list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocm-smi-lib${_ROCM_SMI_SUFFIX}") -endif() -if(ROCPROFSYS_USE_ROCTRACER) - list(APPEND _DEBIAN_PACKAGE_DEPENDS "roctracer-dev${_ROCTRACER_SUFFIX}") -endif() -if(ROCPROFSYS_USE_ROCPROFILER) - list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocprofiler-dev${_ROCPROFILER_SUFFIX}") + list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocprofiler-sdk (>= ${rocprofiler-sdk_VERSION})") endif() if(ROCPROFSYS_USE_MPI) if("${ROCPROFSYS_MPI_IMPL}" STREQUAL "openmpi") diff --git a/projects/rocprofiler-systems/cmake/PAPI.cmake b/projects/rocprofiler-systems/cmake/PAPI.cmake index 780dd101d1..394edd43f9 100644 --- a/projects/rocprofiler-systems/cmake/PAPI.cmake +++ b/projects/rocprofiler-systems/cmake/PAPI.cmake @@ -109,13 +109,6 @@ set(_ROCPROFSYS_PAPI_COMPONENTS ) if(ROCPROFSYS_PAPI_AUTO_COMPONENTS) - # rocm - if(ROCPROFSYS_USE_HIP - OR ROCPROFSYS_USE_ROCTRACER - OR ROCPROFSYS_USE_ROCM_SMI) - list(APPEND _ROCPROFSYS_PAPI_COMPONENTS rocm) - endif() - # lmsensors find_path(ROCPROFSYS_PAPI_LMSENSORS_ROOT_DIR NAMES include/sensors/sensors.h include/sensors.h) @@ -209,28 +202,35 @@ externalproject_add( BUILD_IN_SOURCE 1 PATCH_COMMAND ${CMAKE_COMMAND} -E env CC=${PAPI_C_COMPILER} - CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation LIBS=-lrt LDFLAGS=-lrt - ${ROCPROFSYS_PAPI_EXTRA_ENV} /configure --quiet + CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation\ -Wno-use-after-free LIBS=-lrt + LDFLAGS=-lrt ${ROCPROFSYS_PAPI_EXTRA_ENV} /configure --quiet --prefix=${ROCPROFSYS_PAPI_INSTALL_DIR} --with-static-lib=yes --with-shared-lib=no --with-perf-events --with-tests=no --with-components=${_ROCPROFSYS_PAPI_COMPONENTS} --libdir=${ROCPROFSYS_PAPI_INSTALL_DIR}/lib CONFIGURE_COMMAND - ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation + ${CMAKE_COMMAND} -E env + CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation\ -Wno-use-after-free ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s -j ${ROCPROFSYS_PAPI_CONFIGURE_JOBS} - BUILD_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s + BUILD_COMMAND + ${CMAKE_COMMAND} -E env + CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation\ -Wno-use-after-free + ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s INSTALL_COMMAND "" BUILD_BYPRODUCTS "${_ROCPROFSYS_PAPI_BUILD_BYPRODUCTS}") # target for re-executing the installation add_custom_target( rocprofiler-systems-papi-install - COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s - COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s + COMMAND + ${CMAKE_COMMAND} -E env + CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation\ -Wno-use-after-free + ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s + COMMAND + ${CMAKE_COMMAND} -E env + CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation\ -Wno-use-after-free + ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s WORKING_DIRECTORY ${ROCPROFSYS_PAPI_SOURCE_DIR}/src COMMENT "Installing PAPI...") diff --git a/projects/rocprofiler-systems/cmake/Packages.cmake b/projects/rocprofiler-systems/cmake/Packages.cmake index eadbfe030e..c508a459d2 100644 --- a/projects/rocprofiler-systems/cmake/Packages.cmake +++ b/projects/rocprofiler-systems/cmake/Packages.cmake @@ -15,14 +15,12 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-threading rocprofiler_systems_add_interface_library( rocprofiler-systems-dyninst "Provides flags and libraries for Dyninst (dynamic instrumentation)") -rocprofiler_systems_add_interface_library(rocprofiler-systems-hip - "Provides flags and libraries for HIP") +rocprofiler_systems_add_interface_library(rocprofiler-systems-rocm + "Provides flags and libraries for ROCm") rocprofiler_systems_add_interface_library(rocprofiler-systems-roctracer "Provides flags and libraries for roctracer") rocprofiler_systems_add_interface_library(rocprofiler-systems-rocprofiler "Provides flags and libraries for rocprofiler") -rocprofiler_systems_add_interface_library(rocprofiler-systems-rocm-smi - "Provides flags and libraries for rocm-smi") rocprofiler_systems_add_interface_library( rocprofiler-systems-rccl "Provides flags for ROCm Communication Collectives Library (RCCL)") @@ -50,10 +48,7 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-compile-definition # libraries with relevant compile definitions set(ROCPROFSYS_EXTENSION_LIBRARIES - rocprofiler-systems::rocprofiler-systems-hip - rocprofiler-systems::rocprofiler-systems-roctracer - rocprofiler-systems::rocprofiler-systems-rocprofiler - rocprofiler-systems::rocprofiler-systems-rocm-smi + rocprofiler-systems::rocprofiler-systems-rocm rocprofiler-systems::rocprofiler-systems-rccl rocprofiler-systems::rocprofiler-systems-bfd rocprofiler-systems::rocprofiler-systems-mpi @@ -127,14 +122,11 @@ endforeach() # ----------------------------------------------------------------------------------------# # -# hip version +# ROCm Version # # ----------------------------------------------------------------------------------------# -if(ROCPROFSYS_USE_HIP - OR ROCPROFSYS_USE_ROCTRACER - OR ROCPROFSYS_USE_ROCPROFILER - OR ROCPROFSYS_USE_ROCM_SMI) +if(ROCPROFSYS_USE_ROCM) find_package(ROCmVersion) if(NOT ROCmVersion_FOUND) @@ -164,13 +156,13 @@ if(ROCPROFSYS_USE_HIP endif() set(ROCPROFSYS_ROCM_VERSION ${ROCmVersion_FULL_VERSION}) - set(ROCPROFSYS_HIP_VERSION_MAJOR ${ROCmVersion_MAJOR_VERSION}) - set(ROCPROFSYS_HIP_VERSION_MINOR ${ROCmVersion_MINOR_VERSION}) - set(ROCPROFSYS_HIP_VERSION_PATCH ${ROCmVersion_PATCH_VERSION}) - set(ROCPROFSYS_HIP_VERSION ${ROCmVersion_TRIPLE_VERSION}) + set(ROCPROFSYS_ROCM_VERSION_MAJOR ${ROCmVersion_MAJOR_VERSION}) + set(ROCPROFSYS_ROCM_VERSION_MINOR ${ROCmVersion_MINOR_VERSION}) + set(ROCPROFSYS_ROCM_VERSION_PATCH ${ROCmVersion_PATCH_VERSION}) + set(ROCPROFSYS_ROCM_VERSION ${ROCmVersion_TRIPLE_VERSION}) - if(ROCPROFSYS_HIP_VERSION_MAJOR GREATER_EQUAL 4 AND ROCPROFSYS_HIP_VERSION_MINOR - GREATER 3) + if(ROCPROFSYS_ROCM_VERSION_MAJOR GREATER_EQUAL 4 AND ROCPROFSYS_ROCM_VERSION_MINOR + GREATER 3) set(roctracer_kfdwrapper_LIBRARY) endif() @@ -181,64 +173,30 @@ if(ROCPROFSYS_USE_HIP rocprofiler_systems_add_feature(ROCPROFSYS_ROCM_VERSION "ROCm version used by rocprofiler-systems") else() - set(ROCPROFSYS_HIP_VERSION "0.0.0") - set(ROCPROFSYS_HIP_VERSION_MAJOR 0) - set(ROCPROFSYS_HIP_VERSION_MINOR 0) - set(ROCPROFSYS_HIP_VERSION_PATCH 0) + set(ROCPROFSYS_ROCM_VERSION "0.0.0") + set(ROCPROFSYS_ROCM_VERSION_MAJOR 0) + set(ROCPROFSYS_ROCM_VERSION_MINOR 0) + set(ROCPROFSYS_ROCM_VERSION_PATCH 0) endif() # ----------------------------------------------------------------------------------------# # -# HIP +# ROCm # # ----------------------------------------------------------------------------------------# -if(ROCPROFSYS_USE_HIP) - find_package(hip ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) - rocprofiler_systems_target_compile_definitions(rocprofiler-systems-hip - INTERFACE ROCPROFSYS_USE_HIP) - target_link_libraries(rocprofiler-systems-hip INTERFACE hip::host) -endif() +if(ROCPROFSYS_USE_ROCM) + find_package(rocprofiler-sdk ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) + rocprofiler_systems_target_compile_definitions(rocprofiler-systems-rocm + INTERFACE ROCPROFSYS_USE_ROCM) + target_link_libraries(rocprofiler-systems-rocm + INTERFACE rocprofiler-sdk::rocprofiler-sdk) -# ----------------------------------------------------------------------------------------# -# -# roctracer -# -# ----------------------------------------------------------------------------------------# - -if(ROCPROFSYS_USE_ROCTRACER) - find_package(roctracer ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) - rocprofiler_systems_target_compile_definitions(rocprofiler-systems-roctracer - INTERFACE ROCPROFSYS_USE_ROCTRACER) - target_link_libraries( - rocprofiler-systems-roctracer - INTERFACE roctracer::roctracer rocprofiler-systems::rocprofiler-systems-hip) -endif() - -# ----------------------------------------------------------------------------------------# -# -# rocprofiler -# -# ----------------------------------------------------------------------------------------# -if(ROCPROFSYS_USE_ROCPROFILER) - find_package(rocprofiler ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) - rocprofiler_systems_target_compile_definitions(rocprofiler-systems-rocprofiler - INTERFACE ROCPROFSYS_USE_ROCPROFILER) - target_link_libraries(rocprofiler-systems-rocprofiler - INTERFACE rocprofiler::rocprofiler) -endif() - -# ----------------------------------------------------------------------------------------# -# -# rocm-smi -# -# ----------------------------------------------------------------------------------------# - -if(ROCPROFSYS_USE_ROCM_SMI) find_package(rocm-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) - rocprofiler_systems_target_compile_definitions(rocprofiler-systems-rocm-smi - INTERFACE ROCPROFSYS_USE_ROCM_SMI) - target_link_libraries(rocprofiler-systems-rocm-smi INTERFACE rocm-smi::rocm-smi) + target_link_libraries(rocprofiler-systems-rocm INTERFACE rocm-smi::rocm-smi) + + # find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) + # target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi) endif() # ----------------------------------------------------------------------------------------# diff --git a/projects/rocprofiler-systems/cmake/Templates/modulefile.in b/projects/rocprofiler-systems/cmake/Templates/modulefile.in index 9e8b7c75ef..cf45f889d6 100644 --- a/projects/rocprofiler-systems/cmake/Templates/modulefile.in +++ b/projects/rocprofiler-systems/cmake/Templates/modulefile.in @@ -14,7 +14,3 @@ prepend-path PATH "${ROOT}/bin" prepend-path LD_LIBRARY_PATH "${ROOT}/@CMAKE_INSTALL_LIBDIR@" prepend-path PYTHONPATH "${ROOT}/@CMAKE_INSTALL_PYTHONDIR@" setenv @PROJECT_NAME_UNDERSCORED@_DIR "${ROOT}/@CMAKE_INSTALL_DATAROOTDIR@/cmake/@PROJECT_NAME@" - -# @ROCPROFSYS_HSA_ENV@setenv HSA_TOOLS_LIB "${ROOT}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@rocprof-sys@CMAKE_SHARED_LIBRARY_SUFFIX@" -# @ROCPROFSYS_HSA_ENV@setenv HSA_TOOLS_REPORT_LOAD_FAILURE 1 -# @ROCPROFSYS_ROCP_ENV@setenv ROCP_TOOL_LIB "${ROOT}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@rocprof-sys@CMAKE_SHARED_LIBRARY_SUFFIX@" diff --git a/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in b/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in index b6c4a97de0..882c7838c0 100644 --- a/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in +++ b/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in @@ -26,12 +26,3 @@ export LD_LIBRARY_PATH export PYTHONPATH export CMAKE_PREFIX_PATH export @PROJECT_NAME_UNDERSCORED@_DIR - -# ROCm environment variables -# @ROCPROFSYS_HSA_ENV@HSA_TOOLS_LIB="${BASEDIR}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@rocprof-sys-dl@CMAKE_SHARED_LIBRARY_SUFFIX@" -# @ROCPROFSYS_HSA_ENV@HSA_TOOLS_REPORT_LOAD_FAILURE=1 -# @ROCPROFSYS_ROCP_ENV@ROCP_TOOL_LIB="${BASEDIR}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@rocprof-sys@CMAKE_SHARED_LIBRARY_SUFFIX@" - -# @ROCPROFSYS_HSA_ENV@export HSA_TOOLS_LIB -# @ROCPROFSYS_HSA_ENV@export HSA_TOOLS_REPORT_LOAD_FAILURE -# @ROCPROFSYS_ROCP_ENV@export ROCP_TOOL_LIB diff --git a/projects/rocprofiler-systems/docker/Dockerfile.opensuse b/projects/rocprofiler-systems/docker/Dockerfile.opensuse index 2dbb1b5d43..31594cf2c1 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.opensuse +++ b/projects/rocprofiler-systems/docker/Dockerfile.opensuse @@ -25,7 +25,7 @@ RUN zypper --non-interactive update -y && \ zypper --non-interactive install -y -t pattern devel_basis && \ zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel \ gcc-c++ git libnuma-devel openmpi3-devel python3-pip rpm-build wget && \ - python3 -m pip install 'cmake==3.18.4' + python3 -m pip install 'cmake==3.21' ARG ROCM_VERSION=0.0 ARG AMDGPU_RPM=6.2/sle/15.6/amdgpu-install-6.2.60200-1.noarch.rpm diff --git a/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci b/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci index 9d7ac120ce..2267cf6a66 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci @@ -31,7 +31,7 @@ RUN zypper --non-interactive update -y && \ gcc-c++ git libnuma-devel openmpi3-devel papi-devel python3-pip \ rpm-build wget && \ zypper --non-interactive clean --all && \ - python3 -m pip install 'cmake==3.18.4' + python3 -m pip install 'cmake==3.21' COPY ./dyninst-source /tmp/dyninst diff --git a/projects/rocprofiler-systems/docker/Dockerfile.rhel b/projects/rocprofiler-systems/docker/Dockerfile.rhel index 298d69ed6a..1a6e2342fa 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.rhel +++ b/projects/rocprofiler-systems/docker/Dockerfile.rhel @@ -18,7 +18,7 @@ RUN yum groupinstall -y "Development Tools" && \ yum install -y --allowerasing cmake curl dpkg-devel numactl-devel openmpi-devel \ papi-devel python3-pip texinfo wget which zlib-devel && \ yum clean all && \ - python3 -m pip install 'cmake==3.18.4' + python3 -m pip install 'cmake==3.21' ARG ROCM_VERSION=0.0 ARG AMDGPU_RPM=6.2/rhel/9.4/amdgpu-install-6.2.60202-1.el9.noarch.rpm diff --git a/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci b/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci index 2429a2bbf5..6b09697396 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci @@ -22,7 +22,7 @@ RUN yum groupinstall -y "Development Tools" && \ yum install -y --allowerasing cmake curl dpkg-devel numactl-devel \ openmpi-devel papi-devel python3-pip texinfo wget which zlib-devel && \ yum clean all && \ - python3 -m pip install 'cmake==3.18.4' + python3 -m pip install 'cmake==3.21' COPY ./dyninst-source /tmp/dyninst diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu index d41fcb9c85..a98d692242 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu @@ -30,9 +30,9 @@ RUN apt-get update && \ python3-pip rpm texinfo wget && \ OS_VERSION=$(cat /etc/os-release | grep VERSION_ID | sed 's/=/ /'1 | awk '{print $NF}' | sed 's/"//g') && \ if [ "${OS_VERSION}" == "24.04" ]; then \ - python3 -m pip install --break-system-packages 'cmake==3.18.4'; \ + python3 -m pip install --break-system-packages 'cmake==3.21'; \ else \ - python3 -m pip install 'cmake==3.18.4'; \ + python3 -m pip install 'cmake==3.21'; \ fi RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci index 5058ef81c1..36c26f6bdc 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci @@ -31,9 +31,9 @@ RUN apt-get update && \ python3-pip texinfo unzip wget zip zlib1g-dev && \ apt-get autoclean && \ if [ "${OS_VERSION}" == "24.04" ]; then \ - python3 -m pip install --break-system-packages 'cmake==3.18.4' \ + python3 -m pip install --break-system-packages 'cmake==3.21' \ else \ - python3 -m pip install 'cmake==3.18.4'; \ + python3 -m pip install 'cmake==3.21'; \ fi COPY ./dyninst-source /tmp/dyninst diff --git a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst index f624318a6b..5e50242969 100644 --- a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst +++ b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst @@ -228,7 +228,7 @@ Generating a default configuration file ROCPROFSYS_PROFILE = false ROCPROFSYS_USE_SAMPLING = false ROCPROFSYS_USE_PROCESS_SAMPLING = true - ROCPROFSYS_USE_ROCTRACER = true + ROCPROFSYS_USE_ROCM = true ROCPROFSYS_USE_ROCM_SMI = true ROCPROFSYS_USE_KOKKOSP = false ROCPROFSYS_USE_CODE_COVERAGE = false @@ -248,9 +248,6 @@ Generating a default configuration file ROCPROFSYS_PERFETTO_FILE = perfetto-trace.proto ROCPROFSYS_PERFETTO_FILL_POLICY = discard ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB = 4096 - ROCPROFSYS_ROCTRACER_HSA_ACTIVITY = false - ROCPROFSYS_ROCTRACER_HSA_API = false - ROCPROFSYS_ROCTRACER_HSA_API_TYPES = ROCPROFSYS_SAMPLING_CPUS = ROCPROFSYS_SAMPLING_DELAY = 0.5 ROCPROFSYS_SAMPLING_FREQ = 10 @@ -363,13 +360,10 @@ Viewing the setting descriptions | ROCPROFSYS_PERFETTO_FILL_POLICY | Behavior when perfetto buffer is ful... | | ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB | Hint for shared-memory buffer size i... | | ROCPROFSYS_PRECISION | Set the global output precision for ... | - | ROCPROFSYS_ROCTRACER_HSA_ACTIVITY | Enable HSA activity tracing support | - | ROCPROFSYS_ROCTRACER_HSA_API | Enable HSA API tracing support | - | ROCPROFSYS_ROCTRACER_HSA_API_TYPES | HSA API type to collect | | ROCPROFSYS_SAMPLING_CPUS | CPUs to collect frequency informatio... | | ROCPROFSYS_SAMPLING_DELAY | Number of seconds to wait before the... | | ROCPROFSYS_SAMPLING_FREQ | Number of software interrupts per se... | - | ROCPROFSYS_SAMPLING_GPUS | Devices to query when ROCPROFSYS_USE_... | + | ROCPROFSYS_SAMPLING_GPUS | Devices to query when ROCPROFSYS_USE... | | ROCPROFSYS_SCIENTIFIC | Set the global numerical reporting t... | | ROCPROFSYS_STRICT_CONFIG | Throw errors for unknown setting nam... | | ROCPROFSYS_SUPPRESS_CONFIG | Disable processing of setting config... | @@ -391,13 +385,13 @@ Viewing the setting descriptions | ROCPROFSYS_TRACE | Enable perfetto backend | | ROCPROFSYS_USE_PID | Enable tagging filenames with proces... | | ROCPROFSYS_USE_ROCM_SMI | Enable sampling GPU power, temp, uti... | - | ROCPROFSYS_USE_ROCTRACER | Enable ROCM tracing | + | ROCPROFSYS_USE_ROCM | Enable ROCM tracing | | ROCPROFSYS_USE_SAMPLING | Enable statistical sampling of call-... | | ROCPROFSYS_USE_PROCESS_SAMPLING | Enable a background thread which sam... | | ROCPROFSYS_PROFILE | Enable timemory backend | | ROCPROFSYS_VERBOSE | Verbosity level | | ROCPROFSYS_WIDTH | Set the global output width for comp... | - |-----------------------------------------|-----------------------------------------| + |------------------------------------------|-----------------------------------------| Viewing components ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst index f8702373e0..3821abd589 100644 --- a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst +++ b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst @@ -268,8 +268,6 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ $ rocprof-sys-sample -- ./parallel-overhead-locks 30 4 100 - HSA_TOOLS_LIB=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 - HSA_TOOLS_REPORT_LOAD_FAILURE=1 LD_PRELOAD=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 ROCPROFSYS_USE_PROCESS_SAMPLING=false ROCPROFSYS_USE_SAMPLING=true @@ -283,8 +281,6 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ $ rocprof-sys-sample -PTDH -I all -- ./parallel-overhead-locks 30 4 100 - HSA_TOOLS_LIB=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 - HSA_TOOLS_REPORT_LOAD_FAILURE=1 KOKKOS_PROFILE_LIBRARY=/opt/rocprofiler-systems/lib/librocprof-sys.so.1.7.1 LD_PRELOAD=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 ROCPROFSYS_CPU_FREQ_ENABLED=true @@ -298,9 +294,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=true ROCPROFSYS_USE_ROCM_SMI=true - ROCPROFSYS_USE_ROCPROFILER=true - ROCPROFSYS_USE_ROCTRACER=true - ROCPROFSYS_USE_ROCTX=true + ROCPROFSYS_USE_ROCM=true ROCPROFSYS_USE_SAMPLING=true ROCPROFSYS_PROFILE=true OMP_TOOL_LIBRARIES=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 @@ -330,9 +324,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=false ROCPROFSYS_USE_ROCM_SMI=false - ROCPROFSYS_USE_ROCPROFILER=false - ROCPROFSYS_USE_ROCTRACER=false - ROCPROFSYS_USE_ROCTX=false + ROCPROFSYS_USE_ROCM=false ROCPROFSYS_USE_SAMPLING=true ROCPROFSYS_PROFILE=true ... @@ -363,9 +355,7 @@ Here is the full output from the previous ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=false ROCPROFSYS_USE_ROCM_SMI=false - ROCPROFSYS_USE_ROCPROFILER=false - ROCPROFSYS_USE_ROCTRACER=false - ROCPROFSYS_USE_ROCTX=false + ROCPROFSYS_USE_ROCM=false ROCPROFSYS_USE_SAMPLING=true [rocprof-sys][dl][1785877] rocprofsys_main [rocprof-sys][1785877][rocprofsys_init_tooling] Instrumentation mode: Sampling diff --git a/projects/rocprofiler-systems/docs/install/install.rst b/projects/rocprofiler-systems/docs/install/install.rst index 285f635bcf..dd3c67db0c 100644 --- a/projects/rocprofiler-systems/docs/install/install.rst +++ b/projects/rocprofiler-systems/docs/install/install.rst @@ -241,8 +241,8 @@ Installing ROCm Systems Profiler ----------------------------------- ROCm Systems Profiler has CMake configuration options for MPI support (``ROCPROFSYS_USE_MPI`` or -``ROCPROFSYS_USE_MPI_HEADERS``), HIP kernel tracing (``ROCPROFSYS_USE_ROCTRACER``), -ROCm device sampling (``ROCPROFSYS_USE_ROCM_SMI``), OpenMP-Tools (``ROCPROFSYS_USE_OMPT``), +``ROCPROFSYS_USE_MPI_HEADERS``), +ROCm tracing and sampling (``ROCPROFSYS_USE_ROCM``), OpenMP-Tools (``ROCPROFSYS_USE_OMPT``), hardware counters via PAPI (``ROCPROFSYS_USE_PAPI``), among other features. Various additional features can be enabled via the ``TIMEMORY_USE_*`` `CMake options `_. @@ -256,22 +256,20 @@ in `the Perfetto UI `_. .. code-block:: shell git clone https://github.com/ROCm/rocprofiler-systems.git rocprof-sys-source - cmake \ - -B rocprof-sys-build \ + cmake \ + -B rocprof-sys-build \ -D CMAKE_INSTALL_PREFIX=/opt/rocprofiler-systems \ - -D ROCPROFSYS_USE_HIP=ON \ - -D ROCPROFSYS_USE_ROCM_SMI=ON \ - -D ROCPROFSYS_USE_ROCTRACER=ON \ - -D ROCPROFSYS_USE_PYTHON=ON \ - -D ROCPROFSYS_USE_OMPT=ON \ - -D ROCPROFSYS_USE_MPI_HEADERS=ON \ - -D ROCPROFSYS_BUILD_PAPI=ON \ - -D ROCPROFSYS_BUILD_LIBUNWIND=ON \ - -D ROCPROFSYS_BUILD_DYNINST=ON \ - -D DYNINST_BUILD_TBB=ON \ - -D DYNINST_BUILD_BOOST=ON \ - -D DYNINST_BUILD_ELFUTILS=ON \ - -D DYNINST_BUILD_LIBIBERTY=ON \ + -D ROCPROFSYS_USE_ROCM=ON \ + -D ROCPROFSYS_USE_PYTHON=ON \ + -D ROCPROFSYS_USE_OMPT=ON \ + -D ROCPROFSYS_USE_MPI_HEADERS=ON \ + -D ROCPROFSYS_BUILD_PAPI=ON \ + -D ROCPROFSYS_BUILD_LIBUNWIND=ON \ + -D ROCPROFSYS_BUILD_DYNINST=ON \ + -D DYNINST_BUILD_TBB=ON \ + -D DYNINST_BUILD_BOOST=ON \ + -D DYNINST_BUILD_ELFUTILS=ON \ + -D DYNINST_BUILD_LIBIBERTY=ON \ rocprof-sys-source cmake --build rocprof-sys-build --target all --parallel 8 cmake --build rocprof-sys-build --target install diff --git a/projects/rocprofiler-systems/scripts/build-release.sh b/projects/rocprofiler-systems/scripts/build-release.sh index e8f419aa9a..e95de36e87 100755 --- a/projects/rocprofiler-systems/scripts/build-release.sh +++ b/projects/rocprofiler-systems/scripts/build-release.sh @@ -372,7 +372,7 @@ if [ "${IS_DOCKER}" -ne 0 ]; then git config --global --add safe.directory ${PWD verbose-run echo "Build rocprofiler-systems installers with generators: ${GENERATORS}" -build-and-package ${WITH_CORE} ${DISTRO}-core -DROCPROFSYS_USE_HIP=OFF -DROCPROFSYS_USE_MPI=OFF -build-and-package ${WITH_MPI} ${DISTRO}-${MPI_IMPL} -DROCPROFSYS_USE_HIP=OFF -DROCPROFSYS_USE_MPI=ON -build-and-package ${WITH_ROCM} ${DISTRO}-rocm-${ROCM_VERSION} -DROCPROFSYS_USE_HIP=ON -DROCPROFSYS_USE_MPI=OFF -build-and-package ${WITH_ROCM_MPI} ${DISTRO}-rocm-${ROCM_VERSION}-${MPI_IMPL} -DROCPROFSYS_USE_HIP=ON -DROCPROFSYS_USE_MPI=ON +build-and-package ${WITH_CORE} ${DISTRO}-core -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_MPI=OFF +build-and-package ${WITH_MPI} ${DISTRO}-${MPI_IMPL} -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_MPI=ON +build-and-package ${WITH_ROCM} ${DISTRO}-rocm-${ROCM_VERSION} -DROCPROFSYS_USE_ROCM=ON -DROCPROFSYS_USE_MPI=OFF +build-and-package ${WITH_ROCM_MPI} ${DISTRO}-rocm-${ROCM_VERSION}-${MPI_IMPL} -DROCPROFSYS_USE_ROCM=ON -DROCPROFSYS_USE_MPI=ON diff --git a/projects/rocprofiler-systems/source/bin/CMakeLists.txt b/projects/rocprofiler-systems/source/bin/CMakeLists.txt index de27963dca..3ac0cd0786 100644 --- a/projects/rocprofiler-systems/source/bin/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/bin/CMakeLists.txt @@ -1,17 +1,8 @@ # executable RPATH -if(ROCPROFSYS_USE_ROCPROFILER - AND rocprofiler_LIBRARY_DIR - AND ROCmVersion_TRIPLE_VERSION VERSION_LESS 5.2.0 - AND NOT CMAKE_INSTALL_RPATH_USE_LINK_PATH) - set(ROCPROFSYS_EXE_INSTALL_RPATH - "\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}:${rocprofiler_LIBRARY_DIR}" - ) -else() - set(ROCPROFSYS_EXE_INSTALL_RPATH - "\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}" - ) -endif() +set(ROCPROFSYS_EXE_INSTALL_RPATH + "\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}" + ) # executables add_subdirectory(rocprof-sys-avail) diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/avail.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/avail.cpp index 9f5d67a9ac..2d8c1a7562 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/avail.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/avail.cpp @@ -33,8 +33,7 @@ #include "api.hpp" #include "core/config.hpp" #include "core/gpu.hpp" -#include "core/hip_runtime.hpp" -#include "library/rocprofiler.hpp" +#include "library/rocm.hpp" #include #include @@ -119,7 +118,7 @@ write_hw_counter_info(std::ostream&, const array_t& = {}, namespace { // initialize HIP before main so that librocprof-sys is not HSA_TOOLS_LIB -int gpu_count = rocprofsys::gpu::hip_device_count(); +int gpu_count = rocprofsys::gpu::device_count(); // statically allocated shared_ptrs to prevent use after free errors auto timemory_manager = tim::manager::master_instance(); @@ -508,15 +507,15 @@ main(int argc, char** argv) return EXIT_FAILURE; } -#if ROCPROFSYS_USE_HIP > 0 +#if ROCPROFSYS_USE_ROCM > 0 if(gpu_count > 0) { size_t _num_metrics = 0; try { - // call to rocm_metrics() will add choices to ROCPROFSYS_ROCM_EVENTS setting + // call to rocm_events() will add choices to ROCPROFSYS_ROCM_EVENTS setting // so always perform this call even if list of HW counters is not requested - _num_metrics = rocprofsys::rocprofiler::rocm_metrics().size(); + _num_metrics = rocprofsys::rocm::rocm_events().size(); } catch(std::runtime_error& _e) { verbprintf(0, "Retrieving the GPU HW counters failed: %s", _e.what()); @@ -615,9 +614,9 @@ main(int argc, char** argv) } } - signal(SIGABRT, &dump_log_abort); - signal(SIGSEGV, &dump_log_abort); - signal(SIGQUIT, &dump_log_abort); + // signal(SIGABRT, &dump_log_abort); + // signal(SIGSEGV, &dump_log_abort); + // signal(SIGQUIT, &dump_log_abort); if(!os) os = &std::cout; @@ -641,6 +640,8 @@ main(int argc, char** argv) } dump_log(); + const_cast&>(tim::settings::shared_instance()).reset(); + return 0; } @@ -1076,7 +1077,7 @@ write_hw_counter_info(std::ostream& os, const array_t& options, auto _papi_events = tim::papi::available_events_info(); auto _rocm_events = - (gpu_count > 0) ? rocprofsys::rocprofiler::rocm_metrics() : hwcounter_info_t{}; + (gpu_count > 0) ? rocprofsys::rocm::rocm_events() : hwcounter_info_t{}; if(alphabetical) { diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp index ac2b7738db..0aa0f9eeb2 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp @@ -339,7 +339,7 @@ generate_config(std::string _config_file, const std::set& _config_f for(const auto* itr : { "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE", "ROCPROFSYS_PROFILE", "ROCPROFSYS_USE_SAMPLING", - "ROCPROFSYS_USE_PROCESS_SAMPLING", "ROCPROFSYS_USE_ROCTRACER", + "ROCPROFSYS_USE_PROCESS_SAMPLING", "ROCPROFSYS_USE_ROCM", "ROCPROFSYS_USE_ROCM_SMI", "ROCPROFSYS_USE_KOKKOSP", "ROCPROFSYS_USE_OMPT", "ROCPROFSYS_USE", "ROCPROFSYS_OUTPUT" }) { diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/info_type.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/info_type.cpp index 49a85d0657..d7d7d4e494 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/info_type.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/info_type.cpp @@ -29,8 +29,6 @@ #include "library/components/fork_gotcha.hpp" #include "library/components/mpi_gotcha.hpp" #include "library/components/pthread_gotcha.hpp" -#include "library/components/rocprofiler.hpp" -#include "library/components/roctracer.hpp" #include #include diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp index be0da1ee8d..855b20307d 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp @@ -752,10 +752,6 @@ parse_args(int argc, char** argv, std::vector& _env, parser.end_group(); -#if ROCPROFSYS_HIP_VERSION > 0 && ROCPROFSYS_HIP_VERSION < 50300 - update_env(_env, "HSA_ENABLE_INTERRUPT", 0); -#endif - auto _inpv = std::vector{}; auto _outv = std::vector{}; bool _hash = false; @@ -824,11 +820,6 @@ parse_args(int argc, char** argv, std::vector& _env, add_default_env(_env, "ROCPROFSYS_USE_MPIP", true); #endif -#if defined(ROCPROFSYS_USE_ROCTRACER) && ROCPROFSYS_USE_ROCTRACER > 0 - add_default_env(_env, "ROCPROFSYS_ROCTRACER_HIP_API", true); - add_default_env(_env, "ROCPROFSYS_ROCTRACER_HSA_API", true); -#endif - #if defined(ROCPROFSYS_USE_RCCL) && ROCPROFSYS_USE_RCCL > 0 add_default_env(_env, "ROCPROFSYS_USE_RCCLP", true); #endif diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/CMakeLists.txt b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/CMakeLists.txt index ecacdbb90b..3e32a39363 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/CMakeLists.txt @@ -35,6 +35,8 @@ target_link_libraries( timemory::timemory-extensions timemory::timemory-core) +add_target_flag_if_avail(rocprofiler-systems-instrument "-Wno-deprecated-declarations") + set_target_properties( rocprofiler-systems-instrument PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp index 3bda766905..5d4dd66842 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp @@ -312,13 +312,25 @@ get_internal_basic_libs_impl() "liblzma.so" }; // shared libraries used by rocprof-sys - const auto _omni_libs = strview_init_t{ - "libstdc++.so.6", "libgotcha.so", "libunwind-coredump.so", - "libunwind-generic.so", "libunwind-ptrace.so", "libunwind-setjmp.so", - "libunwind.so", "libunwind-x86_64.so", "librocm_smi64.so", - "libroctx64.so", "librocmtools.so", "libroctracer64.so", - "librocprofiler64.so", "libpapi.so", "libpfm.so" - }; + const auto _omni_libs = strview_init_t{ "libstdc++.so.6", + "libgotcha.so", + "libunwind-coredump.so", + "libunwind-generic.so", + "libunwind-ptrace.so", + "libunwind-setjmp.so", + "libunwind.so", + "libunwind-x86_64.so", + "librocm_smi64.so", + "libroctx64.so", + "librocmtools.so", + "libroctracer64.so", + "librocprofiler64.so", + "libpapi.so", + "libpfm.so", + "librocprofiler-register.so", + "librocprofiler-sdk.so", + "librocprofiler-sdk-roctx.so", + "libamd_smi.so" }; // shared libraries potentially used by timemory const auto _3rdparty_libs = strview_init_t{ "libcaliper.so", diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp index 93a2da5ae1..e8ce55473b 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp @@ -357,10 +357,12 @@ main(int argc, char** argv) itr.find("rocprof-sys") != std::string::npos || itr.find("rocprofiler-systems") != std::string::npos || std::regex_search( - itr, std::regex{ "lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|" - "instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|" - "tbbmalloc|tbbmalloc_proxy|gotcha|libunwind|roctracer|" - "hsa-runtime|amdhip|rocm_smi)\\.(so|a)" })) + itr, std::regex{ + "lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|" + "instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|" + "tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|" + "amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|" + "rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" })) { if(!find(filepath::dirname(itr), lib_search_paths)) lib_search_paths.emplace_back(filepath::dirname(itr)); diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp index da9da3a5fa..c951455199 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp @@ -44,14 +44,6 @@ #include #include -#if !defined(ROCPROFSYS_USE_ROCTRACER) -# define ROCPROFSYS_USE_ROCTRACER 0 -#endif - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) -# define ROCPROFSYS_USE_ROCPROFILER 0 -#endif - namespace color = tim::log::color; using namespace timemory::join; using tim::get_env; @@ -140,17 +132,6 @@ get_initial_environment() update_env(_env, "ROCPROFSYS_USE_SAMPLING", (_mode != "causal")); -#if defined(ROCPROFSYS_USE_ROCTRACER) || defined(ROCPROFSYS_USE_ROCPROFILER) - update_env(_env, "HSA_TOOLS_LIB", _dl_libpath); - if(!getenv("HSA_TOOLS_REPORT_LOAD_FAILURE")) - update_env(_env, "HSA_TOOLS_REPORT_LOAD_FAILURE", "1"); -#endif - -#if defined(ROCPROFSYS_USE_ROCPROFILER) - update_env(_env, "ROCP_TOOL_LIB", _omni_libpath); - if(!getenv("ROCP_HSA_INTERCEPT")) update_env(_env, "ROCP_HSA_INTERCEPT", "1"); -#endif - #if defined(ROCPROFSYS_USE_OMPT) if(!getenv("OMP_TOOL_LIBRARIES")) update_env(_env, "OMP_TOOL_LIBRARIES", _dl_libpath, UPD_APPEND); @@ -357,14 +338,6 @@ parse_args(int argc, char** argv, std::vector& _env) %{INDENT}% 0 avoid triggering the bug, potentially at the cost of reduced performance %{INDENT}% 1 do not modify how ROCm is notified about kernel completion)"; - auto _realtime_reqs = (get_env("HSA_ENABLE_INTERRUPT", std::string{}, false).empty()) - ? std::vector{ "hsa-interrupt" } - : std::vector{}; - -#if ROCPROFSYS_USE_ROCTRACER == 0 && ROCPROFSYS_USE_ROCPROFILER == 0 - _realtime_reqs.clear(); -#endif - const auto* _trace_policy_desc = R"(Policy for new data when the buffer size limit is reached: %{INDENT}%- discard : new data is ignored @@ -720,7 +693,6 @@ parse_args(int argc, char** argv, std::vector& _env) parser.add_argument({ "--realtime" }, _realtime_desc) .min_count(0) - .required(std::move(_realtime_reqs)) .action([&](parser_t& p) { auto _v = p.get>("realtime"); update_env(_env, "ROCPROFSYS_SAMPLING_REALTIME", true); @@ -741,10 +713,20 @@ parse_args(int argc, char** argv, std::vector& _env) } }); - std::set _backend_choices = { "all", "kokkosp", "mpip", - "ompt", "rcclp", "rocm-smi", - "roctracer", "rocprofiler", "roctx", - "mutex-locks", "spin-locks", "rw-locks" }; + std::set _backend_choices = { "all", + "kokkosp", + "mpip", + "ompt", + "rcclp", + "rocm-smi", + "roctracer", + "rocprofiler", + "roctx", + "mutex-locks", + "spin-locks", + "rw-locks", + "rocprofiler-sdk", + "rocm" }; #if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS) _backend_choices.erase("mpip"); @@ -758,17 +740,10 @@ parse_args(int argc, char** argv, std::vector& _env) _backend_choices.erase("rcclp"); #endif -#if !defined(ROCPROFSYS_USE_ROCM_SMI) +#if !defined(ROCPROFSYS_USE_ROCM) + _backend_choices.erase("rocm"); _backend_choices.erase("rocm-smi"); -#endif - -#if !defined(ROCPROFSYS_USE_ROCTRACER) - _backend_choices.erase("roctracer"); - _backend_choices.erase("roctx"); -#endif - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) - _backend_choices.erase("rocprofiler"); + _backend_choices.erase("rocprofiler-sdk"); #endif parser.start_group("BACKEND OPTIONS", @@ -784,11 +759,9 @@ parse_args(int argc, char** argv, std::vector& _env) _update("ROCPROFSYS_USE_KOKKOSP", _v.count("kokkosp") > 0); _update("ROCPROFSYS_USE_MPIP", _v.count("mpip") > 0); _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); + _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCTX", _v.count("roctx") > 0); _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); - _update("ROCPROFSYS_USE_ROCTRACER", _v.count("roctracer") > 0); - _update("ROCPROFSYS_USE_ROCPROFILER", _v.count("rocprofiler") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); @@ -810,27 +783,18 @@ parse_args(int argc, char** argv, std::vector& _env) _update("ROCPROFSYS_USE_KOKKOSP", _v.count("kokkosp") > 0); _update("ROCPROFSYS_USE_MPIP", _v.count("mpip") > 0); _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); + _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCTX", _v.count("roctx") > 0); _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); - _update("ROCPROFSYS_USE_ROCTRACER", _v.count("roctracer") > 0); - _update("ROCPROFSYS_USE_ROCPROFILER", _v.count("rocprofiler") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); - if(_v.count("all") > 0 || - (_v.count("roctracer") > 0 && _v.count("rocprofiler") > 0)) - { - remove_env(_env, "HSA_TOOLS_LIB"); - remove_env(_env, "HSA_TOOLS_REPORT_LOAD_FAILURE"); - } - - if(_v.count("all") > 0 || _v.count("rocprofiler") > 0) - { - remove_env(_env, "ROCP_TOOL_LIB"); - remove_env(_env, "ROCP_HSA_INTERCEPT"); - } + // if(_v.count("all") > 0 || _v.count("rocprofiler") > 0) + // { + // remove_env(_env, "ROCP_TOOL_LIB"); + // remove_env(_env, "ROCP_HSA_INTERCEPT"); + // } if(_v.count("all") > 0 || _v.count("ompt") > 0) remove_env(_env, "OMP_TOOL_LIBRARIES"); @@ -850,18 +814,6 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "ROCPROFSYS_PAPI_EVENTS", _events); }); -#if defined(ROCPROFSYS_USE_ROCPROFILER) - parser - .add_argument({ "-G", "--gpu-events" }, - "Set the GPU hardware counter events to record (ref: " - "`rocprof-sys-avail -H -c GPU`)") - .action([&](parser_t& p) { - auto _events = - join(array_config{ "," }, p.get>("gpu-events")); - update_env(_env, "ROCPROFSYS_ROCM_EVENTS", _events); - }); -#endif - parser.start_group("MISCELLANEOUS OPTIONS", ""); parser .add_argument({ "-i", "--inlines" }, diff --git a/projects/rocprofiler-systems/source/lib/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/CMakeLists.txt index 6d93bf7865..13f320d39a 100644 --- a/projects/rocprofiler-systems/source/lib/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/CMakeLists.txt @@ -12,15 +12,7 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.20) cmake_policy(SET CMP0115 NEW) endif() -if(ROCPROFSYS_USE_ROCPROFILER - AND rocprofiler_LIBRARY_DIR - AND ROCmVersion_TRIPLE_VERSION VERSION_LESS 5.2.0 - AND NOT CMAKE_INSTALL_RPATH_USE_LINK_PATH) - set(ROCPROFSYS_LIB_INSTALL_RPATH - "\$ORIGIN:\$ORIGIN/${PROJECT_NAME}:${rocprofiler_LIBRARY_DIR}") -else() - set(ROCPROFSYS_LIB_INSTALL_RPATH "\$ORIGIN:\$ORIGIN/${PROJECT_NAME}") -endif() +set(ROCPROFSYS_LIB_INSTALL_RPATH "\$ORIGIN:\$ORIGIN/${PROJECT_NAME}") # ------------------------------------------------------------------------------# # @@ -50,10 +42,7 @@ target_link_libraries( $ $ $ - $ - $ - $ - $ + $ $ $ $ diff --git a/projects/rocprofiler-systems/source/lib/common/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/common/CMakeLists.txt index b9c6f1919d..b4237c9d48 100644 --- a/projects/rocprofiler-systems/source/lib/common/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/common/CMakeLists.txt @@ -19,7 +19,9 @@ target_sources( ${CMAKE_CURRENT_SOURCE_DIR}/environment.hpp ${CMAKE_CURRENT_SOURCE_DIR}/invoke.hpp ${CMAKE_CURRENT_SOURCE_DIR}/join.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/setup.hpp) + ${CMAKE_CURRENT_SOURCE_DIR}/setup.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/static_object.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/synchronized.hpp) get_filename_component(COMMON_SOURCE_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" DIRECTORY) get_filename_component(COMMON_BINARY_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}" DIRECTORY) diff --git a/projects/rocprofiler-systems/source/lib/common/defines.h.in b/projects/rocprofiler-systems/source/lib/common/defines.h.in index b8eff840ad..776a5f105a 100644 --- a/projects/rocprofiler-systems/source/lib/common/defines.h.in +++ b/projects/rocprofiler-systems/source/lib/common/defines.h.in @@ -42,10 +42,10 @@ #define ROCPROFSYS_COMPILER_STRING ROCPROFSYS_COMPILER_ID " v" ROCPROFSYS_COMPILER_VERSION #define ROCPROFSYS_DEFAULT_ROCM_PATH "@ROCmVersion_DIR@" -#define ROCPROFSYS_HIP_VERSION_STRING "@ROCPROFSYS_HIP_VERSION@" -#define ROCPROFSYS_HIP_VERSION_MAJOR @ROCPROFSYS_HIP_VERSION_MAJOR@ -#define ROCPROFSYS_HIP_VERSION_MINOR @ROCPROFSYS_HIP_VERSION_MINOR@ -#define ROCPROFSYS_HIP_VERSION_PATCH @ROCPROFSYS_HIP_VERSION_PATCH@ +#define ROCPROFSYS_ROCM_VERSION_STRING "@ROCPROFSYS_ROCM_VERSION@" +#define ROCPROFSYS_ROCM_VERSION_MAJOR @ROCPROFSYS_ROCM_VERSION_MAJOR@ +#define ROCPROFSYS_ROCM_VERSION_MINOR @ROCPROFSYS_ROCM_VERSION_MINOR@ +#define ROCPROFSYS_ROCM_VERSION_PATCH @ROCPROFSYS_ROCM_VERSION_PATCH@ // these can be set via defining the variable in CMake, e.g.: // cmake -D ROCPROFSYS_CACHELINE_SIZE=N /path/to/source @@ -63,15 +63,15 @@ ((10000 * ROCPROFSYS_VERSION_MAJOR) + (100 * ROCPROFSYS_VERSION_MINOR) + \ ROCPROFSYS_VERSION_PATCH) -#define ROCPROFSYS_HIP_VERSION \ - ((10000 * ROCPROFSYS_HIP_VERSION_MAJOR) + (100 * ROCPROFSYS_HIP_VERSION_MINOR) + \ - ROCPROFSYS_HIP_VERSION_PATCH) +#define ROCPROFSYS_ROCM_VERSION \ + ((10000 * ROCPROFSYS_ROCM_VERSION_MAJOR) + (100 * ROCPROFSYS_ROCM_VERSION_MINOR) + \ + ROCPROFSYS_ROCM_VERSION_PATCH) -#if ROCPROFSYS_HIP_VERSION_MAJOR > 0 -# define ROCPROFSYS_HIP_VERSION_COMPAT_STRING \ - "v@ROCPROFSYS_HIP_VERSION_MAJOR@.@ROCPROFSYS_HIP_VERSION_MINOR@.x" +#if ROCPROFSYS_ROCM_VERSION_MAJOR > 0 +# define ROCPROFSYS_ROCM_VERSION_COMPAT_STRING \ + "v@ROCPROFSYS_ROCM_VERSION_MAJOR@.@ROCPROFSYS_ROCM_VERSION_MINOR@.x" #else -# define ROCPROFSYS_HIP_VERSION_COMPAT_STRING "" +# define ROCPROFSYS_ROCM_VERSION_COMPAT_STRING "" #endif // this should be passed to argparse::argument_parser::enable_version @@ -83,7 +83,7 @@ { \ { "", ROCPROFSYS_LIBRARY_ARCH }, { "compiler", ROCPROFSYS_COMPILER_STRING }, \ { \ - "rocm", ROCPROFSYS_HIP_VERSION_COMPAT_STRING \ + "rocm", ROCPROFSYS_ROCM_VERSION_COMPAT_STRING \ } \ } #endif diff --git a/projects/rocprofiler-systems/source/lib/common/setup.hpp b/projects/rocprofiler-systems/source/lib/common/setup.hpp index 0c2d1ad69a..1cf5e734c6 100644 --- a/projects/rocprofiler-systems/source/lib/common/setup.hpp +++ b/projects/rocprofiler-systems/source/lib/common/setup.hpp @@ -109,148 +109,6 @@ get_environ(int _verbose, std::string _search_paths = {}, _omnilib = common::path::find_path(_omnilib, _verbose, _search_paths); _omnilib_dl = common::path::find_path(_omnilib_dl, _verbose, _search_paths); -#if defined(ROCPROFSYS_USE_ROCTRACER) && ROCPROFSYS_USE_ROCTRACER > 0 - _data.emplace_back(env_config{ "HSA_TOOLS_LIB", _omnilib.c_str(), 0 }); -#endif - -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 -# if ROCPROFSYS_HIP_VERSION >= 50200 -# define ROCPROFILER_METRICS_DIR "lib/rocprofiler" -# else -# define ROCPROFILER_METRICS_DIR "rocprofiler/lib" -# endif -# if ROCPROFSYS_HIP_VERSION <= 50500 -# define ROCPROFILER_LIBNAME "librocprofiler64.so" -# else -# define ROCPROFILER_LIBNAME "librocprofiler64.so.1" -# endif - - _data.emplace_back(env_config{ "HSA_TOOLS_LIB", _omnilib.c_str(), 0 }); - _data.emplace_back(env_config{ "ROCP_TOOL_LIB", _omnilib.c_str(), 0 }); - _data.emplace_back(env_config{ "ROCPROFILER_LOG", "1", 0 }); - _data.emplace_back(env_config{ "ROCP_HSA_INTERCEPT", "1", 0 }); - _data.emplace_back(env_config{ "HSA_TOOLS_REPORT_LOAD_FAILURE", "1", 0 }); - - auto _possible_rocp_metrics = std::vector{}; - auto _possible_rocprof_libs = std::vector{}; - for(const auto* itr : { "ROCPROFSYS_ROCM_PATH", "ROCM_PATH" }) - { - if(getenv(itr)) - { - _possible_rocp_metrics.emplace_back( - common::join('/', getenv(itr), "lib/rocprofiler")); - _possible_rocprof_libs.emplace_back( - common::join('/', getenv(itr), "lib/rocprofiler", ROCPROFILER_LIBNAME)); - _possible_rocp_metrics.emplace_back( - common::join('/', getenv(itr), "rocprofiler/lib")); - _possible_rocprof_libs.emplace_back( - common::join('/', getenv(itr), "rocprofiler/lib", ROCPROFILER_LIBNAME)); - } - } - - // default path - _possible_rocp_metrics.emplace_back( - common::join('/', ROCPROFSYS_DEFAULT_ROCM_PATH, "lib/rocprofiler")); - _possible_rocp_metrics.emplace_back( - common::join('/', ROCPROFSYS_DEFAULT_ROCM_PATH, "rocprofiler/lib")); - - auto _realpath_and_unique = [](const auto& _inp_v) { - auto _out_v = decltype(_inp_v){}; - for(auto& itr : _inp_v) - { - if(path::exists(itr)) _out_v.emplace_back(path::realpath(itr)); - } - - _out_v.erase(std::unique(_out_v.begin(), _out_v.end()), _out_v.end()); - return _out_v; - }; - - _possible_rocprof_libs = _realpath_and_unique(_possible_rocprof_libs); - - for(const auto& itr : _possible_rocprof_libs) - { - if(path::exists(itr)) - { - _data.emplace_back( - env_config{ "ROCPROFSYS_ROCPROFILER_LIBRARY", itr.c_str(), 0 }); - _possible_rocp_metrics.emplace( - _possible_rocp_metrics.begin(), - common::join('/', path::dirname(itr), "../../lib/rocprofiler")); - _possible_rocp_metrics.emplace(_possible_rocp_metrics.begin(), - common::join('/', path::dirname(itr))); - } - } - - _possible_rocp_metrics = _realpath_and_unique(_possible_rocp_metrics); - - auto _env_rocp_metrics = get_env("ROCP_METRICS", ""); - if(!_env_rocp_metrics.empty()) - { - if(!path::exists(_env_rocp_metrics)) - throw std::runtime_error(join("", "Error! ROCP_METRICS file \"", - _env_rocp_metrics, "\" does not exist")); - _possible_rocp_metrics.clear(); - _possible_rocp_metrics.emplace_back( - common::join('/', path::dirname(_env_rocp_metrics))); - } - - auto _found_rocp_metrics = (!_env_rocp_metrics.empty()) - ? get_env("ROCPROFSYS_ROCP_METRICS_FORCE_VALID", false) - : false; - - if(!_found_rocp_metrics) - { - for(const auto& itr : _possible_rocp_metrics) - { - auto _metrics_path = join('/', itr, "metrics.xml"); - if(path::exists(itr) && path::exists(_metrics_path) && - path::exists(join('/', itr, "gfx_metrics.xml"))) - { - _found_rocp_metrics = true; - _data.emplace_back( - env_config{ "ROCP_METRICS", _metrics_path.c_str(), 0 }); - break; - } - } - } - - // handle error - if(!_found_rocp_metrics) - { - auto _msg = std::stringstream{}; - _msg << std::boolalpha; - if(!_env_rocp_metrics.empty()) - { - auto _env_rocp_metrics_dir = path::dirname(_env_rocp_metrics); - auto _rocp_metrics_xml = join('/', _env_rocp_metrics_dir, "metrics.xml"); - auto _rocp_gfx_metrics_xml = - join('/', _env_rocp_metrics_dir, "gfx_metrics.xml"); - _msg << "Error! ROCP_METRICS=\"" << _env_rocp_metrics - << "\" in the environment but the directory (" << _env_rocp_metrics_dir - << ") does not contain " - "metrics.xml (found: " - << path::exists(_rocp_metrics_xml) << ") and/or gfx_metrics.xml (found: " - << path::exists(_rocp_gfx_metrics_xml) - << "). To ignore this error, set " - "ROCPROFSYS_ROCP_METRICS_FORCE_VALID=true in the environment"; - } - else - { - _msg - << "Error! ROCP_METRICS not set in environment and rocprof-sys could not " - "find a suitable path. Please set ROCP_METRICS=/path/to/metrics.xml " - "in the environment. This file is typically located in the same " - "folder as the librocprofiler64.so library.\nAdditional note: " - "metrics.xml typically contains:\n\t#include " - "\"gfx_metrics.xml\"\nMake sure the provided path also contains this " - "file.\nExample:\n\texport ROCP_METRICS=" - << ROCPROFSYS_DEFAULT_ROCM_PATH << "/" << ROCPROFILER_METRICS_DIR - << "/metrics.xml\n"; - } - throw std::runtime_error(_msg.str()); - } -#endif - #if defined(ROCPROFSYS_USE_OMPT) && ROCPROFSYS_USE_OMPT > 0 if(get_env("ROCPROFSYS_USE_OMPT", true)) { diff --git a/projects/rocprofiler-systems/source/lib/common/static_object.hpp b/projects/rocprofiler-systems/source/lib/common/static_object.hpp new file mode 100644 index 0000000000..567ce2c7f6 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/common/static_object.hpp @@ -0,0 +1,207 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +inline namespace common +{ +using static_dtor_func_t = void (*)(); + +void +destroy_static_objects(); + +void +register_static_dtor(static_dtor_func_t&&); + +namespace +{ +struct anonymous +{}; +} // namespace + +struct do_not_destroy +{}; + +template +constexpr size_t +static_buffer_size() +{ + return sizeof(Tp); +} + +/** + * @brief This struct is used to create static singleton objects which have the properties + * of a heap-allocated static object without a memory leak. + * + * @tparam Tp Data type of singleton + * @tparam ContextT Use to differentiate singletons in different translation units (if + * using default parameter) or ensure the singleton can be accessed in different + * translation units (not recommended) as long as this type is not in an anonymous + * namespace + * + * This template works by creating a buffer of at least `sizeof(Tp)` bytes in the binary + * and does a placement new into that buffer. The object created is NOT heap allocated, + * the address of the object is an address in between the library load address and the + * load address + size of library. + */ +template +struct static_object +{ + static_object() = delete; + ~static_object() = delete; + static_object(const static_object&) = delete; + static_object(static_object&&) noexcept = delete; + static_object& operator=(const static_object&) = delete; + static_object& operator=(static_object&&) noexcept = delete; + + template + static Tp*& construct(Args&&... args); + + template + static Tp*& construct(do_not_destroy&&, Args&&... args); + + static Tp* get() { return m_object; } + + static constexpr bool is_trivial_standard_layout(); + +private: + static Tp* m_object; + static std::array()> m_buffer; +}; + +template +Tp* static_object::m_object = nullptr; + +template +std::array()> + static_object::m_buffer = {}; + +template +constexpr bool +static_object::is_trivial_standard_layout() +{ + return (std::is_standard_layout::value && std::is_trivial::value); +} + +template +template +Tp*& +static_object::construct(Args&&... args) +{ + if constexpr(!is_trivial_standard_layout()) + { + static auto _once = std::once_flag{}; + std::call_once(_once, []() { + register_static_dtor([]() { + if(static_object::m_object) + { + static_object::m_object->~Tp(); + static_object::m_object = nullptr; + } + }); + }); + } + + if(m_object) + { + std::cerr + << "reconstructing static object. Use get() function to retrieve pointer" + << std::endl; + abort(); + } + + m_object = new(m_buffer.data()) Tp{ std::forward(args)... }; + return m_object; +} + +template +template +Tp*& +static_object::construct(do_not_destroy&&, Args&&... args) +{ + if(m_object) + { + std::cerr + << "reconstructing static object. Use get() function to retrieve pointer" + << std::endl; + abort(); + } + + m_object = new(m_buffer.data()) Tp{ std::forward(args)... }; + return m_object; +} + +namespace +{ +inline auto*& +get_static_object_stack() +{ + static auto* _v = new std::stack{}; + return _v; +} +} // namespace + +inline void +destroy_static_objects() +{ + static auto _sync = std::mutex{}; + auto _lk = std::unique_lock{ _sync }; + + auto*& _stack = get_static_object_stack(); + if(_stack) + { + while(!_stack->empty()) + { + auto& itr = _stack->top(); + if(itr) itr(); + _stack->pop(); + } + + delete _stack; + _stack = nullptr; + } +} + +inline void +register_static_dtor(static_dtor_func_t&& _func) +{ + static auto _sync = std::mutex{}; + auto _lk = std::unique_lock{ _sync }; + + auto*& _stack = get_static_object_stack(); + if(_stack) + { + _stack->push(_func); + } +} +} // namespace common +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/common/synchronized.hpp b/projects/rocprofiler-systems/source/lib/common/synchronized.hpp new file mode 100644 index 0000000000..99b4aa1e5b --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/common/synchronized.hpp @@ -0,0 +1,167 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +inline namespace common +{ +/** + * Sychronized is a wrapper that adds lock based write/read + * protection around a datatype. The protected data is accessed + * only by rlock/wlock. rlock(lambda) gets a reader lock of the + * protected value, passing the protected value to the lambda as a + * const. wlock(lambda) gets a writer lock on the protective value + * and does the same. The reason for this class is to make it less + * error prone to access shared data and more obvious when a lock + * is being held. + * + * Example usage: + * + * synchronized x(9); + * x.rlock([](const auto& data){ + * // data = 9 + * }); + * + * x.wlock([](auto& data){ + * // set data to new value + * }); + */ +template +class synchronized +{ +public: + using value_type = LockedType; + using this_type = synchronized; + + synchronized() = default; + ~synchronized() = default; + + explicit synchronized(value_type&& data) + : m_data{ std::move(data) } + {} + + synchronized(synchronized&& data) noexcept = default; + synchronized& operator=(synchronized&& data) noexcept = default; + + // Do not allow this data structure to be copied, std::move only. + synchronized(const synchronized&) = delete; + + template + decltype(auto) rlock(FuncT&& lambda, Args&&... args) const; + + template + decltype(auto) wlock(FuncT&& lambda, Args&&... args); + + // This overload to wlock allows a synchronized map whose keys map to synchronized + // data to use a read lock on the key data and then a write lock on the mapped data. + template = 0> + decltype(auto) wlock(FuncT&& lambda, Args&&... args) const; + + // Upgradable lock. If read returns false, write will be called with a unique_lock. + // Essentially a helper function that does .rlock() followed by .wlock(). + template + bool ulock(ReadFuncT&& read, WriteFuncT&& write, Args&&... args); + +private: + mutable std::shared_mutex m_mutex = {}; + value_type m_data = {}; +}; + +// +// member definitions +// +template +template +decltype(auto) +synchronized::rlock(FuncT&& lambda, Args&&... args) const +{ + static_assert(std::is_invocable::value, + "function must accept const reference to locked type"); + + auto lock = std::shared_lock{ m_mutex }; + return std::forward(lambda)(m_data, std::forward(args)...); +} + +template +template +decltype(auto) +synchronized::wlock(FuncT&& lambda, Args&&... args) +{ + static_assert(std::is_invocable::value, + "function must accept reference to locked type"); + + auto lock = std::unique_lock{ m_mutex }; + return std::forward(lambda)(m_data, std::forward(args)...); +} + +// This overload to wlock allows a synchronized map whose keys map to synchronized data to +// use a read lock on the key data and then a write lock on the mapped data. +template +template > +decltype(auto) +synchronized::wlock(FuncT&& lambda, Args&&... args) const +{ + return const_cast(this)->wlock(std::forward(lambda), + std::forward(args)...); +} + +// Upgradable lock. If read returns false, write will be called with a unique_lock. +// Essentially a helper function that does .rlock() followed by .wlock(). +template +template +bool +synchronized::ulock(ReadFuncT&& read, WriteFuncT&& write, + Args&&... args) +{ + static_assert(std::is_invocable::value, + "read function must accept const reference to locked type"); + static_assert(std::is_invocable::value, + "write function must accept reference to locked type"); + + using read_return_type = std::invoke_result_t; + using write_return_type = std::invoke_result_t; + + static_assert(std::is_same::value, + "read and write functions must return same type"); + static_assert(std::is_same::value, + "read/write functions must return bool"); + + { + auto lock = std::shared_lock{ m_mutex }; + if(read(m_data, std::forward(args)...)) return true; + } + + auto lock = std::unique_lock{ m_mutex }; + return write(m_data, std::forward(args)...); +} +} // namespace common +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt index 6e3b534163..184229642c 100644 --- a/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt @@ -14,6 +14,7 @@ set(core_sources ${CMAKE_CURRENT_LIST_DIR}/mproc.cpp ${CMAKE_CURRENT_LIST_DIR}/perf.cpp ${CMAKE_CURRENT_LIST_DIR}/perfetto.cpp + ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp ${CMAKE_CURRENT_LIST_DIR}/state.cpp ${CMAKE_CURRENT_LIST_DIR}/timemory.cpp ${CMAKE_CURRENT_LIST_DIR}/utility.cpp) @@ -29,13 +30,13 @@ set(core_headers ${CMAKE_CURRENT_LIST_DIR}/dynamic_library.hpp ${CMAKE_CURRENT_LIST_DIR}/exception.hpp ${CMAKE_CURRENT_LIST_DIR}/gpu.hpp - ${CMAKE_CURRENT_LIST_DIR}/hip_runtime.hpp ${CMAKE_CURRENT_LIST_DIR}/locking.hpp ${CMAKE_CURRENT_LIST_DIR}/mproc.hpp ${CMAKE_CURRENT_LIST_DIR}/perf.hpp ${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp ${CMAKE_CURRENT_LIST_DIR}/rccl.hpp ${CMAKE_CURRENT_LIST_DIR}/redirect.hpp + ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp ${CMAKE_CURRENT_LIST_DIR}/state.hpp ${CMAKE_CURRENT_LIST_DIR}/timemory.hpp ${CMAKE_CURRENT_LIST_DIR}/utility.hpp) @@ -54,6 +55,10 @@ add_subdirectory(containers) target_include_directories(rocprofiler-systems-core-library BEFORE PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_include_directories( + rocprofiler-systems-core-library + PRIVATE ${PROJECT_SOURCE_DIR}/external/timemory/source/timemory/tpls/cereal) + target_link_libraries(rocprofiler-systems-core-library PRIVATE rocprofiler-systems::rocprofiler-systems-interface-library) target_link_libraries( @@ -67,8 +72,7 @@ target_link_libraries( $ $ $ - $ - $ + $ $ $ $ diff --git a/projects/rocprofiler-systems/source/lib/core/argparse.cpp b/projects/rocprofiler-systems/source/lib/core/argparse.cpp index ee195cb438..01c0d229f0 100644 --- a/projects/rocprofiler-systems/source/lib/core/argparse.cpp +++ b/projects/rocprofiler-systems/source/lib/core/argparse.cpp @@ -222,17 +222,6 @@ init_parser(parser_data& _data) _data.dl_libpath = get_realpath(get_internal_libpath("librocprof-sys-dl.so").c_str()); _data.omni_libpath = get_realpath(get_internal_libpath("librocprof-sys.so").c_str()); -#if defined(ROCPROFSYS_USE_ROCTRACER) || defined(ROCPROFSYS_USE_ROCPROFILER) - update_env(_data, "HSA_TOOLS_LIB", _data.dl_libpath); - if(!getenv("HSA_TOOLS_REPORT_LOAD_FAILURE")) - update_env(_data, "HSA_TOOLS_REPORT_LOAD_FAILURE", "1"); -#endif - -#if defined(ROCPROFSYS_USE_ROCPROFILER) - update_env(_data, "ROCP_TOOL_LIB", _data.omni_libpath); - if(!getenv("ROCP_HSA_INTERCEPT")) update_env(_data, "ROCP_HSA_INTERCEPT", "1"); -#endif - #if defined(ROCPROFSYS_USE_OMPT) if(!getenv("OMP_TOOL_LIBRARIES")) update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath, UPD_PREPEND); @@ -300,15 +289,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) %{INDENT}% 0 avoid triggering the bug, potentially at the cost of reduced performance %{INDENT}% 1 do not modify how ROCm is notified about kernel completion)"; - auto _realtime_reqs = - (tim::get_env("HSA_ENABLE_INTERRUPT", std::string{}, false).empty()) - ? strvec_t{ "hsa-interrupt" } - : strvec_t{}; - -#if ROCPROFSYS_USE_ROCTRACER == 0 && ROCPROFSYS_USE_ROCPROFILER == 0 - _realtime_reqs.clear(); -#endif - const auto* _trace_policy_desc = R"(Policy for new data when the buffer size limit is reached: %{INDENT}%- discard : new data is ignored @@ -579,45 +559,29 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _backend_choices.erase("rcclp"); #endif -#if !defined(ROCPROFSYS_USE_ROCM_SMI) +#if !defined(ROCPROFSYS_USE_ROCM) + _backend_choices.erase("amd-smi"); _backend_choices.erase("rocm-smi"); -#endif - -#if !defined(ROCPROFSYS_USE_ROCTRACER) - _backend_choices.erase("roctracer"); - _backend_choices.erase("roctx"); -#endif - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) - _backend_choices.erase("rocprofiler"); + _backend_choices.erase("rocprofiler-sdk"); + _backend_choices.erase("rocm"); #endif if(gpu::device_count() == 0) { + // remove GPU-specific backends _backend_choices.erase("rcclp"); + _backend_choices.erase("amd-smi"); _backend_choices.erase("rocm-smi"); - _backend_choices.erase("roctracer"); - _backend_choices.erase("rocprofiler"); + _backend_choices.erase("rocprofiler-sdk"); + _backend_choices.erase("rocm"); #if defined(ROCPROFSYS_USE_RCCL) update_env(_data, "ROCPROFSYS_USE_RCCLP", false); #endif -#if defined(ROCPROFSYS_USE_ROCM_SMI) +#if defined(ROCPROFSYS_USE_ROCM) update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", false); -#endif - -#if defined(ROCPROFSYS_USE_ROCTRACER) - update_env(_data, "ROCPROFSYS_USE_ROCTRACER", false); - update_env(_data, "ROCPROFSYS_USE_ROCTX", false); - update_env(_data, "ROCPROFSYS_ROCTRACER_HSA_ACTIVITY", false); - update_env(_data, "ROCPROFSYS_ROCTRACER_HIP_ACTIVITY", false); - _backend_choices.erase("roctracer"); - _backend_choices.erase("roctx"); -#endif - -#if defined(ROCPROFSYS_USE_ROCPROFILER) - update_env(_data, "ROCPROFSYS_USE_ROCPROFILER", false); + update_env(_data, "ROCPROFSYS_USE_ROCM", false); #endif } @@ -640,11 +604,9 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _update("ROCPROFSYS_USE_KOKKOSP", _v.count("kokkosp") > 0); _update("ROCPROFSYS_USE_MPIP", _v.count("mpip") > 0); _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); + _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCTX", _v.count("roctx") > 0); _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); - _update("ROCPROFSYS_USE_ROCTRACER", _v.count("roctracer") > 0); - _update("ROCPROFSYS_USE_ROCPROFILER", _v.count("rocprofiler") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); @@ -676,28 +638,13 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _update("ROCPROFSYS_USE_KOKKOSP", _v.count("kokkosp") > 0); _update("ROCPROFSYS_USE_MPIP", _v.count("mpip") > 0); _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); + _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCTX", _v.count("roctx") > 0); _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); - _update("ROCPROFSYS_USE_ROCTRACER", _v.count("roctracer") > 0); - _update("ROCPROFSYS_USE_ROCPROFILER", _v.count("rocprofiler") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); - if(_v.count("all") > 0 || - (_v.count("roctracer") > 0 && _v.count("rocprofiler") > 0)) - { - remove_env(_data, "HSA_TOOLS_LIB"); - remove_env(_data, "HSA_TOOLS_REPORT_LOAD_FAILURE"); - } - - if(_v.count("all") > 0 || _v.count("rocprofiler") > 0) - { - remove_env(_data, "ROCP_TOOL_LIB"); - remove_env(_data, "ROCP_HSA_INTERCEPT"); - } - if(_v.count("all") > 0 || _v.count("ompt") > 0) remove_env(_data, "OMP_TOOL_LIBRARIES"); @@ -1126,7 +1073,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _parser.add_argument({ "--sample-realtime" }, _realtime_desc) .min_count(0) .dtype("[freq] [delay] [tids...]") - .required(std::move(_realtime_reqs)) .action([&](parser_t& p) { auto _v = p.get>("sample-realtime"); update_env(_data, "ROCPROFSYS_SAMPLING_REALTIME", true); @@ -1210,25 +1156,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _data.processed_environs.emplace("papi_events"); } -#if defined(ROCPROFSYS_USE_ROCPROFILER) - if(_data.environ_filter("gpu_events", _data)) - { - _parser - .add_argument({ "-G", "--gpu-events" }, - "Set the GPU hardware counter events to record (ref: " - "`rocprof-sys-avail -H -c GPU`)") - .min_count(1) - .dtype("[EVENT ...]") - .action([&](parser_t& p) { - auto _events = join(array_config_t{ "," }, p.get("gpu-events")); - update_env(_data, "ROCPROFSYS_ROCM_EVENTS", _events); - }); - - _data.processed_environs.emplace("gpu_events"); - _data.processed_environs.emplace("rocm_events"); - } -#endif - add_group_arguments(_parser, "category", _data, true); add_group_arguments(_parser, "io", _data, true); add_group_arguments(_parser, "perfetto", _data, true); diff --git a/projects/rocprofiler-systems/source/lib/core/categories.hpp b/projects/rocprofiler-systems/source/lib/core/categories.hpp index 5eb633db23..0f09f4f1b4 100644 --- a/projects/rocprofiler-systems/source/lib/core/categories.hpp +++ b/projects/rocprofiler-systems/source/lib/core/categories.hpp @@ -91,19 +91,21 @@ ROCPROFSYS_DEFINE_CATEGORY(project, rocprofsys, ROCPROFSYS_CATEGORY_NONE, "rocpr ROCPROFSYS_DEFINE_CATEGORY(category, host, ROCPROFSYS_CATEGORY_HOST, "host", "Host-side function tracing") ROCPROFSYS_DEFINE_CATEGORY(category, user, ROCPROFSYS_CATEGORY_USER, "user", "User-defined regions") ROCPROFSYS_DEFINE_CATEGORY(category, python, ROCPROFSYS_CATEGORY_PYTHON, "python", "Python regions") -ROCPROFSYS_DEFINE_CATEGORY(category, device_hip, ROCPROFSYS_CATEGORY_DEVICE_HIP, "device_hip", "Device-side functions submitted via HIP API") -ROCPROFSYS_DEFINE_CATEGORY(category, device_hsa, ROCPROFSYS_CATEGORY_DEVICE_HSA, "device_hsa", "Device-side functions submitted via HSA API") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip, ROCPROFSYS_CATEGORY_ROCM_HIP, "rocm_hip", "Host-side HIP functions") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hsa, ROCPROFSYS_CATEGORY_ROCM_HSA, "rocm_hsa", "Host-side HSA functions") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_roctx, ROCPROFSYS_CATEGORY_ROCM_ROCTX, "rocm_roctx", "ROCTx labels") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm, ROCPROFSYS_CATEGORY_ROCM, "rocm", "General ROCm tracing") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_api, ROCPROFSYS_CATEGORY_ROCM_HIP_API, "rocm_hip_api", "ROCm HIP functions") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hsa_api, ROCPROFSYS_CATEGORY_ROCM_HSA_API, "rocm_hsa_api", "ROCm HSA functions") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_kernel_dispatch, ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH, "rocm_kernel_dispatch", "ROCm Kernel dispatch") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_copy, ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY, "rocm_memory_copy", "ROCm Async Memory Copy") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY, "rocm_scratch_memory", "ROCm kernel scratch memory reallocations") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_counter_collection, ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION, "rocm_counter_collection", "ROCm device counter collection") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_marker_api, ROCPROFSYS_CATEGORY_ROCM_MARKER_API, "rocm_marker_api", "ROCTx labels") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi, ROCPROFSYS_CATEGORY_ROCM_SMI, "rocm_smi", "rocm-smi data") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_busy, ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY, "device_busy", "Busy percentage of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, "device_temp", "Temperature of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions") -ROCPROFSYS_DEFINE_CATEGORY(category, roctracer, ROCPROFSYS_CATEGORY_ROCTRACER, "roctracer", "Kernel tracing provided by roctracer") -ROCPROFSYS_DEFINE_CATEGORY(category, rocprofiler, ROCPROFSYS_CATEGORY_ROCPROFILER, "rocprofiler", "HW counter data provided by rocprofiler") ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions") ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions") ROCPROFSYS_DEFINE_CATEGORY(category, mpi, ROCPROFSYS_CATEGORY_MPI, "mpi", "MPI regions") @@ -151,19 +153,21 @@ using name = perfetto_category; ROCPROFSYS_PERFETTO_CATEGORY(category::user), \ ROCPROFSYS_PERFETTO_CATEGORY(category::python), \ ROCPROFSYS_PERFETTO_CATEGORY(category::sampling), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::device_hip), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::device_hsa), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hip), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hsa), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_roctx), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hip_api), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hsa_api), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_kernel_dispatch), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_copy), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_scratch_memory), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_counter_collection), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_marker_api), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_busy), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_temp), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::roctracer), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocprofiler), \ ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \ ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \ ROCPROFSYS_PERFETTO_CATEGORY(category::mpi), \ diff --git a/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp b/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp index b5726086cf..8e9343d9d2 100644 --- a/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp @@ -96,14 +96,6 @@ struct functors; } // namespace component } // namespace rocprofsys -#if !defined(ROCPROFSYS_USE_ROCTRACER) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::roctracer, false_type) -#endif - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::rocprofiler, false_type) -#endif - #if !defined(ROCPROFSYS_USE_RCCL) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, category::rocm_rccl, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::rcclp_handle, false_type) @@ -124,7 +116,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_cpu_clock, fa ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_percent, false_type) #endif -#if !defined(TIMEMORY_USE_LIBUNWIND) || !defined(ROCPROFSYS_USE_ROCM_SMI) +#if !defined(TIMEMORY_USE_LIBUNWIND) || !defined(ROCPROFSYS_USE_ROCM) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type) diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index 38fbc4e5ed..b5f249d4ca 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -22,6 +22,7 @@ #include "config.hpp" #include "common/defines.h" +#include "common/static_object.hpp" #include "constraint.hpp" #include "debug.hpp" #include "defines.hpp" @@ -29,9 +30,9 @@ #include "mproc.hpp" #include "perf.hpp" #include "perfetto.hpp" +#include "rocprofiler-sdk.hpp" #include "utility.hpp" -#include #include #include #include @@ -52,6 +53,7 @@ #include #include #include +#include #include #include @@ -60,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +70,7 @@ #include #include #include +#include #include #include @@ -76,6 +80,11 @@ using settings = tim::settings; namespace { +int verbose_value = tim::get_env("ROCPROFSYS_VERBOSE", 0, false); +bool debug_value = tim::get_env("ROCPROFSYS_DEBUG", false, false); +bool is_ci_value = tim::get_env("ROCPROFSYS_CI", false, false); +auto configure_once = std::once_flag{}; + TIMEMORY_NOINLINE bool& _settings_are_configured() { @@ -83,6 +92,14 @@ _settings_are_configured() return _v; } +auto*& +get_config_impl() +{ + static auto*& _v = common::static_object>::construct( + common::do_not_destroy{}, settings::shared_instance()); + return _v; +} + auto get_config() { @@ -97,7 +114,7 @@ get_config() std::string get_setting_name(std::string _v) { - static const auto _prefix = tim::string_view_t{ "rocprofsys_" }; + constexpr auto _prefix = tim::string_view_t{ "rocprofsys_" }; for(auto& itr : _v) itr = tolower(itr); auto _pos = _v.find(_prefix); @@ -195,7 +212,7 @@ configure_settings(bool _init) if(settings_are_configured()) return; - if(get_is_continuous_integration() && get_state() < State::Init) + if(is_ci_value && get_state() < State::Init) { timemory_print_demangled_backtrace<64>(); ROCPROFSYS_THROW("config::configure_settings() called before " @@ -220,17 +237,17 @@ configure_settings(bool _init) tim::manager::add_metadata("ROCPROFSYS_COMPILER_VERSION", ROCPROFSYS_COMPILER_VERSION); -#if ROCPROFSYS_HIP_VERSION > 0 - tim::manager::add_metadata("ROCPROFSYS_HIP_VERSION", ROCPROFSYS_HIP_VERSION_STRING); - tim::manager::add_metadata("ROCPROFSYS_HIP_VERSION_MAJOR", - ROCPROFSYS_HIP_VERSION_MAJOR); - tim::manager::add_metadata("ROCPROFSYS_HIP_VERSION_MINOR", - ROCPROFSYS_HIP_VERSION_MINOR); - tim::manager::add_metadata("ROCPROFSYS_HIP_VERSION_PATCH", - ROCPROFSYS_HIP_VERSION_PATCH); +#if ROCPROFSYS_ROCM_VERSION > 0 + tim::manager::add_metadata("ROCPROFSYS_ROCM_VERSION", ROCPROFSYS_ROCM_VERSION_STRING); + tim::manager::add_metadata("ROCPROFSYS_ROCM_VERSION_MAJOR", + ROCPROFSYS_ROCM_VERSION_MAJOR); + tim::manager::add_metadata("ROCPROFSYS_ROCM_VERSION_MINOR", + ROCPROFSYS_ROCM_VERSION_MINOR); + tim::manager::add_metadata("ROCPROFSYS_ROCM_VERSION_PATCH", + ROCPROFSYS_ROCM_VERSION_PATCH); #endif - auto _config = settings::shared_instance(); + auto _config = *get_config_impl(); // if using timemory, default to perfetto being off auto _default_perfetto_v = !tim::get_env("ROCPROFSYS_PROFILE", false, false); @@ -294,24 +311,15 @@ configure_settings(bool _init) "Enable causal profiling analysis", false, "backend", "causal", "analysis"); - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCTRACER", + ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCM", "Enable ROCm API and kernel tracing", true, "backend", - "roctracer", "rocm"); - - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCPROFILER", - "Enable ROCm hardware counters", true, "backend", - "rocprofiler", "rocm"); + "rocm"); ROCPROFSYS_CONFIG_SETTING( bool, "ROCPROFSYS_USE_ROCM_SMI", "Enable sampling GPU power, temp, utilization, and memory usage", true, "backend", "rocm_smi", "rocm", "process_sampling"); - ROCPROFSYS_CONFIG_SETTING( - bool, "ROCPROFSYS_USE_ROCTX", - "Enable ROCtx API. Warning! Out-of-order ranges may corrupt perfetto flamegraph", - false, "backend", "roctracer", "rocm", "roctx"); - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_SAMPLING", "Enable statistical sampling of call-stack", false, "backend", "sampling"); @@ -616,41 +624,7 @@ configure_settings(bool _init) "sampling", "hardware_counters") ->set_choices(perf::get_config_choices()); - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_ROCTRACER_HIP_API", - "Enable HIP API tracing support", true, "roctracer", "rocm", - "advanced"); - - ROCPROFSYS_CONFIG_SETTING( - bool, "ROCPROFSYS_ROCTRACER_HIP_API_BACKTRACE", - "Enable annotating the perfetto debug annotation with backtraces", false, - "roctracer", "rocm", "perfetto", "advanced"); - - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_ROCTRACER_HIP_ACTIVITY", - "Enable HIP activity tracing support", true, "roctracer", - "rocm", "advanced"); - - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_ROCTRACER_HSA_ACTIVITY", - "Enable HSA activity tracing support", false, "roctracer", - "rocm", "advanced"); - - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_ROCTRACER_HSA_API", - "Enable HSA API tracing support", false, "roctracer", - "rocm", "advanced"); - - ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCTRACER_HSA_API_TYPES", - "HSA API type to collect", "", "roctracer", "rocm", - "advanced"); - - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_ROCTRACER_DISCARD_BARRIERS", - "Skip barrier marker events in traces", false, "roctracer", - "rocm", "advanced"); - - ROCPROFSYS_CONFIG_SETTING( - std::string, "ROCPROFSYS_ROCM_EVENTS", - "ROCm hardware counters. Use ':device=N' syntax to specify collection on device " - "number N, e.g. ':device=0'. If no device specification is provided, the event " - "is collected on every available device", - "", "rocprofiler", "rocm", "hardware_counters"); + rocprofiler_sdk::config_settings(_config); ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_METRICS", "rocm-smi metrics to collect: busy, temp, power, mem_usage", @@ -670,12 +644,6 @@ configure_settings(bool _init) "default to the value of ROCPROFSYS_COLLAPSE_PROCESSES", false, "perfetto", "data", "advanced"); - ROCPROFSYS_CONFIG_SETTING( - bool, "ROCPROFSYS_PERFETTO_ROCTRACER_PER_STREAM", - "Separate roctracer GPU side traces (copies, kernels) into separate " - "tracks based on the stream they're enqueued into", - true, "perfetto", "roctracer", "rocm", "advanced"); - ROCPROFSYS_CONFIG_SETTING( std::string, "ROCPROFSYS_PERFETTO_FILL_POLICY", "Behavior when perfetto buffer is full. 'discard' will ignore new entries, " @@ -704,18 +672,6 @@ configure_settings(bool _init) "feature may dramatically reduce the size of the trace", true, "perfetto", "data", "debugging", "advanced"); - ROCPROFSYS_CONFIG_SETTING( - bool, "ROCPROFSYS_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS", - "When PERFETTO_ANNOTATIONS, USE_ROCTRACER, and ROCTRACER_HIP_API are all " - "enabled, enabling this option will result in the arg information for HIP API " - "calls to all be within one annotation (e.g., args=\"stream=0x0, dst=0x1F, " - "sizeBytes=64, src=0x08, kind=1\"). When disabled, each parameter will be an " - "individual annotation (e.g. stream, dst, sizeBytes, etc.). The benefit of the " - "former is that it is faster to serialize and consumes less file space; the " - "benefit of the latter is that it becomes much easier to find slices in the " - "trace with the same value", - false, "perfetto", "data", "debugging", "roctracer", "rocm", "advanced"); - ROCPROFSYS_CONFIG_SETTING( uint64_t, "ROCPROFSYS_THREAD_POOL_SIZE", "Max number of threads for processing background tasks", @@ -1045,6 +1001,10 @@ configure_settings(bool _init) settings::suppress_config() = true; + if(auto opt = get_setting_value("ROCPROFSYS_VERBOSE"); opt) verbose_value = *opt; + if(auto opt = get_setting_value("ROCPROFSYS_DEBUG"); opt) debug_value = *opt; + if(auto opt = get_setting_value("ROCPROFSYS_CI"); opt) is_ci_value = *opt; + if(get_env("ROCPROFSYS_MONOCHROME", _config->get("ROCPROFSYS_MONOCHROME"))) tim::log::monochrome() = true; @@ -1106,6 +1066,10 @@ configure_settings(bool _init) ROCPROFSYS_BASIC_VERBOSE(2, "configuration complete\n"); + if(auto opt = get_setting_value("ROCPROFSYS_VERBOSE"); opt) verbose_value = *opt; + if(auto opt = get_setting_value("ROCPROFSYS_DEBUG"); opt) debug_value = *opt; + if(auto opt = get_setting_value("ROCPROFSYS_CI"); opt) is_ci_value = *opt; + _settings_are_configured() = true; } @@ -1140,8 +1104,6 @@ configure_mode_settings(const std::shared_ptr& _config) _set("ROCPROFSYS_PROFILE", false); _set("ROCPROFSYS_USE_CAUSAL", false); _set("ROCPROFSYS_USE_ROCM_SMI", false); - _set("ROCPROFSYS_USE_ROCTRACER", false); - _set("ROCPROFSYS_USE_ROCPROFILER", false); _set("ROCPROFSYS_USE_KOKKOSP", false); _set("ROCPROFSYS_USE_RCCLP", false); _set("ROCPROFSYS_USE_OMPT", false); @@ -1164,12 +1126,11 @@ configure_mode_settings(const std::shared_ptr& _config) if(gpu::device_count() == 0) { -#if ROCPROFSYS_HIP_VERSION > 0 - ROCPROFSYS_BASIC_VERBOSE(1, "No HIP devices were found: disabling roctracer, " - "rocprofiler, and rocm_smi...\n"); +#if ROCPROFSYS_ROCM_VERSION > 0 + ROCPROFSYS_BASIC_VERBOSE( + 1, "No ROCm devices were found: disabling rocm and rocm_smi...\n"); #endif - _set("ROCPROFSYS_USE_ROCPROFILER", false); - _set("ROCPROFSYS_USE_ROCTRACER", false); + _set("ROCPROFSYS_USE_ROCM", false); _set("ROCPROFSYS_USE_ROCM_SMI", false); } @@ -1202,9 +1163,8 @@ configure_mode_settings(const std::shared_ptr& _config) _set("ROCPROFSYS_USE_TRACE", false); _set("ROCPROFSYS_PROFILE", false); _set("ROCPROFSYS_USE_CAUSAL", false); + _set("ROCPROFSYS_USE_ROCM", false); _set("ROCPROFSYS_USE_ROCM_SMI", false); - _set("ROCPROFSYS_USE_ROCTRACER", false); - _set("ROCPROFSYS_USE_ROCPROFILER", false); _set("ROCPROFSYS_USE_KOKKOSP", false); _set("ROCPROFSYS_USE_RCCLP", false); _set("ROCPROFSYS_USE_OMPT", false); @@ -1389,22 +1349,9 @@ configure_disabled_settings(const std::shared_ptr& _config) _handle_use_option("ROCPROFSYS_USE_OMPT", "ompt"); _handle_use_option("ROCPROFSYS_USE_RCCLP", "rcclp"); _handle_use_option("ROCPROFSYS_USE_ROCM_SMI", "rocm_smi"); - _handle_use_option("ROCPROFSYS_USE_ROCTRACER", "roctracer"); - _handle_use_option("ROCPROFSYS_USE_ROCPROFILER", "rocprofiler"); + _handle_use_option("ROCPROFSYS_USE_ROCM", "rocm"); -#if !defined(ROCPROFSYS_USE_ROCTRACER) || ROCPROFSYS_USE_ROCTRACER == 0 - _config->find("ROCPROFSYS_USE_ROCTRACER")->second->set_hidden(true); - for(const auto& itr : _config->disable_category("roctracer")) - _config->find(itr)->second->set_hidden(true); -#endif - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) || ROCPROFSYS_USE_ROCPROFILER == 0 - _config->find("ROCPROFSYS_USE_ROCPROFILER")->second->set_hidden(true); - for(const auto& itr : _config->disable_category("rocprofiler")) - _config->find(itr)->second->set_hidden(true); -#endif - -#if !defined(ROCPROFSYS_USE_ROCM_SMI) || ROCPROFSYS_USE_ROCM_SMI == 0 +#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0 _config->find("ROCPROFSYS_USE_ROCM_SMI")->second->set_hidden(true); for(const auto& itr : _config->disable_category("rocm_smi")) _config->find(itr)->second->set_hidden(true); @@ -1567,7 +1514,7 @@ print_banner(std::ostream& _os) { "tag", ROCPROFSYS_GIT_DESCRIBE }, { "", ROCPROFSYS_LIBRARY_ARCH }, { "compiler", ROCPROFSYS_COMPILER_STRING }, - { "rocm", ROCPROFSYS_HIP_VERSION_COMPAT_STRING } }); + { "rocm", ROCPROFSYS_ROCM_VERSION_COMPAT_STRING } }); // () if(!_properties.empty()) @@ -1797,10 +1744,7 @@ get_debug_env() bool get_is_continuous_integration() { - if(!settings_are_configured()) - return tim::get_env("ROCPROFSYS_CI", false, false); - static auto _v = get_config()->find("ROCPROFSYS_CI"); - return static_cast&>(*_v->second).get(); + return is_ci_value; } bool @@ -1818,8 +1762,8 @@ get_debug_finalize() bool get_debug() { - static auto _v = get_config()->find("ROCPROFSYS_DEBUG"); - return static_cast&>(*_v->second).get(); + std::call_once(configure_once, []() { (void) get_config(); }); + return debug_value; } bool @@ -1842,15 +1786,15 @@ get_verbose_env() int get_verbose() { - static auto _v = get_config()->find("ROCPROFSYS_VERBOSE"); - return static_cast&>(*_v->second).get(); + std::call_once(configure_once, []() { (void) get_config(); }); + return verbose_value; } bool& get_use_perfetto() { - static auto _v = get_config()->find("ROCPROFSYS_TRACE"); - return static_cast&>(*_v->second).get(); + static auto _v = get_config()->at("ROCPROFSYS_TRACE"); + return static_cast&>(*_v).get(); } bool& @@ -1867,43 +1811,10 @@ get_use_causal() return static_cast&>(*_v->second).get(); } -bool -get_use_roctracer() -{ -#if defined(ROCPROFSYS_USE_ROCTRACER) && ROCPROFSYS_USE_ROCTRACER > 0 - static auto _v = get_config()->find("ROCPROFSYS_USE_ROCTRACER"); - return static_cast&>(*_v->second).get(); -#else - return false; -#endif -} - -bool -get_perfetto_roctracer_per_stream() -{ -#if defined(ROCPROFSYS_USE_ROCTRACER) && ROCPROFSYS_USE_ROCTRACER > 0 - static auto _v = get_config()->find("ROCPROFSYS_PERFETTO_ROCTRACER_PER_STREAM"); - return static_cast&>(*_v->second).get(); -#else - return false; -#endif -} - -bool -get_use_rocprofiler() -{ -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 - static auto _v = get_config()->find("ROCPROFSYS_USE_ROCPROFILER"); - return static_cast&>(*_v->second).get(); -#else - return false; -#endif -} - bool get_use_rocm_smi() { -#if defined(ROCPROFSYS_USE_ROCM_SMI) && ROCPROFSYS_USE_ROCM_SMI > 0 +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 static auto _v = get_config()->find("ROCPROFSYS_USE_ROCM_SMI"); return static_cast&>(*_v->second).get(); #else @@ -1911,17 +1822,6 @@ get_use_rocm_smi() #endif } -bool -get_use_roctx() -{ -#if defined(ROCPROFSYS_USE_ROCTRACER) && ROCPROFSYS_USE_ROCTRACER > 0 - static auto _v = get_config()->find("ROCPROFSYS_USE_ROCTX"); - return static_cast&>(*_v->second).get(); -#else - return false; -#endif -} - bool& get_use_sampling() { @@ -2031,34 +1931,6 @@ get_sampling_cputime_signal() return static_cast&>(*_v->second).get(); } -bool -get_trace_hip_api() -{ - static auto _v = get_config()->find("ROCPROFSYS_ROCTRACER_HIP_API"); - return static_cast&>(*_v->second).get(); -} - -bool -get_trace_hip_activity() -{ - static auto _v = get_config()->find("ROCPROFSYS_ROCTRACER_HIP_ACTIVITY"); - return static_cast&>(*_v->second).get(); -} - -bool -get_trace_hsa_api() -{ - static auto _v = get_config()->find("ROCPROFSYS_ROCTRACER_HSA_API"); - return static_cast&>(*_v->second).get(); -} - -bool -get_trace_hsa_activity() -{ - static auto _v = get_config()->find("ROCPROFSYS_ROCTRACER_HSA_ACTIVITY"); - return static_cast&>(*_v->second).get(); -} - size_t get_perfetto_shmem_size_hint() { @@ -2176,14 +2048,6 @@ get_thread_pool_size() return _v; } -std::string -get_trace_hsa_api_types() -{ - static std::string _v = - get_config()->get("ROCPROFSYS_ROCTRACER_HSA_API_TYPES"); - return _v; -} - std::string& get_perfetto_backend() { @@ -2360,7 +2224,7 @@ get_process_sampling_duration() std::string get_sampling_gpus() { -#if defined(ROCPROFSYS_USE_ROCM_SMI) && ROCPROFSYS_USE_ROCM_SMI > 0 +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 static auto _v = get_config()->find("ROCPROFSYS_SAMPLING_GPUS"); return static_cast&>(*_v->second).get(); #else @@ -2375,13 +2239,6 @@ get_trace_thread_locks() return static_cast&>(*_v->second).get(); } -std::string -get_rocm_events() -{ - static auto _v = get_config()->find("ROCPROFSYS_ROCM_EVENTS"); - return static_cast&>(*_v->second).get(); -} - bool get_trace_thread_rwlocks() { diff --git a/projects/rocprofiler-systems/source/lib/core/config.hpp b/projects/rocprofiler-systems/source/lib/core/config.hpp index 2ccbce234a..609ca9816d 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.hpp +++ b/projects/rocprofiler-systems/source/lib/core/config.hpp @@ -101,17 +101,22 @@ get_exe_realpath(); template bool -set_setting_value(const std::string& _name, Tp&& _v) +set_setting_value(const std::string& _name, Tp&& _v, + settings::update_type _upd = settings::update_type::user) { - auto _user_upd = tim::settings::update_type::user; - auto _instance = tim::settings::shared_instance(); - auto _setting = _instance->find(_name); + auto* _instance = tim::settings::instance(); + if(!_instance) return false; + + auto _setting = _instance->find(_name); if(_setting == _instance->end()) return false; if(!_setting->second) return false; + auto& itr = _setting->second; - auto _upd = itr->set_user_updated(); - auto _success = itr->set(std::forward(_v), _user_upd); - if(!_success) itr->set_updated(_upd); + auto _old_upd = itr->get_updated_type(); + + auto _success = itr->set(std::forward(_v), _upd); + if(!_success) itr->set_updated(_old_upd); + return _success; } @@ -119,10 +124,13 @@ template bool set_default_setting_value(const std::string& _name, Tp&& _v) { - auto _instance = tim::settings::shared_instance(); - auto _setting = _instance->find(_name); + auto* _instance = tim::settings::instance(); + if(!_instance) return false; + + auto _setting = _instance->find(_name); if(_setting == _instance->end()) return false; if(!_setting->second) return false; + if(_setting->second->get_config_updated() || _setting->second->get_environ_updated()) return false; return _setting->second->set(std::forward(_v)); @@ -132,10 +140,12 @@ template std::optional get_setting_value(const std::string& _name) { - auto _instance = tim::settings::shared_instance(); - if(!_instance) return std::optional{}; + auto* _instance = tim::settings::instance(); + if(!_instance) return std::nullopt; + auto _setting = _instance->find(_name); if(_setting == _instance->end() || !_setting->second) return std::optional{}; + auto&& _ret = _setting->second->get(); return (_ret.first) ? std::optional{ _ret.second } : std::optional{}; } @@ -194,18 +204,9 @@ get_use_timemory() ROCPROFSYS_HOT; bool& get_use_causal() ROCPROFSYS_HOT; -bool -get_use_roctracer() ROCPROFSYS_HOT; - -bool -get_use_rocprofiler() ROCPROFSYS_HOT; - bool get_use_rocm_smi() ROCPROFSYS_HOT; -bool -get_use_roctx(); - bool& get_use_sampling() ROCPROFSYS_HOT; @@ -236,18 +237,6 @@ get_sampling_keep_internal(); bool get_use_rcclp(); -bool -get_trace_hip_api(); - -bool -get_trace_hip_activity(); - -bool -get_trace_hsa_api(); - -bool -get_trace_hsa_activity(); - size_t get_perfetto_shmem_size_hint(); @@ -272,9 +261,6 @@ get_perfetto_annotations() ROCPROFSYS_HOT; uint64_t get_thread_pool_size(); -std::string -get_trace_hsa_api_types(); - std::string& get_perfetto_backend(); @@ -282,9 +268,6 @@ get_perfetto_backend(); std::string get_perfetto_output_filename(); -bool -get_perfetto_roctracer_per_stream() ROCPROFSYS_HOT; - double get_trace_delay(); @@ -360,9 +343,6 @@ get_trace_thread_barriers(); bool get_trace_thread_join(); -std::string -get_rocm_events(); - bool get_use_tmp_files(); diff --git a/projects/rocprofiler-systems/source/lib/core/containers/stable_vector.hpp b/projects/rocprofiler-systems/source/lib/core/containers/stable_vector.hpp index f52cf7ebfc..929044d696 100644 --- a/projects/rocprofiler-systems/source/lib/core/containers/stable_vector.hpp +++ b/projects/rocprofiler-systems/source/lib/core/containers/stable_vector.hpp @@ -209,7 +209,7 @@ public: void push_back(Tp&& t); template - void emplace_back(Args&&... args); + decltype(auto) emplace_back(Args&&... args); reference operator[](size_type i); @@ -229,6 +229,14 @@ private: storage_type m_chunks; }; +template +template +decltype(auto) +stable_vector::emplace_back(Args&&... args) +{ + return last_chunk().emplace_back(std::forward(args)...); +} + template stable_vector::stable_vector(size_type count, const Tp& value) { @@ -332,14 +340,6 @@ stable_vector::push_back(Tp&& t) last_chunk().push_back(std::move(t)); } -template -template -void -stable_vector::emplace_back(Args&&... args) -{ - last_chunk().emplace_back(std::forward(args)...); -} - template typename stable_vector::reference stable_vector::operator[](size_type i) diff --git a/projects/rocprofiler-systems/source/lib/core/gpu.cpp b/projects/rocprofiler-systems/source/lib/core/gpu.cpp index 1dfc7c468b..091464f878 100644 --- a/projects/rocprofiler-systems/source/lib/core/gpu.cpp +++ b/projects/rocprofiler-systems/source/lib/core/gpu.cpp @@ -20,22 +20,19 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#define ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN \ + namespace tim \ + { \ + namespace cereal \ + { +#define ROCPROFILER_SDK_CEREAL_NAMESPACE_END \ + } \ + } // namespace ::tim::cereal + #include "common/defines.h" -#if !defined(ROCPROFSYS_USE_ROCM_SMI) -# define ROCPROFSYS_USE_ROCM_SMI 0 -#endif - -#if !defined(ROCPROFSYS_USE_HIP) -# define ROCPROFSYS_USE_HIP 0 -#endif - -#include "core/hip_runtime.hpp" - -#if ROCPROFSYS_USE_HIP > 0 -# if !defined(TIMEMORY_USE_HIP) -# define TIMEMORY_USE_HIP 1 -# endif +#if !defined(ROCPROFSYS_USE_ROCM) +# define ROCPROFSYS_USE_ROCM 0 #endif #include "debug.hpp" @@ -44,24 +41,11 @@ #include -#if ROCPROFSYS_USE_ROCM_SMI > 0 +#if ROCPROFSYS_USE_ROCM > 0 # include -#endif - -#if ROCPROFSYS_USE_HIP > 0 -# include - -# if !defined(ROCPROFSYS_HIP_RUNTIME_CALL) -# define ROCPROFSYS_HIP_RUNTIME_CALL(err) \ - { \ - if(err != ::tim::hip::success_v && (int) err != 0) \ - { \ - ROCPROFSYS_THROW( \ - "[%s:%d] Warning! HIP API call failed with code %i :: %s\n", \ - __FILE__, __LINE__, (int) err, hipGetErrorString(err)); \ - } \ - } -# endif +# include +# include +# include #endif namespace rocprofsys @@ -70,9 +54,7 @@ namespace gpu { namespace { -namespace scope = ::tim::scope; - -#if ROCPROFSYS_USE_ROCM_SMI > 0 +#if ROCPROFSYS_USE_ROCM > 0 # define ROCPROFSYS_ROCM_SMI_CALL(ERROR_CODE) \ ::rocprofsys::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__) @@ -108,99 +90,47 @@ rsmi_init() return _rsmi_init; } -#endif +#endif // ROCPROFSYS_USE_ROCM > 0 -#if ROCPROFSYS_HIP_VERSION >= 60000 -template ::value, int> = 0> -void -device_prop_serialize(ArchiveT& archive, const char* name, const ArgT& arg) +int32_t +query_rocm_gpu_agents() { - namespace cereal = tim::cereal; - using cereal::make_nvp; - archive(make_nvp(name, arg)); -} - -template -void -device_prop_serialize(ArchiveT& archive, const char* name, ArgT arg[N]) -{ - if constexpr(!std::is_same::value && - !std::is_same::value) - { - namespace cereal = tim::cereal; - using cereal::make_nvp; - auto data = std::array{}; - for(size_t i = 0; i < N; ++i) - data[i] = arg[i]; - archive(make_nvp(name, data)); - } - else - { - device_prop_serialize(archive, name, std::string{ arg }); - } -} - -template -void -device_prop_serialize(ArchiveT& archive, const char* name, hipUUID_t arg) -{ - constexpr auto N = sizeof(arg.bytes); - namespace cereal = tim::cereal; - using cereal::make_nvp; - auto data = std::array{}; - data.fill('\0'); - for(size_t i = 0; i < N; ++i) - data[i] = arg.bytes[i]; - auto str_v = std::string_view{ data.data() }; - auto str = std::string{ str_v }.substr(0, str_v.find('\0')); - archive(make_nvp(name, str)); -} - -template -void -device_prop_serialize(ArchiveT& archive, const char* name, hipDeviceArch_t arg) -{ - namespace cereal = tim::cereal; - using cereal::make_nvp; - -# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(NAME) \ - { \ - auto val = arg.NAME; \ - archive(make_nvp(#NAME, val)); \ + int32_t _dev_cnt = 0; +#if ROCPROFSYS_USE_ROCM > 0 + auto iterator = [](rocprofiler_agent_version_t /*version*/, const void** agents, + size_t num_agents, void* user_data) -> rocprofiler_status_t { + auto* _cnt = static_cast(user_data); + for(size_t i = 0; i < num_agents; ++i) + { + const auto* _agent = static_cast(agents[i]); + if(_agent && _agent->type == ROCPROFILER_AGENT_TYPE_GPU) *_cnt += 1; } + return ROCPROFILER_STATUS_SUCCESS; + }; - archive.setNextName(name); - archive.startNode(); - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt32Atomics) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalFloatAtomicExch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt32Atomics) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSharedFloatAtomicExch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasFloatAtomicAdd) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt64Atomics) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt64Atomics) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasDoubles) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasWarpVote) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasWarpBallot) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasWarpShuffle) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasFunnelShift) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasThreadFenceSystem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSyncThreadsExt) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSurfaceFuncs) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(has3dGrid) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasDynamicParallelism) - archive.finishNode(); - -# undef ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH -} + try + { + rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator, + sizeof(rocprofiler_agent_v0_t), &_dev_cnt); + } catch(std::exception& _e) + { + ROCPROFSYS_BASIC_VERBOSE( + 1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%d\n", _e.what(), + _dev_cnt); + } + // rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator, + // sizeof(rocprofiler_agent_v0_t), &_dev_cnt); #endif + return _dev_cnt; +} } // namespace int -hip_device_count() +rocm_device_count() { -#if ROCPROFSYS_USE_HIP > 0 - return ::tim::hip::device_count(); +#if ROCPROFSYS_USE_ROCM > 0 + static int _num_devices = query_rocm_gpu_agents(); + return _num_devices; #else return 0; #endif @@ -209,7 +139,7 @@ hip_device_count() int rsmi_device_count() { -#if ROCPROFSYS_USE_ROCM_SMI > 0 +#if ROCPROFSYS_USE_ROCM > 0 if(!rsmi_init()) return 0; static auto _num_devices = []() { @@ -234,11 +164,8 @@ rsmi_device_count() int device_count() { -#if ROCPROFSYS_USE_ROCM_SMI > 0 - // store as static since calls after rsmi_shutdown will return zero - return rsmi_device_count(); -#elif ROCPROFSYS_USE_HIP > 0 - return ::tim::hip::device_count(); +#if ROCPROFSYS_USE_ROCM > 0 + return rocm_device_count(); #else return 0; #endif @@ -246,251 +173,44 @@ device_count() template void -add_hip_device_metadata(ArchiveT& ar) +add_device_metadata(ArchiveT& ar) { namespace cereal = tim::cereal; using cereal::make_nvp; -#if ROCPROFSYS_USE_HIP > 0 - int _device_count = 0; - int _current_device = 0; - hipError_t _device_count_err = hipGetDeviceCount(&_device_count); +#if ROCPROFSYS_USE_ROCM > 0 + using agent_vec_t = std::vector; - if(_device_count_err != hipSuccess) return; - - hipError_t _current_device_err = hipGetDevice(&_current_device); - - scope::destructor _dtor{ [_current_device, _current_device_err]() { - if(_current_device_err == hipSuccess) + auto _agents_vec = agent_vec_t{}; + auto iterator = [](rocprofiler_agent_version_t /*version*/, const void** agents, + size_t num_agents, void* user_data) -> rocprofiler_status_t { + auto* _agents_vec_v = static_cast(user_data); + _agents_vec_v->reserve(num_agents); + for(size_t i = 0; i < num_agents; ++i) { - ROCPROFSYS_HIP_RUNTIME_CALL(hipSetDevice(_current_device)); + const auto* _agent = static_cast(agents[i]); + if(_agent) _agents_vec_v->emplace_back(*_agent); } - } }; + return ROCPROFILER_STATUS_SUCCESS; + }; + rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator, + sizeof(rocprofiler_agent_v0_t), &_agents_vec); - if(_current_device_err != hipSuccess || _device_count == 0) return; - - ar.setNextName("hip_device_properties"); - ar.startNode(); - ar.makeArray(); - - scope::destructor _prop_dtor{ [&ar]() { ar.finishNode(); } }; - for(int dev = 0; dev < _device_count; ++dev) - { - auto _device_prop = hipDeviceProp_t{}; - int _driver_version = 0; - int _runtime_version = 0; - ROCPROFSYS_HIP_RUNTIME_CALL(hipSetDevice(dev)); - ROCPROFSYS_HIP_RUNTIME_CALL(hipGetDeviceProperties(&_device_prop, dev)); - ROCPROFSYS_HIP_RUNTIME_CALL(hipDriverGetVersion(&_driver_version)); - ROCPROFSYS_HIP_RUNTIME_CALL(hipRuntimeGetVersion(&_runtime_version)); - - ar.startNode(); - -# if ROCPROFSYS_HIP_VERSION < 60000 - using intvec_t = std::vector; - -# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(NAME) \ - ar(make_nvp(#NAME, _device_prop.NAME)); - -# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP_ARRAY(NAME, ...) \ - ar(make_nvp(NAME, __VA_ARGS__)); - - ar(make_nvp("name", std::string{ _device_prop.name })); - ar(make_nvp("driver_version", _driver_version)); - ar(make_nvp("runtime_version", _runtime_version)); - ar(make_nvp("capability.major_version", _device_prop.major)); - ar(make_nvp("capability.minor_version", _device_prop.minor)); - - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalConstMem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockRate) - -# if ROCPROFSYS_HIP_VERSION >= 50000 - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize) -# endif - - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(warpSize) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP_ARRAY( - "maxThreadsDim", - intvec_t{ _device_prop.maxThreadsDim[0], _device_prop.maxThreadsDim[1], - _device_prop.maxThreadsDim[2] }) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP_ARRAY( - "maxGridSize", - intvec_t{ _device_prop.maxGridSize[0], _device_prop.maxGridSize[1], - _device_prop.maxGridSize[2] }) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memPitch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(textureAlignment) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(integrated) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDomainID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciBusID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(computeMode) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gcnArch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gcnArchName) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(asicRevision) -# else -# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(NAME) \ - device_prop_serialize(ar, #NAME, _device_prop.NAME); - - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(name) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(uuid) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(luid) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(luidDeviceNodeMask) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(warpSize) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memPitch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsDim) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxGridSize) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockRate) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalConstMem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(major) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(minor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(textureAlignment) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(texturePitchAlignment) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(deviceOverlap) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(integrated) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(computeMode) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1D) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DMipmap) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLinear) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2D) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DMipmap) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLinear) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DGather) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture3D) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture3DAlt) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemap) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLayered) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLayered) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemapLayered) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface1D) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface2D) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface3D) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface1DLayered) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface2DLayered) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemap) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemapLayered) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(surfaceAlignment) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciBusID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDomainID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(tccDriver) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(asyncEngineCount) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(unifiedAddressing) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(persistingL2CacheMaxSize) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(streamPrioritiesSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(globalL1CacheSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(localL1CacheSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerMultiprocessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(regsPerMultiprocessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(managedMemory) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(multiGpuBoardGroupID) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hostNativeAtomicSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(singleToDoublePrecisionPerfRatio) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(computePreemptionSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(canUseHostPointerForRegisteredMem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlockOptin) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxBlocksPerMultiProcessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(accessPolicyMaxWindowSize) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(reservedSharedMemPerBlock) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hostRegisterSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sparseHipArraySupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hostRegisterReadOnlySupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(timelineSemaphoreInteropSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryPoolsSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMASupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAFlushWritesOptions) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAWritesOrdering) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryPoolSupportedHandleTypes) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(deferredMappingHipArraySupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(ipcEventSupported) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clusterLaunch) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(unifiedFunctionPointers) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gcnArchName) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(arch) - // ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hdpMemFlushCntl) - // ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hdpRegFlushCntl) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedFunc) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedGridDim) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedBlockDim) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedSharedMem) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(isLargeBar) - ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(asicRevision) -# endif - - const auto _compute_mode_descr = std::array{ - "Default (multiple host threads can use ::hipSetDevice() with device " - "simultaneously)", - "Exclusive (only one host thread in one process is able to use " - "::hipSetDevice() with this device)", - "Prohibited (no host thread can use ::hipSetDevice() with this device)", - "Exclusive Process (many threads in one process is able to use " - "::hipSetDevice() with this device)", - "Unknown", - nullptr - }; - - auto _compute_mode = std::min(_device_prop.computeMode, 5); - ar(make_nvp("computeModeDescription", - std::string{ _compute_mode_descr.at(_compute_mode) })); - - ar.finishNode(); - } + ar(make_nvp("rocm_agents", _agents_vec)); #else (void) ar; #endif } void -add_hip_device_metadata() +add_device_metadata() { if(device_count() == 0) return; ROCPROFSYS_METADATA([](auto& ar) { try { - add_hip_device_metadata(ar); + add_device_metadata(ar); } catch(std::runtime_error& _e) { ROCPROFSYS_VERBOSE(2, "%s\n", _e.what()); diff --git a/projects/rocprofiler-systems/source/lib/core/gpu.hpp b/projects/rocprofiler-systems/source/lib/core/gpu.hpp index 0989284b4d..cf8cfe6168 100644 --- a/projects/rocprofiler-systems/source/lib/core/gpu.hpp +++ b/projects/rocprofiler-systems/source/lib/core/gpu.hpp @@ -30,12 +30,12 @@ int device_count(); int -hip_device_count(); +rocm_device_count(); int rsmi_device_count(); void -add_hip_device_metadata(); +add_device_metadata(); } // namespace gpu } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp b/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp index 9ec902dd7d..24dbff5a78 100644 --- a/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp +++ b/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp @@ -24,7 +24,7 @@ #include "core/defines.hpp" -#if defined(ROCPROFSYS_USE_HIP) && ROCPROFSYS_USE_HIP > 0 +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 # if defined(HIP_INCLUDE_HIP_HIP_RUNTIME_H) || \ defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H) @@ -35,22 +35,17 @@ # define HIP_PROF_HIP_API_STRING 1 // following must be included before for ROCm 6.0+ -# if ROCPROFSYS_HIP_VERSION >= 60000 -# if defined(USE_PROF_API) -# undef USE_PROF_API -# endif -# include -# include -// must be included after hip_runtime_api.h -# include -// must be included after hip_runtime_api.h -# include -// must be included after hip_runtime_api.h -# include -# else -# include -# include +# if defined(USE_PROF_API) +# undef USE_PROF_API # endif +# include +# include +// must be included after hip_runtime_api.h +# include +// must be included after hip_runtime_api.h +# include +// must be included after hip_runtime_api.h +# include # include #endif diff --git a/projects/rocprofiler-systems/source/lib/core/perfetto.hpp b/projects/rocprofiler-systems/source/lib/core/perfetto.hpp index 53c11effb3..10e8d3a9e9 100644 --- a/projects/rocprofiler-systems/source/lib/core/perfetto.hpp +++ b/projects/rocprofiler-systems/source/lib/core/perfetto.hpp @@ -104,6 +104,7 @@ perfetto_counter_track::emplace(size_t _idx, const std::string& _v, for(const auto& itr : _name_data) { _missing.emplace_back(std::make_tuple(*itr, itr->c_str(), false)); + // TODO: _missing.emplace_back(*itr, itr->c_str(), false); } } auto _index = _track_data.size(); diff --git a/projects/rocprofiler-systems/source/lib/core/rccl.hpp b/projects/rocprofiler-systems/source/lib/core/rccl.hpp index 68ef13e243..53aec9476d 100644 --- a/projects/rocprofiler-systems/source/lib/core/rccl.hpp +++ b/projects/rocprofiler-systems/source/lib/core/rccl.hpp @@ -23,13 +23,7 @@ #pragma once #include "core/defines.hpp" -#include "core/hip_runtime.hpp" -#if defined(ROCPROFSYS_USE_HIP) && ROCPROFSYS_USE_HIP > 0 && \ - defined(ROCPROFSYS_USE_RCCL) && ROCPROFSYS_USE_RCCL > 0 -# if ROCPROFSYS_HIP_VERSION == 0 || ROCPROFSYS_HIP_VERSION >= 50200 -# include -# else -# include -# endif +#if defined(ROCPROFSYS_USE_RCCL) && ROCPROFSYS_USE_RCCL > 0 +# include #endif diff --git a/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp new file mode 100644 index 0000000000..c53bc25a75 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp @@ -0,0 +1,576 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "core/rocprofiler-sdk.hpp" +#include "core/config.hpp" +#include "core/debug.hpp" +#include "timemory.hpp" +#include + +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 + +# include +# include + +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include + +# define ROCPROFILER_CALL(result) \ + { \ + rocprofiler_status_t CHECKSTATUS = (result); \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + auto msg = std::stringstream{}; \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + msg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " \ + << "rocprofiler-sdk call [" << #result \ + << "] failed with error code " << CHECKSTATUS \ + << " :: " << status_msg; \ + ROCPROFSYS_WARNING(0, "%s\n", msg.str().c_str()); \ + } \ + } + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +namespace +{ +std::string +get_setting_name(std::string _v) +{ + constexpr auto _prefix = tim::string_view_t{ "rocprofsys_" }; + for(auto& itr : _v) + itr = tolower(itr); + auto _pos = _v.find(_prefix); + if(_pos == 0) return _v.substr(_prefix.length()); + return _v; +} + +# define ROCPROFSYS_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \ + [&]() { \ + auto _ret = _config->insert( \ + ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, \ + TYPE{ INITIAL_VALUE }, \ + std::set{ "custom", "rocprofsys", "librocprof-sys", \ + __VA_ARGS__ }); \ + if(!_ret.second) \ + { \ + ROCPROFSYS_PRINT("Warning! Duplicate setting: %s / %s\n", \ + get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ + } \ + return _config->find(ENV_NAME)->second; \ + }() + +template +std::string +to_lower(const Tp& _val) +{ + auto _v = std::string{ _val }; + for(auto& itr : _v) + itr = ::tolower(itr); + return _v; +} + +struct operation_options +{ + std::string operations_include = {}; + std::string operations_exclude = {}; + std::string operations_annotate_backtrace = {}; +}; + +auto callback_operation_option_names = + std::unordered_map{}; +auto buffered_operation_option_names = + std::unordered_map{}; + +std::unordered_set +get_operations_impl(rocprofiler_callback_tracing_kind_t kindv, + const std::string& optname = {}) +{ + static const auto callback_tracing_info = + rocprofiler::sdk::get_callback_tracing_names(); + + if(optname.empty()) + { + auto _ret = std::unordered_set{}; + for(auto iitr : callback_tracing_info[kindv].items()) + { + if(iitr.second && *iitr.second != "none") _ret.emplace(iitr.first); + } + return _ret; + } + + auto _val = get_setting_value(optname); + + ROCPROFSYS_CONDITIONAL_ABORT_F(!_val, "no setting %s\n", optname.c_str()); + + if(_val->empty()) return std::unordered_set{}; + + auto _ret = std::unordered_set{}; + for(const auto& itr : tim::delimit(*_val, " ,;:\n\t")) + { + for(auto iitr : callback_tracing_info[kindv].items()) + { + auto _re = std::regex{ itr, std::regex_constants::icase }; + if(iitr.second && std::regex_search(iitr.second->data(), _re)) + { + ROCPROFSYS_PRINT_F("%s ('%s') matched: %s\n", optname.c_str(), + itr.c_str(), iitr.second->data()); + _ret.emplace(iitr.first); + } + } + } + + return _ret; +} + +std::unordered_set +get_operations_impl(rocprofiler_buffer_tracing_kind_t kindv, + const std::string& optname = {}) +{ + static const auto buffered_tracing_info = + rocprofiler::sdk::get_buffer_tracing_names(); + + if(optname.empty()) + { + auto _ret = std::unordered_set{}; + for(auto iitr : buffered_tracing_info[kindv].items()) + { + if(iitr.second && *iitr.second != "none") _ret.emplace(iitr.first); + } + return _ret; + } + + auto _val = get_setting_value(optname); + + ROCPROFSYS_CONDITIONAL_ABORT_F(!_val, "no setting %s\n", optname.c_str()); + + if(_val->empty()) return std::unordered_set{}; + + auto _ret = std::unordered_set{}; + for(const auto& itr : tim::delimit(*_val, " ,;:\n\t")) + { + for(auto iitr : buffered_tracing_info[kindv].items()) + { + auto _re = std::regex{ itr, std::regex_constants::icase }; + if(iitr.second && std::regex_search(iitr.second->data(), _re)) + { + ROCPROFSYS_PRINT_F("%s ('%s') matched: %s\n", optname.c_str(), + itr.c_str(), iitr.second->data()); + _ret.emplace(iitr.first); + } + } + } + return _ret; +} + +std::vector +get_operations_impl(const std::unordered_set& _complete, + const std::unordered_set& _include, + const std::unordered_set& _exclude) +{ + auto _convert = [](const auto& _dset) { + auto _dret = std::vector{}; + _dret.reserve(_dset.size()); + for(auto itr : _dset) + _dret.emplace_back(itr); + std::sort(_dret.begin(), _dret.end()); + return _dret; + }; + + if(_include.empty() && _exclude.empty()) return _convert(_complete); + + auto _ret = (_include.empty()) ? _complete : _include; + for(auto itr : _exclude) + _ret.erase(itr); + + return _convert(_ret); +} + +} // namespace + +void +config_settings(const std::shared_ptr& _config) +{ + // const auto agents = std::vector{}; + const auto buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); + const auto callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); + + auto _skip_domains = + std::unordered_set{ "none", + "correlation_id_retirement", + "marker_core_api", + "marker_control_api", + "marker_name_api", + "code_object" }; + + auto _domain_choices = std::vector{}; + auto _add_domain = [&_domain_choices, &_skip_domains](std::string_view _domain) { + auto _v = to_lower(_domain); + + if(_skip_domains.count(_v) == 0) + { + auto itr = std::find(_domain_choices.begin(), _domain_choices.end(), _v); + if(itr == _domain_choices.end()) _domain_choices.emplace_back(_v); + } + }; + + static auto _option_names = std::unordered_set{}; + auto _add_operation_settings = [&_config, &_skip_domains]( + std::string_view _domain_name, const auto& _domain, + auto& _operation_option_names) { + auto _v = to_lower(_domain_name); + + if(_skip_domains.count(_v) > 0) return; + + auto _op_option_name = JOIN('_', "ROCPROFSYS_ROCM", _domain_name, "OPERATIONS"); + auto _eop_option_name = + JOIN('_', "ROCPROFSYS_ROCM", _domain_name, "OPERATIONS_EXCLUDE"); + auto _bt_option_name = + JOIN('_', "ROCPROFSYS_ROCM", _domain_name, "OPERATIONS_ANNOTATE_BACKTRACE"); + + auto _op_choices = std::vector{}; + for(auto itr : _domain.operations) + _op_choices.emplace_back(std::string{ itr }); + + if(_op_choices.empty()) return; + + _operation_option_names.emplace( + _domain.value, + operation_options{ _op_option_name, _eop_option_name, _bt_option_name }); + + if(_option_names.emplace(_op_option_name).second) + { + ROCPROFSYS_CONFIG_SETTING( + std::string, _op_option_name.c_str(), + "Inclusive filter for domain operations (for API domains, this selects " + "the functions to trace) [regex supported]", + std::string{}, "rocm", "rocprofiler-sdk", "advanced") + ->set_choices(_op_choices); + } + + if(_option_names.emplace(_eop_option_name).second) + { + ROCPROFSYS_CONFIG_SETTING( + std::string, _eop_option_name.c_str(), + "Exclusive filter for domain operations applied after the inclusive " + "filter (for API domains, removes function from trace) [regex supported]", + std::string{}, "rocm", "rocprofiler-sdk", "advanced") + ->set_choices(_op_choices); + } + + if(_option_names.emplace(_bt_option_name).second) + { + ROCPROFSYS_CONFIG_SETTING( + std::string, _bt_option_name.c_str(), + "Specification of domain operations which will record a backtrace (for " + "API domains, this is a list of function names) [regex supported]", + std::string{}, "rocm", "rocprofiler-sdk", "advanced") + ->set_choices(_op_choices); + } + }; + + _domain_choices.reserve(buffered_tracing_info.size()); + _domain_choices.reserve(callback_tracing_info.size()); + _add_domain("hip_api"); + _add_domain("hsa_api"); + _add_domain("marker_api"); + + for(const auto& itr : buffered_tracing_info) + _add_domain(itr.name); + + for(const auto& itr : callback_tracing_info) + _add_domain(itr.name); + + std::sort(_domain_choices.begin(), _domain_choices.end()); + + namespace join = ::timemory::join; + auto _domain_description = + JOIN("", "Specification of ROCm domains to trace/profile. Choices: ", + join::join(join::array_config{ ", ", "", "" }, _domain_choices)); + + ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_DOMAINS", _domain_description, + std::string{ "hip_runtime_api,marker_api,kernel_dispatch," + "memory_copy,scratch_memory,page_migration" }, + "rocm", "rocprofiler-sdk") + ->set_choices(_domain_choices); + + ROCPROFSYS_CONFIG_SETTING( + std::string, "ROCPROFSYS_ROCM_EVENTS", + "ROCm hardware counters. Use ':device=N' syntax to specify collection on device " + "number N, e.g. ':device=0'. If no device specification is provided, the event " + "is collected on every available device", + "", "rocm", "hardware_counters"); + + _skip_domains.emplace("kernel_dispatch"); + _skip_domains.emplace("page_migration"); + _skip_domains.emplace("scratch_memory"); + + _add_operation_settings( + "MARKER_API", callback_tracing_info[ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API], + callback_operation_option_names); + + for(const auto& itr : callback_tracing_info) + _add_operation_settings(itr.name, itr, callback_operation_option_names); + + for(const auto& itr : buffered_tracing_info) + _add_operation_settings(itr.name, itr, buffered_operation_option_names); +} + +std::unordered_set +get_callback_domains() +{ + const auto callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); + const auto supported = std::unordered_set{ + ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, + ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, + ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, + }; + + auto _data = std::unordered_set{}; + auto _domains = + tim::delimit(config::get_setting_value("ROCPROFSYS_ROCM_DOMAINS") + .value_or(std::string{}), + " ,;:\t\n"); + + const auto valid_choices = + settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS")->get_choices(); + + auto invalid_domain = [&valid_choices](const auto& domainv) { + return !std::any_of(valid_choices.begin(), valid_choices.end(), + [&domainv](const auto& aitr) { return (aitr == domainv); }); + }; + + for(const auto& itr : _domains) + { + if(invalid_domain(itr)) + { + ROCPROFSYS_THROW("unsupported ROCPROFSYS_ROCM_DOMAINS value: %s\n", + itr.c_str()); + } + + if(itr == "hsa_api") + { + for(auto eitr : { ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, + ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API }) + _data.emplace(eitr); + } + else if(itr == "hip_api") + { + for(auto eitr : { ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, + ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API }) + _data.emplace(eitr); + } + else if(itr == "marker_api" || itr == "roctx") + { + _data.emplace(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API); + } + else + { + for(size_t idx = 0; idx < callback_tracing_info.size(); ++idx) + { + auto ditr = callback_tracing_info[idx]; + auto dval = static_cast(idx); + if(itr == to_lower(ditr.name) && supported.count(dval) > 0) + { + _data.emplace(dval); + break; + } + } + } + } + + return _data; +} + +std::unordered_set +get_buffered_domains() +{ + const auto buffer_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); + const auto supported = std::unordered_set{ + ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, + ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, + ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, + ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, + }; + + auto _data = std::unordered_set{}; + auto _domains = + tim::delimit(config::get_setting_value("ROCPROFSYS_ROCM_DOMAINS") + .value_or(std::string{}), + " ,;:\t\n"); + const auto valid_choices = + settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS")->get_choices(); + + auto invalid_domain = [&valid_choices](const auto& domainv) { + return !std::any_of(valid_choices.begin(), valid_choices.end(), + [&domainv](const auto& aitr) { return (aitr == domainv); }); + }; + + for(const auto& itr : _domains) + { + if(invalid_domain(itr)) + { + ROCPROFSYS_THROW("unsupported ROCPROFSYS_ROCM_DOMAINS value: %s\n", + itr.c_str()); + } + + if(itr == "hsa_api") + { + for(auto eitr : { ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, + ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, + ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API, + ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API }) + _data.emplace(eitr); + } + else if(itr == "hip_api") + { + for(auto eitr : { ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API }) + _data.emplace(eitr); + } + else if(itr == "marker_api" || itr == "roctx") + { + _data.emplace(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API); + } + else + { + for(size_t idx = 0; idx < buffer_tracing_info.size(); ++idx) + { + auto ditr = buffer_tracing_info[idx]; + auto dval = static_cast(idx); + if(itr == to_lower(ditr.name) && supported.count(dval) > 0) + { + _data.emplace(dval); + break; + } + } + } + } + + return _data; +} + +std::vector +get_rocm_events() +{ + return tim::delimit( + get_setting_value("ROCPROFSYS_ROCM_EVENTS").value_or(std::string{}), + " ,;\t\n"); +} + +std::vector +get_operations(rocprofiler_callback_tracing_kind_t kindv) +{ + ROCPROFSYS_CONDITIONAL_ABORT_F( + callback_operation_option_names.count(kindv) == 0, + "callback_operation_operation_names does not have value for %i\n", kindv); + + auto _complete = get_operations_impl(kindv); + auto _include = get_operations_impl( + kindv, callback_operation_option_names.at(kindv).operations_include); + auto _exclude = get_operations_impl( + kindv, callback_operation_option_names.at(kindv).operations_exclude); + + return get_operations_impl(_complete, _include, _exclude); +} + +std::vector +get_operations(rocprofiler_buffer_tracing_kind_t kindv) +{ + ROCPROFSYS_CONDITIONAL_ABORT_F( + buffered_operation_option_names.count(kindv) == 0, + "buffered_operation_option_names does not have value for %i\n", kindv); + + auto _complete = get_operations_impl(kindv); + auto _include = get_operations_impl( + kindv, buffered_operation_option_names.at(kindv).operations_include); + auto _exclude = get_operations_impl( + kindv, buffered_operation_option_names.at(kindv).operations_exclude); + + return get_operations_impl(_complete, _include, _exclude); +} + +std::unordered_set +get_backtrace_operations(rocprofiler_callback_tracing_kind_t kindv) +{ + ROCPROFSYS_CONDITIONAL_ABORT_F( + callback_operation_option_names.count(kindv) == 0, + "callback_operation_operation_names does not have value for %i\n", kindv); + + auto _data = get_operations_impl( + kindv, callback_operation_option_names.at(kindv).operations_annotate_backtrace); + auto _ret = std::unordered_set{}; + _ret.reserve(_data.size()); + for(auto itr : _data) + _ret.emplace(itr); + return _ret; +} + +std::unordered_set +get_backtrace_operations(rocprofiler_buffer_tracing_kind_t kindv) +{ + ROCPROFSYS_CONDITIONAL_ABORT_F( + buffered_operation_option_names.count(kindv) == 0, + "buffered_operation_option_names does not have value for %i\n", kindv); + + auto _data = get_operations_impl( + kindv, buffered_operation_option_names.at(kindv).operations_annotate_backtrace); + auto _ret = std::unordered_set{}; + _ret.reserve(_data.size()); + for(auto itr : _data) + _ret.emplace(itr); + return _ret; +} +} // namespace rocprofiler_sdk +} // namespace rocprofsys + +#else + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +void +config_settings(const std::shared_ptr&) +{} +} // namespace rocprofiler_sdk +} // namespace rocprofsys + +#endif diff --git a/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.hpp b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.hpp new file mode 100644 index 0000000000..5ceee14e2a --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.hpp @@ -0,0 +1,70 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "core/timemory.hpp" + +#if defined(ROCPROFSYS_USE_ROCM) +# include +# include +#endif + +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +void +config_settings(const std::shared_ptr&); + +#if defined(ROCPROFSYS_USE_ROCM) + +std::unordered_set +get_callback_domains(); + +std::unordered_set +get_buffered_domains(); + +std::vector +get_operations(rocprofiler_callback_tracing_kind_t kindv); + +std::vector +get_operations(rocprofiler_buffer_tracing_kind_t kindv); + +std::vector +get_rocm_events(); + +std::unordered_set +get_backtrace_operations(rocprofiler_callback_tracing_kind_t kindv); + +std::unordered_set +get_backtrace_operations(rocprofiler_buffer_tracing_kind_t kindv); + +#endif +} // namespace rocprofiler_sdk +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/state.cpp b/projects/rocprofiler-systems/source/lib/core/state.cpp index 7434b7de18..fd8c4d4403 100644 --- a/projects/rocprofiler-systems/source/lib/core/state.cpp +++ b/projects/rocprofiler-systems/source/lib/core/state.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "state.hpp" +#include "common/static_object.hpp" #include "config.hpp" #include "debug.hpp" #include "utility.hpp" @@ -35,8 +36,9 @@ namespace auto& get_state_value() { - static auto _v = std::atomic{ State::PreInit }; - return _v; + static auto*& _v = common::static_object>::construct( + common::do_not_destroy{}, State::PreInit); + return *_v; } ThreadState& diff --git a/projects/rocprofiler-systems/source/lib/core/utility.hpp b/projects/rocprofiler-systems/source/lib/core/utility.hpp index ac0f71fe6d..8ac877edc0 100644 --- a/projects/rocprofiler-systems/source/lib/core/utility.hpp +++ b/projects/rocprofiler-systems/source/lib/core/utility.hpp @@ -74,6 +74,15 @@ get_reserved_vector(size_t _n) return _v; } +/// returns a vector with a preallocated buffer +template +inline decltype(auto) +get_reserved_vector(std::vector&& _v, size_t _n) +{ + _v.reserve(_n); + return std::forward>(_v); +} + template struct offset_index_sequence; diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/CMakeLists.txt index 5a36c71128..565b50e6c9 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/CMakeLists.txt @@ -25,7 +25,8 @@ target_include_directories( PUBLIC $ $ $ - $) + $ + PRIVATE ${rocprofiler-sdk_INCLUDE_DIR}) target_link_libraries( rocprofiler-systems-dl-library PUBLIC $ diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp index 77d59f0e73..3d2f18e016 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp @@ -54,6 +54,14 @@ #include #include +#if !defined(ROCPROFSYS_USE_ROCM) +# define ROCPROFSYS_USE_ROCM 0 +#endif + +#if ROCPROFSYS_USE_ROCM > 0 +# include +#endif + //--------------------------------------------------------------------------------------// #define ROCPROFSYS_DLSYM(VARNAME, HANDLE, FUNCNAME) \ @@ -79,6 +87,7 @@ //--------------------------------------------------------------------------------------// using main_func_t = int (*)(int, char**, char**); +using init_func_t = void (*)(void); std::ostream& operator<<(std::ostream& _os, const SpaceHandle& _handle) @@ -360,14 +369,8 @@ struct ROCPROFSYS_INTERNAL_API indirect ROCPROFSYS_DLSYM(kokkosp_dual_view_modify_f, m_omnihandle, "kokkosp_dual_view_modify"); -#if ROCPROFSYS_USE_ROCTRACER > 0 - ROCPROFSYS_DLSYM(hsa_on_load_f, m_omnihandle, "OnLoad"); - ROCPROFSYS_DLSYM(hsa_on_unload_f, m_omnihandle, "OnUnload"); -#endif - -#if ROCPROFSYS_USE_ROCPROFILER > 0 - ROCPROFSYS_DLSYM(rocp_on_load_tool_prop_f, m_omnihandle, "OnLoadToolProp"); - ROCPROFSYS_DLSYM(rocp_on_unload_tool_f, m_omnihandle, "OnUnloadTool"); +#if ROCPROFSYS_USE_ROCM > 0 + ROCPROFSYS_DLSYM(rocprofiler_configure_f, m_omnihandle, "rocprofiler_configure"); #endif #if ROCPROFSYS_USE_OMPT == 0 @@ -460,16 +463,9 @@ public: void (*kokkosp_dual_view_sync_f)(const char*, const void* const, bool) = nullptr; void (*kokkosp_dual_view_modify_f)(const char*, const void* const, bool) = nullptr; - // HSA functions -#if ROCPROFSYS_USE_ROCTRACER > 0 - bool (*hsa_on_load_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr; - void (*hsa_on_unload_f)() = nullptr; -#endif - - // ROCP functions -#if ROCPROFSYS_USE_ROCPROFILER > 0 - void (*rocp_on_load_tool_prop_f)(void* settings) = nullptr; - void (*rocp_on_unload_tool_f)() = nullptr; +#if ROCPROFSYS_USE_ROCM > 0 + rocprofiler_tool_configure_result_t* (*rocprofiler_configure_f)( + uint32_t, const char*, uint32_t, rocprofiler_client_id_t*) = nullptr; #endif // OpenMP functions @@ -644,13 +640,18 @@ extern "C" bool _invoked = false; ROCPROFSYS_DL_INVOKE_STATUS(_invoked, get_indirect().rocprofsys_init_f, a, b, c); + if(_invoked) { dl::get_active() = true; dl::get_inited() = true; dl::_rocprofsys_dl_verbose = dl::get_rocprofsys_dl_env(); - if(dl::get_instrumented() < dl::InstrumentMode::PythonProfile) + + if(dl::get_instrumented() >= dl::InstrumentMode::None && + dl::get_instrumented() < dl::InstrumentMode::PythonProfile) + { dl::rocprofsys_postinit((c) ? std::string{ c } : std::string{}); + } } } @@ -1069,43 +1070,17 @@ extern "C" //----------------------------------------------------------------------------------// // - // HSA + // ROCm // //----------------------------------------------------------------------------------// -#if ROCPROFSYS_USE_ROCTRACER > 0 - bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) +#if ROCPROFSYS_USE_ROCM > 0 + rocprofiler_tool_configure_result_t* rocprofiler_configure( + uint32_t version, const char* runtime_version, uint32_t priority, + rocprofiler_client_id_t* client_id) { - return ROCPROFSYS_DL_INVOKE(get_indirect().hsa_on_load_f, table, runtime_version, - failed_tool_count, failed_tool_names); - } - - void OnUnload() { return ROCPROFSYS_DL_INVOKE(get_indirect().hsa_on_unload_f); } -#endif - - //----------------------------------------------------------------------------------// - // - // ROCP - // - //----------------------------------------------------------------------------------// - -#if ROCPROFSYS_USE_ROCPROFILER > 0 - void OnLoadToolProp(void* settings) - { - ROCPROFSYS_DL_LOG( - -16, - "invoking %s(rocprofiler_settings_t*) within librocprof-sys-dl.so " - "will cause a silent failure for rocprofiler. ROCP_TOOL_LIB " - "should be set to librocprof-sys.so\n", - __FUNCTION__); - abort(); - return ROCPROFSYS_DL_INVOKE(get_indirect().rocp_on_load_tool_prop_f, settings); - } - - void OnUnloadTool() - { - return ROCPROFSYS_DL_INVOKE(get_indirect().rocp_on_unload_tool_f); + return ROCPROFSYS_DL_INVOKE(get_indirect().rocprofiler_configure_f, version, + runtime_version, priority, client_id); } #endif @@ -1227,7 +1202,9 @@ rocprofsys_preinit() void rocprofsys_postinit(std::string _exe) { - switch(get_instrumented()) + InstrumentMode instrumentMode = get_instrumented(); + + switch(instrumentMode) { case InstrumentMode::None: case InstrumentMode::BinaryRewrite: @@ -1393,20 +1370,122 @@ verify_instrumented_preloaded() bool _handle_preload = rocprofsys_preload(); main_func_t main_real = nullptr; +init_func_t init_real = nullptr; } // namespace } // namespace dl } // namespace rocprofsys extern "C" { + void rocprofsys_main_init(void) ROCPROFSYS_INTERNAL_API; int rocprofsys_main(int argc, char** argv, char** envp) ROCPROFSYS_INTERNAL_API; + + void rocprofsys_set_main_init(init_func_t) ROCPROFSYS_INTERNAL_API; void rocprofsys_set_main(main_func_t) ROCPROFSYS_INTERNAL_API; + void rocprofsys_set_main_init(init_func_t _init_real) + { + ::rocprofsys::dl::init_real = _init_real; + } + void rocprofsys_set_main(main_func_t _main_real) { ::rocprofsys::dl::main_real = _main_real; } + // void rocprofsys_main_init(int argc, char** argv, char** envp) + // { + // ROCPROFSYS_DL_LOG(0, "%s\n", __FUNCTION__); + // using ::rocprofsys::common::get_env; + // using ::rocprofsys::dl::get_default_mode; + + // // prevent re-entry + // static int _reentry = 0; + // if(_reentry > 0) return -1; + // _reentry = 1; + + // int ret = 0; + + // if(::rocprofsys::dl::init_real) + // { + // if(envp) + // { + // size_t _idx = 0; + // while(envp[_idx] != nullptr) + // { + // auto _env_v = std::string_view{ envp[_idx++] }; + // if(_env_v.find("ROCPROFSYS") != 0 && + // _env_v.find("librocprof-sys") == std::string_view::npos) + // continue; + // auto _pos = _env_v.find('='); + // if(_pos < _env_v.length()) + // { + // auto _var = std::string{ _env_v }.substr(0, _pos); + // auto _val = std::string{ _env_v }.substr(_pos + 1); + // ROCPROFSYS_DL_LOG(1, "%s(%s, %s)\n", "rocprofsys_set_env", + // _var.c_str(), _val.c_str()); + // setenv(_var.c_str(), _val.c_str(), 0); + // } + // } + // } + + // ret = (*::rocprofsys::dl::init_real)(argc, argv, envp); + // } + // else + // { + // ROCPROFSYS_DL_LOG( + // 0, "%s\n", + // "Unsuccessful wrapping of init: nullptr to real init function"); + // } + + // auto _mode = get_env("ROCPROFSYS_MODE", get_default_mode()); + // rocprofsys_init(_mode.c_str(), + // dl::get_instrumented() == dl::InstrumentMode::BinaryRewrite, + // argv[0]); + + // return ret; + // } + + // int rocprofsys_main(int argc, char** argv, char** envp) + // { + // ROCPROFSYS_DL_LOG(0, "%s\n", __FUNCTION__); + + // // prevent re-entry + // static int _reentry = 0; + // if(_reentry > 0) return -1; + // _reentry = 1; + + // if(!::rocprofsys::dl::main_real) + // throw std::runtime_error("[rocprof-sys][dl] Unsuccessful wrapping of main: + // " + // "nullptr to real main function"); + + // rocprofsys_push_trace(basename(argv[0])); + + // int ret = (*::rocprofsys::dl::main_real)(argc, argv, envp); + + // rocprofsys_pop_trace(basename(argv[0])); + // rocprofsys_finalize(); + + // return ret; + // } + + void rocprofsys_main_init(void) + { + ROCPROFSYS_DL_LOG(0, "[%s].\n", __FUNCTION__); + + if(::rocprofsys::dl::init_real) + { + // Call real init function + (*::rocprofsys::dl::init_real)(); + } + else + { + ROCPROFSYS_DL_LOG( + 0, "Unsuccessful wrapping of init: real_init function is nullptr.\n"); + } + } + int rocprofsys_main(int argc, char** argv, char** envp) { ROCPROFSYS_DL_LOG(0, "%s\n", __FUNCTION__); @@ -1420,7 +1499,7 @@ extern "C" if(!::rocprofsys::dl::main_real) throw std::runtime_error("[rocprof-sys][dl] Unsuccessful wrapping of main: " - "nullptr to real main function"); + "real_main function is nullptr."); if(envp) { @@ -1455,4 +1534,4 @@ extern "C" return ret; } -} +} // extern "C" diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl/dl.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl/dl.hpp index 1a34612b13..cfd269c7fd 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl/dl.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl/dl.hpp @@ -53,12 +53,8 @@ # define ROCPROFSYS_USE_OMPT 0 #endif -#if !defined(ROCPROFSYS_USE_ROCTRACER) -# define ROCPROFSYS_USE_ROCTRACER 0 -#endif - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) -# define ROCPROFSYS_USE_ROCPROFILER 0 +#if !defined(ROCPROFSYS_USE_ROCM) +# define ROCPROFSYS_USE_ROCM 0 #endif //--------------------------------------------------------------------------------------// @@ -177,20 +173,12 @@ extern "C" const char*) ROCPROFSYS_PUBLIC_API; # endif -# if ROCPROFSYS_USE_ROCTRACER > 0 - // HSA - struct HsaApiTable; - bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) ROCPROFSYS_PUBLIC_API; - void OnUnload() ROCPROFSYS_PUBLIC_API; +# if ROCPROFSYS_USE_ROCM > 0 + struct rocprofiler_tool_configure_result_t; + struct rocprofiler_client_id_t; # endif -# if ROCPROFSYS_USE_ROCPROFILER > 0 - // ROCP - void OnLoadToolProp(void* settings) ROCPROFSYS_PUBLIC_API; - void OnUnloadTool() ROCPROFSYS_PUBLIC_API; -# endif -#endif +#endif // ROCPROFSYS_DL_SOURCE } namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/main.c b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/main.c index 7391a4e7a0..a654efaade 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/main.c +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/main.c @@ -37,20 +37,19 @@ // local type definitions // typedef int (*main_func_t)(int, char**, char**); -typedef int (*start_main_t)(int (*)(int, char**, char**), int, char**, - int (*)(int, char**, char**), void (*)(void), void (*)(void), - void*); +typedef void (*init_func_t)(void); +typedef int (*start_main_t)(int (*)(int, char**, char**), int, char**, void (*)(void), + void (*)(void), void (*)(void), void*); // // local function declarations // int -rocprofsys_libc_start_main(int (*)(int, char**, char**), int, char**, - int (*)(int, char**, char**), void (*)(void), void (*)(void), - void*) ROCPROFSYS_INTERNAL_API; +rocprofsys_libc_start_main(int (*)(int, char**, char**), int, char**, void (*)(void), + void (*)(void), void (*)(void), void*) ROCPROFSYS_INTERNAL_API; int -__libc_start_main(int (*)(int, char**, char**), int, char**, int (*)(int, char**, char**), +__libc_start_main(int (*)(int, char**, char**), int, char**, void (*)(void), void (*)(void), void (*)(void), void*) ROCPROFSYS_PUBLIC_API; // @@ -79,12 +78,18 @@ basename(const char*); extern void rocprofsys_set_main(main_func_t) ROCPROFSYS_INTERNAL_API; +extern void +rocprofsys_set_main_init(init_func_t func) ROCPROFSYS_INTERNAL_API; + +extern void +rocprofsys_main_init(void) ROCPROFSYS_INTERNAL_API; + extern int rocprofsys_main(int argc, char** argv, char** envp) ROCPROFSYS_INTERNAL_API; int rocprofsys_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _argv, - int (*_init)(int, char**, char**), void (*_fini)(void), + void (*_init)(void), void (*_fini)(void), void (*_rtld_fini)(void), void* _stack_end) { int _preload = rocprofsys_preload_library(); @@ -97,8 +102,9 @@ rocprofsys_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** // get the address of this function void* _this_func = __builtin_return_address(0); - // Save the real main function address + // Save the real main function addresses rocprofsys_set_main(_main); + rocprofsys_set_main_init(_init); // Find the real __libc_start_main() start_main_t user_main = dlsym(RTLD_NEXT, "__libc_start_main"); @@ -115,6 +121,10 @@ rocprofsys_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** } else { + // return user_main(rocprofsys_main, _argc, _argv, + // rocprofsys_main_init, _fini, + // _rtld_fini, _stack_end); + // call rocprof-sys main function wrapper return user_main(rocprofsys_main, _argc, _argv, _init, _fini, _rtld_fini, _stack_end); @@ -129,9 +139,10 @@ rocprofsys_libc_start_main(int (*_main)(int, char**, char**), int _argc, char** int __libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _argv, - int (*_init)(int, char**, char**), void (*_fini)(void), - void (*_rtld_fini)(void), void* _stack_end) + void (*_init)(void), void (*_fini)(void), void (*_rtld_fini)(void), + void* _stack_end) { + // intercept the main function return rocprofsys_libc_start_main(_main, _argc, _argv, _init, _fini, _rtld_fini, _stack_end); } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h index d559799ba0..fbc17bc7cf 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h @@ -43,19 +43,21 @@ extern "C" ROCPROFSYS_CATEGORY_PYTHON, ROCPROFSYS_CATEGORY_USER, ROCPROFSYS_CATEGORY_HOST, - ROCPROFSYS_CATEGORY_DEVICE_HIP, - ROCPROFSYS_CATEGORY_DEVICE_HSA, - ROCPROFSYS_CATEGORY_ROCM_HIP, - ROCPROFSYS_CATEGORY_ROCM_HSA, - ROCPROFSYS_CATEGORY_ROCM_ROCTX, + ROCPROFSYS_CATEGORY_ROCM, + ROCPROFSYS_CATEGORY_ROCM_HIP_API, + ROCPROFSYS_CATEGORY_ROCM_HSA_API, + ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH, + ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY, + ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY, + ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, + ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION, + ROCPROFSYS_CATEGORY_ROCM_MARKER_API, ROCPROFSYS_CATEGORY_ROCM_SMI, ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, ROCPROFSYS_CATEGORY_ROCM_RCCL, - ROCPROFSYS_CATEGORY_ROCTRACER, - ROCPROFSYS_CATEGORY_ROCPROFILER, ROCPROFSYS_CATEGORY_SAMPLING, ROCPROFSYS_CATEGORY_PTHREAD, ROCPROFSYS_CATEGORY_KOKKOS, diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp index e5d1017144..92ee2b4874 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp @@ -26,6 +26,7 @@ // #include "api.hpp" #include "common/setup.hpp" +#include "common/static_object.hpp" #include "core/categories.hpp" #include "core/components/fwd.hpp" #include "core/concepts.hpp" @@ -46,13 +47,12 @@ #include "library/components/mpi_gotcha.hpp" #include "library/components/numa_gotcha.hpp" #include "library/components/pthread_gotcha.hpp" -#include "library/components/rocprofiler.hpp" #include "library/coverage.hpp" #include "library/ompt.hpp" #include "library/process_sampler.hpp" #include "library/ptl.hpp" #include "library/rcclp.hpp" -#include "library/rocprofiler.hpp" +#include "library/rocprofiler-sdk.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" @@ -399,10 +399,6 @@ rocprofsys_init_library_hidden() if(_debug_init) config::set_setting_value("ROCPROFSYS_DEBUG", _debug_value); } }; - tim::trait::runtime_enabled::set(get_use_roctracer()); - tim::trait::runtime_enabled::set(get_use_roctracer() && - get_use_timemory()); - ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(_debug_init, "\n"); } @@ -718,13 +714,6 @@ rocprofsys_finalize_hidden(void) } } - if(get_use_roctracer()) - { - ROCPROFSYS_VERBOSE_F(1, "Flushing roctracer...\n"); - // ensure that roctracer is flushed before setting the state to finalized - comp::roctracer::flush(); - } - set_state(State::Finalized); push_enable_sampling_on_child_threads(false); @@ -785,6 +774,14 @@ rocprofsys_finalize_hidden(void) ompt::shutdown(); } +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 + // TODO: option for rocm + { + ROCPROFSYS_VERBOSE_F(1, "Shutting down ROCm...\n"); + rocprofiler_sdk::shutdown(); + } +#endif + ROCPROFSYS_DEBUG_F("Stopping and destroying instrumentation bundles...\n"); for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) { @@ -835,24 +832,6 @@ rocprofsys_finalize_hidden(void) process_sampler::shutdown(); } - if(get_use_roctracer()) - { - ROCPROFSYS_VERBOSE_F(1, "Shutting down roctracer...\n"); - // ensure that threads running roctracer callbacks shutdown - comp::roctracer::shutdown(); - - // join extra thread(s) used by roctracer - ROCPROFSYS_VERBOSE_F(2, "Waiting on roctracer tasks...\n"); - tasking::join(); - } - - if(get_use_rocprofiler()) - { - ROCPROFSYS_VERBOSE_F(1, "Shutting down rocprofiler...\n"); - rocprofiler::post_process(); - rocprofiler::rocm_cleanup(); - } - if(get_use_causal()) { ROCPROFSYS_VERBOSE_F(1, "Shutting down causal sampling...\n"); @@ -919,7 +898,7 @@ rocprofsys_finalize_hidden(void) process_sampler::post_process(); } - // shutdown tasking before timemory is finalized, especially the roctracer thread-pool + // shutdown tasking before timemory is finalized ROCPROFSYS_VERBOSE_F(1, "Shutting down thread-pools...\n"); tasking::shutdown(); @@ -991,6 +970,8 @@ rocprofsys_finalize_hidden(void) tim::signals::enable_signal_detection( { tim::signals::sys_signal::SegFault, tim::signals::sys_signal::Stop }, [](int) {}); + + common::destroy_static_objects(); } //======================================================================================// diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt index 776b06b5e2..5084c43958 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt @@ -23,8 +23,7 @@ set(library_headers ${CMAKE_CURRENT_LIST_DIR}/rcclp.hpp ${CMAKE_CURRENT_LIST_DIR}/rocm.hpp ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.hpp - ${CMAKE_CURRENT_LIST_DIR}/rocprofiler.hpp - ${CMAKE_CURRENT_LIST_DIR}/roctracer.hpp + ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp ${CMAKE_CURRENT_LIST_DIR}/runtime.hpp ${CMAKE_CURRENT_LIST_DIR}/sampling.hpp ${CMAKE_CURRENT_LIST_DIR}/thread_data.hpp @@ -35,37 +34,23 @@ set(library_headers target_sources(rocprofiler-systems-object-library PRIVATE ${library_sources} ${library_headers}) -if(ROCPROFSYS_USE_ROCTRACER OR ROCPROFSYS_USE_ROCPROFILER) - target_sources(rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm.cpp) -endif() - -if(ROCPROFSYS_USE_ROCTRACER) - target_sources(rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/roctracer.cpp) -endif() - if(ROCPROFSYS_USE_RCCL) target_sources(rocprofiler-systems-object-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp) endif() -if(ROCPROFSYS_USE_ROCPROFILER) +if(ROCPROFSYS_USE_ROCM) target_sources( rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocprofiler.cpp - ${CMAKE_CURRENT_LIST_DIR}/rocprofiler.hpp) -endif() - -if(ROCPROFSYS_USE_ROCM_SMI) - target_sources(rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp) + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm.cpp + ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp + ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp) + add_subdirectory(rocprofiler-sdk) endif() add_subdirectory(causal) add_subdirectory(components) add_subdirectory(coverage) -add_subdirectory(rocm) add_subdirectory(tracing) set(ndebug_sources diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt index 16f841a8a7..4c66da1551 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt @@ -28,8 +28,6 @@ set(component_headers ${CMAKE_CURRENT_LIST_DIR}/mpi_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/numa_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/rcclp.hpp - ${CMAKE_CURRENT_LIST_DIR}/rocprofiler.hpp - ${CMAKE_CURRENT_LIST_DIR}/roctracer.hpp ${CMAKE_CURRENT_LIST_DIR}/pthread_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/pthread_create_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/pthread_mutex_gotcha.hpp) @@ -37,16 +35,6 @@ set(component_headers target_sources(rocprofiler-systems-object-library PRIVATE ${component_sources} ${component_headers}) -if(ROCPROFSYS_USE_ROCPROFILER) - target_sources(rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocprofiler.cpp) -endif() - -if(ROCPROFSYS_USE_ROCTRACER) - target_sources(rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/roctracer.cpp) -endif() - if(ROCPROFSYS_USE_RCCL) target_sources(rocprofiler-systems-object-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp index ddb571048a..77d22faef8 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp @@ -64,13 +64,13 @@ using tim::type_list; // these categories increment push/pop counts, which are used for sanity checks since // they should ALWAYS be popped if they were pushed using tracing_count_categories_t = - type_list; + type_list; // convert these categories to throughput points using causal_throughput_categories_t = - type_list; + type_list; // define this outside of category region functions so that the // static thread_local is global instead of per-template instantiation diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp index 39a370f405..fcf354721e 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp @@ -28,7 +28,6 @@ #include "core/utility.hpp" #include "library/causal/delay.hpp" #include "library/components/category_region.hpp" -#include "library/components/roctracer.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" @@ -61,7 +60,7 @@ shutdown(); namespace component { -using bundle_t = tim::lightweight_tuple; +using bundle_t = tim::lightweight_tuple; using category_region_t = tim::lightweight_tuple>; namespace @@ -82,7 +81,6 @@ inline void start_bundle(bundle_t& _bundle, int64_t _tid, Args&&... _args) { if(!get_use_timemory() && !get_use_perfetto()) return; - trait::runtime_enabled::set(get_use_roctracer()); ROCPROFSYS_BASIC_VERBOSE_F(3, "starting bundle '%s' in thread %li...\n", _bundle.key().c_str(), _tid); if constexpr(sizeof...(Args) > 0) @@ -619,5 +617,3 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, } } // namespace component } // namespace rocprofsys - -TIMEMORY_INITIALIZE_STORAGE(component::roctracer_data) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.cpp deleted file mode 100644 index 0253436adc..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "library/components/rocprofiler.hpp" -#include "core/common.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/defines.hpp" -#include "core/dynamic_library.hpp" -#include "core/perfetto.hpp" -#include "core/redirect.hpp" -#include "library/rocprofiler.hpp" -#include "library/sampling.hpp" -#include "library/thread_data.hpp" - -#include -#include -#include -#include - -#include - -#include -#include -#include - -namespace rocprofsys -{ -namespace component -{ -namespace -{ -auto& -rocprofiler_activity_count() -{ - static std::atomic _v{ 0 }; - return _v; -} -} // namespace - -unique_ptr_t& -rocm_data(int64_t _tid) -{ - using thread_data_t = thread_data; - return thread_data_t::instance(construct_on_thread{ _tid }); -} - -rocm_event::rocm_event(uint32_t _dev, uint32_t _thr, uint32_t _queue, - std::string _event_name, rocm_metric_type _begin, - rocm_metric_type _end, uint32_t _feature_count, void* _features_v) -: device_id{ _dev } -, thread_id{ _thr } -, queue_id{ _queue } -, entry{ _begin } -, exit{ _end } -, name(std::move(_event_name)) -{ - feature_values.reserve(_feature_count); - feature_names.reserve(_feature_count); - auto* _features = static_cast(_features_v); - for(uint32_t i = 0; i < _feature_count; ++i) - { - const rocprofiler_feature_t* p = &_features[i]; - feature_names.emplace_back(i); - switch(p->data.kind) - { - // Output metrics results - case ROCPROFILER_DATA_KIND_UNINIT: break; - case ROCPROFILER_DATA_KIND_BYTES: - feature_values.emplace_back( - rocm_feature_value{ p->data.result_bytes.size }); - break; - case ROCPROFILER_DATA_KIND_INT32: - feature_values.emplace_back(rocm_feature_value{ p->data.result_int32 }); - break; - case ROCPROFILER_DATA_KIND_FLOAT: - feature_values.emplace_back(rocm_feature_value{ p->data.result_float }); - break; - case ROCPROFILER_DATA_KIND_DOUBLE: - feature_values.emplace_back(rocm_feature_value{ p->data.result_double }); - break; - case ROCPROFILER_DATA_KIND_INT64: - feature_values.emplace_back(rocm_feature_value{ p->data.result_int64 }); - break; - } - } -} - -std::string -rocm_event::as_string() const -{ - std::stringstream _ss{}; - _ss << name << ", device: " << device_id << ", queue: " << queue_id - << ", thread: " << thread_id << ", entry: " << entry << ", exit = " << exit; - _ss.precision(3); - _ss << std::fixed; - for(size_t i = 0; i < feature_names.size(); ++i) - { - auto _name = rocprofsys::rocprofiler::get_data_labels().at(device_id).at( - feature_names.at(i)); - _ss << ", " << _name << " = "; - auto _as_string = [&_ss](auto&& itr) { _ss << std::setw(4) << itr; }; - std::visit(_as_string, feature_values.at(i)); - } - return _ss.str(); -} - -void -rocprofiler::preinit() -{ - rocprofiler_data::label() = "rocprofiler"; - rocprofiler_data::description() = "ROCm hardware counters"; -} - -void -rocprofiler::start() -{ - if(tracker_type::start() == 0) setup(); -} - -void -rocprofiler::stop() -{ - if(tracker_type::stop() == 0) shutdown(); -} - -bool -rocprofiler::is_setup() -{ - return rocprofsys::rocprofiler::is_setup(); -} - -void -rocprofiler::add_setup(const std::string&, std::function&&) -{} - -void -rocprofiler::add_shutdown(const std::string&, std::function&&) -{} - -void -rocprofiler::remove_setup(const std::string&) -{} - -void -rocprofiler::remove_shutdown(const std::string&) -{} - -void -rocprofiler::setup() -{ - ROCPROFSYS_VERBOSE_F(1, "rocprofiler is setup\n"); -} - -void -rocprofiler::shutdown() -{ - rocprofsys::rocprofiler::post_process(); - rocprofsys::rocprofiler::rocm_cleanup(); - ROCPROFSYS_VERBOSE_F(1, "rocprofiler is shutdown\n"); -} - -scope::transient_destructor -rocprofiler::protect_flush_activity() -{ - return scope::transient_destructor([]() { --rocprofiler_activity_count(); }, - []() { ++rocprofiler_activity_count(); }); -} -} // namespace component -} // namespace rocprofsys - -ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(rocprofiler, false, void) -ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(rocprofiler_data, true, - tim::component::rocprofiler_value) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.hpp deleted file mode 100644 index 27326e9dda..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/rocprofiler.hpp +++ /dev/null @@ -1,241 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/components/fwd.hpp" -#include "core/defines.hpp" -#include "library/thread_data.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace rocprofsys -{ -namespace component -{ -using rocm_metric_type = unsigned long long; -using rocm_info_entry = ::tim::hardware_counters::info; -using rocm_feature_value = std::variant; - -struct rocm_counter -{ - std::array counters; -}; - -struct rocm_event -{ - using value_type = rocm_feature_value; - - uint32_t device_id = 0; - uint32_t thread_id = 0; - uint32_t queue_id = 0; - rocm_metric_type entry = 0; - rocm_metric_type exit = 0; - std::string name = {}; - std::vector feature_names = {}; - std::vector feature_values = {}; - - rocm_event() = default; - rocm_event(uint32_t _dev, uint32_t _thr, uint32_t _queue, std::string _event_name, - rocm_metric_type begin, rocm_metric_type end, uint32_t _feature_count, - void* _features); - - std::string as_string() const; - - friend std::ostream& operator<<(std::ostream& _os, const rocm_event& _v) - { - return (_os << _v.as_string()); - } - - friend bool operator<(const rocm_event& _lhs, const rocm_event& _rhs) - { - return std::tie(_lhs.device_id, _lhs.queue_id, _lhs.entry, _lhs.thread_id) < - std::tie(_rhs.device_id, _rhs.queue_id, _rhs.entry, _rhs.thread_id); - } -}; - -using rocm_data_t = std::vector; -using rocm_data_tracker = data_tracker; - -rocprofsys::unique_ptr_t& -rocm_data(int64_t _tid = threading::get_id()); - -using rocprofiler_value = typename rocm_event::value_type; -using rocprofiler_data = data_tracker; - -struct rocprofiler -: base -, private policy::instance_tracker -{ - using value_type = void; - using base_type = base; - using tracker_type = policy::instance_tracker; - - ROCPROFSYS_DEFAULT_OBJECT(rocprofiler) - - static void preinit(); - static void global_init() { setup(); } - static void global_finalize() { shutdown(); } - - static bool is_setup(); - static void setup(); - static void shutdown(); - static void add_setup(const std::string&, std::function&&); - static void add_shutdown(const std::string&, std::function&&); - static void remove_setup(const std::string&); - static void remove_shutdown(const std::string&); - - void start(); - void stop(); - - // this function protects rocprofiler_flush_activty from being called - // when rocprof-sys exits during a callback - [[nodiscard]] static scope::transient_destructor protect_flush_activity(); -}; - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) -inline void -rocprofiler::setup() -{} - -inline void -rocprofiler::shutdown() -{} - -inline bool -rocprofiler::is_setup() -{ - return false; -} -#endif -} // namespace component -} // namespace rocprofsys - -namespace tim -{ -namespace component -{ -using ::rocprofsys::component::rocm_data_tracker; -using ::rocprofsys::component::rocm_feature_value; -using ::rocprofsys::component::rocprofiler_data; -using ::rocprofsys::component::rocprofiler_value; -} // namespace component -} // namespace tim - -namespace tim -{ -namespace operation -{ -template <> -struct set_storage -{ - using T = component::rocm_data_tracker; - static constexpr size_t max_threads = 4096; - using type = T; - using storage_array_t = std::array*, max_threads>; - friend struct get_storage; - - ROCPROFSYS_DEFAULT_OBJECT(set_storage) - - auto operator()(storage*, size_t) const {} - auto operator()(type&, size_t) const {} - auto operator()(storage* _v) const { get().fill(_v); } - -private: - static storage_array_t& get() - { - static storage_array_t _v = { nullptr }; - return _v; - } -}; - -template <> -struct get_storage -{ - using type = component::rocm_data_tracker; - - ROCPROFSYS_DEFAULT_OBJECT(get_storage) - - auto operator()(const type&) const - { - return operation::set_storage::get().at(0); - } - - auto operator()() const - { - type _obj{}; - return (*this)(_obj); - } - - auto operator()(size_t _idx) const - { - return operation::set_storage::get().at(_idx); - } - - auto operator()(type&, size_t _idx) const { return (*this)(_idx); } -}; -} // namespace operation -} // namespace tim - -#if !defined(ROCPROFSYS_USE_ROCPROFILER) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::rocprofiler_data, false_type) -#endif - -TIMEMORY_SET_COMPONENT_API(component::rocprofiler_data, project::timemory, - category::timing, os::supports_unix) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_timing_category, component::rocprofiler_data, - false_type) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_timing_units, component::rocprofiler_data, - false_type) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_units, component::rocprofiler_data, false_type) -TIMEMORY_STATISTICS_TYPE(component::rocprofiler_data, component::rocprofiler_value) -TIMEMORY_STATISTICS_TYPE(component::rocm_data_tracker, component::rocm_feature_value) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_units, component::rocm_data_tracker, false_type) - -#if !defined(ROCPROFSYS_EXTERN_COMPONENTS) || \ - (defined(ROCPROFSYS_EXTERN_COMPONENTS) && ROCPROFSYS_EXTERN_COMPONENTS > 0) - -# include - -ROCPROFSYS_DECLARE_EXTERN_COMPONENT(rocprofiler, false, void) -ROCPROFSYS_DECLARE_EXTERN_COMPONENT(rocprofiler_data, true, double) - -#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.cpp deleted file mode 100644 index 29ba18385f..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.cpp +++ /dev/null @@ -1,396 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "library/components/roctracer.hpp" -#include "core/common.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/defines.hpp" -#include "core/dynamic_library.hpp" -#include "core/redirect.hpp" -#include "library/roctracer.hpp" -#include "library/runtime.hpp" -#include "library/thread_data.hpp" -#include "library/thread_info.hpp" - -#include -#include - -#define HIP_PROF_HIP_API_STRING 1 - -#include -#include - -#if ROCPROFSYS_HIP_VERSION < 50300 -# include -#endif - -#define AMD_INTERNAL_BUILD 1 -#include - -namespace rocprofsys -{ -namespace component -{ -namespace -{ -auto& -roctracer_activity_count() -{ - static std::atomic _v{ 0 }; - return _v; -} -} // namespace - -void -roctracer::preinit() -{ - roctracer_data::label() = "roctracer"; - roctracer_data::description() = "ROCm tracer (activity API)"; -} - -void -roctracer::start() -{ - if(tracker_type::start() == 0) setup(nullptr); -} - -void -roctracer::stop() -{ - if(tracker_type::stop() == 0) shutdown(); -} - -bool -roctracer::is_setup() -{ - return roctracer_is_setup(); -} - -void -roctracer::add_setup(const std::string& _lbl, std::function&& _func) -{ - roctracer_setup_routines().emplace_back(_lbl, std::move(_func)); -} - -void -roctracer::add_shutdown(const std::string& _lbl, std::function&& _func) -{ - roctracer_shutdown_routines().emplace_back(_lbl, std::move(_func)); -} - -void -roctracer::remove_setup(const std::string& _lbl) -{ - auto& _data = roctracer_setup_routines(); - for(auto itr = _data.begin(); itr != _data.end(); ++itr) - { - if(itr->first == _lbl) - { - _data.erase(itr); - break; - } - } -} - -void -roctracer::remove_shutdown(const std::string& _lbl) -{ - auto& _data = roctracer_setup_routines(); - for(auto itr = _data.begin(); itr != _data.end(); ++itr) - { - if(itr->first == _lbl) - { - _data.erase(itr); - break; - } - } -} - -void -roctracer::setup(void* table, bool on_load_trace) -{ - if(!get_use_roctracer()) return; - - auto_lock_t _lk{ type_mutex() }; - if(roctracer_is_setup()) return; - roctracer_is_setup() = true; - - ROCPROFSYS_VERBOSE_F(1, "setting up roctracer...\n"); - ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - - dynamic_library _amdhip64{ "ROCPROFSYS_ROCTRACER_LIBAMDHIP64", - find_library_path("libamdhip64.so", - { "ROCPROFSYS_ROCM_PATH", "ROCM_PATH" }, - { ROCPROFSYS_DEFAULT_ROCM_PATH }) }; - -#if ROCPROFSYS_HIP_VERSION_MAJOR == 4 && ROCPROFSYS_HIP_VERSION_MINOR < 4 - dynamic_library _kfdwrapper{ - "ROCPROFSYS_ROCTRACER_LIBKFDWRAPPER", - find_library_path("libkfdwrapper64.so", { "ROCPROFSYS_ROCM_PATH", "ROCM_PATH" }, - { ROCPROFSYS_DEFAULT_ROCM_PATH }, - { "roctracer/lib", "roctracer/lib64", "lib", "lib64" }) - }; -#endif - - ROCPROFSYS_ROCTRACER_CALL(roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr)); - - // Allocating tracing pool - roctracer_properties_t properties{}; - memset(&properties, 0, sizeof(roctracer_properties_t)); - // properties.mode = 0x1000; - properties.buffer_size = 0x100; - properties.buffer_callback_fun = hip_activity_callback; - ROCPROFSYS_ROCTRACER_CALL(roctracer_open_pool(&properties)); - -#if ROCPROFSYS_HIP_VERSION_MAJOR == 4 && ROCPROFSYS_HIP_VERSION_MINOR >= 4 - // HIP 4.5.0 has an invalid warning - redirect _rd{ std::cerr, "roctracer_enable_callback(), get_op_end(), invalid domain " - "ID(4) in: roctracer_enable_callback(hip_api_callback, " - "nullptr)roctracer_enable_activity_expl(), get_op_end(), " - "invalid domain ID(4) in: roctracer_enable_activity()" }; -#endif - - if(get_trace_hip_api()) - { - ROCPROFSYS_ROCTRACER_CALL(roctracer_enable_domain_callback( - ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr)); - } - - if(get_use_roctx()) - { - ROCPROFSYS_ROCTRACER_CALL(roctracer_enable_domain_callback( - ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, nullptr)); - } - - if(get_trace_hip_activity()) - { - // Enable HIP activity tracing - ROCPROFSYS_ROCTRACER_CALL( - roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); - } - - if(table != nullptr) - { - ROCPROFSYS_VERBOSE(1 || on_load_trace, "[OnLoad] setting up HSA...\n"); - - bool trace_hsa_api = get_trace_hsa_api(); - - // Enable HSA API callbacks/activity - if(trace_hsa_api) - { - std::vector hsa_api_vec = - tim::delimit(get_trace_hsa_api_types()); - - // initialize HSA tracing - roctracer_set_properties( - static_cast(ACTIVITY_DOMAIN_HSA_API), (void*) table); - - if(!hsa_api_vec.empty()) - { - for(const auto& itr : hsa_api_vec) - { - uint32_t cid = HSA_API_ID_NUMBER; - const char* api = itr.c_str(); - ROCPROFSYS_ROCTRACER_CALL(roctracer_op_code( - static_cast(ACTIVITY_DOMAIN_HSA_API), api, - &cid, nullptr)); - ROCPROFSYS_ROCTRACER_CALL(roctracer_enable_op_callback( - static_cast(ACTIVITY_DOMAIN_HSA_API), cid, - hsa_api_callback, nullptr)); - - ROCPROFSYS_VERBOSE(1 || on_load_trace, " HSA-trace(%s)", api); - } - } - else - { - ROCPROFSYS_VERBOSE(1 || on_load_trace, " HSA-trace()\n"); - ROCPROFSYS_ROCTRACER_CALL(roctracer_enable_domain_callback( - static_cast(ACTIVITY_DOMAIN_HSA_API), - hsa_api_callback, nullptr)); - } - } - - bool trace_hsa_activity = get_trace_hsa_activity(); - // Enable HSA GPU activity - if(trace_hsa_activity) - { -#if ROCPROFSYS_HIP_VERSION < 50300 - using namespace roctracer; - // initialize HSA tracing - const char* output_prefix = nullptr; - hsa_ops_properties_t ops_properties{ - table, reinterpret_cast(hsa_activity_callback), - nullptr, output_prefix - }; -#elif ROCPROFSYS_HIP_VERSION < 50301 - hsa_ops_properties_t ops_properties; - ops_properties.table = table; - ops_properties.reserved1[0] = reinterpret_cast(&hsa_activity_callback); - ops_properties.reserved1[1] = nullptr; - ops_properties.reserved1[2] = nullptr; -#else - hsa_ops_properties_t ops_properties{ - table, reinterpret_cast(&hsa_activity_callback), nullptr, nullptr - }; -#endif - roctracer_set_properties( - static_cast(ACTIVITY_DOMAIN_HSA_OPS), &ops_properties); - - ROCPROFSYS_VERBOSE(1 || on_load_trace, " HSA-activity-trace()\n"); - ROCPROFSYS_ROCTRACER_CALL(roctracer_enable_op_activity( - static_cast(ACTIVITY_DOMAIN_HSA_OPS), HSA_OP_ID_COPY)); - } - } - - // callback for HSA - for(auto& itr : roctracer_setup_routines()) - itr.second(); - - // make sure all async callbacks are allocated - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - hip_exec_activity_callbacks(i); - - ROCPROFSYS_VERBOSE_F(1, "roctracer is setup\n"); -} - -void -roctracer::flush() -{ - auto wait_for_activity_flush_completion = []() { - uint16_t nitr = 0; - while(roctracer_activity_count() > 0 && nitr++ < 10) - std::this_thread::sleep_for(std::chrono::milliseconds{ 100 }); - }; - - // a flush may already be happening - wait_for_activity_flush_completion(); - - if(roctracer_activity_count() == 0) - { - ROCPROFSYS_VERBOSE_F(2, "executing roctracer_flush_activity()...\n"); - ROCPROFSYS_ROCTRACER_CALL(roctracer_flush_activity()); - // wait to make sure flush completes - std::this_thread::sleep_for(std::chrono::milliseconds{ 100 }); - wait_for_activity_flush_completion(); - } - else - { - ROCPROFSYS_CI_FAIL(true, - "roctracer_activity_count() != 0 (== %li). " - "roctracer::shutdown() most likely called during abort", - roctracer_activity_count().load()); - } - - ROCPROFSYS_VERBOSE_F(2, "executing hip_exec_activity_callbacks(0..%zu)\n", - thread_info::get_peak_num_threads()); - // make sure all async operations are executed - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - hip_exec_activity_callbacks(i); - - ROCPROFSYS_VERBOSE_F(2, "roctracer flush completed\n"); -} - -void -roctracer::shutdown() -{ - auto_lock_t _lk{ type_mutex() }; - if(!roctracer_is_setup()) return; - - roctracer_is_setup() = false; - - ROCPROFSYS_VERBOSE_F(1, "shutting down roctracer...\n"); - - // callback for hsa - ROCPROFSYS_VERBOSE_F(2, "executing %zu roctracer_shutdown_routines...\n", - roctracer_shutdown_routines().size()); - for(auto& itr : roctracer_shutdown_routines()) - itr.second(); - -#if ROCPROFSYS_HIP_VERSION_MAJOR == 4 && ROCPROFSYS_HIP_VERSION_MINOR >= 4 - ROCPROFSYS_DEBUG_F("redirecting roctracer warnings\n"); - // HIP 4.5.0 has an invalid warning - redirect _rd{ - std::cerr, "roctracer_disable_callback(), get_op_end(), invalid domain ID(4) " - "in: roctracer_disable_callback()roctracer_disable_activity(), " - "get_op_end(), invalid domain ID(4) in: roctracer_disable_activity()" - }; -#endif - - if(get_trace_hip_api()) - { - ROCPROFSYS_VERBOSE_F( - 2, - "executing roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)...\n"); - ROCPROFSYS_ROCTRACER_CALL( - roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); - } - - if(get_use_roctx()) - { - ROCPROFSYS_VERBOSE_F( - 2, "executing roctracer_disable_domain_activity(ACTIVITY_DOMAIN_ROCTX)...\n"); - ROCPROFSYS_ROCTRACER_CALL( - roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); - } - - if(get_trace_hip_activity()) - { - ROCPROFSYS_VERBOSE_F( - 2, - "executing roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)...\n"); - ROCPROFSYS_ROCTRACER_CALL( - roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); - } - - if(get_trace_hsa_api()) - { - ROCPROFSYS_VERBOSE_F( - 2, - "executing roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_API)...\n"); - ROCPROFSYS_ROCTRACER_CALL( - roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); - } - - if(get_trace_hsa_api()) - { - ROCPROFSYS_VERBOSE_F( - 2, "executing roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, " - "HSA_OP_ID_COPY)...\n"); - ROCPROFSYS_ROCTRACER_CALL( - roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); - } - - ROCPROFSYS_VERBOSE_F(1, "roctracer is shutdown\n"); -} - -scope::transient_destructor -roctracer::protect_flush_activity() -{ - return scope::transient_destructor([]() { --roctracer_activity_count(); }, - []() { ++roctracer_activity_count(); }); -} -} // namespace component -} // namespace rocprofsys - -ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(roctracer, false, void) -ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.hpp deleted file mode 100644 index 93c9f6055a..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/roctracer.hpp +++ /dev/null @@ -1,117 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/common.hpp" -#include "core/components/fwd.hpp" -#include "core/defines.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -ROCPROFSYS_COMPONENT_ALIAS(roctracer_data, - ::tim::component::data_tracker) - -namespace rocprofsys -{ -namespace component -{ -struct roctracer -: base -, private policy::instance_tracker -{ - using value_type = void; - using base_type = base; - using tracker_type = policy::instance_tracker; - - ROCPROFSYS_DEFAULT_OBJECT(roctracer) - - static void preinit(); - static void global_finalize() { shutdown(); } - - static bool is_setup(); - static void setup(void* hsa_api_table, bool on_load_trace = false); - static void flush(); - static void shutdown(); - static void add_setup(const std::string&, std::function&&); - static void add_shutdown(const std::string&, std::function&&); - static void remove_setup(const std::string&); - static void remove_shutdown(const std::string&); - - void start(); - void stop(); - - // this function protects roctracer_flush_activty from being called - // when rocprof-sys exits during a callback - [[nodiscard]] static scope::transient_destructor protect_flush_activity(); -}; - -#if !defined(ROCPROFSYS_USE_ROCTRACER) -inline void -roctracer::setup(void*, bool) -{} - -inline void -roctracer::flush() -{} - -inline void -roctracer::shutdown() -{} - -inline bool -roctracer::is_setup() -{ - return false; -} -#endif -} // namespace component -} // namespace rocprofsys - -#if !defined(ROCPROFSYS_USE_ROCTRACER) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::roctracer_data, false_type) -#endif - -TIMEMORY_SET_COMPONENT_API(rocprofsys::component::roctracer_data, project::timemory, - category::timing, os::supports_unix) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_timing_category, component::roctracer_data, true_type) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_timing_units, component::roctracer_data, true_type) - -#if defined(ROCPROFSYS_USE_ROCTRACER) && ROCPROFSYS_USE_ROCTRACER > 0 -# if !defined(ROCPROFSYS_EXTERN_COMPONENTS) || \ - (defined(ROCPROFSYS_EXTERN_COMPONENTS) && ROCPROFSYS_EXTERN_COMPONENTS > 0) - -# include - -ROCPROFSYS_DECLARE_EXTERN_COMPONENT(roctracer, false, void) -ROCPROFSYS_DECLARE_EXTERN_COMPONENT(roctracer_data, true, double) - -# endif -#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.cpp index bd8505708d..7afc4b3efb 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.cpp @@ -25,12 +25,8 @@ #include "core/debug.hpp" #include "core/dynamic_library.hpp" #include "core/gpu.hpp" -#include "library/components/rocprofiler.hpp" -#include "library/components/roctracer.hpp" -#include "library/rocm/hsa_rsrc_factory.hpp" #include "library/rocm_smi.hpp" -#include "library/rocprofiler.hpp" -#include "library/roctracer.hpp" +#include "library/rocprofiler-sdk.hpp" #include "library/runtime.hpp" #include "library/thread_data.hpp" #include "library/tracing.hpp" @@ -46,208 +42,18 @@ #include #include -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 -# include +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 +# include #endif -using namespace rocprofsys; - namespace rocprofsys { namespace rocm { -std::mutex rocm_mutex = {}; -bool is_loaded = false; -bool on_load_trace = (get_env("ROCP_ONLOAD_TRACE", 0) > 0); +std::vector +rocm_events() +{ + return rocprofiler_sdk::get_rocm_events_info(); +} } // namespace rocm } // namespace rocprofsys - -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 -std::ostream& -operator<<(std::ostream& _os, const rocprofiler_settings_t& _v) -{ -# define ROCPROF_SETTING_FIELD_STR(NAME) JOIN('=', # NAME, _v.NAME) - - _os << JOIN( - ", ", ROCPROF_SETTING_FIELD_STR(intercept_mode), - ROCPROF_SETTING_FIELD_STR(code_obj_tracking), - ROCPROF_SETTING_FIELD_STR(memcopy_tracking), - ROCPROF_SETTING_FIELD_STR(trace_size), ROCPROF_SETTING_FIELD_STR(trace_local), - ROCPROF_SETTING_FIELD_STR(timeout), ROCPROF_SETTING_FIELD_STR(timestamp_on), - ROCPROF_SETTING_FIELD_STR(hsa_intercepting), - ROCPROF_SETTING_FIELD_STR(k_concurrent), ROCPROF_SETTING_FIELD_STR(opt_mode), - ROCPROF_SETTING_FIELD_STR(obj_dumping)); - return _os; -} -#endif - -// HSA-runtime tool on-load method -extern "C" -{ -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 - void OnUnloadTool() - { - ROCPROFSYS_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Unloading...\n"); - - rocm::lock_t _lk{ rocm::rocm_mutex, std::defer_lock }; - if(!_lk.owns_lock()) _lk.lock(); - - if(!rocm::is_loaded) - { - ROCPROFSYS_BASIC_VERBOSE_F(1 || rocm::on_load_trace, - "rocprofiler is not loaded\n"); - return; - } - rocm::is_loaded = false; - - _lk.unlock(); - - // stop_top_level_timer_if_necessary(); - // Final resources cleanup - rocprofsys::rocprofiler::rocm_cleanup(); - } - - void OnLoadToolProp(rocprofiler_settings_t* settings) - { - using ::rocprofiler::util::HsaRsrcFactory; - - if(!config::get_use_rocprofiler() || config::get_rocm_events().empty()) return; - - ROCPROFSYS_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Loading...\n"); - - rocm::lock_t _lk{ rocm::rocm_mutex, std::defer_lock }; - if(!_lk.owns_lock()) _lk.lock(); - - if(rocm::is_loaded) - { - ROCPROFSYS_BASIC_VERBOSE_F(1 || rocm::on_load_trace, - "rocprofiler is already loaded\n"); - return; - } - rocm::is_loaded = true; - - _lk.unlock(); - - // Enable timestamping - settings->timestamp_on = 1; - settings->intercept_mode = 1; - settings->hsa_intercepting = 1; - settings->k_concurrent = 0; - settings->obj_dumping = 0; - // settings->code_obj_tracking = 0; - // settings->memcopy_tracking = 0; - // settings->trace_local = 1; - // settings->opt_mode = 1; - // settings->trace_size = 0; - // settings->timeout = 0; - - ROCPROFSYS_BASIC_VERBOSE_F(1 || rocm::on_load_trace, "rocprofiler settings: %s\n", - JOIN("", *settings).c_str()); - - // Initialize profiling - rocprofsys::rocprofiler::rocm_initialize(); - HsaRsrcFactory::Instance().PrintGpuAgents("ROCm"); - } -#endif - - bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) - { - tim::consume_parameters(table, runtime_version, failed_tool_count, - failed_tool_names); - - static bool _once = false; - if(_once) return true; - _once = true; - - ROCPROFSYS_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Loading...\n"); - ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - - if(!tim::get_env("ROCPROFSYS_INIT_TOOLING", true)) return true; - if(!tim::settings::enabled()) return true; - - roctracer_is_init() = true; - ROCPROFSYS_BASIC_VERBOSE_F(1 || rocm::on_load_trace, "Loading ROCm tooling...\n"); - - if(!config::settings_are_configured() && get_state() < State::Active) - rocprofsys_init_tooling_hidden(); - - ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); - -#if ROCPROFSYS_HIP_VERSION < 50300 - ROCPROFSYS_VERBOSE_F(1 || rocm::on_load_trace, - "Computing the roctracer clock skew...\n"); - (void) rocprofsys::get_clock_skew(); -#endif - - if(get_use_process_sampling() && get_use_rocm_smi()) - { - ROCPROFSYS_VERBOSE_F(1 || rocm::on_load_trace, - "Setting rocm_smi state to active...\n"); - rocm_smi::set_state(State::Active); - } - - comp::roctracer::setup(static_cast(table), rocm::on_load_trace); - -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 - bool _force_rocprofiler_init = - tim::get_env("ROCPROFSYS_FORCE_ROCPROFILER_INIT", false, false); -#else - bool _force_rocprofiler_init = false; -#endif - - bool _success = true; - bool _is_empty = - (config::settings_are_configured() && config::get_rocm_events().empty()); - if(_force_rocprofiler_init || (get_use_rocprofiler() && !_is_empty)) - { -#if ROCPROFSYS_HIP_VERSION < 50500 - auto _rocprof = dynamic_library{ - "ROCPROFSYS_ROCPROFILER_LIBRARY", - find_library_path( - "librocprofiler64.so", { "ROCPROFSYS_ROCM_PATH", "ROCM_PATH" }, - { ROCPROFSYS_DEFAULT_ROCM_PATH }, - { "lib", "lib64", "rocprofiler/lib", "rocprofiler/lib64" }), - (RTLD_LAZY | RTLD_GLOBAL), false - }; - - ROCPROFSYS_VERBOSE_F(1 || rocm::on_load_trace, - "Loading rocprofiler library (%s=%s)...\n", - _rocprof.envname.c_str(), _rocprof.filename.c_str()); - _rocprof.open(); - - on_load_t _rocprof_load = nullptr; - _success = _rocprof.invoke("OnLoad", _rocprof_load, table, runtime_version, - failed_tool_count, failed_tool_names); - ROCPROFSYS_CONDITIONAL_PRINT_F(!_success, - "Warning! Invoking rocprofiler's OnLoad " - "failed! ROCPROFSYS_ROCPROFILER_LIBRARY=%s\n", - _rocprof.filename.c_str()); - ROCPROFSYS_CI_THROW(!_success, - "Warning! Invoking rocprofiler's OnLoad " - "failed! ROCPROFSYS_ROCPROFILER_LIBRARY=%s\n", - _rocprof.filename.c_str()); -#endif - } - else - { - using ::rocprofiler::util::HsaRsrcFactory; - - HsaRsrcFactory::Instance().PrintGpuAgents("ROCm"); - } - - gpu::add_hip_device_metadata(); - - ROCPROFSYS_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Loading... %s\n", - (_success) ? "Done" : "Failed"); - return _success; - } - - // HSA-runtime on-unload method - void OnUnload() - { - ROCPROFSYS_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Unloading...\n"); - rocprofsys_finalize_hidden(); - ROCPROFSYS_BASIC_VERBOSE_F(2 || rocm::on_load_trace, "Unloading... Done\n"); - } -} diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.hpp index b5b1808f65..131f0d13ea 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm.hpp @@ -23,36 +23,48 @@ #pragma once #include "core/defines.hpp" +#include "core/timemory.hpp" -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 -# include +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 +# include +# include #endif #include #include +#include namespace rocprofsys { namespace rocm { -using lock_t = std::unique_lock; +using hardware_counter_info = ::tim::hardware_counters::info; -extern std::mutex rocm_mutex; -extern bool is_loaded; +std::vector +rocm_events(); + +#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0 +inline std::vector +rocm_events() +{ + return std::vector(); +} +#endif } // namespace rocm } // namespace rocprofsys extern "C" { - struct HsaApiTable; - using on_load_t = bool (*)(HsaApiTable*, uint64_t, uint64_t, const char* const*); + struct rocprofiler_tool_configure_result_t; + struct rocprofiler_client_id_t; - bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) ROCPROFSYS_PUBLIC_API; - void OnUnload() ROCPROFSYS_PUBLIC_API; + using rocprofiler_configure_t = + rocprofiler_tool_configure_result_t* (*) (uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* client_id); -#if defined(ROCPROFSYS_USE_ROCPROFILER) && ROCPROFSYS_USE_ROCPROFILER > 0 - void OnLoadToolProp(rocprofiler_settings_t* settings) ROCPROFSYS_PUBLIC_API; - void OnUnloadTool() ROCPROFSYS_PUBLIC_API; -#endif + rocprofiler_tool_configure_result_t* rocprofiler_configure( + uint32_t version, const char* runtime_version, uint32_t priority, + rocprofiler_client_id_t* client_id) ROCPROFSYS_PUBLIC_API; } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/CMakeLists.txt deleted file mode 100644 index 0136d5d185..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -if(ROCPROFSYS_USE_ROCPROFILER OR ROCPROFSYS_USE_ROCTRACER) - target_sources( - rocprofiler-systems-object-library - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/hsa_rsrc_factory.hpp - ${CMAKE_CURRENT_LIST_DIR}/hsa_rsrc_factory.cpp) -endif() diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.cpp deleted file mode 100644 index ecb010cdec..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.cpp +++ /dev/null @@ -1,1027 +0,0 @@ -/****************************************************************************** -Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*******************************************************************************/ - -#include "library/rocm/hsa_rsrc_factory.hpp" -#include "core/debug.hpp" -#include "core/defines.hpp" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace rocprofiler -{ -namespace util -{ -// Demangle C++ symbol name -static const char* -cpp_demangle(const char* symname) -{ - size_t size = 0; - int status; - const char* ret = abi::__cxa_demangle(symname, nullptr, &size, &status); - return (ret != nullptr) ? ret : strdup(symname); -} - -// Callback function to get available in the system agents -hsa_status_t -HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) -{ - hsa_status_t status = HSA_STATUS_ERROR; - HsaRsrcFactory* hsa_rsrc = reinterpret_cast(data); - const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent); - if(agent_info != nullptr) status = HSA_STATUS_SUCCESS; - return status; -} - -// This function checks to see if the provided -// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true, -// the function adds an additional requirement that the pool have the -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false, -// pools must NOT have this property. -// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is -// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but -// no pool was found meeting the requirements. If an error is encountered, we -// return that error. -static hsa_status_t -FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) -{ - hsa_status_t err; - hsa_amd_segment_t segment; - uint32_t flag; - - if(nullptr == data) - { - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( - pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); - CHECK_STATUS("hsa_amd_memory_pool_get_info", err); - if(HSA_AMD_SEGMENT_GLOBAL != segment) - { - return HSA_STATUS_SUCCESS; - } - - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( - pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); - CHECK_STATUS("hsa_amd_memory_pool_get_info", err); - - uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; - - if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) - { - return HSA_STATUS_SUCCESS; - } - - *(reinterpret_cast(data)) = pool; - return HSA_STATUS_INFO_BREAK; -} - -// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that -// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT -hsa_status_t -FindStandardPool(hsa_amd_memory_pool_t pool, void* data) -{ - return FindGlobalPool(pool, data, false); -} - -// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that -// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT -hsa_status_t -FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) -{ - return FindGlobalPool(pool, data, true); -} - -// Constructor of the class -HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) -: initialize_hsa_(initialize_hsa) -{ - hsa_status_t status; - - cpu_pool_ = nullptr; - kern_arg_pool_ = nullptr; - - InitHsaApiTable(nullptr); - - // Initialize the Hsa Runtime - if(initialize_hsa_) - { - status = hsa_api_.hsa_init(); - CHECK_STATUS("Error in hsa_init", status); - } - - // Discover the set of Gpu devices available on the platform - status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this); - CHECK_STATUS("Error Calling hsa_iterate_agents", status); - if(cpu_pool_ == nullptr) - CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); - if(kern_arg_pool_ == nullptr) - CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); - - // Get AqlProfile API table - aqlprofile_api_ = {}; - memset(&aqlprofile_api_, 0, sizeof(aqlprofile_api_)); -#ifdef ROCP_LD_AQLPROFILE - status = LoadAqlProfileLib(&aqlprofile_api_); -#else - status = hsa_api_.hsa_system_get_major_extension_table( - HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, - sizeof(aqlprofile_api_), &aqlprofile_api_); -#endif - CHECK_STATUS("aqlprofile API table load failed", status); - - // Get Loader API table - loader_api_ = {}; - memset(&loader_api_, 0, sizeof(loader_api_)); - status = hsa_api_.hsa_system_get_major_extension_table( - HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); - CHECK_STATUS("loader API table query failed", status); - - // Instantiate HSA timer - timer_ = new HsaTimer(&hsa_api_); - CHECK_STATUS("HSA timer allocation failed", - (timer_ == nullptr) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); - - // Time correlation - const uint32_t corr_iters = 1000; - for(unsigned time_id = 0; time_id < HsaTimer::TIME_ID_NUMBER; time_id += 1) - { - CorrelateTime((HsaTimer::time_id_t) time_id, corr_iters); - } - - // System timeout - timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) - ? timeout_ns_ - : timer_->ns_to_sysclock(timeout_ns_); - - // To dump code objects - to_dump_code_obj_ = getenv("ROCP_DUMP_CODEOBJ"); -} - -// Destructor of the class -HsaRsrcFactory::~HsaRsrcFactory() -{ - delete timer_; - for(const auto* p : cpu_list_) - delete p; - for(const auto* p : gpu_list_) - delete p; - if(initialize_hsa_) - { - hsa_status_t status = hsa_api_.hsa_shut_down(); - try - { - CHECK_STATUS("Error in hsa_shut_down", status); - } catch(std::runtime_error& _e) - { - fflush(stderr); - fprintf(stderr, "%s\n", _e.what()); - fflush(stderr); - abort(); - } - } -} - -void -HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) -{ - std::lock_guard lck(mutex_); - - if(hsa_api_.hsa_init == nullptr) - { - if(table != nullptr) - { - hsa_api_.hsa_init = table->core_->hsa_init_fn; - hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn; - hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn; - hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn; - - hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; - hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn; - hsa_api_.hsa_queue_load_read_index_relaxed = - table->core_->hsa_queue_load_read_index_relaxed_fn; - hsa_api_.hsa_queue_load_write_index_relaxed = - table->core_->hsa_queue_load_write_index_relaxed_fn; - hsa_api_.hsa_queue_add_write_index_scacq_screl = - table->core_->hsa_queue_add_write_index_scacq_screl_fn; - - hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; - hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; - hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn; - hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn; - hsa_api_.hsa_signal_wait_scacquire = - table->core_->hsa_signal_wait_scacquire_fn; - hsa_api_.hsa_signal_store_screlease = - table->core_->hsa_signal_store_screlease_fn; - - hsa_api_.hsa_code_object_reader_create_from_file = - table->core_->hsa_code_object_reader_create_from_file_fn; - hsa_api_.hsa_executable_create_alt = - table->core_->hsa_executable_create_alt_fn; - hsa_api_.hsa_executable_load_agent_code_object = - table->core_->hsa_executable_load_agent_code_object_fn; - hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; - hsa_api_.hsa_executable_destroy = table->core_->hsa_executable_destroy_fn; - hsa_api_.hsa_executable_get_symbol = - table->core_->hsa_executable_get_symbol_fn; - hsa_api_.hsa_executable_symbol_get_info = - table->core_->hsa_executable_symbol_get_info_fn; - hsa_api_.hsa_executable_iterate_symbols = - table->core_->hsa_executable_iterate_symbols_fn; - - hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; - hsa_api_.hsa_system_get_major_extension_table = - table->core_->hsa_system_get_major_extension_table_fn; - - hsa_api_.hsa_amd_agent_iterate_memory_pools = - table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; - hsa_api_.hsa_amd_memory_pool_get_info = - table->amd_ext_->hsa_amd_memory_pool_get_info_fn; - hsa_api_.hsa_amd_memory_pool_allocate = - table->amd_ext_->hsa_amd_memory_pool_allocate_fn; - hsa_api_.hsa_amd_agents_allow_access = - table->amd_ext_->hsa_amd_agents_allow_access_fn; - hsa_api_.hsa_amd_memory_async_copy = - table->amd_ext_->hsa_amd_memory_async_copy_fn; - - hsa_api_.hsa_amd_signal_async_handler = - table->amd_ext_->hsa_amd_signal_async_handler_fn; - hsa_api_.hsa_amd_profiling_set_profiler_enabled = - table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; - hsa_api_.hsa_amd_profiling_get_async_copy_time = - table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; - hsa_api_.hsa_amd_profiling_get_dispatch_time = - table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; - } - else - { - hsa_api_.hsa_init = hsa_init; - hsa_api_.hsa_shut_down = hsa_shut_down; - hsa_api_.hsa_agent_get_info = hsa_agent_get_info; - hsa_api_.hsa_iterate_agents = hsa_iterate_agents; - - hsa_api_.hsa_queue_create = hsa_queue_create; - hsa_api_.hsa_queue_destroy = hsa_queue_destroy; - hsa_api_.hsa_queue_load_read_index_relaxed = - hsa_queue_load_read_index_relaxed; - hsa_api_.hsa_queue_load_write_index_relaxed = - hsa_queue_load_write_index_relaxed; - hsa_api_.hsa_queue_add_write_index_scacq_screl = - hsa_queue_add_write_index_scacq_screl; - - hsa_api_.hsa_signal_create = hsa_signal_create; - hsa_api_.hsa_signal_destroy = hsa_signal_destroy; - hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed; - hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed; - hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire; - hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease; - - hsa_api_.hsa_code_object_reader_create_from_file = - hsa_code_object_reader_create_from_file; - hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt; - hsa_api_.hsa_executable_load_agent_code_object = - hsa_executable_load_agent_code_object; - hsa_api_.hsa_executable_freeze = hsa_executable_freeze; - hsa_api_.hsa_executable_destroy = hsa_executable_destroy; - hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; - hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; - hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; - - hsa_api_.hsa_system_get_info = hsa_system_get_info; - hsa_api_.hsa_system_get_major_extension_table = - hsa_system_get_major_extension_table; - - hsa_api_.hsa_amd_agent_iterate_memory_pools = - hsa_amd_agent_iterate_memory_pools; - hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info; - hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate; - hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access; - hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy; - - hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler; - hsa_api_.hsa_amd_profiling_set_profiler_enabled = - hsa_amd_profiling_set_profiler_enabled; - hsa_api_.hsa_amd_profiling_get_async_copy_time = - hsa_amd_profiling_get_async_copy_time; - hsa_api_.hsa_amd_profiling_get_dispatch_time = - hsa_amd_profiling_get_dispatch_time; - } - } -} - -hsa_status_t -HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) -{ - void* handle = dlopen(kAqlProfileLib, RTLD_NOW); - if(handle == nullptr) - { - fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); - return HSA_STATUS_ERROR; - } - dlerror(); /* Clear any existing error */ - - api->hsa_ven_amd_aqlprofile_error_string = - (decltype(::hsa_ven_amd_aqlprofile_error_string)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_error_string"); - api->hsa_ven_amd_aqlprofile_validate_event = - (decltype(::hsa_ven_amd_aqlprofile_validate_event)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_validate_event"); - api->hsa_ven_amd_aqlprofile_start = (decltype(::hsa_ven_amd_aqlprofile_start)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_start"); - api->hsa_ven_amd_aqlprofile_stop = (decltype(::hsa_ven_amd_aqlprofile_stop)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_stop"); -#ifdef AQLPROF_NEW_API - api->hsa_ven_amd_aqlprofile_read = (decltype(::hsa_ven_amd_aqlprofile_read)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_read"); -#endif - api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = - (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - api->hsa_ven_amd_aqlprofile_get_info = - (decltype(::hsa_ven_amd_aqlprofile_get_info)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_get_info"); - api->hsa_ven_amd_aqlprofile_iterate_data = - (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*) dlsym( - handle, "hsa_ven_amd_aqlprofile_iterate_data"); - - return HSA_STATUS_SUCCESS; -} - -// Add system agent info -const AgentInfo* -HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) -{ - // Determine if device is a Gpu agent - hsa_status_t status; - AgentInfo* agent_info = nullptr; - - hsa_device_type_t type; - status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); - CHECK_STATUS("Error Calling hsa_agent_get_info", status); - - if(type == HSA_DEVICE_TYPE_CPU) - { - agent_info = new AgentInfo{}; - agent_info->dev_id = agent; - agent_info->dev_type = HSA_DEVICE_TYPE_CPU; - agent_info->dev_index = cpu_list_.size(); - - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, - &agent_info->cpu_pool); - if((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == nullptr)) - cpu_pool_ = &agent_info->cpu_pool; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, - &agent_info->kern_arg_pool); - if((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == nullptr)) - kern_arg_pool_ = &agent_info->kern_arg_pool; - agent_info->gpu_pool = {}; - - cpu_list_.push_back(agent_info); - cpu_agents_.push_back(agent); - } - - if(type == HSA_DEVICE_TYPE_GPU) - { - agent_info = new AgentInfo{}; - agent_info->dev_id = agent; - agent_info->dev_type = HSA_DEVICE_TYPE_GPU; - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); - const int gfxip_label_len = - std::min(strlen(agent_info->name) - 2, sizeof(agent_info->gfxip) - 1); - memcpy(agent_info->gfxip, agent_info->name, gfxip_label_len); - agent_info->gfxip[gfxip_label_len] = '\0'; - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, - &agent_info->max_wave_size); - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, - &agent_info->max_queue_size); - hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); - agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), - &agent_info->cu_num); - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), - &agent_info->waves_per_cu); - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), - &agent_info->simds_per_cu); - hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), - &agent_info->se_num); - hsa_api_.hsa_agent_get_info( - agent, - static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), - &agent_info->shader_arrays_per_se); - - agent_info->cpu_pool = {}; - agent_info->kern_arg_pool = {}; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, - &agent_info->gpu_pool); - CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); - - // GFX8 and GFX9 SGPR/VGPR block sizes - agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2; - agent_info->sgpr_block_size = 8; - agent_info->vgpr_block_size = 4; - - // Set GPU index - /*uint32_t driver_node_id; - status = hsa_api_.hsa_agent_get_info( - agent, static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), - &driver_node_id); - CHECK_STATUS("hsa_agent_get_info(gpu hsa_driver_node_id)", status); - agent_info->dev_index = driver_node_id;*/ - // disable this change above (found in the rocprofiler library) - // because it breaks the lookup for rocprofiler_pool_fetch - // lookup in rocprofiler.cpp. On my system (one AMD GPU and one NVIDIA GPU), - // it has a value of 1, not 0 and the pool size is 1 - agent_info->dev_index = gpu_list_.size(); - gpu_list_.push_back(agent_info); - gpu_agents_.push_back(agent); - } - - if(agent_info) agent_map_[agent.handle] = agent_info; - - return agent_info; -} - -// Return systen agent info -const AgentInfo* -HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) -{ - const AgentInfo* agent_info = nullptr; - auto it = agent_map_.find(agent.handle); - if(it != agent_map_.end()) - { - agent_info = it->second; - } - return agent_info; -} - -// Get the count of Hsa Gpu Agents available on the platform -// -// @return uint32_t Number of Gpu agents on platform -// -uint32_t -HsaRsrcFactory::GetCountOfGpuAgents() -{ - return uint32_t(gpu_list_.size()); -} - -// Get the count of Hsa Cpu Agents available on the platform -// -// @return uint32_t Number of Cpu agents on platform -// -uint32_t -HsaRsrcFactory::GetCountOfCpuAgents() -{ - return uint32_t(cpu_list_.size()); -} - -// Get the AgentInfo handle of a Gpu device -// -// @param idx Gpu Agent at specified index -// -// @param agent_info Output parameter updated with AgentInfo -// -// @return bool true if successful, false otherwise -// -bool -HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) -{ - // Determine if request is valid - uint32_t size = uint32_t(gpu_list_.size()); - if(idx >= size) - { - return false; - } - - // Copy AgentInfo from specified index - *agent_info = gpu_list_[idx]; - - return true; -} - -// Get the AgentInfo handle of a Cpu device -// -// @param idx Cpu Agent at specified index -// -// @param agent_info Output parameter updated with AgentInfo -// -// @return bool true if successful, false otherwise -// -bool -HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) -{ - // Determine if request is valid - uint32_t size = uint32_t(cpu_list_.size()); - if(idx >= size) - { - return false; - } - - // Copy AgentInfo from specified index - *agent_info = cpu_list_[idx]; - return true; -} - -// Create a Queue object and return its handle. The queue object is expected -// to support user requested number of Aql dispatch packets. -// -// @param agent_info Gpu Agent on which to create a queue object -// -// @param num_Pkts Number of packets to be held by queue -// -// @param queue Output parameter updated with handle of queue object -// -// @return bool true if successful, false otherwise -// -bool // NOLINTNEXTLINE(readability-convert-member-functions-to-static) -HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, - hsa_queue_t** queue) -{ - hsa_status_t status; - status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, - nullptr, nullptr, UINT32_MAX, UINT32_MAX, queue); - return (status == HSA_STATUS_SUCCESS); -} - -// Create a Signal object and return its handle. -// @param value Initial value of signal object -// @param signal Output parameter updated with handle of signal object -// @return bool true if successful, false otherwise -bool // NOLINTNEXTLINE(readability-convert-member-functions-to-static) -HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) -{ - hsa_status_t status; - status = hsa_api_.hsa_signal_create(value, 0, nullptr, signal); - return (status == HSA_STATUS_SUCCESS); -} - -// Allocate memory for use by a kernel of specified size in specified -// agent's memory region. -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* // NOLINTNEXTLINE(readability-convert-member-functions-to-static) -HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) -{ - hsa_status_t status = HSA_STATUS_ERROR; - uint8_t* buffer = nullptr; - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, - reinterpret_cast(&buffer)); - uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : nullptr; - return ptr; -} - -// Allocate memory to pass kernel parameters. -// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo -// parameter. -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* -HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) -{ - hsa_status_t status = HSA_STATUS_ERROR; - uint8_t* buffer = nullptr; - if(!cpu_agents_.empty()) - { - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, - reinterpret_cast(&buffer)); - // Both the CPU and GPU can access the kernel arguments - if(status == HSA_STATUS_SUCCESS) - { - hsa_agent_t ag_list[1] = { agent_info->dev_id }; - status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, nullptr, buffer); - } - } - uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : nullptr; - return ptr; -} - -// Allocate system memory accessible by both CPU and GPU -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* -HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) -{ - hsa_status_t status = HSA_STATUS_ERROR; - uint8_t* buffer = nullptr; - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - if(!cpu_agents_.empty()) - { - status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, - reinterpret_cast(&buffer)); - // Both the CPU and GPU can access the memory - if(status == HSA_STATUS_SUCCESS) - { - hsa_agent_t ag_list[1] = { agent_info->dev_id }; - status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, nullptr, buffer); - } - } - uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : nullptr; - return ptr; -} - -// Allocate memory for command buffer. -// @param agent_info Agent from whose memory region to allocate -// @param size Size of memory in terms of bytes -// @return uint8_t* Pointer to buffer, null if allocation fails. -uint8_t* -HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) -{ - size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) - ? reinterpret_cast( - mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_SHARED | MAP_ANONYMOUS, 0, 0)) - : AllocateSysMemory(agent_info, size); - return ptr; -} - -// Wait signal -hsa_signal_value_t -HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const -{ - const hsa_signal_value_t exp_value = signal_value - 1; - hsa_signal_value_t ret_value = signal_value; - while(true) - { - ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, - signal_value, timeout_, - HSA_WAIT_STATE_BLOCKED); - if(ret_value == exp_value) break; - if(ret_value != signal_value) - { - std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" - << signal_value << "), ret_value(" << ret_value << ")" << std::endl - << std::flush; - abort(); - } - } - return ret_value; -} - -// Wait signal with signal value restore -void -HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const -{ - SignalWait(signal, signal_value); - hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); -} - -// Copy data from GPU to host memory -bool -HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) -{ - hsa_status_t status = HSA_STATUS_ERROR; - if(!cpu_agents_.empty()) - { - hsa_signal_t s = {}; - status = hsa_api_.hsa_signal_create(1, 0, nullptr, &s); - CHECK_STATUS("hsa_signal_create()", status); - status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, - 0, nullptr, s); - CHECK_STATUS("hsa_amd_memory_async_copy()", status); - SignalWait(s, 1); - status = hsa_api_.hsa_signal_destroy(s); - CHECK_STATUS("hsa_signal_destroy()", status); - } - return (status == HSA_STATUS_SUCCESS); -} - -bool -HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, - size_t size) -{ - return Memcpy(agent_info->dev_id, dst, src, size); -} - -// Memory free method -bool -HsaRsrcFactory::FreeMemory(void* ptr) -{ - const hsa_status_t status = hsa_memory_free(ptr); - CHECK_STATUS("hsa_memory_free", status); - return (status == HSA_STATUS_SUCCESS); -} - -// Loads an Assembled Brig file and Finalizes it into Device Isa -// @param agent_info Gpu device for which to finalize -// @param brig_path File path of the Assembled Brig file -// @param kernel_name Name of the kernel to finalize -// @param code_desc Handle of finalized Code Descriptor that could -// be used to submit for execution -// @return bool true if successful, false otherwise -bool // NOLINTNEXTLINE(readability-convert-member-functions-to-static) -HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, - const char* kernel_name, hsa_executable_t* executable, - hsa_executable_symbol_t* code_desc) -{ - hsa_status_t status = HSA_STATUS_ERROR; - - // Build the code object filename - std::string filename(brig_path); - std::clog << "Code object filename: " << filename << std::endl; - - // Open the file containing code object - hsa_file_t file_handle = open(filename.c_str(), O_RDONLY); - if(file_handle == -1) - { - std::cerr << "Error: failed to load '" << filename << "'" << std::endl; - assert(false); - return false; - } - - // Create code object reader - hsa_code_object_reader_t code_obj_rdr = { 0 }; - status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); - if(status != HSA_STATUS_SUCCESS) - { - std::cerr << "Failed to create code object reader '" << filename << "'" - << std::endl; - return false; - } - - // Create executable. - status = hsa_api_.hsa_executable_create_alt( - HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr, executable); - CHECK_STATUS("Error in creating executable object", status); - - // Load code object. - status = hsa_api_.hsa_executable_load_agent_code_object( - *executable, agent_info->dev_id, code_obj_rdr, nullptr, nullptr); - CHECK_STATUS("Error in loading executable object", status); - - // Freeze executable. - status = hsa_api_.hsa_executable_freeze(*executable, ""); - CHECK_STATUS("Error in freezing executable object", status); - - // Get symbol handle. - hsa_executable_symbol_t kernelSymbol; - status = hsa_api_.hsa_executable_get_symbol(*executable, nullptr, kernel_name, - agent_info->dev_id, 0, &kernelSymbol); - CHECK_STATUS("Error in looking up kernel symbol", status); - - close(file_handle); - - // Update output parameter - *code_desc = kernelSymbol; - return true; -} - -// Print the various fields of Hsa Gpu Agents -bool -HsaRsrcFactory::PrintGpuAgents(const std::string&) -{ - std::vector _agents = {}; - for(const auto* itr : gpu_list_) - { - if(itr) _agents.emplace_back(*itr); - } - - ROCPROFSYS_METADATA([_agents](auto& ar) { - namespace cereal = ::tim::cereal; - - ar.setNextName("rocm_agents"); - ar.startNode(); - ar.makeArray(); - for(auto itr : _agents) - { - ar.startNode(); - ar(cereal::make_nvp("name", std::string{ itr.name }), - cereal::make_nvp("is_apu", itr.is_apu), - cereal::make_nvp("hsa_profile", itr.profile), - cereal::make_nvp("max_wave_size", itr.max_wave_size), - cereal::make_nvp("max_queue_size", itr.max_queue_size), - cereal::make_nvp("cu_number", itr.cu_num), - cereal::make_nvp("waves_per_cu", itr.waves_per_cu), - cereal::make_nvp("simds_per_cu", itr.simds_per_cu), - cereal::make_nvp("se_num", itr.se_num), - cereal::make_nvp("shader_arrays_per_se", itr.shader_arrays_per_se)); - ar.finishNode(); - } - ar.finishNode(); - }); - - return true; -} - -void* -HsaRsrcFactory::GetSlotPointer(hsa_queue_t* queue, const uint64_t& idx) -{ - const uint32_t slot_size_b = CMD_SLOT_SIZE_B; - const uint32_t slot_idx = (uint32_t)(idx % queue->size); - void* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + - (slot_idx * slot_size_b)); - return queue_slot; -} - -void* -HsaRsrcFactory::GetReadPointer(hsa_queue_t* queue) -{ - const uint64_t read_idx = hsa_api_.hsa_queue_load_read_index_relaxed(queue); - return GetSlotPointer(queue, read_idx); -} - -uint64_t -HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) -{ - const uint32_t slot_size_b = CMD_SLOT_SIZE_B; - - // adevance command queue - const uint64_t write_idx = hsa_api_.hsa_queue_add_write_index_scacq_screl(queue, 1); - while((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) - { - sched_yield(); - } - - const uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + - (slot_idx * slot_size_b)); - const uint32_t* slot_data = reinterpret_cast(packet); - - // Copy buffered commands into the queue slot. - // Overwrite the AQL invalid header (first dword) last. - // This prevents the slot from being read until it's fully written. - memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); - std::atomic* header_atomic_ptr = - reinterpret_cast*>(&queue_slot[0]); - header_atomic_ptr->store(slot_data[0], std::memory_order_release); - - // ringdoor bell - hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx); - - return write_idx; -} - -uint64_t -HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) -{ - const uint32_t slot_size_b = CMD_SLOT_SIZE_B; - if((size_bytes & (slot_size_b - 1)) != 0) - { - fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); - abort(); - } - - const char* begin = reinterpret_cast(packet); - const char* end = begin + size_bytes; - uint64_t write_idx = 0; - for(const char* ptr = begin; ptr < end; ptr += slot_size_b) - { - write_idx = Submit(queue, ptr); - } - - return write_idx; -} - -const char* -HsaRsrcFactory::GetKernelNameRef(uint64_t addr) -{ - std::lock_guard lck(mutex_); - const auto it = symbols_map_->find(addr); - if(it == symbols_map_->end()) - { - fprintf(stderr, - "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", - addr); - abort(); - } - return it->second; -} - -void -HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) -{ - std::lock_guard lck(mutex_); - executable_tracking_on_ = true; - table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; - table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor; -} - -hsa_status_t -HsaRsrcFactory::executable_symbols_cb(hsa_executable_t /*exec*/, - hsa_executable_symbol_t symbol, void* data) -{ - hsa_symbol_kind_t value = (hsa_symbol_kind_t) 0; - hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info( - symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); - CHECK_STATUS("Error in getting symbol info", status); - if(value == HSA_SYMBOL_KIND_KERNEL) - { - uint64_t addr = 0; - uint32_t len = 0; - status = hsa_api_.hsa_executable_symbol_get_info( - symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); - CHECK_STATUS("Error in getting kernel object", status); - status = hsa_api_.hsa_executable_symbol_get_info( - symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); - CHECK_STATUS("Error in getting name len", status); - char* symname = new char[len + 1]; - status = hsa_api_.hsa_executable_symbol_get_info( - symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symname); - CHECK_STATUS("Error in getting kernel name", status); - symname[len] = 0; - if(data == nullptr) - { - const char* name = cpp_demangle(symname); - auto ret = symbols_map_->insert({ addr, name }); - if(ret.second == false) - { - delete[] ret.first->second; - ret.first->second = name; - } - } - else - { - symbols_map_->erase(addr); - } - delete[] symname; - } - return HSA_STATUS_SUCCESS; -} - -hsa_status_t -HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, - const char* options) -{ - std::lock_guard lck(mutex_); - if(symbols_map_ == nullptr) symbols_map_ = new symbols_map_t; - hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols( - executable, executable_symbols_cb, nullptr); - CHECK_STATUS("Error in iterating executable symbols", status); - return hsa_api_.hsa_executable_freeze(executable, options); -} - -hsa_status_t -HsaRsrcFactory::hsa_executable_destroy_interceptor(hsa_executable_t executable) -{ - std::lock_guard lck(mutex_); - if(symbols_map_ != nullptr) - { - hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols( - executable, executable_symbols_cb, (void*) 1); - CHECK_STATUS("Error in iterating executable symbols", status); - } - return hsa_api_.hsa_executable_destroy(executable); -} - -std::atomic HsaRsrcFactory::instance_{}; -HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; -HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; -hsa_pfn_t HsaRsrcFactory::hsa_api_{}; -bool HsaRsrcFactory::executable_tracking_on_ = false; -HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = nullptr; -void* HsaRsrcFactory::to_dump_code_obj_ = nullptr; - -} // namespace util -} // namespace rocprofiler diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.hpp deleted file mode 100644 index 9e255ce98a..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm/hsa_rsrc_factory.hpp +++ /dev/null @@ -1,582 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/exception.hpp" - -#define AMD_INTERNAL_BUILD 1 - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define HSA_ARGUMENT_ALIGN_BYTES 16 -#define HSA_QUEUE_ALIGN_BYTES 64 -#define HSA_PACKET_ALIGN_BYTES 64 -#define HSA_MESSAGE_LENGTH 4096 - -#define CHECK_STATUS(msg, status) \ - do \ - { \ - if((status) != HSA_STATUS_SUCCESS) \ - { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - char _buffer[HSA_MESSAGE_LENGTH]; \ - snprintf(_buffer, HSA_MESSAGE_LENGTH - 1, "%s: %s", msg, \ - emsg ? emsg : ""); \ - throw ::rocprofsys::exception(_buffer); \ - } \ - } while(0) - -#define CHECK_ITER_STATUS(msg, status) \ - do \ - { \ - if((status) != HSA_STATUS_INFO_BREAK) \ - { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - char _buffer[HSA_MESSAGE_LENGTH]; \ - snprintf(_buffer, HSA_MESSAGE_LENGTH - 1, "%s: %s", msg, \ - emsg ? emsg : ""); \ - throw ::rocprofsys::exception(_buffer); \ - } \ - } while(0) - -namespace rocprofiler -{ -namespace util -{ -static const size_t MEM_PAGE_BYTES = 0x1000; -static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; -typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; - -struct hsa_pfn_t -{ - decltype(::hsa_init)* hsa_init; - decltype(::hsa_shut_down)* hsa_shut_down; - decltype(::hsa_agent_get_info)* hsa_agent_get_info; - decltype(::hsa_iterate_agents)* hsa_iterate_agents; - - decltype(::hsa_queue_create)* hsa_queue_create; - decltype(::hsa_queue_destroy)* hsa_queue_destroy; - decltype(::hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed; - decltype(::hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed; - decltype( - ::hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl; - - decltype(::hsa_signal_create)* hsa_signal_create; - decltype(::hsa_signal_destroy)* hsa_signal_destroy; - decltype(::hsa_signal_load_relaxed)* hsa_signal_load_relaxed; - decltype(::hsa_signal_store_relaxed)* hsa_signal_store_relaxed; - decltype(::hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire; - decltype(::hsa_signal_store_screlease)* hsa_signal_store_screlease; - - decltype(::hsa_code_object_reader_create_from_file)* - hsa_code_object_reader_create_from_file; - decltype(::hsa_executable_create_alt)* hsa_executable_create_alt; - decltype( - ::hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; - decltype(::hsa_executable_freeze)* hsa_executable_freeze; - decltype(::hsa_executable_destroy)* hsa_executable_destroy; - decltype(::hsa_executable_get_symbol)* hsa_executable_get_symbol; - decltype(::hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; - decltype(::hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; - - decltype(::hsa_system_get_info)* hsa_system_get_info; - decltype( - ::hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; - - decltype(::hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools; - decltype(::hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info; - decltype(::hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate; - decltype(::hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access; - decltype(::hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy; - - decltype(::hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler; - decltype( - ::hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled; - decltype( - ::hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time; - decltype(::hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time; -}; - -// Encapsulates information about a Hsa Agent such as its -// handle, name, max queue size, max wavefront size, etc. -struct AgentInfo -{ - // Handle of Agent - hsa_agent_t dev_id; - - // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 - uint32_t dev_type; - - // APU flag - bool is_apu; - - // Agent system index - uint32_t dev_index; - - // GFXIP name - char gfxip[64]; - - // Name of Agent whose length is less than 64 - char name[64]; - - // Max size of Wavefront size - uint32_t max_wave_size; - - // Max size of Queue buffer - uint32_t max_queue_size; - - // Hsail profile supported by agent - hsa_profile_t profile; - - // CPU/GPU/kern-arg memory pools - hsa_amd_memory_pool_t cpu_pool; - hsa_amd_memory_pool_t gpu_pool; - hsa_amd_memory_pool_t kern_arg_pool; - - // The number of compute unit available in the agent. - uint32_t cu_num; - - // Maximum number of waves possible in a Compute Unit. - uint32_t waves_per_cu; - - // Number of SIMD's per compute unit CU - uint32_t simds_per_cu; - - // Number of Shader Engines (SE) in Gpu - uint32_t se_num; - - // Number of Shader Arrays Per Shader Engines in Gpu - uint32_t shader_arrays_per_se; - - // SGPR/VGPR/LDS block sizes - uint32_t sgpr_block_dflt; - uint32_t sgpr_block_size; - uint32_t vgpr_block_size; - static const uint32_t lds_block_size = 128 * 4; -}; - -// HSA timer class -// Provides current HSA timestampa and system-clock/ns conversion API -class HsaTimer -{ -public: - typedef uint64_t timestamp_t; - static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; - typedef long double freq_t; - - enum time_id_t - { - TIME_ID_CLOCK_REALTIME = 0, - TIME_ID_CLOCK_REALTIME_COARSE = 1, - TIME_ID_CLOCK_MONOTONIC = 2, - TIME_ID_CLOCK_MONOTONIC_COARSE = 3, - TIME_ID_CLOCK_MONOTONIC_RAW = 4, - TIME_ID_NUMBER - }; - - HsaTimer(const hsa_pfn_t* hsa_api) - : hsa_api_(hsa_api) - { - timestamp_t sysclock_hz = 0; - hsa_status_t status = hsa_api_->hsa_system_get_info( - HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); - CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); - sysclock_factor_ = (freq_t) 1000000000 / (freq_t) sysclock_hz; - } - - // Methods for system-clock/ns conversion - timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const - { - return timestamp_t((freq_t) sysclock * sysclock_factor_); - } - timestamp_t ns_to_sysclock(const timestamp_t& time) const - { - return timestamp_t((freq_t) time / sysclock_factor_); - } - - // Method for timespec/ns conversion - static timestamp_t timespec_to_ns(const timespec& time) - { - return ((timestamp_t) time.tv_sec * 1000000000) + time.tv_nsec; - } - - // Return timestamp in 'ns' - timestamp_t timestamp_ns() const - { - timestamp_t sysclock; - hsa_status_t status = - hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); - CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status); - return sysclock_to_ns(sysclock); - } - - // Return time in 'ns' - timestamp_t clocktime_ns(clockid_t clock_id) const - { - timespec time; - clock_gettime(clock_id, &time); - return timespec_to_ns(time); - } - - // Return pair of correlated values of profiling timestamp and time with - // correlation error for a given time ID and number of iterations - void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v, - timestamp_t* time_v, timestamp_t* error_v) - { - clockid_t clock_id = 0; - switch(time_id) - { - case TIME_ID_CLOCK_REALTIME: clock_id = CLOCK_REALTIME; break; - case TIME_ID_CLOCK_REALTIME_COARSE: clock_id = CLOCK_REALTIME_COARSE; break; - case TIME_ID_CLOCK_MONOTONIC: clock_id = CLOCK_MONOTONIC; break; - case TIME_ID_CLOCK_MONOTONIC_COARSE: clock_id = CLOCK_MONOTONIC_COARSE; break; - case TIME_ID_CLOCK_MONOTONIC_RAW: clock_id = CLOCK_MONOTONIC_RAW; break; - default: CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR); - } - - std::vector ts_vec(iters); - std::vector tm_vec(iters); - const uint32_t steps = iters - 1; - - for(uint32_t i = 0; i < iters; ++i) - { - hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]); - clock_gettime(clock_id, &tm_vec[i]); - } - - const timestamp_t ts_base = sysclock_to_ns(ts_vec.front()); - const timestamp_t tm_base = timespec_to_ns(tm_vec.front()); - const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps); - - timestamp_t ts_accum = 0; - timestamp_t tm_accum = 0; - for(uint32_t i = 0; i < iters; ++i) - { - ts_accum += (ts_vec[i] - ts_base); - tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base); - } - - *timestamp_v = (ts_accum / iters) + ts_base + error; - *time_v = (tm_accum / iters) + tm_base; - *error_v = error; - } - -private: - // Timestamp frequency factor - freq_t sysclock_factor_; - // HSA API table - const hsa_pfn_t* const hsa_api_; -}; - -class HsaRsrcFactory -{ -public: - static const size_t CMD_SLOT_SIZE_B = 0x40; - typedef std::recursive_mutex mutex_t; - typedef HsaTimer::timestamp_t timestamp_t; - - static HsaRsrcFactory* Create(bool initialize_hsa = true) - { - std::lock_guard lck(mutex_); - HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed); - if(obj == nullptr) - { - obj = new HsaRsrcFactory(initialize_hsa); - instance_.store(obj, std::memory_order_release); - } - return obj; - } - - static HsaRsrcFactory& Instance() - { - HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire); - if(obj == nullptr) obj = Create(false); - hsa_status_t status = (obj != nullptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; - CHECK_STATUS("HsaRsrcFactory::Instance() failed", status); - return *obj; - } - - static void Destroy() - { - std::lock_guard lck(mutex_); - if(instance_) delete instance_.load(); - instance_ = nullptr; - } - - // Return system agent info - const AgentInfo* GetAgentInfo(const hsa_agent_t agent); - - // Get the count of Hsa Gpu Agents available on the platform - // @return uint32_t Number of Gpu agents on platform - uint32_t GetCountOfGpuAgents(); - - // Get the count of Hsa Cpu Agents available on the platform - // @return uint32_t Number of Cpu agents on platform - uint32_t GetCountOfCpuAgents(); - - // Get the AgentInfo handle of a Gpu device - // @param idx Gpu Agent at specified index - // @param agent_info Output parameter updated with AgentInfo - // @return bool true if successful, false otherwise - bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); - - // Get the AgentInfo handle of a Cpu device - // @param idx Cpu Agent at specified index - // @param agent_info Output parameter updated with AgentInfo - // @return bool true if successful, false otherwise - bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); - - // Create a Queue object and return its handle. The queue object is expected - // to support user requested number of Aql dispatch packets. - // @param agent_info Gpu Agent on which to create a queue object - // @param num_Pkts Number of packets to be held by queue - // @param queue Output parameter updated with handle of queue object - // @return bool true if successful, false otherwise - bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue); - - // Create a Signal object and return its handle. - // @param value Initial value of signal object - // @param signal Output parameter updated with handle of signal object - // @return bool true if successful, false otherwise - bool CreateSignal(uint32_t value, hsa_signal_t* signal); - - // Allocate local GPU memory - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size); - - // Allocate memory tp pass kernel parameters - // Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo - // parameter. - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size); - - // Allocate system memory accessible from both CPU and GPU - // Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored. - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); - - // Allocate memory for command buffer. - // @param agent_info Agent from whose memory region to allocate - // @param size Size of memory in terms of bytes - // @return uint8_t* Pointer to buffer, null if allocation fails. - uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); - - // Wait signal - hsa_signal_value_t SignalWait(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const; - - // Wait signal with signal value restore - void SignalWaitRestore(const hsa_signal_t& signal, - const hsa_signal_value_t& signal_value) const; - - // Copy data from GPU to host memory - bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size); - bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size); - - // Memory free method - static bool FreeMemory(void* ptr); - - // Loads an Assembled Brig file and Finalizes it into Device Isa - // @param agent_info Gpu device for which to finalize - // @param brig_path File path of the Assembled Brig file - // @param kernel_name Name of the kernel to finalize - // @param code_desc Handle of finalized Code Descriptor that could - // be used to submit for execution - // @return true if successful, false otherwise - bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, - const char* kernel_name, hsa_executable_t* hsa_exec, - hsa_executable_symbol_t* code_desc); - - // Print the various fields of Hsa Gpu Agents - bool PrintGpuAgents(const std::string& header); - - // Utils for submitting AQL packet to a given queue - static void* GetSlotPointer(hsa_queue_t* queue, const uint64_t& idx); - static void* GetReadPointer(hsa_queue_t* queue); - static uint64_t Submit(hsa_queue_t* queue, const void* packet); - static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); - - // Enable executables loading tracking - static bool IsExecutableTracking() { return executable_tracking_on_; } - static void EnableExecutableTracking(HsaApiTable* table); - static const char* GetKernelNameRef(uint64_t addr); - - // Initialize HSA API table - void static InitHsaApiTable(HsaApiTable* table); - static const hsa_pfn_t* HsaApi() { return &hsa_api_; } - - // Return AqlProfile API table - typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; - const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } - - // Return Loader API table - const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } - - // Methods for system-clock/ns conversion and timestamp in 'ns' - timestamp_t SysclockToNs(const timestamp_t& sysclock) const - { - return timer_->sysclock_to_ns(sysclock); - } - timestamp_t NsToSysclock(const timestamp_t& time) const - { - return timer_->ns_to_sysclock(time); - } - timestamp_t TimestampNs() const { return timer_->timestamp_ns(); } - - timestamp_t GetSysTimeout() const { return timeout_; } - static timestamp_t GetTimeoutNs() { return timeout_ns_; } - static void SetTimeoutNs(const timestamp_t& time) - { - std::lock_guard lck(mutex_); - timeout_ns_ = time; - if(instance_ != nullptr) - Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); - } - - void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) - { - timestamp_t timestamp_v = 0; - timestamp_t time_v = 0; - timestamp_t error_v = 0; - timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v); - time_shift_[time_id] = time_v - timestamp_v; - time_error_[time_id] = error_v; - } - - hsa_status_t GetTimeVal(uint32_t time_id, uint64_t time_stamp, uint64_t* time_value) - { - if(time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; - *time_value = time_stamp + time_shift_[time_id]; - return HSA_STATUS_SUCCESS; - } - - hsa_status_t GetTimeErr(uint32_t time_id, uint64_t* err) - { - *err = time_error_[time_id]; - return HSA_STATUS_SUCCESS; - } - -private: - // System agents iterating callback - static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); - - // Callback function to find and bind kernarg region of an agent - static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data); - - // Load AQL profile HSA extension library directly - static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api); - - // Constructor of the class. Will initialize the Hsa Runtime and - // query the system topology to get the list of Cpu and Gpu devices - explicit HsaRsrcFactory(bool initialize_hsa); - - // Destructor of the class - ~HsaRsrcFactory(); - - // Add an instance of AgentInfo representing a Hsa Gpu agent - const AgentInfo* AddAgentInfo(const hsa_agent_t agent); - - // To mmap command buffer memory - static const bool CMD_MEMORY_MMAP = false; - - // HSA was initialized - const bool initialize_hsa_; - - static std::atomic instance_; - static mutex_t mutex_; - - // Used to maintain a list of Hsa Gpu Agent Info - std::vector gpu_list_; - std::vector gpu_agents_; - - // Used to maintain a list of Hsa Cpu Agent Info - std::vector cpu_list_; - std::vector cpu_agents_; - - // System agents map - std::map agent_map_; - - // Executables loading tracking - typedef std::map symbols_map_t; - static symbols_map_t* symbols_map_; - static bool executable_tracking_on_; - static void* to_dump_code_obj_; - static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, - const char* options); - static hsa_status_t hsa_executable_destroy_interceptor(hsa_executable_t executable); - static hsa_status_t executable_symbols_cb(hsa_executable_t exec, - hsa_executable_symbol_t symbol, void* data); - - // HSA runtime API table - static hsa_pfn_t hsa_api_; - - // AqlProfile API table - aqlprofile_pfn_t aqlprofile_api_; - - // Loader API table - hsa_ven_amd_loader_1_00_pfn_t loader_api_; - - // System timeout, ns - static timestamp_t timeout_ns_; - // System timeout, sysclock - timestamp_t timeout_; - - // HSA timer - HsaTimer* timer_; - - // Time shift array to support time conversion - timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER]; - timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; - - // CPU/kern-arg memory pools - hsa_amd_memory_pool_t* cpu_pool_; - hsa_amd_memory_pool_t* kern_arg_pool_; -}; - -} // namespace util -} // namespace rocprofiler diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp index 09d67e7517..ef1b3d4302 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp @@ -128,7 +128,8 @@ private: static bool shutdown(); }; -#if !defined(ROCPROFSYS_USE_ROCM_SMI) +#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0 + inline void setup() {} @@ -154,7 +155,7 @@ inline void set_state(State) {} } // namespace rocm_smi } // namespace rocprofsys -#if defined(ROCPROFSYS_USE_ROCM_SMI) && ROCPROFSYS_USE_ROCM_SMI > 0 +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 # if !defined(ROCPROFSYS_EXTERN_COMPONENTS) || \ (defined(ROCPROFSYS_EXTERN_COMPONENTS) && ROCPROFSYS_EXTERN_COMPONENTS > 0) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp new file mode 100644 index 0000000000..cb1d5627a4 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -0,0 +1,1308 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/rocprofiler-sdk.hpp" +#include "api.hpp" +#include "common/synchronized.hpp" +#include "core/config.hpp" +#include "core/containers/stable_vector.hpp" +#include "core/debug.hpp" +#include "core/gpu.hpp" +#include "core/perfetto.hpp" +#include "core/rocprofiler-sdk.hpp" +#include "core/state.hpp" +#include "library/components/category_region.hpp" +#include "library/rocm_smi.hpp" +#include "library/rocprofiler-sdk/counters.hpp" +#include "library/rocprofiler-sdk/fwd.hpp" +#include "library/thread_info.hpp" +#include "library/tracing.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +namespace +{ +using tool_agent_vec_t = std::vector; +client_data* tool_data = new client_data{}; + +void +thread_precreate(rocprofiler_runtime_library_t /*lib*/, void* /*tool_data*/) +{ + push_thread_state(ThreadState::Internal); +} + +void +thread_postcreate(rocprofiler_runtime_library_t /*lib*/, void* /*tool_data*/) +{ + pop_thread_state(); +} + +// this function creates a rocprofiler profile config on the first entry +std::vector +create_agent_profile(rocprofiler_agent_id_t agent_id, + const std::vector& counters, + // const tool_agent_vec_t& gpu_agents, + // const agent_counter_info_map_t& counters_info, + // agent_counter_profile_map_t& data) + client_data* data = tool_data) +{ + using counter_vec_t = std::vector; + + // check if already created + if(data->agent_counter_profiles.find(agent_id) != data->agent_counter_profiles.end()) + return counter_vec_t{}; + + auto profile = std::optional{}; + auto expected_v = counters.size(); + auto found_v = std::vector{}; + auto counters_v = counter_vec_t{}; + const auto* tool_agent_v = data->get_gpu_tool_agent(agent_id); + + constexpr auto device_qualifier = std::string_view{ ":device=" }; + for(const auto& itr : counters) + { + auto name_v = itr; + if(auto pos = std::string::npos; + (pos = itr.find(device_qualifier)) != std::string::npos) + { + name_v = itr.substr(0, pos); + auto dev_id_s = itr.substr(pos + device_qualifier.length()); + + ROCPROFSYS_CONDITIONAL_ABORT(dev_id_s.empty() || + dev_id_s.find_first_not_of("0123456789") != + std::string::npos, + "invalid device qualifier format (':device=N) " + "where N is the GPU id: %s\n", + itr.c_str()); + + auto dev_id_v = std::stoul(dev_id_s); + + ROCPROFSYS_PRINT_F("tool agent device id=%lu, name=%s, device_id=%lu\n", + tool_agent_v->device_id, name_v.c_str(), dev_id_v); + // skip this counter if the counter is for a specific device id (which + // doesn't this agent's device id) + if(dev_id_v != tool_agent_v->device_id) + { + --expected_v; // is not expected + continue; + } + } + + auto _old_name_v = name_v; + name_v = + std::regex_replace(name_v, std::regex{ "^(.*)(\\[)([0-9]+)(\\])$" }, "$1"); + if(name_v != _old_name_v) + ROCPROFSYS_PRINT_F("tool agent device id=%lu, old_name=%s, name=%s\n", + tool_agent_v->device_id, _old_name_v.c_str(), + name_v.c_str()); + + // search the gpu agent counter info for a counter with a matching name + for(const auto& citr : data->agent_counter_info.at(agent_id)) + { + if(name_v == std::string_view{ citr.name }) + { + counters_v.emplace_back(citr.id); + found_v.emplace_back(itr); + } + } + } + + if(counters_v.size() != expected_v) + { + auto requested_counters = + timemory::join::join(timemory::join::array_config{ ", ", "", "" }, counters); + auto found_counters = + timemory::join::join(timemory::join::array_config{ ", ", "", "" }, found_v); + + ROCPROFSYS_ABORT_F( + "Unable to find all counters for agent %i (gpu-%li, %s) in %s. Found: %s\n", + tool_agent_v->agent->node_id, tool_agent_v->device_id, + tool_agent_v->agent->name, requested_counters.c_str(), + found_counters.c_str()); + } + + if(!counters_v.empty()) + { + auto profile_v = rocprofiler_profile_config_id_t{}; + ROCPROFILER_CALL(rocprofiler_create_profile_config( + agent_id, counters_v.data(), counters_v.size(), &profile_v)); + profile = profile_v; + } + + data->agent_counter_profiles.emplace(agent_id, profile); + + return counters_v; +} + +const kernel_symbol_data_t* +get_kernel_symbol_info(uint64_t _kernel_id) +{ + return tool_data->get_kernel_symbol_info(_kernel_id); +} + +// Implementation of rocprofiler_callback_tracing_operation_args_cb_t +int +save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/, + uint32_t /*arg_number*/, const void* const /*arg_value_addr*/, + int32_t /*arg_indirection_count*/, const char* /*arg_type*/, + const char* arg_name, const char* arg_value_str, + int32_t /*arg_dereference_count*/, void* data) +{ + auto* argvec = static_cast(data); + argvec->emplace_back(arg_name, arg_value_str); + return 0; +} + +auto& +get_marker_pushed_ranges() +{ + static thread_local auto _v = std::vector{}; + return _v; +} + +auto& +get_marker_started_ranges() +{ + static thread_local auto _v = std::vector{}; + return _v; +} + +template +void +tool_tracing_callback_start(CategoryT, rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* /*user_data*/, + rocprofiler_timestamp_t /*ts*/) +{ + auto _name = tool_data->callback_tracing_info.at(record.kind, record.operation); + + if constexpr(std::is_same::value) + { + if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API) + { + auto* _data = static_cast( + record.payload); + + switch(record.operation) + { + case ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA: + { + _name = _data->args.roctxRangePushA.message; + auto _hash = tim::add_hash_id(_name); + get_marker_pushed_ranges().emplace_back(_hash); + break; + } + case ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA: + { + _name = _data->args.roctxRangeStartA.message; + auto _hash = tim::add_hash_id(_name); + get_marker_started_ranges().emplace_back(_hash); + break; + } + case ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA: + { + _name = _data->args.roctxMarkA.message; + tim::add_hash_id(_name); + break; + } + default: + { + break; + } + } + } + } + + if(get_use_timemory()) + { + component::category_region::start( + _name); + } +} + +template +void +tool_tracing_callback_stop( + CategoryT, rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, rocprofiler_timestamp_t ts, + std::optional>& _bt_data) +{ + auto _name = tool_data->callback_tracing_info.at(record.kind, record.operation); + + if constexpr(std::is_same::value) + { + if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API) + { + auto* _data = static_cast( + record.payload); + + switch(record.operation) + { + case ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop: + { + ROCPROFSYS_CONDITIONAL_ABORT_F( + get_marker_pushed_ranges().empty(), + "roctxRangePop does not have corresponding roctxRangePush on " + "this thread"); + + auto _hash = get_marker_pushed_ranges().back(); + _name = tim::get_hash_identifier_fast(_hash); + get_marker_pushed_ranges().pop_back(); + break; + } + case ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop: + { + ROCPROFSYS_CONDITIONAL_ABORT_F( + get_marker_started_ranges().empty(), + "roctxRangeStop does not have corresponding roctxRangeStart on " + "this thread"); + + auto _hash = get_marker_started_ranges().back(); + _name = tim::get_hash_identifier_fast(_hash); + get_marker_started_ranges().pop_back(); + break; + } + case ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA: + { + _name = _data->args.roctxMarkA.message; + break; + } + default: + { + break; + } + } + } + } + + if(get_use_timemory()) + { + component::category_region::stop( + _name); + } + + if(get_use_perfetto()) + { + auto args = callback_arg_array_t{}; + if(config::get_perfetto_annotations()) + { + rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, + &args); + } + + uint64_t _beg_ts = user_data->value; + uint64_t _end_ts = ts; + + tracing::push_perfetto_ts( + CategoryT{}, _name.data(), _beg_ts, + ::perfetto::Flow::ProcessScoped(record.correlation_id.internal), + [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ts); + + for(const auto& [key, val] : args) + tracing::add_perfetto_annotation(ctx, key, val); + + if(_bt_data && !_bt_data->empty()) + { + const std::string _unk = "??"; + size_t _bt_cnt = 0; + for(const auto& itr : *_bt_data) + { + auto _linfo = itr.lineinfo.get(); + const auto* _func = (itr.name.empty()) ? &_unk : &itr.name; + const auto* _loc = + (_linfo && !_linfo.location.empty()) + ? &_linfo.location + : ((itr.location.empty()) ? &_unk : &itr.location); + auto _line = (_linfo && _linfo.line > 0) + ? join("", _linfo.line) + : ((itr.lineno == 0) ? std::string{ "?" } + : join("", itr.lineno)); + auto _entry = + join("", demangle(*_func), " @ ", + join(':', ::basename(_loc->c_str()), _line)); + if(_bt_cnt < 10) + { + // Prepend zero for better ordering in UI. Only one zero + // is ever necessary since stack depth is limited to 16. + tracing::add_perfetto_annotation( + ctx, join("", "frame#0", _bt_cnt++), _entry); + } + else + { + tracing::add_perfetto_annotation( + ctx, join("", "frame#", _bt_cnt++), _entry); + } + } + } + } + }); + tracing::pop_perfetto_ts( + CategoryT{}, _name.data(), _end_ts, [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + tracing::add_perfetto_annotation(ctx, "end_ns", _end_ts); + }); + } +} + +void +tool_control_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* /*user_data*/, void* /*callback_data*/) +{ + if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API) + { + if(record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause && + record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + stop(); + } + else if(record.operation == + ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume && + record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + { + start(); + } + } +} + +void +tool_code_object_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* /*user_data*/, void* /*callback_data*/) +{ + auto ts = rocprofiler_timestamp_t{}; + ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts)); + + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + if(record.operation == ROCPROFILER_CODE_OBJECT_LOAD) + { + auto data_v = + *static_cast( + record.payload); + tool_data->code_object_records.wlock([ts, &record, &data_v](auto& _data) { + _data.emplace_back( + code_object_callback_record_t{ ts, record, data_v }); + }); + } + else if(record.operation == + ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) + { + auto data_v = *static_cast(record.payload); + tool_data->kernel_symbol_records.wlock( + [ts, &record, &data_v](auto& _data) { + _data.emplace_back( + new kernel_symbol_callback_record_t{ ts, record, data_v }); + }); + } + } + return; + } +} + +auto& +get_kernel_dispatch_timestamps() +{ + static auto _v = std::unordered_map{}; + return _v; +} + +void +tool_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, void* /*callback_data*/) +{ + auto ts = rocprofiler_timestamp_t{}; + ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts)); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + user_data->value = ts; + + switch(record.kind) + { + case ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API: + case ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API: + case ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API: + case ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API: + { + tool_tracing_callback_start(category::rocm_hsa_api{}, record, user_data, + ts); + break; + } + case ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API: + case ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API: + { + tool_tracing_callback_start(category::rocm_hip_api{}, record, user_data, + ts); + break; + } + case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API: + { + tool_tracing_callback_start(category::rocm_marker_api{}, record, + user_data, ts); + break; + } + case ROCPROFILER_CALLBACK_TRACING_NONE: + case ROCPROFILER_CALLBACK_TRACING_LAST: + case ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API: + case ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API: + case ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT: + case ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY: + case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH: + case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: + case ROCPROFILER_CALLBACK_TRACING_RCCL_API: + { + ROCPROFSYS_CI_ABORT(true, "unhandled callback record kind: %i\n", + record.kind); + break; + } + } + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + { + using backtrace_entry_vec_t = std::vector; + + constexpr size_t bt_stack_depth = 16; + constexpr size_t bt_ignore_depth = 3; + constexpr bool bt_with_signal_frame = true; + + auto _bt_data = std::optional{}; + if(config::get_use_perfetto() && config::get_perfetto_annotations() && + tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0) + { + auto _backtrace = tim::get_unw_stack(); + _bt_data = backtrace_entry_vec_t{}; + _bt_data->reserve(_backtrace.size()); + for(auto itr : _backtrace) + { + if(itr) + { + if(auto _val = binary::lookup_ipaddr_entry(itr->address()); + _val) + { + _bt_data->emplace_back(std::move(*_val)); + } + } + } + } + + switch(record.kind) + { + case ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API: + case ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API: + case ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API: + case ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API: + { + tool_tracing_callback_stop(category::rocm_hsa_api{}, record, user_data, + ts, _bt_data); + break; + } + case ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API: + case ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API: + { + tool_tracing_callback_stop(category::rocm_hip_api{}, record, user_data, + ts, _bt_data); + break; + } + case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API: + { + tool_tracing_callback_stop(category::rocm_marker_api{}, record, user_data, + ts, _bt_data); + break; + } + case ROCPROFILER_CALLBACK_TRACING_NONE: + case ROCPROFILER_CALLBACK_TRACING_LAST: + case ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API: + case ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API: + case ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT: + case ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY: + case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH: + case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: + case ROCPROFILER_CALLBACK_TRACING_RCCL_API: + { + ROCPROFSYS_CI_ABORT(true, "unhandled callback record kind: %i\n", + record.kind); + break; + } + } + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_NONE) + { + if(record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH && + record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE) + { + auto* _data = + static_cast( + record.payload); + + // save for post-processing + get_kernel_dispatch_timestamps().emplace( + _data->dispatch_info.dispatch_id, + timing_interval{ _data->start_timestamp, _data->end_timestamp }); + } + } + else + { + ROCPROFSYS_CI_ABORT(true, "unhandled callback record phase: %i\n", record.phase); + } +} + +using kernel_dispatch_bundle_t = tim::lightweight_tuple; + +void +tool_tracing_buffered(rocprofiler_context_id_t /*context*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, size_t num_headers, + void* /*user_data*/, uint64_t /*drop_count*/) +{ + if(num_headers == 0 || headers == nullptr) return; + + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + + if(ROCPROFSYS_LIKELY(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING)) + { + if(header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) + { + auto* record = + static_cast( + header->payload); + + const auto* _kern_sym_data = + get_kernel_symbol_info(record->dispatch_info.kernel_id); + + auto _name = tim::demangle(_kern_sym_data->kernel_name); + auto _corr_id = record->correlation_id.internal; + auto _beg_ns = record->start_timestamp; + auto _end_ns = record->end_timestamp; + auto _agent_id = record->dispatch_info.agent_id; + auto _queue_id = record->dispatch_info.queue_id; + const auto* _agent = tool_data->get_gpu_tool_agent(_agent_id); + + if(get_use_timemory()) + { + const auto& _tinfo = thread_info::get(record->thread_id, SystemTID); + auto _tid = _tinfo->index_data->sequent_value; + + auto _bundle = kernel_dispatch_bundle_t{ _name }; + + _bundle.push(_tid).start().stop(); + _bundle.get([_beg_ns, _end_ns](tim::component::wall_clock* _wc) { + _wc->set_value(_end_ns - _beg_ns); + _wc->set_accum(_end_ns - _beg_ns); + }); + _bundle.pop(); + } + + if(get_use_perfetto()) + { + auto _track_desc = [](int32_t _device_id_v, int64_t _queue_id_v) { + return JOIN("", "GPU Kernel Dispatch [", _device_id_v, "] Queue ", + _queue_id_v); + }; + + const auto _track = tracing::get_perfetto_track( + category::rocm_kernel_dispatch{}, _track_desc, _agent->device_id, + _queue_id.handle); + + tracing::push_perfetto( + category::rocm_kernel_dispatch{}, _name.c_str(), _track, _beg_ns, + ::perfetto::Flow::ProcessScoped(_corr_id), + [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", + _beg_ns); + tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns); + tracing::add_perfetto_annotation(ctx, "corr_id", + _corr_id); + tracing::add_perfetto_annotation( + ctx, "node_id", _agent->agent->logical_node_id); + tracing::add_perfetto_annotation(ctx, "queue", + _queue_id.handle); + tracing::add_perfetto_annotation( + ctx, "dispatch_id", + record->dispatch_info.dispatch_id); + tracing::add_perfetto_annotation( + ctx, "kernel_id", record->dispatch_info.kernel_id); + tracing::add_perfetto_annotation( + ctx, "private_segment_size", + record->dispatch_info.private_segment_size); + tracing::add_perfetto_annotation( + ctx, "group_segment_size", + record->dispatch_info.group_segment_size); + tracing::add_perfetto_annotation( + ctx, "workgroup_size", + JOIN("", "(", + JOIN(',', record->dispatch_info.workgroup_size.x, + record->dispatch_info.workgroup_size.y, + record->dispatch_info.workgroup_size.z), + ")")); + tracing::add_perfetto_annotation( + ctx, "grid_size", + JOIN("", "(", + JOIN(',', record->dispatch_info.grid_size.x, + record->dispatch_info.grid_size.y, + record->dispatch_info.grid_size.z), + ")")); + } + }); + tracing::pop_perfetto(category::rocm_kernel_dispatch{}, _name.c_str(), + _track, _end_ns); + } + } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) + { + auto* record = + static_cast( + header->payload); + + auto _corr_id = record->correlation_id.internal; + auto _beg_ns = record->start_timestamp; + auto _end_ns = record->end_timestamp; + auto _dst_agent_id = record->dst_agent_id; + auto _src_agent_id = record->src_agent_id; + const auto* _dst_agent = tool_data->get_agent(_dst_agent_id); + const auto* _src_agent = tool_data->get_agent(_src_agent_id); + auto _name = + tool_data->buffered_tracing_info.at(record->kind, record->operation); + + if(get_use_timemory()) + { + const auto& _tinfo = thread_info::get(record->thread_id, SystemTID); + auto _tid = _tinfo->index_data->sequent_value; + + auto _bundle = kernel_dispatch_bundle_t{ _name }; + + _bundle.push(_tid).start().stop(); + _bundle.get([_beg_ns, _end_ns](tim::component::wall_clock* _wc) { + _wc->set_value(_end_ns - _beg_ns); + _wc->set_accum(_end_ns - _beg_ns); + }); + _bundle.pop(); + } + + if(get_use_perfetto()) + { + auto _track_desc = [](int32_t _device_id_v, + rocprofiler_thread_id_t _tid) { + const auto& _tid_v = thread_info::get(_tid, SystemTID); + return JOIN("", "GPU Memory Copy to Agent [", _device_id_v, + "] Thread ", _tid_v->index_data->sequent_value); + }; + + const auto _track = tracing::get_perfetto_track( + category::rocm_memory_copy{}, _track_desc, + _dst_agent->logical_node_id, record->thread_id); + + tracing::push_perfetto( + category::rocm_memory_copy{}, _name.data(), _track, _beg_ns, + ::perfetto::Flow::ProcessScoped(_corr_id), + [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", + _beg_ns); + tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns); + tracing::add_perfetto_annotation(ctx, "corr_id", + _corr_id); + tracing::add_perfetto_annotation( + ctx, "dst_agent", _dst_agent->logical_node_id); + tracing::add_perfetto_annotation( + ctx, "src_agent", _src_agent->logical_node_id); + } + }); + tracing::pop_perfetto(category::rocm_memory_copy{}, "", _track, + _end_ns); + } + } + else + { + ROCPROFSYS_THROW( + "unexpected rocprofiler_record_header_t buffer tracing category " + "kind. category: %i, kind: %i\n", + header->category, header->kind); + } + } + else + { + ROCPROFSYS_THROW("unexpected rocprofiler_record_header_t tracing category " + "kind. category: %i, kind: %i\n", + header->category, header->kind); + } + } +} + +auto& +get_counter_dispatch_data() +{ + static auto _v = + container::stable_vector{}; + return _v; +} + +auto& +get_counter_dispatch_records() +{ + static auto _v = std::vector{}; + return _v; +} + +using counter_storage_map_t = + std::unordered_map; +using agent_counter_storage_map_t = + std::unordered_map; + +auto*& +get_counter_storage() +{ + static auto* _v = new agent_counter_storage_map_t{}; + return _v; +} + +void +counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data, + rocprofiler_record_counter_t* record_data, size_t record_count, + rocprofiler_user_data_t /*user_data*/, + void* /*callback_data_arg*/) +{ + auto* _agent_counter_storage = get_counter_storage(); + if(!_agent_counter_storage) return; + + static auto _mtx = std::mutex{}; + auto _lk = std::unique_lock{ _mtx }; + + auto _dispatch_id = dispatch_data.dispatch_info.dispatch_id; + auto _agent_id = dispatch_data.dispatch_info.agent_id; + auto _scope = scope::get_default(); + auto _interval = timing_interval{}; + auto _aggregate = + std::unordered_map{}; + for(size_t i = 0; i < record_count; ++i) + { + auto _counter_id = rocprofiler_counter_id_t{}; + ROCPROFILER_CALL( + rocprofiler_query_record_counter_id(record_data[i].id, &_counter_id)); + + if(!_aggregate.emplace(_counter_id, record_data[i]).second) + { + _aggregate[_counter_id].counter_value += record_data[i].counter_value; + } + } + + if(_agent_counter_storage->count(_agent_id) == 0) + _agent_counter_storage->emplace(_agent_id, counter_storage_map_t{}); + + if(get_kernel_dispatch_timestamps().count(_dispatch_id) > 0) + { + _interval = get_kernel_dispatch_timestamps().at(_dispatch_id); + get_kernel_dispatch_timestamps().erase(_dispatch_id); + } + + for(const auto& itr : _aggregate) + { + if(_agent_counter_storage->at(_agent_id).count(itr.first) == 0) + { + const auto* _agent = tool_data->get_gpu_tool_agent(_agent_id); + const auto* _info = tool_data->get_tool_counter_info(_agent_id, itr.first); + + ROCPROFSYS_CONDITIONAL_ABORT_F( + !_agent, "unable to find tool agent for agent (id=%zu)\n", + _agent_id.handle); + ROCPROFSYS_CONDITIONAL_ABORT_F( + !_info, + "unable to find counter info for counter (id=%zu) on agent (id=%zu)\n", + itr.first.handle, _agent_id.handle); + + auto _dev_id = static_cast(_agent->device_id); + + _agent_counter_storage->at(_agent_id).emplace( + itr.first, counter_storage{ tool_data, _dev_id, 0, _info->name }); + } + + auto _event = counter_event{ counter_dispatch_record{ + &dispatch_data, _dispatch_id, itr.first, itr.second } }; + + _agent_counter_storage->at(_agent_id).at(itr.first)(_event, _interval, _scope); + } +} + +void +dispatch_counting_service_callback( + rocprofiler_dispatch_counting_service_data_t dispatch_data, + rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* /*user_data*/, + void* callback_data_arg) +{ + auto* _data = as_client_data(callback_data_arg); + if(!_data || !config) return; + + if(auto itr = + _data->agent_counter_profiles.find(dispatch_data.dispatch_info.agent_id); + itr != _data->agent_counter_profiles.end() && itr->second) + { + *config = *itr->second; + } +} + +// int +// external_correlation_id_callback( +// rocprofiler_thread_id_t /*thr_id*/, rocprofiler_context_id_t /*ctx_id*/, +// rocprofiler_external_correlation_id_request_kind_t /*kind*/, +// rocprofiler_tracing_operation_t /*op*/, uint64_t /*internal_corr_id*/, +// rocprofiler_user_data_t* external_corr_id, void* /*user_data*/) +// { +// auto* _data = new kernel_dispatch_bundle_t{ "kernel_dispatch" }; +// _data->push(); +// external_corr_id->ptr = _data; +// return 0; +// } + +// void +// agent_counter_profile_callback(rocprofiler_context_id_t context_id, +// rocprofiler_agent_id_t agent, +// rocprofiler_agent_set_profile_callback_t set_config, void*) +// { +// if(!agent_counter_profiles) return; +// if(auto itr = agent_counter_profiles->find(agent); +// itr != agent_counter_profiles->end() && itr->second) +// set_config(context_id, *itr->second); +// } + +bool +is_initialized(rocprofiler_context_id_t ctx) +{ + return (ctx.handle > 0); +} + +bool +is_active(rocprofiler_context_id_t ctx) +{ + int status = 0; + auto errc = rocprofiler_context_is_active(ctx, &status); + return (errc == ROCPROFILER_STATUS_SUCCESS && status > 0); +} + +bool +is_valid(rocprofiler_context_id_t ctx) +{ + int status = 0; + auto errc = rocprofiler_context_is_valid(ctx, &status); + return (errc == ROCPROFILER_STATUS_SUCCESS && status > 0); +} + +void +flush() +{ + if(!tool_data) return; + + for(auto itr : tool_data->get_buffers()) + { + if(itr.handle > 0) + { + auto status = rocprofiler_flush_buffer(itr); + if(status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) + { + ROCPROFILER_CALL(status); + } + } + } +} + +int +tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) +{ + auto domains = settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS"); + + ROCPROFSYS_VERBOSE_F(1, "rocprof-sys ROCm Domains:\n"); + for(const auto& itr : domains->get_choices()) + ROCPROFSYS_VERBOSE_F(1, "- %s\n", itr.c_str()); + + auto _callback_domains = rocprofiler_sdk::get_callback_domains(); + auto _buffered_domain = rocprofiler_sdk::get_buffered_domains(); + auto _counter_events = rocprofiler_sdk::get_rocm_events(); + + auto* _data = as_client_data(user_data); + _data->client_fini = fini_func; + + _data->initialize(); + if(!_counter_events.empty()) _data->initialize_event_info(); + + ROCPROFILER_CALL(rocprofiler_create_context(&_data->primary_ctx)); + + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + _data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0, + tool_code_object_callback, _data)); + + for(auto itr : { ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, + ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API, + ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, + ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API }) + { + if(_callback_domains.count(itr) > 0) + { + auto _ops = rocprofiler_sdk::get_operations(itr); + _data->backtrace_operations.emplace( + itr, rocprofiler_sdk::get_backtrace_operations(itr)); + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + _data->primary_ctx, itr, _ops.data(), _ops.size(), tool_tracing_callback, + _data)); + } + } + + constexpr auto buffer_size = 8192; + constexpr auto watermark = 7936; + + if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) > 0) + { + ROCPROFILER_CALL(rocprofiler_create_buffer( + _data->primary_ctx, buffer_size, watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data, + &_data->kernel_dispatch_buffer)); + + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + _data->primary_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, + _data->kernel_dispatch_buffer)); + + // auto external_corr_id_request_kinds = + // std::array{ + // ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH + // }; + + // ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service( + // _data->primary_ctx, external_corr_id_request_kinds.data(), + // external_corr_id_request_kinds.size(), external_correlation_id_callback, + // _data)); + } + + if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) > 0) + { + ROCPROFILER_CALL(rocprofiler_create_buffer( + _data->primary_ctx, buffer_size, watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data, + &_data->memory_copy_buffer)); + + auto _ops = + rocprofiler_sdk::get_operations(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY); + + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + _data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, + (_ops.empty()) ? nullptr : _ops.data(), _ops.size(), + _data->memory_copy_buffer)); + } + + if(!_counter_events.empty()) + { + for(const auto& itr : _data->gpu_agents) + { + _data->agent_events.emplace( + itr.agent->id, + create_agent_profile(itr.agent->id, _counter_events, _data)); + } + + ROCPROFILER_CALL(rocprofiler_create_context(&_data->counter_ctx)); + + auto _operations = std::array{ + ROCPROFILER_KERNEL_DISPATCH_COMPLETE + }; + + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + _data->counter_ctx, ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, + _operations.data(), _operations.size(), tool_tracing_callback, _data)); + + ROCPROFILER_CALL(rocprofiler_configure_callback_dispatch_counting_service( + _data->counter_ctx, dispatch_counting_service_callback, _data, + counter_record_callback, _data)); + + // ROCPROFILER_CALL(rocprofiler_create_buffer( + // counter_ctx, buffer_size, watermark, + // ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data, + // &counter_collection_buffer)); + + // for(const auto& itr : *agent_counter_profiles) + // { + // ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service( + // counter_ctx, counter_collection_buffer, itr.first, + // agent_counter_profile_callback, nullptr)); + // } + } + + for(const auto& itr : _data->get_buffers()) + { + if(itr.handle > 0) + { + auto client_thread = rocprofiler_callback_thread_t{}; + ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread)); + ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr, client_thread)); + } + } + + // throwaway context for handling the profiler control API. If primary_ctx were used, + // we would get profiler pause callback but never get profiler resume callback + { + auto _local_ctx = rocprofiler_context_id_t{ 0 }; + ROCPROFILER_CALL(rocprofiler_create_context(&_local_ctx)); + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + _local_ctx, ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, nullptr, 0, + tool_control_callback, _data)); + } + + if(!is_valid(_data->primary_ctx)) + { + // notify rocprofiler that initialization failed and all the contexts, buffers, + // etc. created should be ignored + return -1; + } + + gpu::add_device_metadata(); + + if(config::get_use_process_sampling() && config::get_use_rocm_smi()) + { + ROCPROFSYS_VERBOSE_F(1, "Setting rocm_smi state to active...\n"); + rocm_smi::set_state(State::Active); + } + + start(); + + // no errors + return 0; +} + +void +tool_fini(void* callback_data) +{ + static std::atomic_flag _once = ATOMIC_FLAG_INIT; + if(_once.test_and_set()) return; + + flush(); + stop(); + + if(config::get_use_process_sampling() && config::get_use_rocm_smi()) + rocm_smi::shutdown(); + + if(get_counter_storage()) + { + auto _storages = std::vector{}; + for(const auto& citr : *get_counter_storage()) + { + for(const auto& itr : citr.second) + _storages.emplace_back(&itr.second); + } + + std::sort(_storages.begin(), _storages.end(), + [](const counter_storage* lhs, const counter_storage* rhs) { + return *lhs < *rhs; + }); + + for(const auto* itr : _storages) + itr->write(); + _storages.clear(); + + get_counter_storage()->clear(); + delete get_counter_storage(); + get_counter_storage() = nullptr; + } + + auto* _data = as_client_data(callback_data); + _data->client_id = nullptr; + _data->client_fini = nullptr; + + delete tool_data; + tool_data = nullptr; +} +} // namespace + +void +setup() +{ + if(int status = 0; + rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) + { + ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure)); + } +} + +void +shutdown() +{ + // shutdown + if(tool_data && tool_data->client_id && tool_data->client_fini) + tool_data->client_fini(*tool_data->client_id); +} + +void +config() +{} + +void +post_process() +{} + +void +sample() +{} + +void +start() +{ + if(!tool_data) return; + + for(auto itr : tool_data->get_contexts()) + { + if(is_initialized(itr) && !is_active(itr)) + { + ROCPROFILER_CALL(rocprofiler_start_context(itr)); + } + } +} + +void +stop() +{ + if(!tool_data) return; + + for(auto itr : tool_data->get_contexts()) + { + if(is_initialized(itr) && is_active(itr)) + { + ROCPROFILER_CALL(rocprofiler_stop_context(itr)); + } + } +} + +std::vector +get_rocm_events_info() +{ + if(!tool_data) + { + auto _tool_data_v = client_data{}; + _tool_data_v.initialize_event_info(); + return _tool_data_v.events_info; + } + + if(tool_data->events_info.empty()) tool_data->initialize_event_info(); + + return tool_data->events_info; +} +} // namespace rocprofiler_sdk +} // namespace rocprofsys + +extern "C" rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, const char* runtime_version, uint32_t priority, + rocprofiler_client_id_t* id) +{ + // only activate once + { + static bool _first = true; + if(!_first) return nullptr; + _first = false; + } + + if(!tim::get_env("ROCPROFSYS_INIT_TOOLING", true)) return nullptr; + if(!tim::settings::enabled()) return nullptr; + + if(!rocprofsys::config::settings_are_configured() && + rocprofsys::get_state() < rocprofsys::State::Active) + rocprofsys_init_tooling_hidden(); + + // set the client name + id->name = "rocprofsys"; + + // ensure tool data exists + if(!rocprofsys::rocprofiler_sdk::tool_data) + rocprofsys::rocprofiler_sdk::tool_data = + new rocprofsys::rocprofiler_sdk::client_data{}; + + // store client info + rocprofsys::rocprofiler_sdk::tool_data->client_id = id; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + // generate info string + auto info = std::stringstream{}; + info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." + << patch << " (" << runtime_version << ")"; + + ROCPROFSYS_VERBOSE_F(0, "%s\n", info.str().c_str()); + ROCPROFSYS_VERBOSE_F(2, "client_id=%u, priority=%u\n", id->handle, priority); + + ROCPROFILER_CALL(rocprofiler_at_internal_thread_create( + rocprofsys::rocprofiler_sdk::thread_precreate, + rocprofsys::rocprofiler_sdk::thread_postcreate, + ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | + ROCPROFILER_MARKER_LIBRARY, + nullptr)); + + // create configure data + static auto cfg = + rocprofiler_tool_configure_result_t{ sizeof(rocprofiler_tool_configure_result_t), + &::rocprofsys::rocprofiler_sdk::tool_init, + &::rocprofsys::rocprofiler_sdk::tool_fini, + rocprofsys::rocprofiler_sdk::tool_data }; + + // return pointer to configure data + return &cfg; +} diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.hpp similarity index 54% rename from projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.hpp rename to projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.hpp index a87784c8b0..4853e4d420 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.hpp @@ -1,6 +1,6 @@ // MIT License // -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,67 +22,39 @@ #pragma once -#include "core/defines.hpp" #include "core/timemory.hpp" -#include "library/components/rocprofiler.hpp" -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include namespace rocprofsys { -namespace rocprofiler +namespace rocprofiler_sdk { -std::map> -get_data_labels(); +using hardware_counter_info = ::tim::hardware_counters::info; void -rocm_initialize(); +setup(); void -rocm_cleanup(); +shutdown(); -bool& -is_setup(); +void +config(); void post_process(); -std::vector -rocm_metrics(); +void +sample(); -#if !defined(ROCPROFSYS_USE_ROCPROFILER) || ROCPROFSYS_USE_ROCPROFILER == 0 -inline void -post_process() -{} +void +start(); -inline void -rocm_cleanup() -{} +void +stop(); -inline std::vector -rocm_metrics() -{ - return std::vector{}; -} -#endif - -} // namespace rocprofiler +std::vector +get_rocm_events_info(); +} // namespace rocprofiler_sdk } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/CMakeLists.txt new file mode 100644 index 0000000000..97446e34c4 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/CMakeLists.txt @@ -0,0 +1,9 @@ +# +set(rocprofiler_sdk_sources ${CMAKE_CURRENT_LIST_DIR}/counters.cpp + ${CMAKE_CURRENT_LIST_DIR}/fwd.cpp) + +set(rocprofiler_sdk_headers ${CMAKE_CURRENT_LIST_DIR}/counters.hpp + ${CMAKE_CURRENT_LIST_DIR}/fwd.hpp) + +target_sources(rocprofiler-systems-object-library PRIVATE ${rocprofiler_sdk_sources} + ${rocprofiler_sdk_headers}) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp new file mode 100644 index 0000000000..fad295f406 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp @@ -0,0 +1,135 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/rocprofiler-sdk/counters.hpp" +#include "common/synchronized.hpp" +#include "core/debug.hpp" +#include "core/timemory.hpp" +#include "library/rocprofiler-sdk/fwd.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +namespace +{ +std::string +get_counter_description(const client_data* tool_data, std::string_view _v) +{ + const auto& _info = tool_data->events_info; + for(const auto& itr : _info) + { + if(itr.symbol().find(_v) == 0 || itr.short_description().find(_v) == 0) + { + return itr.long_description(); + } + } + return std::string{}; +} +} // namespace + +void +counter_event::operator()(const client_data* tool_data, ::perfetto::CounterTrack* _track, + timing_interval _timing, scope::config _scope) const +{ + if(!record.dispatch_data) return; + + const auto& _dispatch_info = record.dispatch_data->dispatch_info; + const auto* _kern_sym_data = + tool_data->get_kernel_symbol_info(_dispatch_info.kernel_id); + + auto _bundle = counter_bundle_t{ tim::demangle(_kern_sym_data->kernel_name), _scope }; + + _bundle.push(_dispatch_info.queue_id.handle) + .start() + .store(record.record_counter.counter_value); + + _bundle.stop().pop(_dispatch_info.queue_id.handle); + + if(_track && _timing.start > 0 && _timing.end > _timing.start) + { + TRACE_COUNTER(trait::name::value, *_track, + _timing.start, record.record_counter.counter_value); + TRACE_COUNTER(trait::name::value, *_track, + _timing.end, 0); + } +} + +counter_storage::counter_storage(const client_data* _tool_data, uint64_t _devid, + size_t _idx, std::string_view _name) +: tool_data{ _tool_data } +, device_id{ _devid } +, index{ static_cast(_idx) } +, metric_name{ _name } +, metric_description{ get_counter_description(_tool_data, metric_name) } +{ + auto _metric_name = std::string{ _name }; + _metric_name = + std::regex_replace(_metric_name, std::regex{ "(.*)\\[([0-9]+)\\]" }, "$1_$2"); + storage_name = JOIN('-', "rocprof", "device", device_id, _metric_name); + storage = std::make_unique(tim::standalone_storage{}, index, + storage_name); + { + constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_COUNT; + track_name = JOIN(" ", "GPU", _metric_name, JOIN("", '[', device_id, ']')); + track = std::make_unique( + ::perfetto::StaticString(track_name.c_str())); + track->set_is_incremental(false); + track->set_unit(_unit); + track->set_unit_multiplier(1); + } +} + +void +counter_storage::operator()(const counter_event& _event, timing_interval _timing, + scope::config _scope) const +{ + operation::set_storage{}(storage.get()); + _event(tool_data, track.get(), _timing, _scope); +} + +void +counter_storage::write() const +{ + operation::set_storage{}(storage.get()); + counter_data_tracker::label() = metric_name; + counter_data_tracker::description() = metric_description; + storage->write(); +} +} // namespace rocprofiler_sdk +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.hpp new file mode 100644 index 0000000000..bcd1ddcdde --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.hpp @@ -0,0 +1,168 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "common/synchronized.hpp" +#include "core/debug.hpp" +#include "core/perfetto.hpp" +#include "core/timemory.hpp" +#include "library/rocprofiler-sdk/fwd.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +struct counter_dispatch_record +{ + const rocprofiler_dispatch_counting_service_data_t* dispatch_data = nullptr; + rocprofiler_dispatch_id_t dispatch_id = 0; + rocprofiler_counter_id_t counter_id = {}; + rocprofiler_record_counter_t record_counter = {}; +}; + +struct counter_data_tag +{}; + +using counter_data_tracker = component::data_tracker; +using counter_storage_type = typename counter_data_tracker::storage_type; +using counter_bundle_t = tim::lightweight_tuple; +using counter_track_type = ::perfetto::CounterTrack; + +struct counter_event +{ + ROCPROFSYS_DEFAULT_OBJECT(counter_event) + + explicit counter_event(counter_dispatch_record&& _v) + : record{ _v } + {} + + void operator()(const client_data* tool_data, counter_track_type*, + timing_interval _timing, scope::config _scope) const; + + counter_dispatch_record record = {}; +}; + +struct counter_storage +{ + const client_data* tool_data = nullptr; + uint64_t device_id = 0; + int64_t index = 0; + std::string metric_name = {}; + std::string metric_description = {}; + std::string storage_name = {}; + std::string track_name = {}; + std::unique_ptr storage = {}; + std::unique_ptr track = {}; + + counter_storage(const client_data* _tool_data, uint64_t _devid, size_t _idx, + std::string_view _name); + + ~counter_storage() = default; + counter_storage(const counter_storage&) = delete; + counter_storage(counter_storage&&) = default; + counter_storage& operator=(const counter_storage&) = delete; + counter_storage& operator=(counter_storage&&) = default; + + friend bool operator<(const counter_storage& lhs, const counter_storage& rhs) + { + return std::tie(lhs.storage_name, lhs.device_id, lhs.index) < + std::tie(rhs.storage_name, rhs.device_id, rhs.index); + } + + void operator()(const counter_event& _event, timing_interval _timing, + scope::config _scope = scope::get_default()) const; + + void write() const; +}; +} // namespace rocprofiler_sdk +} // namespace rocprofsys + +namespace tim +{ +namespace operation +{ +template <> +struct set_storage<::rocprofsys::rocprofiler_sdk::counter_data_tracker> +{ + static constexpr size_t max_threads = 4096; + using type = ::rocprofsys::rocprofiler_sdk::counter_data_tracker; + using storage_array_t = std::array*, max_threads>; + friend struct get_storage; + + ROCPROFSYS_DEFAULT_OBJECT(set_storage) + + auto operator()(storage* _v, size_t _idx) const { get().at(_idx) = _v; } + auto operator()(type&, size_t) const {} + auto operator()(storage* _v) const { get().fill(_v); } + +private: + static storage_array_t& get() + { + static storage_array_t _v = { nullptr }; + return _v; + } +}; + +template <> +struct get_storage<::rocprofsys::rocprofiler_sdk::counter_data_tracker> +{ + using type = ::rocprofsys::rocprofiler_sdk::counter_data_tracker; + + ROCPROFSYS_DEFAULT_OBJECT(get_storage) + + auto operator()(const type&) const + { + return operation::set_storage::get().at(0); + } + + auto operator()() const + { + type _obj{}; + return (*this)(_obj); + } + + auto operator()(size_t _idx) const + { + return operation::set_storage::get().at(_idx); + } + + auto operator()(type&, size_t _idx) const { return (*this)(_idx); } +}; +} // namespace operation +} // namespace tim diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp new file mode 100644 index 0000000000..4120c27b5a --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp @@ -0,0 +1,270 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/rocprofiler-sdk/fwd.hpp" +#include "core/debug.hpp" +#include "core/state.hpp" + +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +namespace +{ +using tool_agent_vec_t = std::vector; + +rocprofiler_status_t +dimensions_info_callback(rocprofiler_counter_id_t /*id*/, + const rocprofiler_record_dimension_info_t* dim_info, + long unsigned int num_dims, void* user_data) +{ + auto* dimensions_info = + static_cast*>(user_data); + dimensions_info->reserve(num_dims); + for(size_t j = 0; j < num_dims; j++) + dimensions_info->emplace_back(dim_info[j]); + + return ROCPROFILER_STATUS_SUCCESS; +} + +rocprofiler_status_t +counters_supported_callback(rocprofiler_agent_id_t agent_id, + rocprofiler_counter_id_t* counters, size_t num_counters, + void* user_data) +{ + using value_type = typename agent_counter_info_map_t::mapped_type; + + auto* data_v = static_cast(user_data); + data_v->emplace(agent_id, value_type{}); + for(size_t i = 0; i < num_counters; ++i) + { + auto _info = rocprofiler_counter_info_v0_t{}; + auto _dim_info = std::vector{}; + + ROCPROFILER_CALL(rocprofiler_query_counter_info( + counters[i], ROCPROFILER_COUNTER_INFO_VERSION_0, &_info)); + + // populate local vector + ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions( + counters[i], dimensions_info_callback, &_dim_info)); + + if(!_info.is_constant) + data_v->at(agent_id).emplace_back(agent_id, _info, std::move(_dim_info)); + } + return ROCPROFILER_STATUS_SUCCESS; +} + +agent_counter_info_map_t +get_agent_counter_info(const tool_agent_vec_t& _agents) +{ + auto _data = agent_counter_info_map_t{}; + + for(auto itr : _agents) + { + ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( + itr.agent->id, counters_supported_callback, &_data)); + + std::sort(_data.at(itr.agent->id).begin(), _data.at(itr.agent->id).end(), + [](const auto& lhs, const auto& rhs) { + return (lhs.id.handle < rhs.id.handle); + }); + + for(auto& citr : _data.at(itr.agent->id)) + { + std::sort(citr.dimension_info.begin(), citr.dimension_info.end(), + [](const auto& lhs, const auto& rhs) { return (lhs.id < rhs.id); }); + } + } + + return _data; +} +} // namespace + +rocprofiler_tool_counter_info_t::rocprofiler_tool_counter_info_t( + rocprofiler_agent_id_t _agent_id, parent_type _info, dimension_info_vec_t&& _dim_info) +: parent_type{ _info } +, agent_id{ _agent_id } +, dimension_info{ std::move(_dim_info) } +{} + +void +client_data::initialize() +{ + buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); + callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); + + static constexpr auto supported_agent_info_version = ROCPROFILER_AGENT_INFO_VERSION_0; + + rocprofiler_query_available_agents_cb_t iterate_cb = + [](rocprofiler_agent_version_t version, const void** agents_arr, + size_t num_agents, void* user_data) { + ROCPROFSYS_CONDITIONAL_ABORT(version != supported_agent_info_version, + "rocprofiler agent info version != expected " + "agent info version (=%i). value: %i\n", + supported_agent_info_version, version); + + auto _agents_v = std::vector{}; + for(size_t i = 0; i < num_agents; ++i) + { + const auto* _agent = + static_cast(agents_arr[i]); + _agents_v.emplace_back(*_agent); + } + + auto* tool_data_v = as_client_data(user_data); + tool_data_v->set_agents(std::move(_agents_v)); + + return ROCPROFILER_STATUS_SUCCESS; + }; + + ROCPROFILER_CALL(rocprofiler_query_available_agents( + supported_agent_info_version, iterate_cb, sizeof(rocprofiler_agent_t), this)); +} + +void +client_data::initialize_event_info() +{ + if(agents.empty()) initialize(); + + if(agent_counter_info.size() != gpu_agents.size()) + agent_counter_info = get_agent_counter_info(gpu_agents); + + try + { + using qualifier_t = tim::hardware_counters::qualifier; + using qualifier_vec_t = std::vector; + + for(const auto& aitr : gpu_agents) + { + auto _dev_index = aitr.device_id; + auto _device_qualifier_sym = JOIN("", ":device=", _dev_index); + auto _device_qualifier = + tim::hardware_counters::qualifier{ true, static_cast(_dev_index), + _device_qualifier_sym, + JOIN(" ", "Device", _dev_index) }; + + auto _counter_info = agent_counter_info.at(aitr.agent->id); + std::sort(_counter_info.begin(), _counter_info.end(), + [](const rocprofiler_tool_counter_info_t& lhs, + const rocprofiler_tool_counter_info_t& rhs) { + if(lhs.is_constant && rhs.is_constant) + return lhs.id < rhs.id; + else if(lhs.is_constant) + return true; + else if(rhs.is_constant) + return false; + + if(!lhs.is_derived && !rhs.is_derived) + return lhs.id < rhs.id; + else if(!lhs.is_derived) + return true; + else if(!rhs.is_derived) + return false; + + return lhs.id < rhs.id; + }); + + for(const auto& ditr : _counter_info) + { + auto _long_desc = std::string{ ditr.description }; + auto _units = std::string{}; + auto _pysym = std::string{}; + if(ditr.is_constant) + { + continue; + } + else if(ditr.is_derived) + { + auto _sym = JOIN("", ditr.name, _device_qualifier_sym); + auto _short_desc = JOIN("", "Derived counter: ", ditr.expression); + events_info.emplace_back(hardware_counter_info( + true, tim::hardware_counters::api::rocm, events_info.size(), 0, + _sym, _pysym, _short_desc, _long_desc, _units, + qualifier_vec_t{ _device_qualifier })); + } + else + { + auto _dim_info = std::vector{}; + + for(const auto& itr : ditr.dimension_info) + { + auto _info = (itr.instance_size > 1) + ? JOIN("", itr.name, "[", 0, ":", + itr.instance_size - 1, "]") + : std::string{}; + if(!_info.empty()) _dim_info.emplace_back(_info); + } + + auto _sym = JOIN("", ditr.name, _device_qualifier_sym); + auto _short_desc = JOIN("", ditr.name, " on device ", _dev_index); + if(!_dim_info.empty()) + { + namespace join = ::timemory::join; + _short_desc += JOIN( + "", ". ", + join::join(join::array_config{ ", ", "", "" }, _dim_info)); + } + events_info.emplace_back(hardware_counter_info( + true, tim::hardware_counters::api::rocm, events_info.size(), 0, + _sym, _pysym, _short_desc, _long_desc, _units, + qualifier_vec_t{ _device_qualifier })); + } + } + } + } catch(std::exception& _e) + { + ROCPROFSYS_WARNING_F(1, "Constructing ROCm event info failed: %s\n", _e.what()); + } +} + +void +client_data::set_agents(agent_vec_t&& _agents_v) +{ + agents = std::move(_agents_v); + + std::sort(agents.begin(), agents.end(), + [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }); + + cpu_agents.clear(); + gpu_agents.clear(); + + for(const auto& itr : agents) + { + if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) + cpu_agents.emplace_back(tool_agent{ cpu_agents.size(), &itr }); + else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) + gpu_agents.emplace_back(tool_agent{ gpu_agents.size(), &itr }); + } +} +} // namespace rocprofiler_sdk +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp new file mode 100644 index 0000000000..4a702cb985 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp @@ -0,0 +1,252 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "common/synchronized.hpp" +#include "core/timemory.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace rocprofsys +{ +namespace rocprofiler_sdk +{ +using hardware_counter_info = ::tim::hardware_counters::info; + +using kernel_symbol_data_t = + rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; +using kernel_symbol_map_t = + std::unordered_map; +using callback_arg_array_t = std::vector>; + +struct code_object_callback_record_t +{ + uint64_t timestamp = 0; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_callback_tracing_code_object_load_data_t payload = {}; +}; + +struct kernel_symbol_callback_record_t +{ + uint64_t timestamp = 0; + rocprofiler_callback_tracing_record_t record = {}; + kernel_symbol_data_t payload = {}; +}; + +struct rocprofiler_tool_counter_info_t : rocprofiler_counter_info_v0_t +{ + using this_type = rocprofiler_tool_counter_info_t; + using parent_type = rocprofiler_counter_info_v0_t; + using dimension_info_vec_t = std::vector; + + rocprofiler_tool_counter_info_t(rocprofiler_agent_id_t _agent_id, parent_type _info, + dimension_info_vec_t&& _dim_info); + + rocprofiler_tool_counter_info_t() = default; + ~rocprofiler_tool_counter_info_t() = default; + rocprofiler_tool_counter_info_t(const rocprofiler_tool_counter_info_t&) = default; + rocprofiler_tool_counter_info_t(rocprofiler_tool_counter_info_t&&) noexcept = default; + rocprofiler_tool_counter_info_t& operator=(const rocprofiler_tool_counter_info_t&) = + default; + rocprofiler_tool_counter_info_t& operator =( + rocprofiler_tool_counter_info_t&&) noexcept = default; + + rocprofiler_agent_id_t agent_id = {}; + std::vector dimension_info = {}; +}; + +struct tool_agent +{ + uint64_t device_id = 0; + const rocprofiler_agent_v0_t* agent = nullptr; +}; + +struct timing_interval +{ + rocprofiler_timestamp_t start = 0; + rocprofiler_timestamp_t end = 0; +}; + +using agent_counter_info_map_t = + std::unordered_map>; + +using agent_counter_profile_map_t = + std::unordered_map>; + +using counter_id_vec_t = std::vector; + +using agent_counter_id_map_t = + std::unordered_map; + +using backtrace_operation_map_t = + std::unordered_map>; + +struct client_data +{ + static constexpr size_t num_buffers = 3; + static constexpr size_t num_contexts = 2; + + using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t; + using callback_name_info_t = rocprofiler::sdk::callback_name_info_t; + using kernel_symbol_vec_t = std::vector; + using code_object_vec_t = std::vector; + using buffer_id_vec_t = std::array; + using context_id_vec_t = std::array; + using agent_vec_t = std::vector; + + rocprofiler_client_id_t* client_id = nullptr; + rocprofiler_client_finalize_t client_fini = nullptr; + rocprofiler_context_id_t primary_ctx = { 0 }; + rocprofiler_context_id_t counter_ctx = { 0 }; + rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 }; + rocprofiler_buffer_id_t memory_copy_buffer = { 0 }; + rocprofiler_buffer_id_t counter_collection_buffer = { 0 }; + std::vector agents = {}; + std::vector cpu_agents = {}; + std::vector gpu_agents = {}; + std::vector events_info = {}; + agent_counter_id_map_t agent_events = {}; + agent_counter_info_map_t agent_counter_info = {}; + agent_counter_profile_map_t agent_counter_profiles = {}; + common::synchronized code_object_records = {}; + common::synchronized kernel_symbol_records = {}; + buffer_name_info_t buffered_tracing_info = {}; + callback_name_info_t callback_tracing_info = {}; + backtrace_operation_map_t backtrace_operations = {}; + + void initialize(); + void initialize_event_info(); + void set_agents(agent_vec_t&& agents); + context_id_vec_t get_contexts() const; + buffer_id_vec_t get_buffers() const; + const rocprofiler_agent_t* get_agent(rocprofiler_agent_id_t _id) const; + const tool_agent* get_gpu_tool_agent(rocprofiler_agent_id_t id) const; + const kernel_symbol_data_t* get_kernel_symbol_info(uint64_t _kernel_id) const; + const rocprofiler_tool_counter_info_t* get_tool_counter_info( + rocprofiler_agent_id_t _agent_id, rocprofiler_counter_id_t _counter_id) const; +}; + +inline client_data::context_id_vec_t +client_data::get_contexts() const +{ + return context_id_vec_t{ + primary_ctx, + counter_ctx, + }; +} + +inline client_data::buffer_id_vec_t +client_data::get_buffers() const +{ + return buffer_id_vec_t{ + kernel_dispatch_buffer, + memory_copy_buffer, + counter_collection_buffer, + }; +} + +inline const rocprofiler_agent_t* +client_data::get_agent(rocprofiler_agent_id_t _id) const +{ + for(const auto& itr : agents) + if(itr.id == _id) return &itr; + return nullptr; +} + +inline const tool_agent* +client_data::get_gpu_tool_agent(rocprofiler_agent_id_t id) const +{ + for(const auto& itr : gpu_agents) + if(id == itr.agent->id) return &itr; + return nullptr; +} + +inline const kernel_symbol_data_t* +client_data::get_kernel_symbol_info(uint64_t _kernel_id) const +{ + return kernel_symbol_records.rlock( + [_kernel_id](const auto& _data) -> const kernel_symbol_data_t* { + for(const auto& itr : _data) + { + if(_kernel_id == itr->payload.kernel_id) + { + return &itr->payload; + break; + } + } + return nullptr; + }); +} + +inline const rocprofiler_tool_counter_info_t* +client_data::get_tool_counter_info(rocprofiler_agent_id_t _agent_id, + rocprofiler_counter_id_t _counter_id) const +{ + for(const auto& itr : agent_counter_info.at(_agent_id)) + { + if(itr.id == _counter_id) return &itr; + } + return nullptr; +} + +inline constexpr client_data* +as_client_data(void* _ptr) +{ + return static_cast(_ptr); +} +} // namespace rocprofiler_sdk +} // namespace rocprofsys + +#if !defined(ROCPROFILER_CALL) +# define ROCPROFILER_CALL(result) \ + { \ + rocprofiler_status_t ROCPROFSYS_VARIABLE(_rocp_status_, __LINE__) = \ + (result); \ + if(ROCPROFSYS_VARIABLE(_rocp_status_, __LINE__) != \ + ROCPROFILER_STATUS_SUCCESS) \ + { \ + auto msg = std::stringstream{}; \ + std::string status_msg = rocprofiler_get_status_string( \ + ROCPROFSYS_VARIABLE(_rocp_status_, __LINE__)); \ + msg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " \ + << "rocprofiler-sdk call [" << #result \ + << "] failed with error code " \ + << ROCPROFSYS_VARIABLE(_rocp_status_, __LINE__) \ + << " :: " << status_msg; \ + ROCPROFSYS_WARNING(0, "%s\n", msg.str().c_str()); \ + } \ + } +#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.cpp deleted file mode 100644 index 5e155b874b..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler.cpp +++ /dev/null @@ -1,834 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "library/rocprofiler.hpp" -#include "core/common.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/gpu.hpp" -#include "core/perfetto.hpp" -#include "library/rocm.hpp" -#include "library/rocm/hsa_rsrc_factory.hpp" - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace rocprofsys -{ -namespace rocprofiler -{ -namespace -{ -using ::rocprofiler::util::AgentInfo; -using ::rocprofiler::util::HsaRsrcFactory; - -auto& -get_event_names() -{ - static auto _v = std::map>{}; - return _v; -} -} // namespace - -// Error handler -void -fatal(const std::string& msg) -{ - ROCPROFSYS_PRINT_F("\n"); - ROCPROFSYS_PRINT_F("%s\n", msg.c_str()); - abort(); -} - -// Check returned HSA API status -const char* -rocm_error_string(hsa_status_t _status) -{ - const char* _err_string = nullptr; - if(_status != HSA_STATUS_SUCCESS) rocprofiler_error_string(&_err_string); - return _err_string; -} - -// Check returned HSA API status -bool -rocm_check_status(hsa_status_t _status, const std::set& _nonfatal = {}) -{ - if(_status != HSA_STATUS_SUCCESS) - { - if(_nonfatal.count(_status) == 0) - fatal(JOIN(" :: ", "ERROR", rocm_error_string(_status))); - - ROCPROFSYS_PRINT_F("Warning! %s\n", rocm_error_string(_status)); - return false; - } - return true; -} - -// Context stored entry type -struct context_entry_t -{ - bool valid; - hsa_agent_t agent; - rocprofiler_group_t group; - rocprofiler_callback_data_t data; -}; - -// Context callback arg -struct callbacks_arg_t -{ - rocprofiler_pool_t** pools; -}; - -// Handler callback arg -struct handler_arg_t -{ - rocprofiler_feature_t* features; - unsigned feature_count; -}; - -bool& -is_setup() -{ - static bool _v = false; - return _v; -} - -std::map> -get_data_labels() -{ - auto _v = std::map>{}; - for(const auto& itr : get_event_names()) - { - _v[itr.first] = {}; - for(auto vitr : itr.second) - _v[itr.first].emplace_back(std::string_view{ vitr.name }); - } - return _v; -} - -// Dump stored context entry -void -rocm_dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features, - unsigned feature_count) -{ - volatile std::atomic* valid = - reinterpret_cast*>(&entry->valid); - while(valid->load() == false) - sched_yield(); - - const rocprofiler_dispatch_record_t* record = entry->data.record; - - if(!record) return; // there is nothing to do here. - - auto _queue_id = entry->data.queue_id; - auto _thread_id = entry->data.thread_id; - auto _dev_id = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index; - auto _kernel_name = std::string{ entry->data.kernel_name }; - auto _pos = _kernel_name.find_last_of(')'); - if(_pos != std::string::npos) _kernel_name = _kernel_name.substr(0, _pos + 1); - - rocprofiler_group_t& group = entry->group; - if(group.context == nullptr) - { - fatal("context is nullptr\n"); - } - - if(feature_count > 0) - { - rocm_check_status(rocprofiler_group_get_data(&group)); - rocm_check_status(rocprofiler_get_metrics(group.context)); - } - - auto _evt = - component::rocm_event{ _dev_id, _thread_id, _queue_id, _kernel_name, - record->begin, record->end, feature_count, features }; - - component::rocm_data()->emplace_back(_evt); -} - -// Profiling completion handler -// Dump and delete the context entry -// Return true if the context was dumped successfully -bool -rocm_context_handler(const rocprofiler_pool_entry_t* entry, void* arg) -{ - // Context entry - context_entry_t* ctx_entry = reinterpret_cast(entry->payload); - handler_arg_t* handler_arg = reinterpret_cast(arg); - - // rocm::lock_t _lk{ rocm::rocm_mutex, std::defer_lock }; - // if(!_lk.owns_lock()) _lk.lock(); - - rocm_dump_context_entry(ctx_entry, handler_arg->features, handler_arg->feature_count); - - return true; -} - -// Kernel disoatch callback -hsa_status_t -rocm_dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* arg, - rocprofiler_group_t* group) -{ - // Passed tool data - hsa_agent_t agent = callback_data->agent; - - // Open profiling context - const unsigned gpu_id = HsaRsrcFactory::Instance().GetAgentInfo(agent)->dev_index; - callbacks_arg_t* callbacks_arg = reinterpret_cast(arg); - rocprofiler_pool_t* pool = callbacks_arg->pools[gpu_id]; - rocprofiler_pool_entry_t pool_entry{}; - rocm_check_status(rocprofiler_pool_fetch(pool, &pool_entry)); - // Profiling context entry - rocprofiler_t* context = pool_entry.context; - context_entry_t* entry = reinterpret_cast(pool_entry.payload); - - // Get group[0] - rocm_check_status(rocprofiler_get_group(context, 0, group)); - - // Fill profiling context entry - entry->agent = agent; - entry->group = *group; - entry->data = *callback_data; - entry->data.kernel_name = strdup(callback_data->kernel_name); - reinterpret_cast*>(&entry->valid)->store(true); - - return HSA_STATUS_SUCCESS; -} - -unsigned -metrics_input(unsigned _device, rocprofiler_feature_t** ret) -{ - // Profiling feature objects - auto _events = tim::delimit(config::get_rocm_events(), ", ;\t\n"); - std::vector _features = {}; - auto _this_device = JOIN("", ":device=", _device); - for(auto itr : _events) - { - ROCPROFSYS_VERBOSE_F(3, "Processing feature '%s' for device %u...\n", itr.c_str(), - _device); - auto _pos = itr.find(":device="); - if(_pos != std::string::npos) - { - if(itr.find(_this_device) != std::string::npos) - { - _features.emplace_back(itr.substr(0, _pos)); - } - } - else - { - _features.emplace_back(itr); - } - } - const unsigned feature_count = _features.size(); - rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; - memset(features, 0, feature_count * sizeof(rocprofiler_feature_t)); - - // PMC events - for(unsigned i = 0; i < feature_count; ++i) - { - ROCPROFSYS_VERBOSE_F(3, "Adding feature '%s' for device %u...\n", - _features.at(i).c_str(), _device); - features[i].kind = ROCPROFILER_FEATURE_KIND_METRIC; - features[i].name = strdup(_features.at(i).c_str()); - features[i].parameters = nullptr; - features[i].parameter_count = 0; - } - - *ret = features; - return feature_count; -} - -using info_data = std::vector; - -hsa_status_t -info_data_callback(const rocprofiler_info_data_t info, void* arg) -{ - using qualifier_t = tim::hardware_counters::qualifier; - using qualifier_vec_t = std::vector; - auto* _data = static_cast(arg); - auto _dev_index = info.agent_index; - - switch(info.kind) - { - case ROCPROFILER_INFO_KIND_METRIC: - { - auto _device_qualifier_sym = JOIN("", ":device=", _dev_index); - auto _device_qualifier = - tim::hardware_counters::qualifier{ true, static_cast(_dev_index), - _device_qualifier_sym, - JOIN(" ", "Device", _dev_index) }; - auto _long_desc = std::string{ info.metric.description }; - auto _units = std::string{}; - auto _pysym = std::string{}; - if(info.metric.expr != nullptr) - { - auto _sym = JOIN("", info.metric.name, _device_qualifier_sym); - auto _short_desc = JOIN("", "Derived counter: ", info.metric.expr); - _data->emplace_back(component::rocm_info_entry( - true, tim::hardware_counters::api::rocm, _data->size(), 0, _sym, - _pysym, _short_desc, _long_desc, _units, - qualifier_vec_t{ _device_qualifier })); - } - else - { - if(info.metric.instances == 1) - { - auto _sym = JOIN("", info.metric.name, _device_qualifier_sym); - auto _short_desc = - JOIN("", info.metric.name, " on device ", _dev_index); - _data->emplace_back(component::rocm_info_entry( - true, tim::hardware_counters::api::rocm, _data->size(), 0, _sym, - _pysym, _short_desc, _long_desc, _units, - qualifier_vec_t{ _device_qualifier })); - } - else - { - for(uint32_t i = 0; i < info.metric.instances; ++i) - { - auto _instance_qualifier_sym = JOIN("", '[', i, ']'); - auto _instance_qualifier = - tim::hardware_counters::qualifier{ true, static_cast(i), - _instance_qualifier_sym, - JOIN(" ", "Instance", i) }; - auto _sym = JOIN("", info.metric.name, _instance_qualifier_sym, - _device_qualifier_sym); - auto _short_desc = JOIN("", info.metric.name, " instance ", i, - " on device ", _dev_index); - _data->emplace_back(component::rocm_info_entry( - true, tim::hardware_counters::api::rocm, _data->size(), 0, - _sym, _pysym, _short_desc, _long_desc, _units, - qualifier_vec_t{ _device_qualifier, _instance_qualifier })); - } - } - } - break; - } - default: printf("wrong info kind %u\n", info.kind); return HSA_STATUS_ERROR; - } - return HSA_STATUS_SUCCESS; -} - -std::vector -rocm_metrics() -{ - std::vector _data = {}; - try - { - (void) HsaRsrcFactory::Instance(); - } catch(std::runtime_error& _e) - { - ROCPROFSYS_VERBOSE_F(0, "%s\n", _e.what()); - return _data; - } - - // Available GPU agents - const unsigned gpu_count = HsaRsrcFactory::Instance().GetCountOfGpuAgents(); - - std::vector _gpu_agents(gpu_count, nullptr); - for(unsigned i = 0; i < gpu_count; ++i) - { - const AgentInfo* _agent = _gpu_agents[i]; - const AgentInfo** _agent_p = &_agent; - HsaRsrcFactory::Instance().GetGpuAgentInfo(i, _agent_p); - - if(!rocm_check_status(rocprofiler_iterate_info( - &_agent->dev_id, ROCPROFILER_INFO_KIND_METRIC, - info_data_callback, reinterpret_cast(&_data)), - { HSA_STATUS_ERROR_NOT_INITIALIZED })) - { - ROCPROFSYS_WARNING_F(-1, "rocprofiler_iterate_info failed for gpu agent %u\n", - i); - } - } - - if(gpu_count > 0 && _data.empty()) - { - if(!rocm_check_status(rocprofiler_iterate_info( - nullptr, ROCPROFILER_INFO_KIND_METRIC, - info_data_callback, reinterpret_cast(&_data)), - { HSA_STATUS_ERROR_NOT_INITIALIZED })) - { - ROCPROFSYS_WARNING_F( - -1, "rocprofiler_iterate_info failed for %i gpu agents\n", gpu_count); - } - } - - auto _settings = tim::settings::shared_instance(); - if(_settings) - { - auto ritr = _settings->find("ROCPROFSYS_ROCM_EVENTS"); - if(ritr != _settings->end()) - { - auto _rocm_events = ritr->second; - if(_rocm_events->get_choices().empty()) - { - std::vector _choices = {}; - _choices.reserve(_data.size()); - for(auto itr : _data) - { - if(!itr.symbol().empty()) _choices.emplace_back(itr.symbol()); - } - _rocm_events->set_choices(_choices); - } - } - } - - return _data; -} - -void -rocm_initialize() -{ - // Available GPU agents - const unsigned gpu_count = HsaRsrcFactory::Instance().GetCountOfGpuAgents(); - - (void) rocm_metrics(); - - // Adding dispatch observer - callbacks_arg_t* callbacks_arg = new callbacks_arg_t{}; - callbacks_arg->pools = new rocprofiler_pool_t*[gpu_count]; - for(unsigned gpu_id = 0; gpu_id < gpu_count; gpu_id++) - { - // Getting profiling features - rocprofiler_feature_t* features = nullptr; - unsigned feature_count = metrics_input(gpu_id, &features); - - if(features) - { - get_event_names()[gpu_id].clear(); - get_event_names()[gpu_id].reserve(feature_count); - for(unsigned i = 0; i < feature_count; ++i) - get_event_names().at(gpu_id).emplace_back(features[i]); - } - - // Handler arg - handler_arg_t* handler_arg = new handler_arg_t{}; - handler_arg->features = features; - handler_arg->feature_count = feature_count; - - // Context properties - rocprofiler_pool_properties_t properties{}; - properties.num_entries = 100; - properties.payload_bytes = sizeof(context_entry_t); - properties.handler = rocm_context_handler; - properties.handler_arg = handler_arg; - - // Getting GPU device info - const AgentInfo* agent_info = nullptr; - if(HsaRsrcFactory::Instance().GetGpuAgentInfo(gpu_id, &agent_info) == false) - { - fprintf(stderr, "GetGpuAgentInfo failed\n"); - abort(); - } - - // Open profiling pool - rocprofiler_pool_t* pool = nullptr; - uint32_t mode = 0; // ROCPROFILER_MODE_SINGLEGROUP - rocm_check_status(rocprofiler_pool_open(agent_info->dev_id, features, - feature_count, &pool, mode, &properties)); - callbacks_arg->pools[gpu_id] = pool; - } - - rocprofiler_queue_callbacks_t callbacks_ptrs{}; - callbacks_ptrs.dispatch = rocm_dispatch_callback; - int err = rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg); - ROCPROFSYS_VERBOSE_F(3, "err=%d, rocprofiler_set_queue_callbacks\n", err); - - is_setup() = true; -} - -void -rocm_cleanup() -{ - // Unregister dispatch callback - rocm_check_status(rocprofiler_remove_queue_callbacks()); - // close profiling pool - // rocm_check_status(rocprofiler_pool_flush(pool)); - // rocm_check_status(rocprofiler_pool_close(pool)); -} - -namespace -{ -using rocm_event = component::rocm_event; -using rocm_data_t = component::rocm_data_t; -using rocm_metric_type = component::rocm_metric_type; -using rocm_feature_value = component::rocm_feature_value; -using rocm_data_tracker = component::rocm_data_tracker; - -void -post_process_perfetto() -{ - using counter_track = perfetto_counter_track; - - static bool _once = false; - if(_once) return; - - auto _data = rocm_data_t{}; - auto _device_data = std::map>{}; - auto _device_fields = std::map>{}; - auto _device_range = std::map>{}; - - for(size_t i = 0; i < ROCPROFSYS_MAX_THREADS; ++i) - { - auto& _v = component::rocm_data(i); - if(_v) - { - _data.reserve(_data.size() + _v->size()); - for(auto& itr : *_v) - _data.emplace_back(itr); - } - } - - if(_data.empty()) return; - _once = true; - - std::sort(_data.begin(), _data.end()); - - auto _get_events = [](std::vector& _inp, rocm_metric_type _ts) { - auto _v = std::vector{}; - for(const auto& itr : _inp) - { - if(_ts >= itr->entry && _ts <= itr->exit) _v.emplace_back(itr); - if(_ts > itr->exit) break; - } - return _v; - }; - - { - auto _device_time = std::map>{}; - for(auto& itr : _data) - { - _device_data[itr.device_id].emplace_back(&itr); - _device_time[itr.device_id].emplace(itr.entry); - _device_time[itr.device_id].emplace(itr.exit); - auto _dev_id = itr.device_id; - if(get_use_perfetto() && !counter_track::exists(_dev_id)) - { - auto addendum = [&](auto&& _v) { - return JOIN(" ", "Device", _v, JOIN("", '[', _dev_id, ']')); - }; - for(auto nitr : itr.feature_names) - { - auto _name = get_data_labels().at(itr.device_id).at(nitr); - counter_track::emplace(_dev_id, addendum(_name)); - } - } - } - - for(auto& ditr : _device_time) - { - for(auto itr = ditr.second.begin(); itr != ditr.second.end(); ++itr) - { - auto _next = std::next(itr); - if(_next == ditr.second.end()) continue; - _device_range[ditr.first].emplace(((*_next / 2) + (*itr / 2))); - } - } - } - - for(auto& ditr : _device_range) - { - auto _dev_id = ditr.first; - auto _values = std::vector{}; - auto _ts_sorted_data = _device_data[_dev_id]; - std::sort(_ts_sorted_data.begin(), _ts_sorted_data.end(), - [](auto* _l, auto* _r) { return _l->exit < _r->exit; }); - for(const auto& itr : ditr.second) - { - auto _v = _get_events(_ts_sorted_data, itr); - uint64_t _ts = itr; - for(auto* vitr : _v) - { - size_t _n = vitr->feature_values.size(); - if(_values.empty()) - { - _values.reserve(_n); - for(size_t i = 0; i < _n; ++i) - { - _values.emplace_back(vitr->feature_values.at(i)); - } - } - else - { - for(size_t i = 0; i < _n; ++i) - { -#ifdef __GNUC__ -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wdouble-promotion" -#endif - auto _plus = [](auto& _lhs, auto&& _rhs) { _lhs += _rhs; }; - std::visit(_plus, _values.at(i), vitr->feature_values.at(i)); -#ifdef __GNUC__ -# pragma GCC diagnostic pop -#endif - } - } - } - - for(size_t i = 0; i < _values.size(); ++i) - { - auto _trace_counter = [_dev_id, i, _ts](auto&& _val) { - TRACE_COUNTER("kernel_hardware_counter", - counter_track::at(_dev_id, i), _ts, _val); - }; - std::visit(_trace_counter, _values.at(i)); - } - } - } -} - -void -post_process_timemory() -{ - static bool _once = false; - if(_once) return; - - auto _data = rocm_data_t{}; - auto _device_data = std::map>{}; - auto _device_fields = std::map>{}; - auto _device_range = std::map>{}; - - for(size_t i = 0; i < ROCPROFSYS_MAX_THREADS; ++i) - { - auto& _v = component::rocm_data(i); - if(_v) - { - _data.reserve(_data.size() + _v->size()); - for(auto& itr : *_v) - _data.emplace_back(itr); - } - } - - if(_data.empty()) return; - _once = true; - - std::sort(_data.begin(), _data.end()); - - for(auto& itr : _data) - { - _device_data[itr.device_id].emplace_back(&itr); - } - - for(auto& itr : _device_data) - { - // sort according to when it exited - std::sort(itr.second.begin(), itr.second.end(), - [](auto* _lhs, auto* _rhs) { return _lhs->exit < _rhs->exit; }); - } - - using storage_type = typename rocm_data_tracker::storage_type; - using bundle_type = tim::lightweight_tuple; - - auto _info = rocm_metrics(); - static auto _get_description = [&_info](std::string_view _v) { - for(auto& itr : _info) - { - if(itr.symbol().find(_v) == 0 || itr.short_description().find(_v) == 0) - { - return itr.long_description(); - } - } - return std::string{}; - }; - - struct local_event - { - rocm_event* parent = nullptr; - mutable std::vector children = {}; - - ROCPROFSYS_DEFAULT_OBJECT(local_event) - - explicit local_event(rocm_event* _v) - : parent{ _v } - {} - - bool operator()(rocm_event* _v) - { - if(!parent) return false; - if(_v->device_id != parent->device_id) return false; - if(_v->entry > parent->entry && _v->exit <= parent->exit) - { - children.emplace_back(_v); - return true; - } - return false; - } - - bool operator<(const local_event& _v) const - { - if(!parent && _v.parent) return true; - if(parent && !_v.parent) return false; - return *parent < *_v.parent; - } - - void operator()(int64_t _index, scope::config _scope) const - { - if(!parent) return; - bundle_type _bundle{ parent->name, _scope }; - _bundle.push(parent->queue_id) - .start() - .store(parent->feature_values.at(_index)); - - std::sort(children.begin(), children.end()); - for(const auto& itr : children) - itr(_index, _scope); - - _bundle.stop().pop(parent->queue_id); - } - }; - - struct local_storage - { - int64_t index = 0; - std::string metric_name = {}; - std::string metric_description = {}; - std::unique_ptr storage = {}; - - local_storage(uint32_t _devid, size_t _idx, std::string_view _name) - : index{ static_cast(_idx) } - , metric_name{ _name } - , metric_description{ _get_description(metric_name) } - { - auto _metric_name = std::string{ _name }; - _metric_name = std::regex_replace( - _metric_name, std::regex{ "(.*)\\[([0-9]+)\\]" }, "$1_$2"); - storage = std::make_unique( - tim::standalone_storage{}, index, - JOIN('-', "rocprof", "device", _devid, _metric_name)); - } - - void operator()(const local_event& _event, scope::config _scope) const - { - operation::set_storage{}(storage.get()); - _event(index, _scope); - } - - void write() const - { - rocm_data_tracker::label() = metric_name; - rocm_data_tracker::description() = metric_description; - storage->write(); - } - }; - - auto _local_data = std::map>{}; - auto _scope = scope::get_default(); - - for(auto& ditr : _device_data) - { - ROCPROFSYS_VERBOSE_F(1, "Post-processing %zu entries for device %u...\n", - ditr.second.size(), ditr.first); - auto _storage = std::vector{}; - for(auto& itr : ditr.second) - { - auto _n = itr->feature_names.size(); - if(_n > _storage.size()) - { - _storage.reserve(_n); - for(size_t i = _storage.size(); i < _n; ++i) - _storage.emplace_back( - ditr.first, i, - get_data_labels().at(ditr.first).at(itr->feature_names.at(i))); - } - } - - auto& _local = _local_data[ditr.first]; - _local.reserve(ditr.second.size()); - double _avg = 0.0; - for(auto& itr : ditr.second) - { - if(_local.empty() || itr->entry >= _local.back().parent->exit) - { - _local.emplace_back(itr); - } - else - { - size_t _n = 0; - bool _found = false; - for(auto litr = _local.rbegin(); litr != _local.rend(); ++litr) - { - ++_n; - if((*litr)(itr)) - { - _found = true; - break; - } - } - if(!_found) _local.emplace_back(itr); - _avg += _n; - } - } - - ROCPROFSYS_VERBOSE_F(3, "Average # of iterations before match: %.1f\n", - _avg / ditr.second.size() * 100.0); - - for(auto& sitr : _storage) - { - for(auto& itr : _local) - sitr(itr, _scope); - } - - for(auto& itr : _storage) - itr.write(); - } - - tim::trait::runtime_enabled::set(false); -} -} // namespace - -void -post_process() -{ - if(get_use_perfetto()) post_process_perfetto(); - - if(get_use_timemory()) - { - auto _manager = tim::manager::master_instance(); - if(_manager) - { - _manager->add_cleanup("rocprofiler", &post_process_timemory); - } - else - { - post_process_timemory(); - } - } -} -} // namespace rocprofiler -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.cpp deleted file mode 100644 index b7abcf713a..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.cpp +++ /dev/null @@ -1,967 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "library/roctracer.hpp" -#include "binary/analysis.hpp" -#include "core/components/fwd.hpp" -#include "core/concepts.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/locking.hpp" -#include "library/components/category_region.hpp" -#include "library/runtime.hpp" -#include "library/sampling.hpp" -#include "library/thread_data.hpp" -#include "library/thread_info.hpp" -#include "library/tracing.hpp" - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#if ROCPROFSYS_HIP_VERSION < 50300 -# include -#endif - -#define AMD_INTERNAL_BUILD 1 -#include - -#if __has_include() || (defined(ROCPROFSYS_USE_HIP) && ROCPROFSYS_USE_HIP > 0) -# include -# define ROCPROFSYS_HIP_API_ARGS 1 -#else -# define ROCPROFSYS_HIP_API_ARGS 0 -#endif - -TIMEMORY_DEFINE_API(roctracer) -namespace rocprofsys -{ -namespace -{ -template -auto& -roctracer_type_mutex() -{ - return tim::type_mutex(); -} - -std::string -hip_api_string(hip_api_id_t id, const hip_api_data_t* data) -{ -#if ROCPROFSYS_HIP_API_ARGS > 0 - std::string _v = hipApiString(id, data); - if(_v.empty()) return _v; - auto _pbeg = _v.find('('); - if(_pbeg == std::string::npos) return _v; - auto _pend = _v.find_last_of(')'); - if(_pend == std::string::npos || _pbeg >= _pend) return _v; - auto _n = (_pend - _pbeg - 1); - return _v.substr(_pbeg + 1, _n); -#else - tim::consume_parameters(id, data); -#endif -} - -int& -get_current_device() -{ - static thread_local int _v = 1; - return _v; -} - -std::unordered_set& -get_roctracer_kernels() -{ - static auto _v = std::unordered_set{}; - return _v; -} - -auto& -get_roctracer_hip_data(int64_t _tid = threading::get_id()) -{ - using data_t = std::unordered_map; - using thread_data_t = thread_data; - return thread_data_t::instance(construct_on_thread{ _tid }); -} - -std::unordered_map& -get_roctracer_key_data() -{ - static auto _v = std::unordered_map{}; - return _v; -} - -std::unordered_map& -get_roctracer_tid_data() -{ - static auto _v = std::unordered_map{}; - return _v; -} - -auto& -get_hip_activity_callbacks(int64_t _tid = threading::get_id()) -{ - using thread_data_t = - thread_data>, category::roctracer>; - return thread_data_t::instance(construct_on_thread{ _tid }); -} - -size_t -get_hip_activity_callbacks_size() -{ - using thread_data_t = - thread_data>, category::roctracer>; - return thread_data_t::size(); -} - -using hip_activity_mutex_t = std::decay_t; -using key_data_mutex_t = std::decay_t; - -auto& -get_hip_activity_mutex(int64_t _tid = threading::get_id()) -{ - return tim::type_mutex( - _tid % max_supported_threads); -} -} // namespace - -// -int64_t -get_clock_skew() -{ - static auto _use = tim::get_env("ROCPROFSYS_USE_ROCTRACER_CLOCK_SKEW", true); - if(!_use) return 0; - static auto _v = []() { - auto _gpu_now = []() { - uint64_t _ts = 0; - roctracer_get_timestamp(&_ts); - return _ts; - }; - - // discard (warm-up) - (void) tracing::get_clock_skew(_gpu_now, 1); - - auto _diff = tracing::get_clock_skew(_gpu_now, 10); - ROCPROFSYS_BASIC_VERBOSE(1, "CPU/HIP timestamp skew: %li (used: %s)\n", _diff, - _use ? "yes" : "no"); - return _diff; - }(); - return _v; -} - -// HSA API callback function -void -hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) -{ - if(get_state() != State::Active || !trait::runtime_enabled::get()) - return; - - ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); - - (void) arg; - const hsa_api_data_t* data = reinterpret_cast(callback_data); - ROCPROFSYS_CONDITIONAL_PRINT_F( - get_debug() && get_verbose() >= 2, "<%-30s id(%u)\tcorrelation_id(%lu) %s>\n", - roctracer_op_string(domain, cid, 0), cid, data->correlation_id, - (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); - - static thread_local int64_t begin_timestamp = 0; - - switch(cid) - { - case HSA_API_ID_hsa_init: - case HSA_API_ID_hsa_shut_down: - case HSA_API_ID_hsa_agent_get_exception_policies: - case HSA_API_ID_hsa_agent_get_info: - case HSA_API_ID_hsa_amd_agent_iterate_memory_pools: - case HSA_API_ID_hsa_amd_agent_memory_pool_get_info: - case HSA_API_ID_hsa_amd_coherency_get_type: - case HSA_API_ID_hsa_amd_memory_pool_get_info: - case HSA_API_ID_hsa_amd_pointer_info: - case HSA_API_ID_hsa_amd_pointer_info_set_userdata: - case HSA_API_ID_hsa_amd_profiling_async_copy_enable: - case HSA_API_ID_hsa_amd_profiling_get_async_copy_time: - case HSA_API_ID_hsa_amd_profiling_get_dispatch_time: - case HSA_API_ID_hsa_amd_profiling_set_profiler_enabled: - case HSA_API_ID_hsa_cache_get_info: - case HSA_API_ID_hsa_code_object_get_info: - case HSA_API_ID_hsa_code_object_get_symbol: - case HSA_API_ID_hsa_code_object_get_symbol_from_name: - case HSA_API_ID_hsa_code_object_reader_create_from_memory: - case HSA_API_ID_hsa_code_symbol_get_info: - case HSA_API_ID_hsa_executable_create_alt: - case HSA_API_ID_hsa_executable_freeze: - case HSA_API_ID_hsa_executable_get_info: - case HSA_API_ID_hsa_executable_get_symbol: - case HSA_API_ID_hsa_executable_get_symbol_by_name: - case HSA_API_ID_hsa_executable_symbol_get_info: - case HSA_API_ID_hsa_extension_get_name: - case HSA_API_ID_hsa_ext_image_data_get_info: - case HSA_API_ID_hsa_ext_image_data_get_info_with_layout: - case HSA_API_ID_hsa_ext_image_get_capability: - case HSA_API_ID_hsa_ext_image_get_capability_with_layout: - case HSA_API_ID_hsa_isa_get_exception_policies: - case HSA_API_ID_hsa_isa_get_info: - case HSA_API_ID_hsa_isa_get_info_alt: - case HSA_API_ID_hsa_isa_get_round_method: - case HSA_API_ID_hsa_region_get_info: - case HSA_API_ID_hsa_system_extension_supported: - case HSA_API_ID_hsa_system_get_extension_table: - case HSA_API_ID_hsa_system_get_info: - case HSA_API_ID_hsa_system_get_major_extension_table: - case HSA_API_ID_hsa_wavefront_get_info: break; - default: - { - if(data->phase == ACTIVITY_API_PHASE_ENTER) - { - begin_timestamp = comp::wall_clock::record(); - } - else - { - const auto* _name = roctracer_op_string(domain, cid, 0); - const auto end_timestamp = (cid == HSA_API_ID_hsa_shut_down) - ? begin_timestamp - : comp::wall_clock::record(); - - if(begin_timestamp > end_timestamp) return; - - if(get_use_perfetto()) - { - uint64_t _beg_ts = begin_timestamp; - uint64_t _end_ts = end_timestamp; - tracing::push_perfetto_ts(category::rocm_hsa{}, _name, _beg_ts, - [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation( - ctx, "begin_ns", _beg_ts); - } - }); - tracing::pop_perfetto_ts(category::rocm_hsa{}, _name, _end_ts, - [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation( - ctx, "end_ns", _end_ts); - } - }); - } - - if(get_use_timemory()) - { - auto _beg_ns = begin_timestamp; - auto _end_ns = end_timestamp; - if(tasking::roctracer::get_task_group().pool()) - tasking::roctracer::get_task_group().exec( - [_name, _beg_ns, _end_ns]() { - roctracer_hsa_bundle_t _bundle{ _name }; - _bundle.start() - .store(std::plus{}, - static_cast(_end_ns - _beg_ns)) - .stop(); - }); - } - // timemory is disabled in this callback because collecting data in this - // thread causes strange segmentation faults - } - } - } -} - -void -hsa_activity_callback(uint32_t op, const void* vrecord, void* arg) -{ - const auto* record = static_cast(vrecord); - - if(get_state() != State::Active || !trait::runtime_enabled::get()) - return; - - ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); - - auto&& _protect = comp::roctracer::protect_flush_activity(); - (void) _protect; - - static const char* copy_op_name = "hsa_async_copy"; - static const char* dispatch_op_name = "hsa_dispatch"; - static const char* barrier_op_name = "hsa_barrier"; - const char** _name = nullptr; - - switch(op) - { - case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break; - case HSA_OP_ID_COPY: _name = ©_op_name; break; - case HSA_OP_ID_BARRIER: _name = &barrier_op_name; break; - default: break; - } - - ROCPROFSYS_CI_FAIL(_name == nullptr, "Error! HSA operation type not handled: %u\n", - op); - - if(!_name) return; - - auto _beg_ns = record->begin_ns + get_clock_skew(); - auto _end_ns = record->end_ns + get_clock_skew(); - - if(get_use_perfetto()) - { - uint64_t _beg = _beg_ns; - uint64_t _end = _end_ns; - tracing::push_perfetto_ts( - category::device_hsa{}, *_name, _beg, [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); - } - }); - tracing::pop_perfetto_ts( - category::device_hsa{}, *_name, _end, [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "end_ns", _end); - } - }); - } - - auto _func = [_beg_ns, _end_ns, _name]() { - if(get_use_timemory()) - { - roctracer_hsa_bundle_t _bundle{ *_name }; - _bundle.start() - .store(std::plus{}, static_cast(_end_ns - _beg_ns)) - .stop(); - } - }; - - if(tasking::roctracer::get_task_group().pool()) - tasking::roctracer::get_task_group().exec(_func); - - // timemory is disabled in this callback because collecting data in this thread - // causes strange segmentation faults - tim::consume_parameters(arg); -} - -void -hip_exec_activity_callbacks(int64_t _tid) -{ - // guard against initialization of structure when trying to exec - if(static_cast(_tid) >= get_hip_activity_callbacks_size()) return; - - // ROCPROFSYS_ROCTRACER_CALL(roctracer_flush_activity()); - locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) }; - auto& _async_ops = get_hip_activity_callbacks(_tid); - if(!_async_ops) return; - for(auto& itr : *_async_ops) - { - if(itr) itr(); - } - _async_ops->clear(); -} - -namespace -{ -thread_local std::unordered_map gpu_crit_cids = {}; -} - -void -roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, - void* /*arg*/) -{ - if(get_state() != State::Active || !trait::runtime_enabled::get()) - return; - - ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); - - if(domain != ACTIVITY_DOMAIN_ROCTX) return; - - static auto _range_map = std::unordered_map{}; - static auto _range_lock = locking::atomic_mutex{}; - const auto* _data = reinterpret_cast(callback_data); - static thread_local auto _range_stack = std::vector{}; - - switch(cid) - { - case ROCTX_API_ID_roctxRangePushA: - { - if(_data->args.message) - { - auto& itr = _range_stack.emplace_back(std::string{ _data->args.message }); - component::category_region::start(itr.c_str()); - } - break; - } - case ROCTX_API_ID_roctxRangePop: - { - if(!_range_stack.empty()) - { - auto& itr = _range_stack.back(); - component::category_region::stop(itr.c_str()); - _range_stack.pop_back(); - } - else - { - ROCPROFSYS_THROW("Error! roctxRangePop stack is empty! Expected " - "roctxRangePush/roctxRangePop on same thread\n"); - } - break; - } - case ROCTX_API_ID_roctxRangeStartA: - { - { - locking::atomic_lock _lk{ _range_lock, std::defer_lock }; - if(!_lk.owns_lock()) _lk.lock(); - _range_map.emplace(roctx_range_id_t{ _data->args.id }, - std::string{ _data->args.message }); - } - - component::category_region::start(_data->args.message); - break; - } - case ROCTX_API_ID_roctxRangeStop: - { - std::string_view _message = {}; - { - locking::atomic_lock _lk{ _range_lock, std::defer_lock }; - if(!_lk.owns_lock()) _lk.lock(); - auto itr = _range_map.find(roctx_range_id_t{ _data->args.id }); - ROCPROFSYS_CI_THROW(itr == _range_map.end(), - "Error! could not find range with id %lu\n", - _data->args.id); - if(itr == _range_map.end()) - { - ROCPROFSYS_VERBOSE(0, "Warning! could not find range with id %lu\n", - _data->args.id); - return; - } - else - { - _message = itr->second; - } - } - - if(!_message.empty()) - { - component::category_region::stop(_message.data()); - } - - break; - } - case ROCTX_API_ID_roctxMarkA: - { - if(_data->args.message) - { - component::category_region::mark( - _data->args.message); - } - break; - } - default: break; - } -} - -// HIP API callback function -void -hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) -{ - if(get_state() != State::Active || !trait::runtime_enabled::get()) - return; - - ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); - - assert(domain == ACTIVITY_DOMAIN_HIP_API); - const char* op_name = roctracer_op_string(domain, cid, 0); - if(op_name == nullptr) op_name = hip_api_name(cid); - if(op_name == nullptr) return; - assert(std::string{ op_name } == std::string{ hip_api_name(cid) }); - - switch(cid) - { - case HIP_API_ID___hipPushCallConfiguration: - case HIP_API_ID___hipPopCallConfiguration: - case HIP_API_ID_hipDeviceEnablePeerAccess: -#if ROCPROFSYS_HIP_VERSION_MAJOR > 4 || \ - (ROCPROFSYS_HIP_VERSION_MAJOR == 4 && ROCPROFSYS_HIP_VERSION_MINOR >= 3) - case HIP_API_ID_hipImportExternalMemory: - case HIP_API_ID_hipDestroyExternalMemory: -#endif - return; - default: break; - } - - const hip_api_data_t* data = reinterpret_cast(callback_data); - ROCPROFSYS_CONDITIONAL_PRINT_F( - get_debug() && get_verbose() >= 2, "<%-30s id(%u)\tcorrelation_id(%lu) %s>\n", - op_name, cid, data->correlation_id, - (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); - - int64_t _ts = comp::wall_clock::record(); - auto _tid = threading::get_id(); - uint64_t _crit_cid = 0; - uint64_t _parent_crit_cid = 0; - uint32_t _depth = 0; - auto _roct_cid = data->correlation_id; - - auto& _device_id = get_current_device(); - - if(data->phase == ACTIVITY_API_PHASE_ENTER) - { - if(cid == HIP_API_ID_hipSetDevice) - get_current_device() = - reinterpret_cast(data->args.hipSetDevice.deviceId) + 1; - - const char* _name = nullptr; - switch(cid) - { - case HIP_API_ID_hipLaunchKernel: - { - _name = hipKernelNameRefByPtr(data->args.hipLaunchKernel.function_address, - data->args.hipLaunchKernel.stream); - break; - } - case HIP_API_ID_hipLaunchCooperativeKernel: - { - _name = - hipKernelNameRefByPtr(data->args.hipLaunchCooperativeKernel.f, - data->args.hipLaunchCooperativeKernel.stream); - if(!_name) - { - _name = - hipKernelNameRefByPtr(data->args.hipLaunchKernel.function_address, - data->args.hipLaunchKernel.stream); - } - break; - } - case HIP_API_ID_hipHccModuleLaunchKernel: - { - _name = hipKernelNameRef(data->args.hipHccModuleLaunchKernel.f); - break; - } - case HIP_API_ID_hipModuleLaunchKernel: - { - _name = hipKernelNameRef(data->args.hipModuleLaunchKernel.f); - break; - } - case HIP_API_ID_hipExtModuleLaunchKernel: - { - _name = hipKernelNameRef(data->args.hipExtModuleLaunchKernel.f); - break; - } - case HIP_API_ID_hipExtLaunchKernel: - { - _name = - hipKernelNameRefByPtr(data->args.hipExtLaunchKernel.function_address, - data->args.hipLaunchKernel.stream); - break; - } - default: break; - } - - if(_name != nullptr) - { - if(get_use_perfetto() || get_use_timemory() || get_use_rocm_smi()) - { - locking::atomic_lock _lk{ roctracer_type_mutex() }; - get_roctracer_key_data().emplace(_roct_cid, _name); - get_roctracer_tid_data().emplace(_roct_cid, _tid); - } - } - - std::tie(_crit_cid, _parent_crit_cid, _depth) = create_cpu_cid_entry(); - - if(get_use_perfetto()) - { - static auto _compact_annotations = - config::get_setting_value( - "ROCPROFSYS_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS") - .value_or(false); - - static auto _enable_backtraces = - config::get_setting_value("ROCPROFSYS_ROCTRACER_HIP_API_BACKTRACE") - .value_or(false); - - constexpr size_t bt_stack_depth = 16; - constexpr size_t bt_ignore_depth = 3; - constexpr bool bt_with_signal_frame = true; - - using backtrace_entry_vec_t = std::vector; - auto _bt_data = std::optional{}; - if(_enable_backtraces && config::get_perfetto_annotations()) - { - auto _backtrace = tim::get_unw_stack(); - _bt_data = backtrace_entry_vec_t{}; - _bt_data->reserve(_backtrace.size()); - for(auto itr : _backtrace) - { - if(itr) - { - if(auto _val = binary::lookup_ipaddr_entry(itr->address()); - _val) - { - _bt_data->emplace_back(std::move(*_val)); - } - } - } - } - - auto _api_id = static_cast(cid); - tracing::push_perfetto_ts( - category::rocm_hip{}, op_name, _ts, - ::perfetto::Flow::ProcessScoped(_roct_cid), - [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", _ts); - tracing::add_perfetto_annotation(ctx, "cid", _crit_cid); - tracing::add_perfetto_annotation(ctx, "pcid", _parent_crit_cid); - tracing::add_perfetto_annotation(ctx, "device", _device_id); - tracing::add_perfetto_annotation(ctx, "tid", _tid); - tracing::add_perfetto_annotation(ctx, "depth", _depth); - tracing::add_perfetto_annotation(ctx, "corr_id", _roct_cid); - if(_compact_annotations) - { - tracing::add_perfetto_annotation( - ctx, "args", hip_api_string(_api_id, data)); - } - else - { - auto _args = std::string{ hip_api_string(_api_id, data) }; - if(!_args.empty()) - { - for(auto itr : tim::delimit(_args, ",")) - { - if(itr.empty()) continue; - auto _bpos = itr.find_first_not_of(' '); - auto _epos = itr.find_last_not_of(' '); - if(_epos > _bpos) - itr = itr.substr(_bpos, (_epos - _bpos) + 1); - auto _pos = itr.find('='); - if(_pos != std::string::npos) - tracing::add_perfetto_annotation( - ctx, itr.substr(0, _pos), - itr.substr(_pos + 1)); - } - } - } - - if(_enable_backtraces && _bt_data && !_bt_data->empty()) - { - const std::string _unk = "??"; - size_t _bt_cnt = 0; - for(const auto& itr : *_bt_data) - { - const auto* _func = - (itr.name.empty()) ? &_unk : &itr.name; - const auto* _loc = - (itr.location.empty()) ? &_unk : &itr.location; - auto _line = (itr.lineno == 0) ? std::string{ "?" } - : join("", itr.lineno); - auto _entry = join("", demangle(*_func), " @ ", - join(':', *_loc, _line)); - if(_bt_cnt < 10) - { - // Prepend zero for better ordering in UI. - // Only one zero is ever necessary since stack depth - // is limited to 16. - tracing::add_perfetto_annotation( - ctx, join("", "frame#0", _bt_cnt++), _entry); - } - else - { - tracing::add_perfetto_annotation( - ctx, join("", "frame#", _bt_cnt++), _entry); - } - } - } - } - }); - } - if(get_use_timemory()) - { - auto itr = get_roctracer_hip_data()->emplace( - _roct_cid, roctracer_hip_bundle_t{ op_name }); - if(itr.second) - { - itr.first->second.start(); - } - else if(itr.first != get_roctracer_hip_data()->end()) - { - itr.first->second.stop(); - get_roctracer_hip_data()->erase(itr.first); - } - } - - hip_exec_activity_callbacks(_tid); - } - else if(data->phase == ACTIVITY_API_PHASE_EXIT) - { - hip_exec_activity_callbacks(_tid); - - if(get_use_perfetto()) - { - tracing::pop_perfetto_ts( - category::rocm_hip{}, op_name, _ts, [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "end_ns", _ts); - } - }); - } - if(get_use_timemory()) - { - auto _stop = [&_roct_cid](int64_t _tid_v) { - auto& _data = get_roctracer_hip_data(_tid_v); - auto itr = _data->find(_roct_cid); - if(itr != get_roctracer_hip_data()->end()) - { - itr->second.stop(); - _data->erase(itr); - return true; - } - return false; - }; - if(!_stop(_tid)) - { - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - if(_stop(i)) break; - } - } - } - } - tim::consume_parameters(arg); -} - -// Activity tracing callback -void -hip_activity_callback(const char* begin, const char* end, void* arg) -{ - if(get_state() != State::Active || !trait::runtime_enabled::get()) - return; - - ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); - - auto&& _protect = comp::roctracer::protect_flush_activity(); - (void) _protect; - - if(!trait::runtime_enabled::get()) return; - static auto _kernel_names = std::unordered_map{}; - static auto _indexes = std::unordered_map{}; - static auto _skip_barrier_packets = - config::get_setting_value("ROCPROFSYS_ROCTRACER_DISCARD_BARRIERS") - .value_or(false); - const roctracer_record_t* record = reinterpret_cast(begin); - const roctracer_record_t* end_record = - reinterpret_cast(end); - - auto&& _advance_record = [&record]() { - ROCPROFSYS_ROCTRACER_CALL(roctracer_next_record(record, &record)); - }; - - while(record < end_record) - { - // make sure every iteration advances regardless of where return point happens - scope::destructor _next_dtor{ _advance_record }; - - // ROCPROFSYS_CI will enable these asserts and should fail if something relevant - // changes - assert(HIP_OP_ID_DISPATCH == 0); - assert(HIP_OP_ID_COPY == 1); - assert(HIP_OP_ID_BARRIER == 2); - - if(record->domain == ACTIVITY_DOMAIN_HSA_OPS) - { - hsa_activity_callback(record->op, record, arg); - continue; - } - if(record->domain != ACTIVITY_DOMAIN_HIP_OPS) continue; - if(record->op > HIP_OP_ID_BARRIER) continue; - if(_skip_barrier_packets && record->op == HIP_OP_ID_BARRIER) continue; - - const char* op_name = - roctracer_op_string(record->domain, record->op, record->kind); - auto _ns_skew = get_clock_skew(); - uint64_t _beg_ns = record->begin_ns + _ns_skew; - uint64_t _end_ns = record->end_ns + _ns_skew; - auto _roct_cid = record->correlation_id; - - auto& _keys = get_roctracer_key_data(); - auto& _tids = get_roctracer_tid_data(); - - int64_t _tid = 0; // thread id - int32_t _devid = record->device_id; // device id - int64_t _queid = record->queue_id; // queue id - uintptr_t _queue = 0; // Host queue (stream) - const char* _name = nullptr; - bool _found = false; - - { - locking::atomic_lock _lk{ roctracer_type_mutex() }; - if(_tids.find(_roct_cid) != _tids.end()) - { - _found = true; - _tid = _tids.at(_roct_cid); - auto itr = _keys.find(_roct_cid); - if(itr != _keys.end()) _name = itr->second; - } - } - - if(_name == nullptr && op_name == nullptr) continue; - if(_name == nullptr) _name = op_name; - - static auto _op_id_names = - std::array{ "DISPATCH", "COPY", "BARRIER" }; - - if(_end_ns < _beg_ns) - { - auto _verbose = []() { return get_verbose() >= 0 || get_debug(); }; - static size_t _n = 0; - static size_t _nmax = - get_env("ROCPROFSYS_ROCTRACER_DISCARD_INVALID", 0); - if(_nmax == 0) std::swap(_end_ns, _beg_ns); - ROCPROFSYS_WARNING_IF_F( - _n < _nmax && _verbose(), - "%4zu :: Discarding kernel roctracer activity record which ended before " - "it started :: %-20s :: %-20s :: cid=%lu, time_ns=(%12lu:%12lu) " - "delta=%li, device=%d, queue=%lu, pid=%u, tid=%lu, op=%s\n", - _n, op_name, _name, record->correlation_id, _beg_ns, _end_ns, - (static_cast(_end_ns) - static_cast(_beg_ns)), _devid, - _queid, record->process_id, _tid, _op_id_names.at(record->op)); - ROCPROFSYS_WARNING_IF_F( - _nmax > 0 && _n == _nmax && _verbose(), - "Suppressing future messages about discarding kernel roctracer activity " - "record which ended before it started. Set " - "ROCPROFSYS_ROCTRACER_DISCARD_INVALID=N to increase/decrease the number " - "of messages. If N is set to 0, data will be included after swapping the " - "begin and end values\n"); - if(_end_ns < _beg_ns) - { - ++_n; - continue; - } - } - - // execute this on this thread bc of how perfetto visualization works - if(get_use_perfetto()) - { - if(_kernel_names.find(_name) == _kernel_names.end()) - _kernel_names.emplace(_name, tim::demangle(_name)); - - auto _track_desc = [](int32_t _device_id, int64_t _queue_id) { - if(config::get_perfetto_roctracer_per_stream()) - return JOIN("", "HIP Activity Device ", _device_id, ", Queue ", - _queue_id); - return JOIN("", "HIP Activity Device ", _device_id); - }; - - const auto _track = tracing::get_perfetto_track( - category::device_hip{}, _track_desc, _devid, - (get_perfetto_roctracer_per_stream()) ? _queid : 0); - - assert(_end_ns >= _beg_ns); - tracing::push_perfetto_track( - category::device_hip{}, _kernel_names.at(_name).c_str(), _track, _beg_ns, - ::perfetto::Flow::ProcessScoped(_roct_cid), - [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ns); - tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns); - tracing::add_perfetto_annotation(ctx, "corr_id", _roct_cid); - tracing::add_perfetto_annotation(ctx, "device", _devid); - tracing::add_perfetto_annotation(ctx, "queue", _queid); - tracing::add_perfetto_annotation(ctx, "tid", _tid); - tracing::add_perfetto_annotation( - ctx, "stream", JOIN("", "0x", std::hex, _queue)); - tracing::add_perfetto_annotation(ctx, "op", - _op_id_names.at(record->op)); - } - }); - tracing::pop_perfetto_track(category::device_hip{}, "", _track, _end_ns); - } - - if(_found && _name != nullptr && get_use_timemory()) - { - auto _func = [_beg_ns, _end_ns, _name]() { - roctracer_hip_bundle_t _bundle{ _name }; - _bundle.start() - .store(std::plus{}, static_cast(_end_ns - _beg_ns)) - .stop() - .get([&](comp::wall_clock* wc) { - wc->set_value(_end_ns - _beg_ns); - wc->set_accum(_end_ns - _beg_ns); - return wc; - }); - _bundle.pop(); - }; - - auto& _async_ops = get_hip_activity_callbacks(_tid); - locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) }; - _async_ops->emplace_back(std::move(_func)); - } - } - - // ensures that all the updates are written - if(get_use_perfetto()) ::perfetto::TrackEvent::Flush(); -} - -bool& -roctracer_is_init() -{ - static bool _v = tim::get_env("ROCPROFSYS_ROCTRACER_IS_INIT", false); - return _v; -} - -bool& -roctracer_is_setup() -{ - static bool _v = false; - return _v; -} - -using roctracer_functions_t = std::vector>>; - -roctracer_functions_t& -roctracer_setup_routines() -{ - static auto _v = roctracer_functions_t{}; - return _v; -} - -roctracer_functions_t& -roctracer_shutdown_routines() -{ - static auto _v = roctracer_functions_t{}; - return _v; -} -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.hpp deleted file mode 100644 index e0f0a4a163..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/roctracer.hpp +++ /dev/null @@ -1,89 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/hip_runtime.hpp" -#include "core/perfetto.hpp" -#include "library/components/roctracer.hpp" -#include "library/ptl.hpp" - -#include -#include - -// Macro to check ROC-tracer calls status -#define ROCPROFSYS_ROCTRACER_CALL(call) \ - { \ - ROCPROFSYS_DEBUG_F(#call); \ - int err = call; \ - if(err != 0) \ - { \ - ROCPROFSYS_PRINT_F("%s in: %s\n", roctracer_error_string(), #call); \ - } \ - } - -namespace rocprofsys -{ -using roctracer_hip_bundle_t = - tim::component_bundle; -using roctracer_hsa_bundle_t = - tim::component_bundle; -using roctracer_functions_t = std::vector>>; - -// HSA API callback function -void -hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); - -void -hsa_activity_callback(uint32_t op, const void* record, void* arg); - -void -hip_exec_activity_callbacks(int64_t _tid); - -// HIP API callback function -void -hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); - -void -roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); - -// Activity tracing callback -void -hip_activity_callback(const char* begin, const char* end, void*); - -bool& -roctracer_is_init(); - -bool& -roctracer_is_setup(); - -int64_t -get_clock_skew(); - -roctracer_functions_t& -roctracer_setup_routines(); - -roctracer_functions_t& -roctracer_shutdown_routines(); -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.hpp index 7d64c326e3..5a8ea562eb 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/runtime.hpp @@ -33,7 +33,6 @@ #include "library/components/mpi_gotcha.hpp" #include "library/components/numa_gotcha.hpp" #include "library/components/pthread_gotcha.hpp" -#include "library/components/roctracer.hpp" #include "library/thread_data.hpp" #include diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake index feed0f1bea..50d4ef9584 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake @@ -4,9 +4,7 @@ # # -------------------------------------------------------------------------------------- # -set(ROCPROFSYS_ROCM_EVENTS_TEST - "GRBM_COUNT,GPUBusy,SQ_WAVES,SQ_INSTS_VALU,VALUInsts,TCC_HIT_sum,TA_TA_BUSY[0]:device=0,TA_TA_BUSY[11]:device=0" - ) +set(ROCPROFSYS_ROCM_EVENTS_TEST "GRBM_COUNT,SQ_WAVES,SQ_INSTS_VALU,TA_TA_BUSY:device=0") rocprofiler_systems_add_test( NAME transpose @@ -26,7 +24,8 @@ rocprofiler_systems_add_test( args -E uniform_int_distribution - ENVIRONMENT "${_base_environment}") + ENVIRONMENT "${_base_environment}" + RUNTIME_TIMEOUT 480) rocprofiler_systems_add_test( SKIP_REWRITE SKIP_RUNTIME @@ -36,9 +35,7 @@ rocprofiler_systems_add_test( GPU ON NUM_PROCS 1 RUN_ARGS 1 2 2 - ENVIRONMENT - "${_base_environment};ROCPROFSYS_ROCTRACER_HSA_ACTIVITY=OFF;ROCPROFSYS_ROCTRACER_HSA_API=OFF" - ) + ENVIRONMENT "${_base_environment}") rocprofiler_systems_add_test( SKIP_BASELINE SKIP_RUNTIME @@ -64,7 +61,11 @@ rocprofiler_systems_add_test( ENVIRONMENT "${_base_environment}" REWRITE_FAIL_REGEX "0 instrumented loops in procedure transpose") -if(ROCPROFSYS_USE_ROCPROFILER) +if(ROCPROFSYS_USE_ROCM) + set(_ROCP_PASS_REGEX + "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-TA_TA_BUSY.txt(.*)" + ) + rocprofiler_systems_add_test( SKIP_BASELINE SKIP_RUNTIME NAME transpose-rocprofiler @@ -76,22 +77,7 @@ if(ROCPROFSYS_USE_ROCPROFILER) REWRITE_ARGS -e -v 2 -E uniform_int_distribution ENVIRONMENT "${_base_environment};ROCPROFSYS_ROCM_EVENTS=${ROCPROFSYS_ROCM_EVENTS_TEST}" - REWRITE_RUN_PASS_REGEX - "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt" - ) + REWRITE_RUN_PASS_REGEX "${_ROCP_PASS_REGEX}" + SAMPLING_PASS_REGEX "${_ROCP_PASS_REGEX}") - rocprofiler_systems_add_test( - SKIP_BASELINE SKIP_RUNTIME - NAME transpose-rocprofiler-no-roctracer - TARGET transpose - LABELS "rocprofiler" - MPI ${TRANSPOSE_USE_MPI} - GPU ON - NUM_PROCS ${NUM_PROCS} - REWRITE_ARGS -e -v 2 -E uniform_int_distribution - ENVIRONMENT - "${_base_environment};ROCPROFSYS_USE_ROCTRACER=OFF;ROCPROFSYS_ROCM_EVENTS=${ROCPROFSYS_ROCM_EVENTS_TEST}" - REWRITE_RUN_PASS_REGEX - "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt" - REWRITE_RUN_FAIL_REGEX "roctracer.txt|ROCPROFSYS_ABORT_FAIL_REGEX") endif() diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake index 5b92f55df6..fafafec9dc 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake @@ -226,7 +226,7 @@ endif() # -------------------------------------------------------------------------------------- # set(_VALID_GPU OFF) -if(ROCPROFSYS_USE_HIP AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU)) +if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU)) set(_VALID_GPU ON) find_program( ROCPROFSYS_ROCM_SMI_EXE @@ -254,7 +254,7 @@ if(ROCPROFSYS_USE_HIP AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU)) endif() endif() -set(LULESH_USE_GPU ${LULESH_USE_HIP}) +set(LULESH_USE_GPU ${LULESH_USE_ROCM}) if(LULESH_USE_CUDA) set(LULESH_USE_GPU ON) endif() @@ -314,8 +314,6 @@ ROCPROFSYS_SAMPLING_FREQ = 300 ROCPROFSYS_SAMPLING_DELAY = 0.05 ROCPROFSYS_SAMPLING_CPUS = 0-${NUM_SAMPLING_PROCS} ROCPROFSYS_SAMPLING_GPUS = $env:HIP_VISIBLE_DEVICES -ROCPROFSYS_ROCTRACER_HSA_API = ON -ROCPROFSYS_ROCTRACER_HSA_ACTIVITY = ON # test-specific values ${_FILE_CONTENTS} @@ -430,18 +428,18 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST) if(TEST_GPU) list(APPEND TEST_LABELS "gpu") - if(NOT "ROCPROFSYS_USE_ROCTRACER=OFF" IN_LIST TEST_ENVIRONMENT) - list(APPEND TEST_LABELS "roctracer") + if(NOT "ROCPROFSYS_USE_ROCM=OFF" IN_LIST TEST_ENVIRONMENT) + list(APPEND TEST_LABELS "rocm") endif() - if(NOT "ROCPROFSYS_USE_ROCM_SMI=OFF" IN_LIST TEST_ENVIRONMENT) + if(NOT "ROCPROFSYS_USE_ROCM=OFF" IN_LIST TEST_ENVIRONMENT) list(APPEND TEST_LABELS "rocm-smi") endif() endif() - if("ROCPROFSYS_USE_ROCTRACER=ON" IN_LIST TEST_ENVIRONMENT AND NOT "roctracer" IN_LIST - TEST_ENVIRONMENT) - list(APPEND TEST_LABELS "roctracer") + if("ROCPROFSYS_USE_ROCM=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm" IN_LIST + TEST_ENVIRONMENT) + list(APPEND TEST_LABELS "rocm") endif() if("ROCPROFSYS_USE_ROCM_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm-smi" IN_LIST @@ -449,11 +447,6 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST) list(APPEND TEST_LABELS "rocm-smi") endif() - if("ROCPROFSYS_USE_ROCPROFILER=ON" IN_LIST TEST_ENVIRONMENT - AND NOT "rocprofiler" IN_LIST TEST_ENVIRONMENT) - list(APPEND TEST_LABELS "rocprofiler") - endif() - if(TARGET ${TEST_TARGET}) if(DEFINED TEST_MPI AND ${TEST_MPI} From 5a6f6ed83de8c32e319807b20a23c906a08ff9fd Mon Sep 17 00:00:00 2001 From: darren-amd Date: Mon, 16 Dec 2024 14:31:56 -0500 Subject: [PATCH 07/12] Allow disabling of openmp examples (#64) Add flag to disable openmp examples Co-authored-by: David Galiffi --------- Co-authored-by: David Galiffi [ROCm/rocprofiler-systems commit: 888b9a43a0196ee605d85f0f70fb3ebe37ad3491] --- .../examples/openmp/CMakeLists.txt | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt b/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt index c7451766f7..2b3c7e6199 100644 --- a/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt +++ b/projects/rocprofiler-systems/examples/openmp/CMakeLists.txt @@ -2,6 +2,15 @@ cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-openmp LANGUAGES CXX) +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if(${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES OR ${_DIR} IN_LIST + ROCPROFSYS_DISABLE_EXAMPLES) + return() + endif() +endif() + file(GLOB common_source ${CMAKE_CURRENT_SOURCE_DIR}/common/*.cpp) add_library(openmp-common OBJECT ${common_source}) target_include_directories(openmp-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/common) @@ -57,4 +66,10 @@ if(ROCPROFSYS_INSTALL_EXAMPLES) COMPONENT rocprofiler-systems-examples) endif() -add_subdirectory(target) +if(ROCPROFSYS_DISABLE_EXAMPLES) + if(NOT "openmp-target" IN_LIST ROCPROFSYS_DISABLE_EXAMPLES) + add_subdirectory(target) + endif() +else() + add_subdirectory(target) +endif() From 01ba75c945d4fd5d37a59a1ea32d575e7887c18d Mon Sep 17 00:00:00 2001 From: Peter Park Date: Mon, 16 Dec 2024 16:29:15 -0500 Subject: [PATCH 08/12] docs: Add section on building rocprofsys without ROCm (#57) Adds brief information in install.rst regarding Provided build scripts What to do when building rocprofiler-systems w/o ROCm Recent changes to the configuration settings as a result of rocprofiler-sdk support. Fixes minor formatting issues in install.rst resulting in Sphinx build warnings --------- Co-authored-by: David Galiffi [ROCm/rocprofiler-systems commit: 02ed6e187e284eff09a82fbdbaee1db0152e9535] --- .../docs/install/install.rst | 68 ++++++++++++++++--- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/projects/rocprofiler-systems/docs/install/install.rst b/projects/rocprofiler-systems/docs/install/install.rst index dd3c67db0c..16889a9960 100644 --- a/projects/rocprofiler-systems/docs/install/install.rst +++ b/projects/rocprofiler-systems/docs/install/install.rst @@ -70,7 +70,7 @@ Other modes of use, such as sampling and causal profiling, are not dependent on might be more portable. Installing ROCm Systems Profiler from binary distributions -================================================ +========================================================== Every ROCm Systems Profiler release provides binary installer scripts of the form: @@ -112,11 +112,11 @@ To install ROCm Systems Profiler using a binary installer script, follow these s ./rocprofiler-systems-1.0.0-ubuntu-18.04-ROCm-405000-OMPT-PAPI.sh --prefix=/opt/rocprofiler-systems --exclude-subdir -Installing ROCm Systems Profiler from source -======================================== +Building ROCm Systems Profiler from source +========================================== ROCm Systems Profiler needs a GCC compiler with full support for C++17 and CMake v3.16 or higher. -The Clang compiler may be used in lieu of the GCC compiler if `Dyninst `_ +The Clang compiler may be used instead of the GCC compiler if `Dyninst `_ is already installed. Build requirements @@ -160,7 +160,6 @@ while Dyninst requires TBB), and the CMake option to build the package alongside .. csv-table:: :header: "Third-Party Library", "Minimum Version", "Required By", "CMake Option" - :widths: 15, 10, 12, 40 "Dyninst", "12.0", "ROCm Systems Profiler", "``ROCPROFSYS_BUILD_DYNINST`` (default: OFF)" "Libunwind", "", "ROCm Systems Profiler", "``ROCPROFSYS_BUILD_LIBUNWIND`` (default: ON)" @@ -176,9 +175,8 @@ Optional third-party packages * `ROCm `_ * HIP - * Roctracer for HIP API and kernel tracing - * ROCM-SMI for GPU monitoring - * Rocprofiler for GPU hardware counters + * ROCm SMI Lib for GPU monitoring + * ROCprofiler SDK for GPU hardware counters and ROCm tracing * `PAPI `_ * MPI @@ -237,8 +235,10 @@ Installing Dyninst via Spack spack install --reuse dyninst spack load -r dyninst -Installing ROCm Systems Profiler ------------------------------------ +.. _cmake-options: + +Building and installing ROCm Systems Profiler +--------------------------------------------- ROCm Systems Profiler has CMake configuration options for MPI support (``ROCPROFSYS_USE_MPI`` or ``ROCPROFSYS_USE_MPI_HEADERS``), @@ -275,6 +275,37 @@ in `the Perfetto UI `_. cmake --build rocprof-sys-build --target install source /opt/rocprofiler-systems/share/rocprofiler-systems/setup-env.sh +.. _build-script: + +Using the build script +^^^^^^^^^^^^^^^^^^^^^^ + +This method automates the CMake process with a script that wraps the CMake +commands and handles build logic, environment variables, and packaging. Run +``./scripts/build-release.sh`` with your desired options to generate packages. + +Use ``./scripts/build-release.sh --help`` for more information. + +.. code-block:: shell-session + + ./scripts/build-release.sh --help + Options: + --core [+nopython] [+python] Core (Use '+nopython' to build w/o python, use '+python' to python build with python) + --mpi [+nopython] [+python] MPI (Use '+nopython' to build w/o python, use '+python' to python build with python) + --rocm [+nopython] [+python] ROCm (Use '+nopython' to build w/o python, use '+python' to python build with python) + --rocm-mpi [+nopython] [+python] ROCm + MPI (Use '+nopython' to build w/o python, use '+python' to python build with python) + --mpi-impl [openmpi|mpich] MPI implementation + + --lto [on|off] Enable LTO (default: off) + --strip [on|off] Strip libraries (default: off) + --perfetto-tools [on|off] Install perfetto tools (default: on) + --static-libgcc [on|off] Build with static libgcc (default: on) + --static-libstdcxx [on|off] Build with static libstdc++ (default: on) + --hidden-visibility [on|off] Build with hidden visibility (default: on) + --max-threads N Max number of threads supported (default: 2048) + --parallel N Number of parallel build jobs (default: 12) + --generators [STGZ][DEB][RPM][+others] CPack generators (default: stgz deb rpm) + .. _mpi-support-rocprof-sys: MPI support within ROCm Systems Profiler @@ -304,6 +335,20 @@ ROCm Systems Profiler on an application built against OpenMPI causes a segmentat This happens because the value of the ``MPI_COMM_WORLD`` is truncated during the function wrapping before being passed along to the underlying MPI function. +ROCm Systems Profiler without ROCm +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To build ROCm Systems Profiler for use on systems without a GPU or the ROCm runtime, disable ROCm +support using the CMake configuration option ``ROCPROFSYS_USE_ROCM=OFF``. See :ref:`cmake-options` +for more information. + +Alternatively, use the provided build script with the appropriate options. See :ref:`build-script`. +For example, to build without ROCm support and create a STGZ installer, use the following command: + +.. code-block:: shell + + ./scripts/build-release.sh --core +python --generators STGZ + .. _post-installation-steps: Post-installation steps @@ -408,4 +453,5 @@ Configuring PAPI to collect hardware counters To use PAPI to collect the majority of hardware counters, ensure the ``/proc/sys/kernel/perf_event_paranoid`` setting has a value less than or equal to ``2``. -For more information, see the :ref:`rocprof-sys_papi_events` section. \ No newline at end of file +For more information, see the :ref:`rocprof-sys_papi_events` section. + From 9be05fc6739104b4dafc89ef26c15e76306568f7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 02:49:43 +0000 Subject: [PATCH 09/12] Bump rocm-docs-core[api_reference] from 1.11.0 to 1.12.0 in /docs/sphinx Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.11.0 to 1.12.0. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.11.0...v1.12.0) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] [ROCm/rocprofiler-systems commit: fc6900c70e13ec5057b8e3c15030822d4361dc4c] --- projects/rocprofiler-systems/docs/sphinx/requirements.in | 2 +- projects/rocprofiler-systems/docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-systems/docs/sphinx/requirements.in b/projects/rocprofiler-systems/docs/sphinx/requirements.in index b49e04e378..ed04a6309d 100644 --- a/projects/rocprofiler-systems/docs/sphinx/requirements.in +++ b/projects/rocprofiler-systems/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.11.0 +rocm-docs-core[api_reference]==1.12.0 diff --git a/projects/rocprofiler-systems/docs/sphinx/requirements.txt b/projects/rocprofiler-systems/docs/sphinx/requirements.txt index ab9bd257a3..1f1081ed2c 100644 --- a/projects/rocprofiler-systems/docs/sphinx/requirements.txt +++ b/projects/rocprofiler-systems/docs/sphinx/requirements.txt @@ -132,7 +132,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.11.0 +rocm-docs-core[api-reference]==1.12.0 # via -r requirements.in six==1.16.0 # via python-dateutil From 2d6b4d99884d88b9a300808fb17636d181a373ab Mon Sep 17 00:00:00 2001 From: Sajina PK Date: Wed, 18 Dec 2024 15:56:48 -0500 Subject: [PATCH 10/12] Enable VCN tracing in Perfetto output (#65) Enable VCN activity tracing on different instances from the GPU metrics fetched using rsmi_dev_gpu_metrics_info_get in the ROCm-SMI. library. The tracing can be controlled with ROCPROFSYS_ROCM_SMI_METRICS by setting the value as vcn_activity, Currently this configuration takes the following values: busy, temp, power, mem_usage, vcn_activity. By default, all the 5 values will be enabled. Signed-off-by: Sajina P Kandy Co-authored-by: Sajina Kandy [ROCm/rocprofiler-systems commit: 3fa37c991e2fa72335e8ca6c7a9bcc7b6fb19066] --- .../conceptual/rocprof-sys-feature-set.rst | 1 + .../docs/data/rocprof-sys-gpu-metrics.png | Bin 0 -> 98355 bytes .../how-to/configuring-runtime-options.rst | 1 + .../understanding-rocprof-sys-output.rst | 8 ++++ .../source/lib/core/categories.hpp | 2 + .../source/lib/core/components/fwd.hpp | 13 +++++++ .../source/lib/core/config.cpp | 13 ++++--- .../rocprofiler-systems/categories.h | 1 + .../lib/rocprof-sys/library/rocm_smi.cpp | 35 +++++++++++++++++- .../lib/rocprof-sys/library/rocm_smi.hpp | 26 ++++++++----- .../lib/rocprof-sys/library/sampling.cpp | 7 ++++ 11 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 projects/rocprofiler-systems/docs/data/rocprof-sys-gpu-metrics.png diff --git a/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst b/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst index 5f6307774c..fed24b55b3 100644 --- a/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst +++ b/projects/rocprofiler-systems/docs/conceptual/rocprof-sys-feature-set.rst @@ -58,6 +58,7 @@ GPU metrics * Power usage * Temperature * Utilization + * VCN activity CPU metrics ======================================== diff --git a/projects/rocprofiler-systems/docs/data/rocprof-sys-gpu-metrics.png b/projects/rocprofiler-systems/docs/data/rocprof-sys-gpu-metrics.png new file mode 100644 index 0000000000000000000000000000000000000000..366d35858cb32da8eaca8871c52b631c67985f4d GIT binary patch literal 98355 zcmeFZcT|&Gw>KI^u^^zLB3(s5M2Penihy)bY0^bHA@rKi6a^Ip1Oy>efzSyxNEcCQ zLVy5)gl3_H5Rew6-zVyR&)#>u=Z^1u=iG6>KW@f=Cs|KhYp%KG{LQwW-qP1#JkEX` z1OhQ?X{y};fsP#jf#|}H(gQ8_ub*E44hMYhXxspm^>UJcABUV&^i)8gidcpn+rz-` zW1gC3J|NIJKHA>_tvi>0fI$0sT52jrfz}J-M}mijcK4SJsF&C%y84KGbh!lNnDz*FCybq=-DoBPGUPZ zBA+9*PA@h!W`v~8Y(plu$0vO}K8N_kv@d^aFQ)Yb)F+~-sQ7huR)CwE+k_ElAmr57 zn|F0}<-U3Iz6cl=2vq$zf1QQ)rH|=QIB<}Q+|$o%vG{e45Owk-&;&}OKd1~GxS}RY zfdeRO_vnFN=V?D^ACZ{f*Zuwl0!@AWFJI96?dS5hp0coPC;RL&c=GHoyMu*{Ipzpo}@ul%-O@C39_wmG_Hnwh1hR{f4}+jC!R{j8=GN=7}0B3m&56&GxR(T}g?95^ORT z$~qDK#{NXY%a)eR6Hh1}Lumu4v&AJPy75%5xUXMN6SoZ?wNbMCs$b0S9<73~SZZWB zJPN0E_pLDEXa~wRJbx&4H1e zGdU`8^38#8k0f-WdUQ(?Iw_QwLn;g`psiDCnkM_o*f<_(bK&se!zMkWg@gWJ;5m73 z-b~M`*yNyx{8GKArff&6q|H`fXrXIYrLZ8m40=azpVibRRx5SlEnC}{lBPB~SW^dw zJh$;?!1S8}_|dyE1wZdER$2K0i#0%H1eEEfC@hyOd3BAyN_wSb7W7yWe-{{F z>+(0$+C1AuagpX*k<488g-PcRA8vQRCBg2vA^dZ4wzT*6c0Vr%xA-;W<~iL(pB@dE z$_hdkVRq3TnVO`2-myb2Qn1mhL7SVy#jY7%jD$3W(R!)hqn(QRy>gTV2EZu8J9l!6 zinO;jmcYAP>s3jE`RnzTHu-3e#qLd9UK=dW!Tz;!$0M&ChaXuF#QfJZ0iH6^ex~#J z)=j6XqIuUsnL^u#y-P0(iwDyT^4QqBxRvQr4b9CR5lTf#eQ}FlzkIPB@=qIFJF#jl zA#+jXCKHcq@cLs8+O)<*wPLHR@juFMv82iQ{(K@k z1JUYq+?TtSxY)8twW!bV<5IGAet3R&TOB#^bPY~g{YD!7Ik`F~>w*^E-TwH*CW1S3 ztubJcz^ksV&OhQSA%M4>bsvBD*?T-TBRW!eZR_x1`e8)``@l#IBDRj{@Z-lbZ`*FD zer|Fus6)8o2_fm_=^Gi}Lon1^K9eEAA@-Ad5vaLxg9i@|d|g`d7_L&XgQwTv&o?n2 z#_T>K#E46STc|;O*^6rPM+ z@Guz^_&T?_1~8oP0x?3?Kj!qXqJ{`|>x{H56uQL`48=rgQmb-=`m zp^)bJvV7mBT@194V%%L}SGT^nP2xrlI4Nh6A1r&-V8izYp_bMrA}^Z|#28sTr%uaj z=cmsX9ISKJxJl)5;(qqnQE}XQ z^;)W963fAZw6nOrJGV4iPAUcd$lAOUF!k=ii)PI-9XfUbLtLa_wNuuNH9~HU?7+A^ z=c&xzaqMWpXe{+Ki_!&*6ezb_QXfJsp)B#gNN7$j#Y6%!zt$Tn+sJ+?X1AojxWo>l( zMWT=KZR^{(?%V2*ANel1W6*e~?hT?_yV=B%5Jf6k!;3nZXBpGyg1D7J#~$5~u*}^?V;~H2&yDo;%hu-4UTjuWoT=-( zg>2Tx_B=9i!`^c=NeYUo)?jN2)qu6XAwHk$Z{tK;@2_)6BeUWnrF}Ien|Krzn`a0k znXHg=mXUJYM0G%e! z3PIkt>7^^;MEh@_W+h@Dc}bklf!V}XEsE!(aelGHr1gbU?gV?Jsxl>b!bk_NTwBwI zu~fC$YA)mB6(=gFwqg)v?fD0?^pLf~%}1P-Z_ydBe;#l;B11g&izlZrzXgBzs;l6k z-0VZ!KdeKx)?6?GSvxzyvfkgH9Mk>qF@2sZCx?{0Cnu2@d1w;oAFj{r`g^5C-kZXPB2dE$}U+xB~Kc3b=BbG6j;RiGaLc^ zu6Lea;hJu5rrc<73fg9VNpHe~D(m3g183L0e&8?<4=yJ%Qo5+9gpezD-WlgqVM z4CA%*bnx}(D#wv?@MaM#Y;0zzd@e?&xtbyldT+G|4)(6fOx)vH z1JT(+EQ&rr!d}wr{av=S>$l*KqO=_SF~^G5D!39N*>vM(zH70$$h&h%_1*>*J!D8NM+m*w1@lUZyA56|a_+;^JOfYf!W_usJhq-LDQ{5TU(4EBH^mva`tV z@K=)1_3nLJaeA~NzixQzGXY&g?R%pVd#Ww+^_M)jy4vfhXhi{bBO#&UAnUahq=eT) zHYxM~RhPTFQ*z_RM(KtG5o1N^aeV()^fXl716a9Q0H?~uC-J9X5K{I(F0eto->xl= z;vW5|j=DH*pvtv=O*$8=ePaj20&;ztfm}31SCxsof4;KH>G!>YL|x06Q}l+x)QM(o zQ#lyXrdU6Oab>ZKk2-%`UN?dLU=i`#U5kv`VM{tjCDLjh#8waD5&xn6V`|puZK6+6 zdr^Bn#2_yhmAc*@%Oxx-+RrbOm(9x}Q_02PDaCu}5Fj<*!Th|B7I^V|{Uu88YjRwyOG;d6QzyT=FdY;M0YnZqEDx@VE8A8jXvT^bh)ohyC1fgiDt$ zjRgy$vrpyOtU!OF+?f^O!Bon;uLp(l%|4H`e6z+Eg3Q?-n0j~E5G&q#e7tuNf|zc2 z1=S6I1Cfx(24tz@winr;NfFND`}4XyM87FR&xMqMaj+rqz6vS0#`RFUHvhmmM7kow zmaa>AuYRT-0xh=%r@%h8w>L;#N5QnwwNy1@7YLF-PlJ+b>lC{gzoS$YxOl5YB^%b4cCxf1JuSxl@8d_YE2Oy&JBDK<_k*@@ zJ;@I>G$8H&@hT2?$JCX)I-Cm4Qn?<29@qdQ5PeYFOcR2|D|h_f!kieRjw`b|doWGk zfj~|bKV+8S!I^?`!~!=x-E%Vcr#Mb`LCo;7zdN z*RLyUYcJTW@JKfy4+R7$q$Lbw#aQRe7tNbkVTT9qwT_7L>W!#HNCpKR9*)QKPrZ!1 ze}g?OVuGO*&7nI09Uk+8{+gT^iG{o(NjHsagd82iwY|Bf(fW}5W38okYfSV<7CkKH zJiQ3Gmzg%WCd?XX+~Q!wK^W{?R*8Al#M;%IR^$c1Z8eu_Ml7B&X+${q& zwMU!YfjE?2r61X3^>Fz>rW_go@lh|1Q{HDlBpzi!vH>fj|3rE&bq6q!z{Pm?>l-bC>6_Ow%X(#` zxEyt&IB^$;s)ZMn0z1~O4W)+29>3#<8HSLBTW(IO`pGD7Z$F?OM?vmNN=sb$MG_f? zw2p)#R#eIx9PE#|E|)^RDjKw6PZ}ytCT(7nl{IVr`IRAPe|MwDARV8_Xv)*$*P8FkO7|6{pU-HMjQ6&TtX4d5w!ruW}#jny*Z1+9k zO;dw2vj~8Kx_YHlMf5UcROtCW)Vexf+Ksotnj+dsSM*?2?%2!u#ZGE(>-a}CVpA#9 zqaLz9?3cdeOX90!;nrkncPWU-^z^iju5Y5HZ>tfTg7dwmi;;x)+!=lMw5=$0j>6-( zm@!igS20Y85H zxYEg^vW+{d{;QcgIz~>4%inV-)sO$Av8h_DC+Xao%V$l{W^#aRKrddm^B2%hyKc+2 zU2bKSSA$o!v0IYtI2D3juLzOSm2T+oAi+O5A$fm>yaYQdv2mON@ zakAOnyY`{;%@#foq@lHmJKW{jJc}T!9`X`zB(r2|JTChH1LH;K*cl@J!fC*KbFU6+ReeuSrs zHi*9di(A#cdBc_wSy{?H=;&t#=}5&Tzf2LXhy91rv<(iYtx3Wdjeh&Uq|cn>&)7g; zSI;Lt{OtzwJ;m6ywlrzu`1$okyrs_z%Ybpenf%@j60ZHxgX;FIUZ#UC9*}x47rhIa znvT-Q{9NKT+T?^bD}!v0gqb8JBTq)r+f=)atTmOG)wUkGvKH+Q1{-vCe2&l2@BvIc zI`64l&o(a+xz{`|D=2&}LB3287Sy=Yv&WL)3Ri!+Fy734-4~gCah#9n&q-)|Mt{qH zB*p@f@wC#)g7T%HrcnX%5r=DAQ+$KZ`#is_T6=1ssX+|2&*3p@s`f|nxjB%fI$Vl!gDss%6tpB{~3dL(}Q;||xv!Uhs%of^9 zE%Y<-Fp4VHVk4g-fBl`!PH;%6Y5KrRthbeHdrZpp0e#}S2W`19_N#4BpoF#!V!&&E z6!tTft3mgzfWo!G6E&>o*zdzXc6Ni~USKfsuR@HN2orV^ca4K82TrII5Q}sWhB^}g zS=Rjs+--9o#s$hn2yZthTG2*6U`cf8ls}>BwW9o1GgK)oM9I3swe(o~@9Etzty%Uf z48C;ZD$mjcW@XIzkbW$kByRp3QgnFKO{&u?>G6N=Yk_F$AN$R}cHnZfg-`eIV!()8M`~8lu z8sY$(WbkX!ftz#}>%%Cd2o zpl42S$c>~WFrCWBFL-vH5=XDY`cn~v+uQS3BIrNq+^W4G>nrndjDfd{SX+qUD?2Z zH_J^qTOrEO%l30brJIq=9QP*z)^;^A*%-;rbM1S&bL;(hU{-lI4tyl#z`MA}%K4;;hs!BFxt$Rz@ zqE^TDc7<-LO!GwYmY=Hfcmpo_NMDM?$y|uO3tH*hr{}q|obMR_GFuleW_?X#Vg?EB z1)ZYNRz`5U?;Zrc7MHCWbs8iW;rlwn7&@$DLcI;hrK%xF9cV6)V9{G}s?DB|sC6iu zb`L0=y!>M>5fpdnm!$#H*NsqSwF$0J)6lSxlwr}<>HgfO)PFm~;oc6Yn3^~><$ju* zn_p3J!h|urH@9t0)c4%4$9T#DBMsU!kmm#Co@e?6$MjX5n2rFE8&9eGGS_JrIrHpw zp80k4pMU=w!HWMR-0{w(psWmo0-ozx|8x9T)PfNECxQ}LSt-_B0-*6Z(3bqGJ@;Qz zy$^gmnf_N#&Av=Zx_R`dr>#fRdeWG8xA6bqqi^56 zt4VwG{zlloK){$+l;kYja`)%i|4x5@k8(38O(7`$}I#&bOl<`*_B@mzB}@!kLx>Q1~`L?<)X_&j18J z4h-kNcG^av2v(we62!XJC4}y13|cLb1(iLgo_NbjkeIS|#)TuY_2?EKOELbvL0-v# zEbtf8{5rC#|6b~UHF|nI3jgc-uOq9*zqvr##LP?(@Jr9+rvuT#XVKAlB_*Fzi@y}~ zE&?Gg)Zow<`9L)@h0PP}RHRo7xQC#}l{IMnnbxf#%~&&M?b13Z%K|eGn;oW@8X4V{ z1j!wTu>;q?(IdK88E(rE10fD}G%flo@x1V9fQ zLqQ0H1@uYqHl?-_vEigR0PyvY+={_(ITeFP;F94u+VE(1QMsEWBpw+8HmLhmY3W)it-}4UbKg4? zn8*^sb@rX7S5ZY)K&T}s#X*izpl_I(8NBjT%f}UtYj1DAX4oFfcRMcch$OTv6>}Rn~f8cQi+X+3?5><#6JFbVDTPkY1Whf3Bd#_7*K#IbT%#nZv&= z%x__C9i-3mi@Vy%2*%=Fmf^9~MfG^U&K{q;T+@IKPdFHt<&=H{tu zWIVR$uxI@r64e7X%i6)*{k!_AKMPu4fh@;pi>PpGXJ@CR(2#Rzl+<#`{pZYFL9aep z^08SmKR77jFo8B|Rwq{+(1Abvu+E%}l!A%bJ^S%@0AA)|#46HX|YiDp+bPBsBc| zA3S(4^TIG0dfa@{o(t#>(qCCQJcxJ5HbMbF^i(9keAFBWZB13XFj5c9asHx1p;YO3 z_L0MfJIig`N=y&V9sSk%RI0S?HEoSBA^1U_@V{1H6o^A1uzdaaNYL-a#=FH|FrG;v z)O7ENi93EiFOG3(s|Oa*>rFsar`m){c}!dbcuvfJ;qL0E(Q^HEW6L`4JQ7@QG8~V# z0SgKWirIIZeERH}>l*x%;o*Nx(#FClZb4o^Ktn%G=Ax*ms3Z4(OgyF6l8&yv7f?^r z9WQxLb8tZSejJ-x>-)nmOlN(k-W|YY)C4>LqyW-d4V0!xlW@Ii$l;c!>+?h1XSlwC zMMU)9zkgrCD~AXg*~|H`ob~Y^Ld^1D+k@X&{ZF$<46sI7*8$Oa*D!y~2MZYZc`7;NWj(PU2^^Z6e(*w%_vVUcELSZ<0uZh4(4ub(iW&BLekC0D z%PLWzcYbFxuT`Mr^y31u0Bq%{j^=^mQ|D!B+pRtHT2tP>pZul3oxX6)JL&nQ@un04 zFWg*v5!-_eTAfMBjOqI?EA-zkq<;K3g{~E_znH$P~eWxA6qkns^|JC;YU+|k0|7X%+Ra0BL2!lHCHp`=B*F>a# z?h=&gUsDI&gLT504I-eq~xB_!3xTW`-ck6bzIgKBY(#fByy@(cltg zjG(}I*UT)1mRs~6pR@>5OUp6CBTSreiXmH1T3T9+jE%LlwV%0-l&io2b=Tb7JgftC z4!Ln_FbG1CUWt1G6Mf0!EQ-lChDICV+WLP4S-9mc-#_XNA5uEH#V6 zB<1AHgM))hBy&5bg=?Uo7cLDbK!dcI5vzyEOZY8$Un3)mt9p*7&E6+_W9K^ECtJ72cXXjsvqV@<8+dR{k!l3GjFakF3*L< zeV?9AQNCrkM(;LSsm^bZajnX+Cmw>Jc06Sf`2IB-oK)vCn=0lua-B3&-N zO%xLskkxg%m|mvA-pNVF)wRgdr$-X?f(*W{J(&nhoAdm-o~X1mzM2(*@U7mNYK=e% zj|Z>zoCac4go?^qs75!^6%QiB{6aP3D8H0FFV*~59wqmOn;=5$FJLo{TKBjoAF!CH z18y4JSuDi?2-pzgLR8`inr}Hdcql2fHDBU` zPwQdUC$+V;DW+RI{4`*7?QZ$IwbQw4-AYZKTVPfotqt(I86Ia~=n=~!c6W7&LFh?j ziKe`#;HEF^6Es}A-i4~ov%T0v8n5>g-_Oof;enVi(vQZPa>@ID-pQ_7Y$VqC}pFGbhY;I3T&zasrm6Du7Y@R%SE&*$B zZ}6{f%-c8$D%C*Hz@j^@zHHAO0zd&=#(^~MUy9ckIUKeoNJx1nUlT8aJcCSfgEG4 z)({>J^5HcauTC1c*akqGTvN3543H@nQgiw8WriP0?l({+R_`%<8X38ZD_f6E zmGhVs38(%Z6NZ%3Tzs(4|Mw)fymX0@syARfo8F_~r^&2mOM2}7jENCt%e@%XIRFZWupb(*-HklK<~_XMOQ{Q3vQCkLrT*O9G&3&E zhb)d58h3??uIt@taGA%WGgCZXpfC>kt8$4WgN25ASVeOq00+zZQwlrW-#GxAK9!VoxiwoA$T*A-H+}w3Y${ngFj4vBm@RetjHJ`ZCK;H+ib^?$#Ns93g zl~BZLcPJ3KqOD*6^ojZX$4c?D(wH>V(V&5YiHS+ds_5*QGq1B(s;yj2T?k3O z=EOGur*mf$Z)wDGmr`!@Oq6j6d;DYW!()D%f7K*L4yo7E5l5J%R{vu>$gQ&D%GA+{+)-l4`w-4!ezjaSnHAe{kuKRCzfd=Z=Yb(uJk-M zR+?4u$`wsu)i~E*8;VBT?0o$AF-7{rDQWWFlG#aySh)J~A>X#-CK1g5)$Zi>61 zN44VRM%y+k*hJ%K>S;Em&_+pPRbnYWCqR5?=*tutuQx$CL(6)=DudmLBXCDC^SBdo zFko0fy>%NP4boq~cHVQ&UOF(F<`d&$O7$;Y;$loD%f%2R!)N~J3pQf)q6R7M9bgO0 z1i*I3*_69~DT7Z^rD4~sfQXz64v>>)c9H4s#64*#kRHv-`v}|>nm;|)a1|u{*Kc=? zDq(X8yEiGUyNYDD@3&z~g#5#)1p^sD?2)iF9hGr4jh&Yjc`(wRMXCvaLRho}xBM^2d&l0?2So_FSe)%s1^8yiwa%L=E3o1Xep5r;f6 zzb&p>m*3Xzznm?yI9TdZ(Qh82iOrMCsj@yZzkC9^)%DHWi}L+j%^};`e**?Ms|5cx z`~2=|A|Ka5UGuACxh)21!~Bi6#d%`d_7%VZy^(~1b~>%3<$#22HncV zFl5L_gJw=201|^XZv=<8Ppiptoen?Eg=ObztNDZ1w7o zh4F@3Ni$VCS@y=K(b3|o@UQIj180mpJwK47HHP+kq?2>?*&74r{l4E&lxOFG&Vy1?Y$=mj)yMO2LD z=H|YZHiz7nXIz{+CS-+T)gV3YkIoh>S|{U?E=|r?B_sxSjNlc12mynPL`mrI>x<2i zfM5_l>(jb^Fl?n$V1-KbKn@zaxD*xwYHK27!wP4duxQbClbZ3csJOUC>VV8}6Cwbr ztt;(f&RH{yBD{4i>XCH&R`g}h*TTP4_S`rVjMG_{80oL^bJQublf$FYR#D?D;Ks2E zzDc(j0kGLFjm?;8!)Tk8m{RGli|Q7x z9lH=gdVoBC5f=J^ATArdNl%K) z)uHPba@!v0rHH8m{<-gS{W&OoIu(DpP&+`U28m|^aur~YPj2{Tz3~kH_FO?B$|tQ_ zAbymN7agyjtKWh%o_100C@Zz)n{s<&$4hXLAEXYHL!by~*$e5R8Fq}dL4Ss714*)f(a1(0Y@PDtN6cJkTlK(oFPBc{F&B!FYxok zpbZEUhGHTfMtY2itwXJvqajB~M;_2e04v$G{ZR>#!V+#MA(KzhCZ=OFU`pnA+8|&> zFSa)~W!-)=?`VS|lmcW;Kdko366GTM_PG8;L5E93n~tn6juTJtp;)hrir$uT8@VSkEIXTK=l;A5?N?Y>=)G7ss}1zf}c)=O`8{Q2X{`f00oqxP#aJ;R9| zK=ny!Av>Y0S{e?8S@Ool<3-HL@Bo%Vf#1Os3U*bHufJ8-h%) zKcS2REG=!Cv!WwJZ zZ*Uyp^jVYD0wk8{ABjb91m<|#`8Nxw5KjP7jOX{;C7qEpjGaRhXX8PfHneJ_|M-YR z{I@HJ{=aZ=DHs4QPX`Bw;gZ)brN47<{)}mgT_J3Ld$L;`tH2AOLDXFJ@DL;YKirg_ z))};rm)m5sRp>TxGW8A4e0Fx$sVUb1u(&H-hl(7#KfT~McJ3cHrN0wbRJ7jv*{;+D z07cz?mr`hS$TqceVYCV|Y<+1|=&tRB2=s&LUjUoJVwm^6zj)QW+Kg$JS zfOfeS^rQ6%Qh4j14^D4;6b@h=FtgH5Igp0PURPH~Gf_zWIT}?CfU*03??*&_WN7GS zw@p|00Xp4&((s`N4yDd)WP*ScR>*9F!A}uw1Dh`^rDAnA%YQ zAi(_LP~egOpu}7H|Cx~6-yiyKV{-q^3%Kbtbj!{aCb=?;LxnJA4!fYKovU3sAKUSh zsefnN#krjQ#;ZVV{k!2(zCQdzY?=`aDe+3Om4~=*R#oJ6uAuh@NdLhYV(=ywLW7YH%?D&I9-2B)P;lie56=zdzYXLqVJV|oUL{Jl7FYPfmlJH_xo*1X|?3)3#h%B!y4 zO4NneWBU@PKwCJEFuvMJvG!Ex0*Z`I+TRWfdkG}bn@i@aX{^8Mu!fY@;qX-&Z$Ere zv+zmR*~r<3>LYr=xrp*KX_tp^z7Y50Yi+jk8<$(cl6|mjP~lT` zv%PhWDZ?%M!$RI4^Tc(VxZ7D;g8Qx^oLcu2fGmC+AbCf;du?_yb346peO~FaoWcIq z?N#`CM0Bt8dOg@`Bc=|Igf|o>U|-^`;nwGx_v;mYeB&{znbWMH{7!lJP0zQ&9$>WL z#|BhjP*7?nxAdf!&H9$!qy@OAtDuv_~{eY1*>xTN20j{Ry~+Ntrdc1^(UX^vOB!{wh)_Wjg?CRog{e3cg|! z)^KOBewSa?X~#Ke&9o&UMO-AFjWrz{7QB}U1bl@IcD|};b$2AT?5x_XBp?>!9xPX^ zd|%2)c=E(E=zHt-_hP=uE`iK2zOb-G7p<`>1M98H!`{;l;?vJ~dgEG}f3{RYH@JP= znJ2=MOUxT?TZgXek&pKG&mH@_INaFid9wlKFL8@CGPOwG4}5|*wGj#Kn#5uc!S(%4 z4X&NPCm!bc^3Y-W*PzaIyrtmuSph!z?E*9Zp=ogkWy`STDwxMGvq&OL-aloF=utQE zNROWHL>zZ^_MN+y*J!D--DOQ%-gQfhL_GtUk9>UbsY$l;8Yg(J226S-z&6i1hBdGx z=r+~QJrv|md}EvZEJ1*;G61}}7X8uDOo!O8Twk0UfVQSpTM7ybi+B5`^Gy07pL2-c z@LoOciWQK^ZTP(1FD$n437ThM??Gba;?r;=5-a^Q5{TPho=4}t*~!&S7whM2$c(LB z&?1;NG$GL~pD+_WV(`HqgphInZqBE%(KYE#Sro#i_DVCI_u>aHj9tC)9=S`0ue^5? z+Z>!^&(rf#$4OsWHT&$DbHf=qP+2XZuCA6so) zrf$y!Huy2b19cl6t*U}zp<`DBMMPf3Dp_34b8whRYmec+$iONriNE5tq-a*aa7?KLELW_v+QF_5&Z5D+5QpwPfS@5wO{!b|7o-4lQZ0468+iY~fbHukBQ33yKs3 zGpr|uIsYa@JcUeVW_v9>&H#13%WFTbf9&oqcC^+pH;)`sn04J;PWLH2ThtS~H9fc= z$;5V9u#hbt$d$~|U`l4l*OO|^?&~|5;@9c>^vT>s(f*L{jasv>x1z%Q(UyCFz}aH7 z_%JzX6UoHoR)2B8f!(|A>wpfB;m&x+$np~N?sx7K8QT&;0)w7|!>eoi?lqPnADO3@ zH9}p7>+H84uiy|%i6uGh_Dd7HVz92uEOM^yh@6`z656>#-?@2^D$jZ^A2$5n98`<+ z^KGaP8IKjg@6OBBFPO&e>GD74;GMi?!?#SgrPC7HKQqEY|EUwZ*q{^K7|agCb`y2@ z)8(zbW2d`;5;1S0FupqtT2?ZSrxk=tI`hd@baxtZ$n%So`b8(0}^u8~fwBpuCrBO0$SuQS7m^ zhw16HgM!p4mESPCF0Zxnk9=Z|`@E1NwqF~I6x$p-R)fy*wXE2+$&ow$7|5i#a^~M1 z-0zg^k4Eje%A9$f92vg2zxl1wg{)t<843M9(clg{vtKAE$sfbR3)Hz)h9LBql{&YX zHG}u}9O3UrZVw1wxkX*nJ7++Yv(AttGip6aMz>h;c{FWj?~t(jiM!j(&+?=4NoQxr zj(z)1Tr%lw;0bn3BAqk>GTh=BP`dFz4Bv2L`8{}=#ez-IxBU5esk=90&%45#gQ#He zYnW@S+K{nvkNZ;N({FjXZ{PYOGDG)Xk%Ba8rSFfN1hOgX6{MxH3pUB?6c$0A;I%q4 z>Vl1RB2Z%LZv-=*= zub`+XX}@v%W$@ngoz0(BU>N!=n}TO}E()b%?C{FoiR+csQ$!EB#)*48GUMB`A+r64 zxYil?yq-mc_X{)X-jPf#^`N>BYU`ri3lbO3OwBsQXdDh;5NkR>;xelWO}M&AQ-|?kjig@?Dh8bz|!2Vgr2~_g1IEDrdG`kh3|%-xlm!Nw5Lq&LB8- z@@|!XY2XYGIBv2xOdlwS=2xW`&~$dz^ZfAAR`rP1ZLOCj)-C;?J%|++aJ5HWACB4C z|NPM!pMVtNkn%XI*AW;9?>cH>rF<`(Q%gq@ZE8%w#pOq_vFmQ+wYfspmG)uUOB1I6 zxW^Ba;p%9c*Vja90vXL(!Y)|xmmzSi;r`Sw^{z(IP7K88xgpgqgN$lx@XB6$W;Z%) z|M{^Q=S%ZHUK+M1C#ASh(W=i~T=~h)m_jUA#{Gu;R#<)0Nn$g1@RWO1=F{2d;@iV^ zeLlrSEFB$+a9xF_MthHy(4d7*B~3H+x-t1Bsk*O~I-hGqJ2@?u%ekVo&W)~-I9wt( zE4SNq(49&)p{0(WFqE$>0-wJ>XcP3VtgQ5hH0W7ibT;A^#Wqk-J+O*gVlq`Gcf0?d z{$z90WdX2(|AKyE{J?1-%~+fq(h`#Y;p2KT8J#YiAn+o&8;rD-zZq7gyW}*!I~U|S z+6c?HpXIDc$Lf-RY~6tUxpPl>PD0BPteI_;ls2(PGP|H2jl8Rw+YOvmlmNnJLTPDM zjuF|1R`~VTXDl^8*(oPJG3_$o{yk|B>&o1TRT5F!|Cq2+6_t4%N4=-rCKfz$QZ{f- zlhR1NWoR9GCz6r-^0Jo~YJI9&xSvOUB7?T&s1+RwZhq5e&)xwX?UWJw zr&^trcC*~e)OFESs^3}-u+;*s!e52Zt()%PXhoDl5;OG4p%k*S7Pce4a>cvK%Bpse z>bt)O6d?Yrgm=$B&oQy|e&fHTMmh%Qm>f>|BvVAFU#(MS*t{t-(zf%>_RVM1Ibb&f z3RIMAw(a_sQJDQ_e z+;eu>&zNhvm?d2azj{eV)MdV<J!mhbi+^O1E!I z{r=e4!75!kA*w??&v>Rgx}f0gMCjg5ZSwTH)-zROJ6Lm#2(^if3CFOu$jcp**1^Vm zvxQFI6COwR)D(s^QNm_KMurP9u*t!@#PbrK;b}^nmf|oa`fV{5P?5L?*hXaUru*w{ zySfzu5!k!rj<&XbM{hcU5AbYCS-Hx$f1B~+kP7vPX~W{NpIdf&H5|FDq-6?_6gZFg zdOO3j%Wo}e2^2D<50Q=Aj%V(QU{(S)aA>hU2euLcQn4!cr);Yz@n2ci7hCGkOfkvc z_0xl^djFYXI&=IZBzi_|L48#uniZS`ILMzxMZr?8Bq)grKIHEPlyEMvi$}9X9m;w) z#_B0;#}Gjg!9puXVp$*olV)@40#FGgb}K5Xa)=yfW_xkGr?SVQwsZO0WAENa{WZk; zsU07OTQ|}ch%Es6;C4pUSZ{=^*gtk7HtSSY#cCT(T02_QUsJfI2IRvb$0zpaj_@f+ zv_KzURIP`nPdBaXzHVbd@{+ipctvUCFe|FA%=Bql}A+*xoSVoKwalFX5O%bMpQAE`y` zseFax$Futvnh+a_fMH{ivH#7`TVnS0=deYd;Q@Br?D6R^n_z1|8JRRJJkxRNs4=w? zsqNWLN!c3>2|$8g4wO9^_8?p`*xkd5Q5t=wS@(BY3~j8egwLKmyOJE%cuC%0pvxmO zP>o*TL}ENk-q)pB0dUI}D!B27&eK>jxMedlDi7ENMBK>O!T42T@~iGNizSyKs&6_F zCJ&;#*zZ8_2qzY!e`b&Q%vuW8aM|1oZ&}cr5NHYAGVAPC5}{cUW8Q5w7}7ypcU)#B zI+(h7e{)s{-HmGjEa+aJYhZ&Ls`cF~ub|T;r9Jh%<=K4LYBq*^`hFx>2n?>ZN^a@S z$PHPE%I8j2IYy_8R**ZBf{u-qMT5_Q6ZSW1m9#9~lmMKezS8q-XypSqROA%oeGgdy z7BCN(ikF3kOOiJx0?kZH%%L$V!hzEsE3E>V-S5?PdK$+844POcZOPl(I{rNx-JIkE zBMUXzw7}O?Qm*?~QyN!*uB=QCcuX5$_i@+uD=6lci1o8GU+(^(Zm)c5nUr3KkF_Aa zvN|G~n-${|gO$0%=9y>YUa*S}`o}I+T1^IuKN?xx4<5bd@iwCtuy4kZ*9;VPpPMmr z^zdrA{U=|xSl6)%zI^Yzy=L0_snoiy_q}7*ea5tLn?M}3>tk(*a z5{7ZgpWlk|(nK=V{7YQDh(cIp;;UEP1V&_ODtiPAy{4~`&1j`x)UmIFl0jbsSF66r z6+wtNKkwC*o;XK8bq)2d@{17;8yQWQZ`f*A=p6O__C^8#%h-i+_jCjA792Y=G&JZ% zmSh))Kf1g`v}q_sVfs&{%2Q|NmZ4|^Mzo<0p@G}{&vr|H2C(R{z2yM7rU&wL_(YeD znA+_63Q(%b!l($4%;6i#{~9Kc7S+PV(JZP2@qLm{uRuRUIaZK5!aVh?tJ~lS8hr7> z->w`Rjd?L?Zn*Apr6Ba^(QutN0{=KW*~@=JxeK>->~FV#-aSp>4okiADE#k@p#Qp? z>Hx6$lHR!+%71^%|10Tvu_G716_o;^pTB+dT_<}PhmGvGhopdQYFKN0Nq162?II=d zm51e%J1}FaVj`|_#(#Warqz7HS~nlBClfV#t6h8bJ3Bk$w1y({a`eV3T$&0Jg_^&HW&BYa}>mt8QPQWovix zvVb%e{;H!h;Z>#oH0PH`9OB2oeiPK1$g5!VDTebBu!+d9 z5V;$CC9!Fnsp~ldR(!R7iAQP2ViPxMBS|EbJ4B6AgIL--T#2FU+<$jhzVnkgaUi78 zIpsN*tWieL%Eh30){Sn}jd2m4YnFH9pBrPjZC3_-3_4nDLT^q4dNrTWI)A|<TaMp~c)i$>rW`0Jh~C zpMW+H;(j+c3hcX-a*zZFv5>;aws+Y;p2CeD zbfDgwFK#HSf8>i#Pk_QpDxw#0IzU+@pY)OuTEdS66%rE4t1dxFXT_8+-FtSdB$zt% zf06eVU{Q8$yD%swAPNFXij=gpGy+P8bfePUIg|wg(mixbBi*QobjJY05Yh}?L$lZD z^SqC5e0v}J{rC5M|33D895R42%-ri<_gdF=UgvpURT0o|;}wq5kdA~6FB89pfv^vH zk6(m)9vmvG6l0R=X)M=_=0x`(c~;4+d(TJ^Rjh-3@(8=g3}+|G+GND1iIefgD#KNV&OZNBxx>Qo7xH1(L@+S=#oT)UBD(iZmi5y3*gf$sP5qT`sg zfDz@^-Nw~1SEb9}zQ-MYC^$ME(GQ?gyu)%go*QQTTvueQ){;$+#^s^43Z9^_LE};O zA=TKQcMSS*JmcBbkM-$duj?)i`$+&CH+I6M*(zY^mdUVFu&1cS^@>M2%!rYkQ4^v(Rah z0%~pnIfB9_&ooDD#_2d&A1MkOY$sVEv_f!sh#)E}?;a|VLTpp~;Sq{kIGrv2ww;pua&(q7ZfUos>ki}Q&FNh1> zy}}LDat}V*n3;c#o8d73`hb%he?Xqms#ny${TgTJU=D1%hFCxL{pQ0OvlWT_-!a_O z*+*nIx;h63cezwz{S?cukVtaT%z_N)%-dUC9!V~k$e2H3oy#&F6tca&8v(bimU((Z z_NIu4ffl&`b@`C@16BrQ=|V^uwmz|ES0#x{4j6T{Q91OC-ohs!WGM4-dlvNN&f0u- z?iz!L2xZOD0Hu`C#?^S|t=lTaNUQN$*@doTbHAAZ=W-bE1Y?%I(`VqaHfHL9$o=@p z+;1%qWUQ6L_=xq|Zg@9eIZcs46q!(rX?H+WOpJ6(d*n!>p=9X148SC=Ipmtb?YryE z0w>OwHs%nDjS&+kM-2kzH>Dw7D{cBoJwzn?{gp~p#I%LS-DPa;{fWBn2WN2PyXxPk z1b-k{x@5WDJ8U#7cFLm!8LK8yaRrEK`JyHZ^^EDh<-X71H`KDY!Xlh^<5m^;l}L&B zqax_s4_j%4IN`e&jB;B02JE@2gb450F>YqIrec!lNkYi4U*{+MQN7+FOCwyABC#L% z@OWsRPMY>mP;ul7nH{Yj#Zv&r#i2`NJ$YolRSJrgHM1pV0l!` zD-w2kMP+X5$_CF2`){Y|+cW(TOI1rvt(cb}i|PtQDbqCcq5EI=t8)Kwr&$v*^cIASHg%$PPWQH zvBmc8O;T$Q+VQ0FskfHe1n)a~+M|nOi=z{YY6>cIk{6aX4O}OjD(Vl$1-3h0qq^Cv zcs0|havWPjBbrXmVk?h5lwH=PD`Qd%S26FAzqI-OE^{%ZDy9$evfX%ZnV9D_a)Vt?{(5ev@QJ3mB(d!L%w-AEJ@Ykklwakfo8nQQzT-E zlr>G9%BD(BKi-YWp@fK1oNVbblqOZ(*cgr|-(|LxnG4Ed>IzeHA$dsx)-ma0JRNR+ zGH5Z@l}lQVW(o+5K2Akpnx>;e!5Wk#tP{Jq!O#QqXye$H7e@TsRY~RKzK;10%QbST zvB9`=Um`y{yOo6eMn1Q=E-tyCD33Kl4Q?5BHUxG@HXjj#Wki|(HSJMC;@CKAOT|Y3 zX08w-URF}y_p%Ioa#3#oFsfXR?}g6f#6C0~`E9hQ^A?;ZYJKC)J6T#zfz~c0cW0X* zV~zV^w_iY1Q9kB9y_^QjTMhJ6g<}sg{BTPx$jT^#^sf+t+n8y@sz^vfD8}oG3XL$& zc$Z~KZsy!uEUcphcdo)07*ZQqAh%j0jBUH$I(BGoGKvhp~<080-&mvUO0N?>Cj2Ez4NqhEFzlNNJkAw;sb@ z&>gX!phKfHS}JhMo|eI$Xgp$8ZrB`e<>{p>m!S7TuWzDa5329nwGvPyrzikxtJe0H zue7DpNxgH~5DISNNpVGBN63XFlozmDdM|jCuxQ=72?bD}|5eM7%V~mwLUP5AE#~^L zIGx6NDN@*ao61Eg4(2H{>+8jy;!(bgA-;WEI&7`J)-{7ZHOBK4!7p7zfKkf%IF*Tw zJ&U{f>z{!WdIlu2hFU;~`EQsNKU9lWqq{wzK0YY-%s#-WtPr_|I7|CXGC%!=8%3qZWPG+2{;J7^!Od)OgLwP7QFl58K#C}+lRAImrDvTS)QGz15O zL5M;!4ZF~i@k1JtYgj-LoJmN=nt3W+)3lp)uV@HhXxlbc^}T)1AF+R9cmIk3*H~o# zH+yPh$sdg#%}MSaZgFW-E0e&APO2*FFnpT(O{C`OquUikY9r>oo)jXn!7d zPwu{J%}g(lyMI!v!)=-~Frd7JLRBsBODc@nZ*A?&g3CM}j9M*fIp)E5BqB@-Uwo$} zQkG4!^Re<15Y>DH#Lj?_kje#&>QX>rmX?w#0pIeYY20Xr@4ryZq_{^%M}xEp2d$z1 z>!^O@abe~D{5)Z)HB6sV$m8Jo*4`fbAo+}@O(P8*d06tT84kD9S*o;M9%S(T?&il_ zJJG?HKs7x=rYjBcxFFLZUy)tgGro%$iQ1WFDe;r!+`#SWBHS>Lg&mE4G91Y8cXU=n zhQP+|+9%a1Xy_v+CUmdi;Z-e+hgu?>t4H)&35%U&QwEFND52tzCGD!vM05GHGM3Sr zZzoRQry3HPAzrWiiX$TYmQ1gc!hnxQbx-3JgrqPeS)Ta>L?&C3*EH-;X0=!dYIOc}Hm2=~6VF z-v|f@j3UU&+MY|%k7|}20QFIodQhHN$+UMsCNi~5?T@2`^_WjHj@xQxCAD-2&{-du zLU$Nn8S-2?W3LHvW_)d($Y}Wehda=FcnC-WI$7dge=IIq+_)EhGFL=O`+3TM$Gb^o z($h9=cl8z>DEMygKy0Hyj2lUF{BrsFp~<|*vP~W9Ag7Zr}<-etA6G8i=C;gspt5fKi5(2Hnbu369360tbz-xTxjEA(=jH-MBth5)@qah zNVo+HQ}#UngJkoiU%4`g?%#!Nel$2?Rl*1LdU{mdNsd`#Aq|3O(+C@442uU~&wuNQ z4xI$IHbdvXTe3#JGhpHE++P(iyP)1HcsK>7IjNAGdI7>JhLDJ`GjC64u3d7SyYo9d z#(HvOds9Y$Akz3C)r|l{M!q)+3|?D`&++?{A33faj!o{GBer8y4Ce842b>Rip5mh@ zN=76k8ChAwBk7q6YkI1HZY`eQId1|m!0?HP=zu7_Jm&q};)3)xN?<Duxwla=3w!q?LB_65e` zj}NH`AW{;54c>0e+ZFSk6$gh|w_f*bze5KEL)!PhiqV>%Qyos1(K>7$M$#~s*PZOm z^U9k&^{@3<*eI51`UqUa?dtEol!d90K>x^5y+N4Ci5S}7 z6Bq+Kq}JjwdBFAt9&vkDP|k=BJuSC5kJZ=<{fzR(WJCAE5nV&Wr#RvHYza2q0k?^W zWK$-EYh#urT+`J>X|*_*%;YN1lN>?;1U*1#^iU?}j`~ zORY_##U+=ADwXwyh1n>-fLsogRfuy>6mE#=ZP={6yS|p5OHepoR0h24;EuD@;o;#X zXdeZK=eDI-SV<&)c)tx*HIIMuD|dGfP#N*()({teygzA90w9Tft{)q){TJ`bV-wVn zl|w{pPb9&!+Shn>X{r5b-vdaS*`VQBQKECW#NIM3L2Gc-!RQ^3q^D7vbC}HCdasst}rAT%^`8IJy|HN={YAn*4IbIZ0l}wLNi+GfP9FT1&E- zFLzlh;$dpj>Udf2Zi(_Ud*uO+QkraA`RR1pT7+{#Rf40Llz#|7n{sL{y9j+1E6g3? z^I+DB0y@D1;dE)e9qlF+}G6-8&pc?{(cfol6b(=oktIGyVnu_|};n z=mGZ*(~JI8sWBk2rbh{%lutPIW;h_dLfglK;d4j%CeBO#xtBwnJ~C##`bjNu9e6cf z(g#B0K~4n^lgi~wZ3>}jS+iy~5XOhos?*Y;kDCctk6t1LIX2jRVaZy)6^L=jr@N^c|!$+11oq0Y0lwaoLo4C+|$ zL449d4ey<*)1>-&TFLxqE~F(Gq-Wz`(1_eKA5Kge@$WB_A7VcH3{Rzxeiz-tV15A zs}7e9O1hy@34pZa!jDDeAFM1_cFo$P{mG9zH+;i$IO3frHC}{jM0*{z)9g! zzp;*Bi_cyD*@)rio#G2I>vnYp_i)Mi!u7GDC;+IR zW!zMfvf~X{hwD^YDVAGUcog){lxTB)8}9pZ(P(f)ucG1dGVp{v+TTX)a5uj+@B3O* zUH#2U$6r%My8GklJy=@7>hTCLjE!5}0lS{V*UI{uHSQ;{fKPg{)Roo`5K~vMV`Tuk z8xOOL33pcoRhtA{0tP*ei=wp(&A#s`-drwXjpIn`-+a~pc_Ni(eg5uAi;1_!fOh~u z)T|=LyqP`7RpnGO3pC4W_I39(%Ujb9M$I^jzhp;55CF2H?LvRKALhM=SP!a0yC4HD%|I6!)LBX z=S=piIH~_@F01XCMT5>fXPvH_T32plb+jxhq8GjDXPU+!b$1x8T;nS<1w_@QGiNi} zwHK1h>_X=Q7s_zg8op0W^}b~WV6pL+x)i7(PRa1{m{~7BI)%DcU4Qz8xRn0TcR8S~ zwN(=EnV4e@93Dr)WqZ5OL0E^ENbQ$Z-owu?24#DUIkSP0dGg6=bQNv2PFM|(;nN$r zuKF8ZwWDOsgq&<#JI?b9#BKSuUNj;#$ftvY1I>!UrXjqS&ACD#+;OP?s7V1i zVjB>8G+L=wCUqb3q^+2;QA14pweg>u^s7kQUidrYB1;I9z+8vtemn1!P*~R#_AFvueTX#4E?UowoCP z=bp?yr58j^2SClt`1^;uBf6bLtGf5$4tVX=A0Pj^@uua*B4TJVz8-b1QIno~VUY)& zQ(OiMFFwc!3Sq9yF>CJrhX_23Y4u%HfN9Qzc{lgMLy#xGO3CYBB*3y@S~JfE#E7># zD>uxYYq>mUm**KU&-NgB{f25_j|u+z@FxmzaNwrHox@epumE-^T+Co{zD%BKRSm>V zC7u2G_xkYC?QM_d_uPaRl!F@eUw8@ei!p)n{n5!VKCsv znDug#-^Eb&5DXC`)`qFx5r^5|#!!~GF6IlV%y|Vpc|B|LzBt>=_Q|iLUcp~~uog4* zqJKynMJU&uxwgMDd-Dy+aZV+~v5LO_i^~6XX8))SiX_o-T@OE|I893AbdiWm3j-}D zws#Hj+7idgXi^n?bEwgzwC0rVzvcBfckj?!@>*9|{48n7#XlOsaRry=kE5Cq4-XFn z@V;gK@OQp1kDs(pAd5;CYQM+b5})e^JHu$Wcp>zA`6Bt+UFjeNSDLlx2Y?kZrk~J5 zE}~2)f5*C;kH>4gCgSFl+XK!&|0>4kXjKgj9Tfol2u-kJVYPCt6*SOrOZr#+vQ9=n zPnR6cDermu)))QLucMmK1B`fL@c-1QrRuBQbMSd@QQiCFRX-m-xBTHA`R6Zn$*AuA z`91pa|CuMa1HUgdok0xUAz(p7!@~potjE6Y%9nmx3fsN5{$lv&p!7t2_-d&9Or7qQ zH=>ZlDJHRW_Q&jEs}CMp;`e&$xR&T=q81JkwUGizbfN~#)66kg8M(RD(las&fpr=) z@D~OtBHBeQ1nqtCA0Qs4`k;Fa_wb#6;U`op5;Nfh|D# zN+tkHaVVDDq#iVMdt?X+30g3c^_9ng5*GIrje+E3-v|el!rY=V9p< z$Qw?_mSfmJg|N$N-0sqhE@H5D*4gE$?qbnkG>o5@SL17Qb79T?thOt8ku%J`#S$^F zSk5%*ztBffO9ZP!juuyNo~42%xw*qXK)GmVX8Q=3xe z2+3moH=4v>rr)2JFwYe{%psoN40qbv-o9`7u-~9aEPD=|^4AZZ zjf_Z)*iN$gt8g0itRA&X6rLk?*ktXh?&Uu<`qqc#zBCE{Hq{Huv&SR3;IUU2I!Et{ zcz}6!R=RbZw7meq&H`l`1{S8slqpEQ#9RM(nlWNojW%8;n5-+@8SL#*Xhaw-Bv?xn z)H2s`^>^hW-uNJO>wvjvJN z2gE$OcDL|o^3>>)uFtTX;|DwP5HV38%P)QYS+DWw*$G^7j##8YCHc6!xzxM! zhO~4i*O!!6pV%X+QN9&){K|lBdClHA+8SQj4!khtCm|crB_$8h8up5B`^@wIdC0~% z){E8a*MsJodt;#>@(=dnpI|CM(MWQ7^WeS51;Zbj7Q z@gQ6G`0(b3p(O;hOgg6GV1+belf8nM)A5$QS^l}?UQi&HGb0y`X0pewFrV#@JLs~z zGfV$*`j>RfPwXbb)cI$P&YGp|1W#q3nshq@R-aid5-vH?8!iH6@s0_p%X#mhr7PDu&T7vUE|C;xHGR7#3Bw?yb-3oU(@phAj%=c2wF z0te;NKNO&D4I8U<&ObOfkTNpLjGy{s=ubA9z>N7g$8Y9$l1WkbjNNnn+ihEVXqafO#A)!Mwta4k#46G;Cv{bYTIV_c2)winkx(>*41b%M!MkLH<| zM>`0lXoY#c?^Zf4NkKo{<&)^SD9Y{2 zh$2Uug+(AIq3EI`-PKIku7R!hy<@!WwpqMM>#v4-xtW;~lRoEO=$3jG{uflv&d%`3 z;fi0WAsv+!q=ni)Z=heh7(#&(Lu2-5L4SDZ%ShN8p$7wHl5zFB69n5Eu_@ua`iX3z zCv!KJiBvf!RCQYRLSu3xzYoVy%%qJ(9Q_|cFgX7!unQ0`BeZw)M5YV=MX}$Jxi`-o z>iXtkke|9iaN+LbDzT zNGrypsctGTGF(-!QnPqn{@IhK;u6b;%-XfeMYT>Ik>Vv4PqaN^0IRcQfY79kQ2>`6 zH!w6h^VGi(6zID(UcY%=z{izigRvA(PZ?ow>X|%ooPnHw2b*MXz&J&nRwW=Ibh>Uc z@3upIPQ({_Q*1kSmj`Kq$K*m!l4Z;ltSjxfs7coFey?u9zb#H~=-GMvvy@dM>oO+em^(ZPe`PfN~9?9akWUv@8yI}RK6Q~3Wd%;>Ja zoB+7-QlhC%@sfiimwk8jEf{ylnOMWGtk+7f*I0)vzxZiP{l}@Lmbt6K2RWV1s%Kij z2z`kI4G#!P4-Z2ZJA6m)5a1s!o}Wps*UvZ62$tJFkZPu*^ggR7(5`C133r}W%G32c zlZZcU4kC3|_3F%x_@0PLBu-2xTTN>FT9&Cv=fJ$>d?O{| z`F?`ubjdR9V(HyMz%W!iJL`bsPq4J3uhlFh2$1`Y6f=sqd);LD5fw< zz@|R0|5VH?#rv?Z{wyVQUQr!mN$y{j>wlG*{uuu0fAk8jkA-L#4$I{nITnVQ@plWJf#v$kq>G? z7PtNSok4a2+7Q9>__A`>xN~yX%&(fQwoz4l$*NuP^BpfO9g1``CzrBAKI?6_9Rm5m&pLlF5B_jw|M|JA*~CQk09QFtOO`72 zY4fY$tiN#x7XOh$@aQ^5F5o1QNx}z3<-NbybAHoMDX&UL^Yg?c$d^B-QD?Vi05MN~ zc4NTo(+8@@bdQR|wR=%>?kqjhHuO-DULzy{o22--1T zZA(*^Hyqyy+5eFFBu5}(U*ort?DtX>?^jcGbb9butL ze)Hgel>aXQhd=Jg|I082S9HoBWtf>G@cmcTKNHx?xk1j-+1(wzxoKxvbPA{u{(w}W zyQNJj;3DU-YgS8YrcsaAohIbOQsfHzWPpqrFE5*EXhhl-)+tEpBSVf5b+iCmXxUrm z?{O*AIp$jngw;aw=ot7&TIM{Z2k8@Q5B79(UOePZXkrj{T5H*qN*2`t}9;yezv z+$WY46V2J!BM&mR?;!fV^0WN{hOdfw_tX_kjc-#%Gd6BbHu{p@u`S1v@a3JuUsoBd zyrB8S+&u4OET{@y+eJmzznHv04FYZY9k7)ln;a;Z>7z(;!jQ{b!hJkcG0vqYSxN1YSrl1!i!{;lu{>9 z8Kg819;S{Nw_mw|-Y~m8|BoOGKDR0?h8g)#r9nnJF)R*tI_X`>0+OEA)>TXeB2XiF zOw$||b7>C70uicIh3uw>HoXGOv+0+${-7&;j)y_flR^rzH5vnX-_-uqdea+GA9BJP zgbJ-3T-+IBaMgAUcGng`9=i2&+!b*6D7Z5xzCFaY9_zner-v>n;+SV~RsU-af2|l) zU{MCh640ea1S`gn5~%T9+x}<%{@R#pA65a-2Q+n8Q1W`jf$X5B#khOy-@+NtXim3t z$qSVpBc;aK4da#sJivEv9BvXz1 zC3ShPBW-dMDM-ZfgX0_7ai2R(4ZN6gL6b{XYUIcI!%5T9gM?vahxDPL+}B_4A8zhs zW$Rz$sVy3C{G`Z~1hXFH8&5vvuC>*xbe&4`n4yzIE(xQ}HItD*lZE6PdWv-_m48U$MTb$m$o`%5S6%s(Ds?;XI;UW) z5KTJTVy%+YL{@_M@?|1ceO4dHJWBwQnB~@sw21QaFwe~~{F;LkX?&t_y|bh31Ujw^ zaXq0Oltz|;D&<|CN0T_8vXG3ptIUTk!SU++O~#P)c$Bsl z82|2F^95Jn_==l5_thLoDgJHZ=UInh=?JexEmn9O`HhGH!`GX-_A2wF*9NQHy zq~h`9ZGtnGZzSNfUb}eI3EGj#;V}^c3ltV=*_THnSghLVaJ?$`IzE3tr=^d$IxbMl zJ|)M_75@JMo>EAL*snr#i18ueO zDTrP+cmqxwuDaKSP3lV0+7krODl79b-&;2*9M5E~7e-G4;E3;ORadLg5gX-+;sD90E-#%H2m zto3x^S$lV(c~xT%PEt#4uk`AI{KhZXAr|hM`IN|9mDy|0Wrd*EY7f(z-WZXfTiO3949}$sA8W zCUp5QmMG5JY=aw!4(N<#z%GkDc{)U9=v%1J`xG*AxLltg|9O55uQ5!h{feBT1X!Ut zJia}zIf=lAPUr(76{tF>uOIj0nlyocDnZ|YHGy5;!C_}^qqv^s3*vQG#a&JZsfHa7 zK=5CvQullPjzN)lbaqla}q#L7(vT zFJCIxpil6}n8dtKHya`%V*2MwGg=*xknz<0P$M!ci5H13u?S%s zo6NVR8#B#^kQV0&u|3*fj-Bd2o*!zS8C?S1(6VG>JAhSbK}`%GLrYZ7Q$*N5ds9Gy zq4z>oJL1E2BwJOe)k%$M>>t_yvhi7TPz;%TF**Bd;!Ue+@1P!kCfsTun84kDbtBt0 zO5M)Q1iXY(ewB!tZ-ymZ_D1vKZhkB*&xud}flpv#zE+}EnD+0%5d$R=88^eUclWLj z?(`q@Hy4tGGs3Oem>FYIeSJMpra+3fuvp$c+pWkYf6*2cDUuvS;DMmG7YSfyDhn25>?6-UFY+juvgn2LPOGIDT^bOpDc2)SW<3Bxx( zgZ|#%-t`a8LzNhyZ~b{ZBK?vhKQCdNf;LDn(GV_ah#D&0eJ-L_}ks0kE8j=NjV zH99KM(;oN1KZo_peH zzA`wvlFQ7-rU($lLL%00pH3ow%+EGA+5Z||Go_yS9rQ{O&%EmAQ*5c8FlD^8(Rw(D zS}YG|(<66c$m%Bke+_uGM6L!;%|T)py7~3`>seo{=yJ|_ylK0t&B(m-~28IYbjX~ zDuLO3Y0bf#%R^}^3~9T@BgEr~?!wHB>rkSd&CL>RJK9B^Yi6z<5f!RUBV-lTTM&G)SN54$ZF-N%} zACusR8^Z)~E#AA?=PFVZ*(@2-Zkz1Zx58ozIC>E=F1k-~8%9=gS3zs}EN6*oi7H#e zz0Yc@#1{shYpAQoU}0g!`Nmf6?Ci`x*RRc7G?oEfjopsjf*?ZgnoL^H%~*aDrupvd zEqvY@Q8rfd^WEOwHvVB+M$0$9OO0fr;P z)B_E&<@FC|ZUNh`N{>|__K3{xjx?q45k<2Kn-|tNoJYj2VdKWspLn`^e{CM%q8UAj z9!ZajEL7xwwW~O$!^`!?XKGg;fX;x~a`E>; z&m6j|yWa>D<9_lUGS9nt+d%(PBcD};8n4xcA8U8&_Fm{&Bv$+_jslalr!{-7nnYZ5 zeZ-m%SP3Z{?JnK7tv`BQzwe(j|1Rfyd*Zv!t|&CSfwF$jG+uZM_i*(TmgVRq5YMxS z#Af6a+@FMZc_)e#3!<3dmwXxdJ$K`-ZP%YIF3$Bf`6yFVADd`Z8@MaxB>Jnxyxl1CKvbcG3Y0Q zZVfa-68>R#79S{gaGH=Y9@rSa&zf2l2CS&)Bx=dfEZM1Ky#k8xwoQ{CvuiBNt(*nP z+q;eY-C^nxxh#-<%7?F_FUdB>W=2tGv&~E8$t6{Y#;~4CgTO~Fr2{mdt}$qh^9Ce` zN|}@(N5E-8&8QBaC8GLUN1W?WG*Q^RY8Ao4em0AlgO22K!d4ZGgNCc4!X2uc&scQc zwzIeLXd9dRyo}K8uS>YL`f=nIi(JZpqi1(?L%yyCIpXP?Cu`~2@h%-bQfXC@^#&^Eg6_Pjx9r?uX%j6_^%$>or%!Sb4gC5oSnHdyJJqGzV z2~rc2Sn*}g<8f;CEw0%NXr6J~=UwYwQq{1n=akl#d@Bo6SnIy6Q(HLjxu1o`!#kwrlk zhye>V7S4Q{#M^iJ+9v#XZ7swe8;i@5@Yfy%MzrEwzaFL%f)WJ^9rM{A%Qc?JhIW(M zD$>h)RUXLR7#5vxot?7UkjIvxl-aB+UDP2izhjR12R1MBQ}hu{fzt_%?R=D2p|95B@!0UgSYfY4z2^djg;no zef;hQTzp)zf@9bP{|24x21U|*hcF^bO#-{dYX}zrq{jQt$O|1*5NA=Lp|KP0gG%Q8 z^|>F&IQn*xJR(4}gqjoR4<;o57TqZ8rG>8DtBUFW#oCxO1S)}=oT+d zRF1)sZT#UdzO=}*#@fis;pHnT*6Pl+x`6l4DmKO_GmeS{8vP-|g#oeMp@H+Ex@Zl0qs@bK*KHX9RSxLA(D9%#@woy7$xo>3S|^k@W3$@2!ZbB zkQ^pm6I^CDa}%rT&l5j8GZiBPGH%`RDr*)NC^vU#E|UA((8utUw^&!SPvrD{^=hdvSAGREE0k-hFMJsu&rw=N1D^=Zwz^wL1(Z z-KMYtx$NdL2NU^vFZBd-uI|7!&gdM8SUDfx6UsS{=a<}H7rBRC?+7UJ6@eo;2V~z0 z-R46!=Yk4oh=zY_SX)f{t@J7sSDo#*iDZY~p8^AeI@LIzkvQ00e)3i9&HS6HGv-11 zMyiwspe{S;%D|sC;6Mx=TEOX)FY6(yc7d&JKt*=__g9!%Y4^BGd?-}{=$VecW*6q! z_$-RQ5m^tR1W|}NF`;=L{waPH?5$-F?M>%h&05d8m%$%uju!?PSrqv7#*$C3$W^2V zo{%rr8Wh`X&s9T==f{Ji_46ImsOXbiR#VP*BwLZ%58$pXI+k{giJ14iy8ZM-M1>EG zvb&O-R2RI+sYOo8AM2m*IN~E3gPk{y%DhiK#%dgMCf?dI-t= zjY%f|$QxQhzn%VPH8r61yz?XXfblcmZkFA8eSube}f{Th#a2K2sKMx$#0oz zmv1gA+4%O2x-3)eLEH)e{&NbF^JppgU$O)+2WpvT z7dQ;uRB7FRhF`_oztX?lS5MnFHD!@TXM9luJ^IKyB<-hMW$h_MZu3yIq zYm4bN9vn$hSCnocNxFg5&8}V<$W1z_H}GA$bZ^i1g4e>K+}fz&kkIW*-CiLq}&ydHSf^I;p#ZT1^x7cMf-+vR0a ziaaxnlUl9>JtV9dFG~O%&(0a~nTb)JGHWK$nY1ce7$zRlYB|C+N~YtS*qXlhxFXPu zib=zk*{i6Qt?<#1FylZu_h=Nbp!N>-lqVg|zk@LVQ)UtkZgj2|JU+N{xJQkGUI+B> zzYgc_wQ|m9%6@*Nljn2T1RwfybGVFlP9hDn^i-J$@ZC|fcR9||>S!{h=as_zG;7rMA(Z*TWgV5QPAuHYf7&cP2CSvyo>};RS)Dt=xdA*oQKyp z`eril%U%Wx?v0)<*w3P9p^SXIc%;m&1EHM)lwPc_FwYhycGvrBCU&6QKDm5;I+D7! zFXJWxi(dFRjpo&F3K`>8yPRg zP0Nfi6tc$n*-i*ewDeYaW|08L2B#&dtcM?L(AGMjY^UCPcX+LsZg@>mT-3|Mi4~VM zT8#(GNjOxE5O@Ff?b}fhy*zBp+?J^#uCz4ohFUG5`z;}{dg2nmmSZvf!RIUfG0sLPFc5#EM=QX&2@`1P1#7a! z8>@6wtXb|UKMGgB%y@is`*tG7@xVQTo}T9B+4?Dg--ZdpIqmz+FeYFd6!lruCtppZ z(rSuUv0-=0!8m1o*iQ_K#T^4|lnN#w$5LCCbEbPEA?mkgOteerzlT(MbPO(k<##n> zvh<#Oxuej~@y;=*(M8?6t&`Ct8JN zI(AE@(k`9le;2k8*7MobSE$Cr&97MA^$kL|CT(kz%D?nE8t{i?ot(;tFHEoeO35)H zFXS%nfBn0heK!j?3F<8905HF*zI>OUl+z~V zEjMns0(oAtiL_!;vY;z(RYVA**@e#L=4J~_=QlE{B#=i_K(5YOt3+e>5Fr5F0kSQ<1Up;^y$Wq?)w^eGDdEW0aLLL z*qKJ!K;CS-oy)Rt+!GZ)`Vc~iu|AYX%qmrYUs^!PZNG1T9`4*2f1CGjF2@ZvQU zeP7`xmJMx^H>%bhF3th!@AmRBY}y&oQuMM-1=FWR?*KL-t;H)fkCNKjqTY3?1ACxJ zn&f1P(ib}}@wN@0hR&rc4vLfDa}g_ru2>}Rn45yUA1i>Xc(MQo!NbpaSA`Gklp)86M|q8KZe zeqJ^(b5yQ=>=?NBXEMG~e^y$*eL}E3F00tS=tva5!l&SKvNp@%^@EgSt+}Zan}8xb zG^dCAOWY125eYLoO)o1BJ==n=D$2#I3CtE&IB&#OH=q;SV*cb=(&94(a6WC}LtK{D zd%Jh)oULpxh)Vn4knIAlrlIE^&(1azwzfwMyxEXE){z(6{pB}v#8sKbY9J{Uw5%Fq zV|;W@>IN6)}UGD7*11o|gua}1#A^~b5$gIlmYGs+- zmKRvGF`nCynVqF0MEvw3x5gfl3bIfBaP3XIf2WX1N4RQxM-O{vOO9eENE3|)%4-(X z;k7osXA#!X*(oj?(qXS(tEej)lkl&=>hg0>GD0a4MZvjpssxA)P?eQsqRt}Yqa%k) z2KZz|FfsuLMULq}{RoFg!{q|y)+{5q2*@+Yjpk!Gq}szZ6}-V_ME4#^OFr5hdn}}ZU-eL-`T|Eh}A^h!SC}=3{X{* z3C!~1ujl<9pB7FYeFB~ppcxRb&ZeST_&ov`Rf!&amBd!)PzvewzJ@Jy9`^XDNzBP& z8Q*qsfXaOF`9a^?uv%B);}iQSSs~%(xRrO) zj1eR^J1Dl?X~&}SAJ6QceF7!+#3U zj}!Hm)>Nqe04cfgZ+W1BNtX`uyLV4Uw8Il81o^ywWDdVZJZcth@|w5hFrDE5_I0px z5tYTZ$(*4wU89+kXj@}apguL5>OzE(4O9BSGkVdUW1Lyteuc`S*9S8pF5o`-y z)Wzq>*#Td9)%>djI5M&#fKh^NOJQ=9pD{z0#8bfI=Hbz%cIwd~a=sX`vb=)sYj?3Y~5S1CJbDn>PEWQ3Dzb#}pJf2{Sw>1j)InGv+G z%)GxKR}eL)Sz|r7hXzI&7#Sl{zhlyU$Qa}>$B@dok}theqSW5qU8YU@D&+m9iF&?A zyMKtL?ERpZ>d;n42UkHK+$VfY_zX#Taw(*%SymWJ!5Ri(B0}eH<=j?<-8UlHsN+#KIWW9OzPGcq z9u^~S&Ej&()P7PP7M;% zGPAupYd8lBonX3njQX^W``d{qx#lrv$maIS1w-~pN-#l*Wh%;!S2^BmITonnU z5x-(!J)$?3Q+HD0@uq&uC#5ar3^k=}w-U7sTM@okyh44xH*)3Y@?m?POar^IWEt}8 zNtGoFh!9LHO3Ice`Nrk0?2nUfbW(zxC;-QnHJl|u2Q;E0^qQDJ&@j=%${|hxJTKCx z8n9`)Wv1^?mLbsPLx~ks&8VB;c@&KIJ{G&avrswUsdAP+~4p2R5lnBuJ^fcLH z)jNOj13JAS3_99dG1-eE3-!x6IX&r$GE!iIS#edpxw6YyRdS^eE9*;nkh>2oCNA#I zT^_4A5FB>=n4P8N=O4Ggv~&Kmi1P4fBvjYf9$D2sQrMbhJ5_dW&&E2LE#E#+W?J%r z6xO_AuK8C~Y2eYLm7k|$$G7ETrQ7lwAZyO^JQAt zv>)w#1<&W2)i>^<6IxiOdWU$1Ed%jEt`yS+`II`T^+xA7Eahugo!{QP`JD0}1Gv~) zl0g!=$EWXu<^KnHZy6Qk`t}VEA&QEqh;9)WkP;A(?hr&eB&9_fq(O231yN9G7`kET z?p9Q~hHjA_N?Q6oFMI#*d)@DP*Lv>te0cfb-b;bG=DN-~&*S*jLCnpqAS$x$b5ZeV zdNOpsM{%+j$)8sX)o^;QNVzk&Sn5S4JUok_AtTlvoarNHnKZZQxLGC(Cr~2?ro^f3 z%TLzVH`tV2RE<`;v}RSN=qq(Q>iv9xwI&^L70V0hek?SHh_Okamx?jZEkJnJTW)QuY`N+h_3v=xxzAw=C zxUMeNb_Jr^_R-n&xHDGcU$6nr+y4(t3D@iQQe)QJM_h)%L=Y5zG$^pXOSH4#JGNi^ zpOlpoLm>0_H4ahue<4W!UDx-EL%5z27Ye| zL>e!_;VeBpJ!(s1a#2O&ns!Wi-?C4gJpJshy%717c;3F_`N+HayU2*~CJvj*%-5{B zgR_N&i|BG|pMIR7Ca^#QCa2D(spsFxM(G!hniv)qW`3EQYpJJm%t@-xUs=%hL`{v{ z0Bq&2&Ibtc3$R2*L>klJ|1fKW+?twtrbdVn{rL=8(A}T4n_0vTs~>oNE615{>xA2w z!b}&@IESOkf#0|n(9>cv4Z4`evP+)uax1}3Q55?Gn+*4qaW*{Vf()&_J8~%^ymLZRza+mw)U9W z_m)&rG6;+g%s!P!sZsGY=*dtJ&7XT5%NNcf7qbsOXN^HfBpm>y!IKA5Us!Z?g}^;0 zgzov0=%3!e+4b{1JMZvM!OTyje?l&U=@w3L%M9$;kpmhQLJk> zz*x01R|bBYuR&m^;k-)`)6(7^MnoqcnE=LB^`f2?ym|Ac7_afq^MQFS+3@R#W2jhI z^k_Y}v;eAqiz)g=>pL-tnUAO@P}Zv%ALZdZ_YnmA9W(u*>ph<7}3Ppqv}TA#Y~WT<&gG090|5{qowwJWMP<@s-$+ zs7+)uS&FYDH2Fe?=hjIyPe)B&6I!8}zt^K!_?x7&Zh+spu+4i@k-EKvILjt}zqPw5GMeNbu$!ocg!)z>zZ@ zb$ir`sq(7&k%0pD(es8=wsjVIVD__*udinA2G8Vn6LA;kNkCt|z2GG0+eMjYyHAfk z3k)Xg3l08K3oyT7j9v0QBq!%D$u3pV2&YWEzIheuEUpVACYZsc^&w^$EeHRSgp)mz%ozZxO;q# z^WpI{-}=@TNB#pj%EY+!gLa3<0P6U(Jgo!g9x5Hz*myqzs6)lohY*=|-|pr4yu{P|PL9}^r2+L}3G%*XZY$toK?0NEm$2cDg-Cvy zTXz~T#8I`6GPQ@RtM8QtWL&{k8W}`Mm=tT@prjDY5j-GlR#@Vad0Cd5<~d0YMnchO z4D*KAvdP%l*@LsUFyJ^IqH2~M$D9D?fG#(=X772_KnVzJXE&FY&I*Jb1+TcTF1JWD zC)1vILDiAPqfZ^NmBxka=+fBWWs2vRm=;KsR>{j#&@;_uJiGsdz); zn(JUcW_|L^5UTQVzj3n#Cat`>+98eHW)IR?KQQ`6fFm~>w;fX!W$=%)Y69ATIeXb9 zU^M2lGx@>n&I9g9M+oB}A%U0E7?H zDk@>nEcDAnL$Ym9>j%cs&yPvvQQqENEwK^humqKyAkub=Bd}KHA)jSh1s6EI2e@9;YqYXqn$M z1Osto@>k;wH{RsScB5pPo}y7E*P{3=lEyfeH5V_B%U`{{H-YN~%C?Lv*AgO5WaFsn z-7uH%t;y9^j^`mBM0>M%%(Lm%;%IjE@dOS0jJU8>K+KTNRwFyn)I^j4C zQ?8AYxN9dMtwXHh_#X;ZJRIh&+yG4Qowocso_DfSYNJ^(7oHsLZ=*An>7oSP%FdGu z42>y(QxOyB>-Crjz1My5_io!D&Z~8%c^m`*YR=wkWYbS7Rkl*izGOMJ@p??i{8i={ z`xoC3IvauM)Jg*TErQCjdJXPFUyFNVAI8iNKD|?K$*VY;^B{Q+Np^_J7W*irt-uO= zGEugfb1-PmR28Ro*wWq_3^Xzy$JRg}PeWbdXE-|(SrCF}aY(vrM!Mh$CNXHLE=;*u zhcP!ITAss*fuhEwfF>chCexy!aak0X>>|fwsQ*4*$|amV zQmDT}9Ke7!doLf`UZTkZC)pV;La0{`_FRWaJH82?6c)^r#}?`Dp>HVJ55xl;8ioP% zFY@R9?w*%t{{z`j#&24*J~ zhUN@kWPEu69zIPcEy`C1M!*3RW(2275+&T^jKnBkE;1D5D4ceeH1~`{*%6Y!{5*NL z0^l#CzPuAN#uNYsejs{N*{+=i#ed}NJH6i9e!R8LZ9Qo;0+Cy31CmRa&0coM}p%_iqG*;r4B|ew0I26-0*NyZ57@S?`B? zn;mxd@B8Azz%(x@&>SxEgD@Xl+}NlM!rv)>*rAyR=pLB zTbIEZKQYL<;XOT82Xsr+Rfv`=w%bySFfv}dd&hfSo~>Zoc$(AN1iB9420HZ!?n!rL zHg@(j$EAUR+P{hvFUIUE*c+SMtl8g2dt&JB>XR@eGD{awN!e`ouE{5So(yUQsR}4NMtK1}KU7gLdiSoGnLx^KqpeI$8}u zniAKGopQsl?i=}a6rQff_)%Yn5b3QUYFPW@z0>A`6i#_18(l;S0+e0CCpCxbfu3PO zK^M({jzVCwoqglWYnEuO^wWQd4I9X_&S33HgTvcz&!aR7ElJF8r4ZlF)@ITRKwf!c zt4?yq*&b!C9x0glSD?7eC?`+l4>2kv7 z7>V~X)}gB4+tKM9HK|C6Yz)t-)0`0ibt{eqw1w2m&gJ?XwM$HfdXGb9=EBF+QEyv2 z0ZiO)Hj$G_O#|mTQ04!!!?#1EYxKEQ3P%U)*dyQw>GP6fjHnuc#Lwd)vso^7G_2^q z%69D=bwXju5;*$)_EUaDQ{SAM9oX=(~SCQ5XhLYIkJBF=>|6vW@JhvT`HZ;Z8ZnFmtXClIUe7S=C5;{5bQ)Y$ph*)yD`l@4i z^NP9@7%!L=U_ikXI_^efIFW2E=PZpQZ4@rpMUu$3@@*0#$#0N#%|Ej9AM>2QWiPP* z&8W*SR44*Z1qhQkRlP}0RES|l(#G)0hndAcy?ixTr*5CYz=q6Z_1AF7p@ZT17f=9W*k|LhnI zdoogOVN(b#El5E-sHi&+5P9X5RNHAUJ7Vk37k?u^Q)4gHN&pyE;@UM1iug3Z0d1`e ztDZf3_Cx)92fg_!PeDRe%X|kN{RjGs_I+7GU>p20*}Rs6bx|4X7PmS!&SADdedlyN z5QJu<3}f5{uQnaWec7dvZubWtQG;DAL6awM#`Uw<#yK1c`jwA1g#gHWTf0h>{D!^B zTjkIsAK!Ga3simHb+YvL^9$B*^wnU35wwIEjnZ&Y>g+##Q{ejNt$jYU^p}a?ZXztv z4FAE1zv93@E3gI&Lc?UIMNVFymE_sArNl}sayZDs7MQO(!IY_2X)z6^V8sW~ z-sMz_=`j~_O~5b^X9ldK7|sNNzasTFC!4D1YOv3G^fv?U<@<4fk|Nf&jJ}J5;vVd8 ztA7LM|Aj7`l#=4__xHci=>Ok%iOP>3KgN|oO+J`K>F4iXqzNMlc_9oBA;8zTOip6@ zUp|RuEN{Vb{a-)^REKKm?yBXEQ=2W)<;1sMpoMb*t_E`@L5PWySQG8&3jh25e|~%s zJL^4qR=6dJDicr$)w{IK04T^+bJ6rz+~n1hdH+rfq6|->>YXb=fV3_etB(x}%Z6-7@cAA3jm)TvYH3f9{m;3lO14Yg~({&51-)VM=@gW-Oxc#+Gfe3X!Np~K3M z{PP&xsO{o8mhh9^`nBfgG?Q(}*LC5JF=bNMn)ZQg?{YoM17nZMd(eJ9rVh!8oP^^k z5pGW*BI*ZseY_>S*4)%P7k$)zVqBS;PKLCK<9lTi1tRa>7S=a6WbaD?qD|@HL+XSM zZ3;c6W6(_KP@>MB16sQq_~xXyQ1WphZ$dTDx%Q_m+StLlN(a(p(A91zG{T+jNXA|W z2?nN*SKpIK^rSK)A-O%7ugRj5JnCP~A9Y(h5)5gTp8oU}pafxIS6ohxb~|F<2p@j+ zc37JjS4lwELB1(We|8#(YYx~oNm?8Qn!n}oYIU$#e{#E2;e+n&g{IB^SET!IRRn-n z&F*J0)GwactvjJn64#*WZZ>>AA!tz7Raz*aSdy){DA(;3#E7+R`B^uY5S}oz$U5H> zisw_8H#F3==+b#;THyY5Afcm9$nC^&k#t->y<@NHlXsp%{f8A|xV5!QzwLzy=WVBu zSi5@xsp%SkcTiO?TX}&F_+^G`Dqe zTjt*6*j@UEU0fZ63oo{{b_RnUKhh{X)=q_ut+>6&>P9_x&+Y6ms!YJ9J(nJ}_?SFI zR&6xAo1IqS+t73T!Eh0f;s8SSL!n+~Vuw9!yrRdjJ!IQtdqpNRQKWa^VAocFW&EZ8 zlVR=X7caud?Hxu3<{m!M>JESma_(R&wvN(%%HjKJXSi zi7-&h2}Y4uPu|YP>|{QpilcpUDCQ*finBiw&p{|*vlJq6?G_5~oUgWiJH5t+6>nC2 zt<~D|C*`5ct#MUXfVqCtti?JX1!vtd42Kth~it3M!9 z@|c|l7^aQ{qIR-PP%U-r z0IvV>b7{?BlPNUu#wZqt6TQzG(W z_nXSsAG*2*Rm$-3tsbvV79zs%@5U;eS?I5?w@z={_+dI)>?uu=b>0$UNA$Rb8aJmH ztllE{yiiwvVp}V)-YiJ_A*J7?hIF^LRp@0Pc__bC-2)Hp;s+yq0fjm$sWsaZhLy=q zPWTR_)%#{g#>#PY)I1qz)(FQ!%0zKiMzZ0_*6u5JG@iHkPh%qLbrZYpFllbMbpxLx+)8rA&;CC5;krDFU*7Z9a!v_~@0% zdOawop?uP#`oo70VZNUo814{YEgr4a9axzS~A6)hD&^FQYDb%S3 zEJK_OVYY0&q%pox&#`gN zU{&&%00lxqb+Qw;Mx9+;RDe9NNerpL&|ca)Sq!)(k3XJ`;&;k}jA%O`7HKxxB9FD- zqo}x;2FTOmoxk9RUOwFFRgfzx6@Ps--TY!N8Vh*=ZUg%%jC(thij_#?=jVR+fx;OX zScM`9uIN$X#^u(c>C$pWyapZC!r0 zTC;bGH7halGau+Pz`1*B6Y$j)?KSzewF0yK$oP=E>f5EOI!*v3JnywM)h7Oni+vEY zC6hR`fw`joar7X5?yY<6Sb*;yVv zmVY+ zl=y(Y3{nJG#$8=*Y$^V6PzCv>Ld8wI`;0DVMmcuxHmbA2W^#5?*7_^Y8MGJKpYP`s z1fUNBh3oBA{P!UVM&A>&{V+5rem>Fje+5m2$rfReVY^V<M%$4e$=9i$q@3-M_PEkHr`?Ze>^tD}85`M*C_M{=JZR_op zwl?mjEB$<;7A_oPOeUCVCLx&q_w9!@6;3s_4EK6nFRuR zK*yk4MP1JW%UsNn*{MW+E0V`M8aihrQM3oPEBA>=m8#-a**RA#d|u$SF*mvNFD7q2 z4EKGrLrWS4FzH7lHUyjXd%Cn;-->k85BOsK(o1}^yYzb-p*^zrn%fk(DGu=C3hJ%4 zl=ye=0A-Iq=mCQzmT4=VX_m@fG4)EborV!5^TMk}!N>z}cfYGaci)>nl+;=9;CMB& zI_1$hiX_0|U1Wu%9oc=#;7W01LWL4c7~tV?*s@(FR=Rb#cpMDsWX3zs(;#oBgP)bj zb_4N3nB#ZF`a$&%Apy&^Um}J3mS0{|0B1P;B2t3yNOR4&BPZUMhKWf|R7}P=CL_Z) zzmz0rs87Xqb8BVWp z(c@hvn!X{+{w9_3oM*8%IVnQM>;=_lchKQ3d;fg}-y;W^IghaMw_h#^!C;C&R<8rg zB@Oe$gyHTVAhQpZBh>KFSwbFlLaJ)dd(1wRyzc_FPuSNveKl(rO{ux;k8QIfSVyTE zy%)2u@pys-ckubw2NZaH6^848NI>wAlLz+wi}|#V7jqK$7}_tb0va?s`7Q+^Ap>mc z%XgJU1MJ7rT^2|$W#~ZToTe7|+8#EL;fCw={ik6zLO{!a#De4iPlY^H^?Kdsw_Au= z2*<|8a%cwMx#Yo-8v?+rC?natr|$0XaWltRVGyo<#M!CZRj&N;M7|ocq)42F{_)#j z1aL}<&uk)R&X`tkz)GlDgX^<^@R5KVi{i&OBfHod@O#&n!B6@rGl>Rhyzlew* zIcGnArc}qEcwWA`lLc{T8VI_O<`|bk%M7`}^mlwMJg} zySIQrd5%xgfl}TRd*M{$^i0H04J*UwMM_xG&>-glk$ zVcgbYk2oY+b(!6xn<)jSJwO>lK;5}Se)GSRr-u_=j{;KNL7qDzTvkVEN}M53YYW$| zP)40KS}zy=PWXEsABAo-jB~f`W)A?>ASx*V^ySn5GGT&#>pZeb#!GC&IK?2PE!^6WI-M`+{=PW?ImptzP zn)v^rRc9K<@H#QYy!5NCt~MB*R#WiH<^M8+@es^a!;h;n$K;$Ua@bP{3Dr_ElVC(s zqewYbT^z<&1@V>K_e;zyoW-pKX+V1ztdzy7mD3j-`KKQrqesTu@}`ODa&V=E3Wz< z^7kK2Ki)DZA!u&jZr`$S>q!=8d(_~C2B*vKug@cZ`k9S6%QgYbVG4e3RyQaCgOQ`Q z!V3;IAZ)$UZx~yW9Hf?FHknu2PX%0+cjs41xf!8g|?0Wf{p3;K@dwl2L2U6Qntqh_O+e*jun)oJ75JVFq$%VPn_ZrE~KS z_eSf5Bkp_Dp_#M7pZFhr^sx`abKt14E$fyOuUbs1CPlI&<>qYmJ) zVg@T{4+yDcw3bSDw%54=(~@qRD7JJBs)KX+HLrRWAQbSWlFavYJ%FVg%6X`vTN?cj zLpi(f+Z)_mmcQ>HaNO@_#a>u;Hwy~f1Kn@G>i_z6nv4Vg@b&V12kC-Z{L`s& z**4H*151L0oNs2{V_X|}erK#{0dEupcqnOaI(<1Ypg1^6i<*A=jbQLqg&BbL8 z%~9fKmcU3b1u{64_npfFDs77 zCxO6c=_gO-6ZtfI=G_;CN~;2UAwfRpjmNVJCz}EIjAm(RMSy6N6;E%A1A6Aeuv|bk6|r$6ufsnw#r^<{OAmhzok`w@RxAX~pZ$dULC zIgJ?1BKXXRV;}$)0z>9l_H?bP)-}y0ZmR%0;Vs~ge~~j<(xDQyIGmf3nVS4AN~Fm) zBVoWgV(GN58TKtBxB^_*$~t*W;E7Eobi*TdX*+@>n#L$8<(^9qB2K1VUA_dnwcDec zs#>F;%X;%AAXxRt;cn%b45_L<9A3%;!|@ij>t^<2B)6-q8i>CvYFvJ==sLnKZ`=Z8 zY|4ugjH?UBC?4|7f#5+TdCF#mIp66{12rSK@M3qc-TZr(x+Y-<V{_|f{ z%^8-;Wh})J+A=Q!L$se4OWSFfnVDTCAu%r{p{dPcobKqqI8-#tgq%f;2XY z3Cqqaf-a*<7Wf5$_O&|}rh^mO2T6Q~LiSb|%*c&-t0#j=Ks0CqigzY|4SLA2|18-5 zSyWNy)i;pFCn@a6PbwrLUY}vCec#+qfNxHI4*<()SdoE3F;oERejSKJmX=B)16JDW zx@d4y!=Ja}J&Ik59aVFech!v5PwihSk2F{qsHB#ha$xQ%1qmWPj&sa!f_usdkQVu? z7Vjw&WI>-mX2zKyCZQxFCUWBJDP5VUGz?IQ`NSus3I3E6Ce2bhAZuHS@l?kV-~D)x zWh~HzT;7WR#JJ&JD?o@V+&Zd&O-2HhUg#!Yk+F-BRUZs?OcQ;t_VQJoCdwRvl=jPLO&_{Sd~@<(NR) zDM&f>jDX(<8-^?nYv(YV8ocMqZDGA#aY6miI_0K=YCXxuww~NGth^)8XwS-eDnxN@ z&}6G>fOZ2<90JzC9OS3?XK&*ZX4n&8nHj~TwRwi%2Tqu5hv6y<#T|zW2p(e*J+)=H zA#Lkq&pnmUNj4fq|Gwn?+f7?n$FK&{!fD?YleRw+6fs)MbCRJ|@^S+0TP=deyO+gRAz}*Rty`$%9T(GTA*({Ovyuij| z?@{v~fW=uXEE_poJlJ<<=i_$RE1DLRJ&2`BoqP33L1CEjkn`%%jT;<5K%fN5YMu2`4p&gxksOVY8!I1 z;%rK43Qy2`#Bd5xxxjCB_3BMvI$>Sn$Vllo0-KN#>N?{M?2WCjVY@<6`l9kv9gT(u zd4fgyyIUcA%WX=hyh4+?Kjj zm8A#WUsy%_IglgK9}USl`KNbfUK=OT-eY(8+Hcn+#~s}uB?llQ{ZK;TIC@LXQjWXC zxuTozl>=XSQVB1S(}2|p9NtFQf`D2YdZN5z%VD#=b6Qz8@%{Mv^wJVly~m6$ufw+m z-j`kua3WND2w?p-NmDc>Kos?u=88R9d2=l=J&L}D>+Dg7w1{J-E#DC#<>k*JocF_H|oW2%I+RvKYP?q;}xvB(Uprya{3m zpD<^dHO;1(z(bAYWaj3xnQ>F|8+-a3u6s0crXy}2RX`oWm-%S}QQ^~sWWm=G$myUk zpl#PxK&NP%2|wjNiCHt+c*C+}RFz-s_giR?_vY`m{Brlew4eK>pQJj+qia{N?a+_> z@f0|Ml9NI=ld3z~TQ9M&5E9+bWFjfC-Luy1kl&g30B>2 z14=+`=7QH2KKu+una<6uOAoW<@@$x_bJUQ#Y#W{s;hgvGHpCku%E2_QZBAm z5z=#l0ZJR)wdK)^&7GV+sj8mxkeI4p)dr2!X1qpx?V602XNq+At1z{1Knol-hGt^2K?F97|L3(9u|FO5e-tu*lJL5}$`04H zYub?^H|)3t1$9J4MRNu!{p7~!tm4Vh+{r0_zz2#VdS3YnQl1HO)t}B2u!^un#nK@V z{sxV}oj%N69ZJ}5Q3(Tp(7q!?J9p2VA&7!dBBC=HKb{!)Qycroo%sD1(En&(p`?KR z`|Im}L-z4sZwEm~*h*VYTCvRcRj-)sxLEOJ_8H_&nc>r35ok(ZCE`{fT)ffKgZZsQ z6hRD!kEwZ6zRC-1K-vuNhjaL)gwLJp(pG4#@42dRFJ@2q{{2%fWWBcE% zIkj%WnURjwfst^X0U?-3qX6o;_y`;v9LjQ+h%Nh zJTtImt7Y%JxzW=ImA&8AIJ)ncouVK(8nctThW~Yw6`@hN$L873!d9Yi1K)DlWS`;3 z{-&7y;@p(~WS-qy9Z$@G_TZ;d?!n<}D3Ip>pji9$gUd#koEL2SWmBBM5cp)6`L%&s z>smZH9q2@II8ByiWJ5rO+Zzr&Fyenu;Jr_ADL2yZ-3)3jd;|Z2H*j|uz z%=?6*KOz^y>L!2+CL|_?@!7Ly{dGVhfBFxjMNpBK^_)KMtGG`#;>opY7S#(0&Ea3W zMY2!ZSc2iwY{}LZb%`)$ti0N1zEX_GrrQaOeC0*1(mBJyxA=G9g_;@Xrjp)PcKTY{ z#39!I$0Udr;RQ%hbKU-$ADDVyQC%r@uWfE)IiEZXD7R@hzS5xYqFHZN+6SviCMdlNvfv(_4uSmhv#VOZQ5Rc4 zi!?h5A83Vpu-9>@gv21hBzpm|Pr?rB;rhg-JM+04z55*-?|oi&(RVmuRFbY-C5|9J za0{5O-uI7(7^U94g?snY!360mO~>$H;7SjP3uOfPMBC)f9mmnp_&-~UM@gYArL11Ob)60`;GddGE zR5{8s$W&i=ZII>Xk4LEsdGkIM$42fW9 zLgBUglVcJ_KCBEa_XezASrnh6X<5c;Ww^;c<7~R3%ME-=aVzTNY=H???v9>345GnB z8tjl)nv8B<9j3%Ttr+smO!GZUi{ z4+ZBXa9Dr#!4Cx;6})AjnyuQJj25a-tEaWs-i$Ktj;S52W( zUY!vtz+om9`V9MJt+J6L!5)crj0 z>;a&VOY#*0L;Qv2Nw7gYCEHXo)YN5txRr|zbyz5fiZ|a2T#}uHH0I^f-eOu#A6jsn zCJo>7g50gpf4SmFt;!*=1!QeF&O4SU1W5w7@@BpOe)DYW6F)3Y0SLbb5r)|ic}C3 zVNgppZ`?p!n;M*u??RgUB(=mpK#d2}U8b6bVWgK(>e$lArFswfn!t>UT?HX`mFFa< zBR9v&Ot0rzf z&jSKkv6bl;!Tk!GP|yo?(eq?@}yVS8}!`k4tL%Dh(A}WH43B=sqf$*ldJ@g6+|#l z(s7*s*ER<@y9z~jUMy|Cr!|neVEpwslEI2RUVFFSyW|b2lu(3z1XVPil&NBilMl>> z8TxEFl4GM8==k3D6$c7C>-Ij&d|SvrBZAhi+?=~1XOsvf?WiI+ZWZ1#PQxEnQD^;h z9h<~)ngR&Pc#Sn#;qZJArrT@m$uy`tKB%(SQ{b<51??|EQpFo@i(Zy7`$wE8@4SeA(fVrl6#l!0sfkv?hnO zW|iEq|2^Wh6bF9}0_M`EvAZV+T@LKw8q*@4yzVoRxA&hKRnqr|;kka

6GGlzx8~ z>;mAh0m#VyhzH*hql@by4p=dk`y5*voe~K$z)3l_#B*A(+L1v$Qxa}sG6qn%m3s;0 zLgl&v^GoN2d2vKC16Vh9LGp&NoS3YqCI8^XZXgJx;pQ&aKC^(-^9athf4sEGi+B`w|u(C(AS5Pw+Bm{n&vzB(hrO? z&K>VGxk}LObGiH-Tgsp>GOo(=biX4W;c*hWwQ!k9?m0;tNcEg;1pd%pLR*3ur%3wL z58_4C8XAZn-&T`AlUYTH=fiRiykCYeC4V+f*kXJ%%kCUtc>_y|-?D`+_?{LIwD zfJ~H4-dGA`|F)l2BpF-m&MVECEE(TYJx^-ZY&zam2)Q29?SkaD(=<6O%mXOLY_pYJ z=m{(wOh+|AN{b8}r8R%SIBv7TLAnq~5O|}an5H^nTKyBann7BNKN5+Wez+s;)O57H zXEju#11U?oK$@3MfQg~BhsWJY%aZ+|P+t>3h<&t;4M1qjGt$wiU!YElTpTJ}dxOMM z-{Dv~dsd+ZOomQ%+6`flmEY$}<@MmmNj-b+T($sP>3q%OJ(~^L zP3@D7mnb=}?7Ub37bOyIt0zFa2qplRSdD9fBxM@lhE-Bl20CEoBI6Dcv%$PnY-B(J zm?I5I8n-b0GKWV;a>8EwNKo%4B_*ky^U5~(o{4tf(oSAANJsIug#1j^fWrNFnWKuz z>P@4IUgyAc2l|efZ&GU)XcDgDWC#QO&qUGus~MiWgRN*VQlhy^epfSVR z*787)rn22?T-rlrvbemZ zCmdtNEbEC;^6kC7%F(!Sf*;3A4fr8tDZ{PB&CCk9^~J}K*<91)%0SRumh?sv)d0G( zI><7M2;^@m2)GBUBL9Dyv)(9)to@=>foO_^I7IQ6_RmU^RC*-W$pSiUr?)_ zKN>0K)8V2bd@G^yODZ*5Sz&_Lx1k9Bov%;z|t|L`c@}I^K#gGJdP6Abc_7?z#;x8JM zUzK%+GH&MHOr5%#JC@Sx9VcUxq4DsQVQhOC&>yE+pmxH`dZhWlOcAwgA3U#UUlpB8 znUG6Orf^z-r%l96m1G=MSJz(|WZ74^MHDj66DZxNs43VwtR^QG6KNk76=%6^s#j1t zGxAePw%5C<(CpK+8oDdACwk287;p%}SFe!J&eZDY z@KE+tB2w42Wor@9H7oeHc7P;;K~$v9STjFzO&#fL#_^BchGB$;Pz>;d+p)3BKss^3 zo1;#v!Qe=gt;k@A<0}aV#;DN5PtPu2dB|oIh8Zo+j*@P>{;6jg7(S5>%ZiSw;UHEx zEu&%xmyH39Am#UX4@OXVhA6cZ; zG=>4s3rK2{%6iH#AWh)#@7U&bU;}ov{?mrTysD1j1=f zryji!e{tOaG3r$0nU=@h|Dw=i8`u$93t0 zX3QN%N}emDnD>u#Zv0BZJ?F*v8YHJLESQ3j&i)^nlPO$s8f7}bh@NKMbOxm3c6UDk z9=mwU=SE|%$khnF_8*s9SOab>_8e#q(<3`t?AUgJzKP@l5ah-i6K5%#@f8$tIHRd= zeXYR}k#3}Vi*sZ_Ly;7i-|N%w2$Nj2&jUF@Bc7~X;y5gw(}P}pEVgcG!26%BLnIh+ z4w|Rqr1>B--Jb*ESU^`P+byL!cHdi4*@TIMI4&!h*!luoj{R%O9)D%Pxn`J3`m0`5q?r^AWQsTiNUbaXBQO( zdt0SXjwg|`!3Wmd;fBnd1jh@kX8q}ytW#!26txO+s!4ohfg!I8Lz6!?M01BC2jW+OlB1! zEcRbAyKXPimgbOolsnbQ!K}24#gqa#qE+FrF!8^EM=w0XSr$J#9ptUO(MbIey@%9p zfvM`lr(s6_f~ym;S~^*RKEw3Zw``D)JoR|p1&UjRV7FfXE5C_Aa{l~ReR9CkY#IMYD9_xGvJ=>*s8_7Dj6DA^@V158=Xjv)oh9q|%KYVB|&~MBEGhwd& z3~>6Zw2CfzQvHE8Rc12ak{Tf_F=_Y;^0we8Zd=7A!5s1iU4ZczGjR(1%3#sfTdo75 z)UQC4`h6>C)yyJejR~DKT&2%G#X$-r_%%O9%K&14;Jd8zKNY70a6S2kPN2AS=mqgy z1W-Iz#VU>smyefCW%tr@ulNgeHG&-kM42}~h%OX^eXO*S$t**TM9fD-+WaEEFb+u z3|LqqjR41Ps01(dgAsU#eElnxEQJeNLWu%LY~CONj}W?WXYiDHuKl<4KsdPIL&3>GFqlejNVLji-q20u7F`i`~#byEPZe{}QAHr-RbtO^(BjgL2Au&Yq& z8MHFJEdEdxk6=F}(oe~I_i4J(azJRy(T%x__$Cb>}Hs?>j9f>2b_&zJa3V((tO zsD};MX&FBd?Dl0J@I&97v&=X?K>AB@2lfLS6cqti=qOR&&ZDDpt9F#T*j4)_qesX4 zzEtq{d5a*tqN0KaERfeSztg9Z_L|=FfDjxoc<005;NUM8AkYE_#qS@KnXmf-aLt%3+x zAKfwk_vE-Qm0ujW28Xs$bzP2@+HTeFE3KN>rha__s@kFHJ^0SRo@QK zps#mxjrHNcFB*s~1pW?)2{c%(lNuY(W(Iz40kJy0QeCLe5fIEKJU4Y1$Aqh*t*yOZ zW%`g2v}4%V*wT7y;0Olcnc)o$4d!~nux)QW-`{snTU`|%iAqb8GBwTUH;>d_F{u7K zddSNHa?)QyXc6IJLy$ezpiSPpFyE&u9gbWu_4Im_teaUXP%yNDY#7}-*~2Gg%j2Gk znjGiCy+QoUYlXnjrw?-5_rcJl)*q%P^|A3W>f}F>%|r~pG8FCg!aZ(^(aOcpbqO4M zecRgyZJV+QOBlIMa+=d_Cm4kwGBqy6J`?aj4Cvk>E>|PU4^1$nX2d17g2nX1xnI`2Vz|UK+F_$!*JEWQ8Ja_&pch~NbLR}C@S3x%qN*t2xwe$uY zod&Ys{3xJTP_TbJ0u5;DT8~D6^sS<&E1E9Lg-xlz>K76mObJJ91sg2yM;*L@3r|pU z(kaA0!4`4FdI~JKgAaS~MaIA2T=?ag!5ZWMVN)7ai!gZPU}zl~RSr$VHWV2E${Bnh zuK$kNH#t5%`oed^EA%*iW9}Q|pJ5OaVV`4)0D-4$&LYCSLLt$#aJeX^EB zK3cXKj^?B#;cpJ<0=r9vA<+>Yos@~7YHl8Rt7UZP?F5=5U_)aMJlQfJj9f`~XPqn@ z|A$>CoE$^fTOU>@SPcOzvWYV+)*cimo2bdiGCr*{lDn*yJEFtD!yWIrp^r~s_Bb~) zUP%72l0V4NKa)LwXi8x`w2cEPd$Uoiv$NNDIveNFAV77vjJ;E3G841j1~fj)9Kfmq zDQ7NE2*{T4>n6n6y3NLq-tTkPb^5dx3Id~nb8#@TtnNM-gv5+EF%7cV+|k)+MwCkd z`)s>Lfh>8&Qo1>cxHx-xnG`ryGehq@R0W2qT~aX3eIQ4J3b-)2C=;2q%9`T^-3+%D zdNzIrd{OiA4y&BPOxD-C?+lx;nfKlDd-YiqyF%qJvs(k_B9cgpC~z~0*J~4y&J_+7 zJD*;lErk0BbLFS!rdq%!sjnD#ZWuZaQuGN4wzn`7EtpGYK^DTUOoRjXhwt(4YBLsLeZ1 zmxl_+wsb+RICCy`YPjc?U=}6C%I3}v7bVTgZ_9xV4IgK?T6zzRN7(U14CF$Pq=ROE zFa745awO{iVedWTn%dg!(STS`5jTi#dXXYs=^X@>UX%_2L~4N0i_e%1{ zyqaF%o=Jtt5DRKC$`F_GC-ZoxCdUD-mi8jmj4SY2^h16v_TcO7u|fy$joW>-axzTu z&xfX&_?cpIbeY~vBx(uGrzlNre+K;dgz4=whpnLUVq#Nw7e+Sx$;XiAPvSn7Iy@FW z_!taVX#eclBHKm6G@OK)u$x5|W2VfGC@-z!;_^PYBQi`Eq_y%d>uYwSV&Z5>$V zmo0fAlOfR7Y=W()&@3T$nPP7%X($C`8E^G{nYADDW;J#>64Txo3+&gpAzd#FP|-Bi zRM)3vt<7y#n+cf#zMfm7yq087hRMV(0%I=p+G|$PQk~<)kKLs*H)}&!RCMlVTU~8g zb96%i;c%;T^`6BQjvA$eJI_B|3@$F3jx;+)qcPNM;An>pnuTTZ; zx`w#4>O+O@`8+TREwZj|7AE=HQ(5aj9m7GjtOcaJsvjG((h=8h+-My?mc^tQfxjYieG1F9te81VxoUIdvl+JJoy^I*= z2YQS~m3o*w#Z>&V+IE|uRhU-Y5@ zOXHg8lD)X@4n;7#+cKTFX$&st8zw}?PBh~TYW_kB!cL{sAx?qVI#uY)e5+vAeXaD_e`HfGP@@4Cu8Ruxu{5#RIi>UqtVPsf}Ar(Ol5M_#p z0M&<#Qz1=Yt;1Ngd7Qniq1?w~?09s~m*>)@JAF<(xyd~qS!bG=i7+#5_wSZ=P|nO5 zzCdd9q1*z`=>wFzPwk4(&L3lW2cbxy=bWbMs4FY*G;pGh-hsCG*sQbOu5fsNrzbH!lXz_P~tg=xl-xr?eVo)AzIWctIa`Vp4xMm zCjWF_@C`bfRnKB|#Py6#9f;m<$lklFRvHP!AY3=w`5Cymd(@5>f5WVaFMi2q#g0YN z8tklgI!k8lUurFtF}#)$R)Yl}+i28)*7E$aQFg^O=;OyyFSt!IJnb>Cs?kvvvw*D{ z%8`;_WbG%cbR7|CcbyI}08`KYvl#doXn$?r)mXlaT;VqUs$OKguN?kdN(o!P21J*L zSL>DSM_9^ob0=p|?n<0opJVnVa`n`nx3Dsk>3#!+k^~3*D=>!lSUuKCqf#IYSA*db z8jG&=+P9DaK3Q*totVpHjhLO|gpn$#pcAd#Zo&OMBam2?NthO4A&`P?-!jwXC4fKa zcxdX3?n^=JL|JG5x1#hVCSWJSpyAEEmhZ@TL(CFfALx60yYH|(KW~V*C?_YCv286#12OD9u}LY|chEHbyb0 zZ>eN->d^y@NNj{`{I>!|-Kar5>$;ba>XXv-5Pto`c#x}b9m@qShb|hlqy$V~{Ay#o z{kAQta!`%he%Cg#9oRh@_0Ct_aekGWX4V9p7J`8aRjfA#3)aG~H{^x|td2xEBx8Lu zpLlBBto5>pLz$1OVHHp3H05V&gdbpqbMYHSZQ-Yw!yMqZ$=?%M4XjpS>Wq7-TdtlP z`y%B_Z}lc zJGS!nXExzR6+QX?#x>|+ES4>n-v3^X0Kx-}{Q_)YVjq+8zkdZ>hHD1D4eREgLKR62 zX!*ThOXu4UU zXGv;%-$u3D>=$ZpWevCU{SCG^Xxq)11s)mrXBR87_xAUj=HQjaAHqekA&*9Wl**gH z(o(=>GArWtOvGn;`4h75di?NnGuvW+`HS+}9hgZ%BL8~O@_9X&B$tv<-(YtyO73GC zAdjd9&sX4R3I!Nu00lTGpFl#T5_x(usKx;V3(H^y-D*Kft0`?Yl2S1O0cojRf%f{p=sVu?fL+W|V8_tq%Zk`GR{X91I zBj=p<=0>O$8KaqlnSfjhxs0%oI|6n;ZqoD<0>QZ zjwSKojC^hN#%Z~qs67;b!pj-?kte;<5E-y>cPMP1{jd$%U%1(+AK(1p1CXg$=pNV- zOk7SxH9QD7n>;*#J%_1!leo2}vhQFEacZ?57!QrK4o!=_BIA6wu>723bfUjVb$uH zJR*Q?!Ka>L>kJmmJXA%6=ENlXEb}TblCPSae?R~5jAF<1M4@FMKJggnHS5V!a&``Q zDvaL5%I8^V4#@!%&Kd`b#+)f?v)5@AauRYmjFhs9Ich`FrObS)Z{r0c-Kf&r;y3|= zs&c57wWme(XZJji4fzRSK6r`qGn@Smxqv?IaBpw(ZD^J7e9ru zhufA~=cIlN1$4T3O2{KJDj?!&z>0&9I7<59Ekp=mMs$z>SpJZ6uH1aG!S#qN=zzgG z-*IuU17-QX@sTDDO$N9nOgf&MFfOESd03J#0>zL2HTaHQmwW%jm0T4D&di<1i#DKE z%d|RtPp3LkX{0}Rq%mO_>2@BL79+oWi!*`{rY2I>%R0aj9nk^ zU$1)$z2{~CKlK6*{;ww`C0SZrq< zA0}^EPqZ!BQRW>Ko?;Pm2I$&~Gw4ZM>d60-D8b?f z2>D~7+aJWDO!5ALf~k9i?&B;e{{Z)m&{4KdI4HQAAIDL`Z+ph@^Z2X?Rzj7daMi}uR`8Vp&pozf*y35byC0$@Iwh!;~o@T_MhXH z;g#)R+46{Q-|JJ!d0!6|j=Psvt}7m;YF6IPIX`c?(v=5VKYz$8n0zrSHd#M;fz`U9 z5OKVx@8K4Q^0P4M?*#e^rfquko}M+>Zr?xHV_S=B-Bjr6V`Eih|59j;J7f#0%L+=OgTHvZZu6;pWZTs?Vme>EB zv`JXUHUNXhv9w@d#l^c3nn8I;2m*3_ZYTBHgt_Y_DL~XcVVd_4STY-f&eDxRqd-|; zaL#b^rj*#h!p+9PZSGxCanCAQ3#9~+Aw2J``p(1k+S<==l^{#kYcKz@cPwK(F7N;= zg-4LCGFGuR0y0p_%%Sau#%BcOGF+(pnY<}zPC5??B4+5W_G-dxDNx^Xy3YKy#g7^u zxQbX4wLhlXO}+vy9RhfKi^Xv1Cb-+@;=7t($W%0b3Ja1N@cO!_;JP+o#K6mb9493q z?7SLWLSBj$04#F5hh=h6oh~*~-5yZ{j5ednJEj#ZqP@!LUd%v7GZ@%8L`gIoj2}sy z%q!R4fjgnE2CW$lue>aBSALH5M8b+9=%|V!;y$HbE8pGuL!)!QTcQmJ=gT-&@@XiK z^=xYGjKW>wmI-3f%>eyJtv!5ibwQ?H={GKU8=vs>PK#*iyUcG@YyJ~V=R*krOE~`y z9PtjwKIt8tbkhvX0fmO(L+enWS{+^dI2x-$)u+U2igio`yOJuCVL`T}1lgJ(kV4PZ zE`D&1kjhF@LKkfnEb6|~w9uV03%E={)?*d2_Vz_t7kg{Hy!Ka$JB@oY9}@lX(QDFM z{^jo+<51Aa$VfF`4~5Jt)<%JPU6aABft$FRwqJ(HftDTwWo$(EG0d%Qof{JOa~})G zI&cpf0fPxfHlk~PJ_h-let}xv%9tL|uT#5-!@RoOlal(rv)tDk=uIk=nF9}oXqG#2 zIa~CULRRS+I>2V5hks@`rwxgj?E2ZHYui!rZUpO2kU-&*KeP)L51q7mV971p)Ad+& z>H+J408C=Gk(YdSp^%5vZ45%}({0;YM3OVBHEc)9yQRZz(hu-o#>;8R(JOHjZ>)yu ztPFtG>G_Z=g2U|$`?lHlP!~e=+=*uVTqk5~8p}a?!wH+2x1^G0C1*yyPaz2amq0!1 zKn|Ekaw9Z=-5Qp%gWtYz97;$X4iwVoL`vbr%$i$krYv0HB_@X)01#($8TUwmY3`DN z#sEqnqJovBqH?I((V=B#6_)-fL^)T}7q5x1F;`D@4lrs4vKrwbAXk)LJl^7s-rX(@ zFe0@`sNJl7l$e|;uQQ0qCnc4-nb`FpVkl(|1NU`y46J^9IO*p?dL8(N6J2sJw+A^Ia$ zU=NRtw5enNuEl|mJePu#G@MGCl1{5QGnrhk5hw$!{;*;u+WiIo=PX*5dXtf(d`stY z7%=Z$RizEWfSrKlb}!r1<06z*w*o*0=)WBj%ePj;1Qy<%9)1#OTt2_Ue>W4bmD6D6 z#BVJ69sv&QQrWw!E-o%ZD5nhk1;9XWs!J1(Qh5oC0qAo2;SrM3hgee;-LVI|-L%=^ z<$Vs7wu^5uaH7yF(YIBx#&*f+oqASvGGr&N%g4=(3P793h=mkU^0XYvKY4M8vQ|05 zV>hLwk5s53h8vQ~#a*nJnPmmZd=DBp-$rJbEnb6BQLWiy3gc^-vQt1(>CztK8->mf zEXCu8Sv}Y3aL5VV2M&-5z)RX&pL{qI~&1*d3b2r#3?=hj|N`!=C}!V&msTRCB3d2Wc8{4Iqn?+ zS^->FXc}DymZXJV#!yLZrY9G{MwA?^`npdqa2|V>04{AIPwsS>r%1}yHpT>j6hd=m zb*XA;X}NK1JLN1@CA770H9BmQRLfs3QIEk0*pHask!x13gY;Q$xgW_OATX$Q88c7< z@Yp#{W=FCD;OQizr4@y;8Y&t^;6TZQ!VRQi$Vopm{P1CZ0O$4+tlZ6PJ+>NhxQAXq z`W<~*lPBT-1%5+Hq*!ryztiNxPU{~-6F@{lCRW`qh6TSt&5pa;nrghX1lzyQko4BE zBJXb-Qrq;c#fg*S!bPuzX7MJkQ=|8zE{!rUBF7V1#6N*=tQ2g?&geoflx(~^tJvZ_ zZOSwgS1Mr5X62;-!q6L8)3IkW*YIKXbK;N2gj|AvJ~~=Qy=O_3>}hvFh=!`*!l1Nc z<&rWQ5G%rzy5S5{lgSpP-IFp8?F#YE0=-H_DJio4hhzd98!Y&vPY<8IF?-1(Ndhxp zPMt1yu@kY@>-tYY@QD0S#-=@v*XeB@M zdkc1^F+9abS2A=&!vQ0|^W=FU_|r)nk#yWfPXnS!ofpqHTWp`*NJ{P14C z_w)BTg9YA6T3sZ1hif%C%WFCXNH4a6>sQ$>ynp@v>O(f3U@APK3phkN7k##Zd|cL; z%p`?yi0Up+bUK;Li*yP)%(X?kj=mOOb6--}cUpa*UE@|=QkvbuUXe{qGNeP~vU-QT z5Rm_uulWe&8@c!FD6rmIJj^2olTZ`W;PF5KJeJ=~syBa`h0G*$G{=nR}-=hc0{PF(TxiZeKW=E)ZKw zO-*f@X$}Ph{8oUmk5^Z+8Pq`26{4R=-aT5qh#hA0r+^kdlAvb&vQD}F2>T2K@EM^2 z>T*NoIq5E@o{XgvjwN_!De~{7tfEnhYL18Kjm4w;7{2#uTk9kB!|u>8dB)mX$6V#R z0pv*1VE_8)jT32urcdAaH8cM-%DDSx1q#23&Zt}FI9g*G`KHe`^X0@e7x-9kkn=aT zwhs_Qs$254-*~4Y6M1V5+hC-fme|IJbXabHk5|jX7`)fr8I=`J#Cts13NJe zHBdH}F+RELrv=bDT|Q2CWwH+DTWmb)aF$x7efQ*#5rJZdIy$~DWB5#7oY#N6HnqZe zz4_h1Wo~3KknJQUzS2D%eoVFOv~bunAC@ryS`H0bDESRk``s*u@|RPSl2owbB5@|sPEY3mS_zZBPQtJcVISqGkP zi@~dyjgCgHlYUFq(Sepkq;Im=@fY1q!C`SrMD1*1PJR|UW@Wc0IGhkty%+eZ-7GiU zd+XTEt*lj?s#2L_X`)f6V#+)=|Mlw&mUth6VL3H`lnNeo7pCLP*Tpnc;qD3+Su`+~NL)BqMb=e*T2uB^@+CThy6JeGJl z`7n`czIn;X&IFXv-TE8h;Q^G{`PuJR^HA?1&T+ByIHZE$ZrB5M^}ZLK&P!P>bMB!M zGk~M@j;qlX)y@MHzokfhxec9@lT%hv7PZ=fs@BxnG-bTkJG3K~ty-=ZIX;a>>P8oY zX%J;AypYzXqCzh-!%x#d9Z+1|#Y%At;ZbP&W>plO=@6ctz0fyDVtOg0zc1pjBed)cZ`C>!U= z#39!f1_#)uXrph@|Jmvpvm;kKrV?rqq6ISPe>l6 zT{N41MO7|Q5T=DMk^Oae?I?f0&vO<{T%G-q2m32I4d~@6D96GWaz${MYbp4R%WI(X z9?hz3u(=JRmh-Mnq!9ABchZ#~>HnIP{uNhXZx)ft#`y6@lej+7u_Lc{$`O2hIZM`M zXl?e}sT79+ev>~uaeMjnu*>JwqaMO&4gL1mvzl3RstFpB6?=6x7$K21|J7ajGxrY0 zcUDAc0+lFOSCLmkJ-vWNEY$gwqVLGYGiZ_8i-S`OC+YJwa&p!?ckc9G@V+^1BZ$-S zl9xc>MnBIIF(5`lGGEI{+zOniwqxPJOP#vr*?0~jP)w|jo6U!3oPo?k4a+`G@_H9; zYw&L#cnBn5Cgke67tGsz+tW9CzP6sz z=1$uZWN2U@SxXy#MOr-H8bzL;cjwyNEuLE%Pq!9eUbV|-0P5xX^*n{QgQ4OXa4zo- zu-sGcw-<1dMLc&uX92z_MBql8qj15-caHdE>UWO#GaS!tqM)vU0X{qzWy@BZL-e85Yy?S6D+5mQ)AI`AMrEg4tBSqC4s7U`$hCA)aspM7@+6e5CAWIby$ z2vg{gYy5lT(~pkCy)4S>nzQcZ?1mYjX!&sp{3TXsJOK_3lJsgF~%S< za&*)KHo;dH0~Y~Ub(4cvEWH#brIt}sOXENzipzbc`FyIZN)oSo+&M$AWa&n5=)$uv z^xYfrVOW!kcy{cj%l+$5vsfmww$8BWp*2Jj@vypYhuv!SritMP{5|gdS zdu*Q?{%k91I9t-m|7}?7E2x@fUuICcB1jgWbjaLi`t@mX!YE=8fzJy`N@VggS?GeI z_OPVnua`e$0R}Au;^Y168SH~s(!UYDy`@qpffT2YH};iZ$3NZzp0%~*Xv}}5r!UqrvZAr=JSi{nrkbr={re=?SgIy6Vv($m(JZ^C?#4_S-r?V-S{=YcvvFykQqex_t^iJMQh4Mrv~ z)E}S(Eni3+t-VtPawDM86wIACph1q)@j`qOY}b}#wj8;zUM5Kike1w8^b%R>Ml;jW z2#|m$yd6Hik(9{mzX{Ii@8kSG81UG$_3{4uRq*Ki?;{3&{tw>3(P)(mhg@Od;w{<;TvjO^m9WLkp6DC?1=prXKS2KRWoWj>$Rf*0*I}}f&1vpw=>9x>OS5OCZOGGOLf$E|3i7fUz_50 zRy-LoZ4$TT!F$Cb^HpmYDbeHhq~x!MypRt=V3<2_z;Dif^n&93*8%$jSEzVz48)|p zk;E^(p-5?PzJDFt{J+hWr5|x15=NNXyPmma zGH&;IOHDgC{$k9(W3m5qnp;A9M;$skx{v+%IKJU$IP(wyaBohEbKw5HzR}V^AYH)2 zk7lZEY8NPGs!)S#`(NK^X3#?>XQy@qxBcb@pzJRS^aDr-A1CMQ zQnV5qP?@i)u112Z&=_+r&W}5P5q0zIBNetU>&qtXlM3cwbl^BYK8XpRis7 z{T7@eu?(=N`z!i*$cu-aT0h76+|Qx=`GmCx0DAj1)cc85xv~5>(dxvSi^;02m(`_1d_#U~QEW42+#%%4JkToCFJ|`>e zPemHcF0uN)u5@oVq>ZZrGeS;Iu0`y|@p(ySXY}6TVI*Q4*b;^SUt|Dm^LlzhIxc(K z;r>1{@&?&;Jf>2sRpb5zh;2JQb;7Qwxbq!a+Qc^=hLqy+Z6)DUjd<)MY;%xPaHPNZkm#u@+r)AvfhCO?ynpp5Dc|R z5>!QFDW)G4Evxg%h+;%fbvL82t%|#cbMA>U76-K7ws(lA4XC5dg==dQ^Y`1-fcAdd z%_V6k+9Zz=GqiCtk#@O#nm;+e(r}3-Q*?AR7krp!WO(~dBbF$A#GbX-lMxG1k&$^l z>e!=LqTz_!Rj}89|7)T*s_Wfm5EUIea{oHy|M;RsPli!h$xGiNKC%05D7YNZ4aYlL z`!z4$t)~L7h)&&GQ|!7;NjQl1Jqe2nh1fgNjAIV5-$82lz1AX{S8HC&$3t9;Ya{f$ zqA7qR-L6~O3JeEKj@SouxakfF)1wCx1=dVAt4I^uN=0o@FwL~cgK^Mb4+zqrdz*t% zWutATbY{ZqxR^8t%*rhaY%B;L37tuHw~h>&Wb99fSzKO`8)xE&inhfF3X;8kLnm zh6ccs9>`Dh`1>fHPTq5hKA%D{dc1hpG#NH^*K zh`LPdi|JBOvd+7Qy2#zh3B**#n%&{k*F%cL)$8gdO_q1)_l?K-F|E!v=i(+*JrGsx z%jPCG)6^YNbztC;Icf$&E;N0pP7C7kM+e)B){@+A9xW#?h^{TztT|Mo|FHMW&r7FM zbtP*9%Kc|J7EC_#BF@c9Nx(>78K3ZrHwlcG<&%T}WtG|ba)aho{s5lFen*4t%4GH6 z;gLLW-*u_URTypYnlvt&ajoa9&nVyI;9j3N(Gv_!&itG{5zUlZ1yYBO<_^YD+pMYngVj`Fm3-!EVEiT(0|P8Arb?PFc5yVJvQwL9&&}xGb)Z z-rHFjvuS_*XCv4B2Cb=IF5EzWoZTAv>uuIR$e- z;haf!uskO>x5)Q}{!#62n>&R_K_b57s?6}scF7nS<7Ew^{;KHJJ5Z>ep2_NGcE%)C z6|GyBt{-bTzfj$w3^LF_hSH7fHO|&XOCtgjQlyM6H!}e=@IK9?b11xOz$Y4Hm^_!a zyUi{s{wZ58&RH!u<^wBoa8P7O;k@q28F^X20L?((QY!C>PH=+ybp;hcf`Bmm9S4B* z_A96ZZ{9x$U3QDE!+;r>{ihv~W2N4EQsQpwEAY=iU^ERDDnZ;^G@(mCImWdlM+^up zG(EO`<&q;Q)mxt(iG_Nco!@0uc>USCLwMo)zD}W+R(pC5!gT+P&rw`7H47TM-Dl*> zU06^9STuaZMb2CXc6A2yEetu_u$=6OZn(?0mk5(F zAa3AqPiLo#j_nFM_r%_QV@Ad~AbKLEpht}lg8=oKgJ3DeK3akT^Kd$_O96$!99vP) zQmv@+`J|$@kx8yGv#H2v>tSNb$5_ZnN`m}IAP~mwMM=#?tqSi(0T||Pa(FE~NH>q% zu#Q*fY^GkUZBLy-co;?KLrnr+oXjZs6Pm*-snPpO=7SG%4$YIypSCrqubG)R!2&h( ztrJ$MOlJqhBBzrJ$@9xceTq&F)(1=wqZ-UvTd&|B6<;WX)0(9KyVly-Q`?90C=ZwdlEN)$Q$^*k5e{V3-1CeQ0Um1H1=ojmCBL%JP`C=``iU({*9flYV!q zNUv&z-X?67OgHM->9`s=`+eulOQFwONN@4uX3IsXO;1v-W1$<>; z+2P{S&-3QCsb#76o<9#LHR{pOeIuJTkWJzwA|l6n-qp?ZwR;+i65<>)L>$*wHCbU!Sv{8+ zrdspjo^J_=ubc(xeYm>3g#ysw0FHs?Xi%2_G%;aNS6|P-$=P{iY-Z+{n3y>99l`w( zK0+uO@ueh)KSMdG6%h~fYM#8NU}q^Zy`S*(s}i^P^6HJ5?fHbDR9X|f_7 zWjbsF+JS`$9;tB*<$ka*AN`rf>?Vt-S)6J;TxLy*C7H*-@6%@6NHJrAi#Z0Tec)Sv@fO8&y>_-IN0 zK7FL8>~kNrJdmPWZ#z=3u3faizbK&pW^Q>fQQ>~{!VY{EM0Ba}d-6SEIYpoJnu`_= zP3+ex`9T~&`igt@II!mS-LMT3qZOW8!SS(7Z)DwBs@Ce(J-Lu5U+CzBe>i$!xJ*#r zy2iRJSEu-)GsAi)c^;MNPQ%d7G3toF$6~2tBo9opyQbr7)1BAl4x3&VNj%6~<(>B^ z*IKK9hq0X)miv@i>F*@)#DP`LJbPHpGk#-N@&!9JVy6Uk`4WjTAt9;AS6l}|Gp|A! z>nhVI{Hdz_aVAEJ_l>NFqW#NVyBVF#%K4`+lxxF)$Jl2P1NK9wc;NORm%kQ2JEpxk zb0TcRcepjqvFDuSUpQ8}v8A6*-4b@m&4t?t0jij>?9(>oXvu|JAV~kjp+~#>u2L4m zb)|Wf(+IvhEkx|i;zeoEB23-7r<;$oHDL8ZlldNO`PsD3o1f-xm$Zs&8mNwx4y|Y0 zWTl3lkG0$WE0@6tBK+O;Um z;DQk?UWUt7r9U#YH04HH%X1K0KAnF7SAXVx{9>y@>Y@LDR96U{*eqAp%S@I4~7xJgcbG+a|t z6Ql%pM#5q4`2}MZSAmV6eI-YABV4zX%a9pKuNHkSuu4S4YCv`PME;B{nP>Le zV2pVsceTp_G;NB4a|8-U_CLqPrLRr;pmyuga8X-hYYy9MHA>(${%v=&uv!TgPAz&G zykg~`yt-8~HVSQ+7#Q&9zG4zHFHq5fLxc34eR7!KUty6cVmohM?%qbgRbXJVZ2oK!>Nb+wriS242Bs#yoAAfl$A3@cW<=t z)}P)2WlVw2>Rl=8AzHoqc_EG{RR%PlsXx*PcF3rPUVtwnnThNiVm z`G$;ftVWTGSGMEoHHOl$+Zd?y>_t(zM$XzhdI_FKH5W-N(e(5!Eq&xO0o`6jrgE@M z^PUL2@_prclm57~At>SdM@Js*yrQ-P>~k?Ck{vV5>#-`BlA9c%4f2tZ>fp-^%@v<5 z+=x8<9HXK#D<{F~HF7HvQ8MTp4_=<@o;PpJlx7?^#1#%-w5$gqsFEzM%Gmo zpH-q#x?ZaP16cJV{`-WmG{7V}$?y2^@2SB51++S=1~5{Co(boGrGH|q9H$fnbTHoZ zvF5|JvZ76)`^_gW78x_u6UOsfcI|cRlop4xUep83R-;Ua99QD|E%4VrB|v=R^op5j zb84*^XL5U~WVebig2&bj}`!4|aicC`C_n*CA0BzvkpAZm;G~(;$gPbS%8_35t z`u~^uRZO9P5+A*ZOk%)PKTuklN0)qKw*Nb$!vcZuc*Ncq`Ok!$+yD?HO4LaJOo|Vf zp>gMH1%4|I(9!_PDTd~1hd*Lbn=4m2wwUk2r{Cu^z|xcgt^z>mw-n@IB9>L^{W<*#&;RYm|Bt)@8Ze|a0-;d* zdl_J(rM>$;CFLh72!P{l*yk?-LqTD*2qE9^n7vuY@ZL2+4Jdu}`xsqrv&gM2M|^zz z!UrUROJ4nC-_f=>_r2Zkkl_D(VY&u+F8FX88MsAUUheS)7fJi*@cT;iE)ZOi88D0; zFAU-!e<62d+i$Sr_@P|tj3%gSGT`|*Tt5}VA}x;7IlOq+sHv3!@N4^*>Bu`s99&=j zstr6|3$QqW6xr{GLQ2`5FKXChwsQ*kz{|)R}qK<1{LiLYHAdu-$MAVfKc8uMrVIvjc zs4IlUrmS+E0&Kcph#64Y1&&K9h**YdC$~+g9#oDgI~kvipP%0n$WDU4Z&upC{UsMk zEU&B_2~2t|d&?-kY5Eu%E#?FiU-G&M#?vd;0Lf^BpA{VFSCbMIocUqV6~z_Ayz(rb_$fOMYC~Y-!59u}PR)ocm)`C+0fYVLU8YopQ`eKjW6HVrPM=(z|mOpuL-8gxDqa8<_Yc`VJG%)UdX3BMhMX z71CK*EXExu)Q=c(i4fC-9l%C*=$Y9>EZWuja&*o1N9tYVNBWVU(gONkZB^-(uCwL% zqt+#29R1&36E~pd-r3p;!kOOwRt$8H3iX?d;86{Kj*I>j~cicRq$N0W{K4A?Tq=o zEhAHW`w`u}oH(nI8dXWD=g8=a*0z?2Q^aje{a7K@1jIg^SaPX<|I_g_zz%rSU+`$z zwSN_V3xMvYuC6hSdhRtot0O*U5R_ea3snEY*-2DW_iZCA347d!Jpl1Q{&N7{H!${r4}Zk_hTb zF?>b={xyepqQ8Fa0Wony42kPW?RwO{?z$`VWP3vm+$^nWvMSysrE9oH9)^mF1 zJ9=G}u}5VdB~^P~>&*tIn+D*b^|o0$B|bZLZxuIilmC7@pRQNhfAbcbBU5)gepVifhSVSsk1zTC#_hPz(YP|}nQaHwyewy;KNoCtpz)VQ z;Tq0)PoI`WFG*&|6l($*PSJ^7%n}Ph+%_n6pz|%^7i@O7K^Wy8Ge8YJ3+W> zareo~j57SiZ!0nl(A^tTPJi+c6!$$;H+imm-@0RShuS zKBbCoOpWHJB^7kVWMvv8t`)&rcZ#k*mdn3+>qD(1M1~c2vE0pt6L}GDernFy4F^b| z{W`o49$8*!>IMw;7BBsHM&OAGJg|gyiq-XaSBPAPv3`=zos^rV0IXN+kW(E?E<) zE#J{OD;o%GIZI5lK?iar^r@$^rCPQ4l_k#Zhj0+H|8}UH#ukH{|4*;2>>-Z?EgQHe=|^#dwY9E*sL}@EQ~%lIJp19hoPC$ zv+fKqC$taha)D`=3TD?t_G33LzSW#bJLDbbKdBdgv2%Wp4=cWj?tPvGw27aTnZGlS zf1%9$OQ8KD9byFn(&+5>SR&Kk2R1K_Z`es)i8Rp2;|=7M?G&&I7($CS$7JclWpAt7 zuSBU_yZHC}As_Al?u#FC5B|bxAP$>fHF{b9zFovhur9F=k+00 zKQAq%t4+-S0B@eEf7PyRXqX(`BcWw#I*0$l@b}NOE2GF^+z$S4Qh`&)6FOj>oZkR+ zwm~LDU!{J@buv#E=)YY8b&#Ly4p(C~mQ1{s>uP@*&+i9B1$bGx;GZe9D?EV7U!=@j zFdJ-`UdAdT^rKgq1fzfXxX{vy6IlGNk=#&!s=O_`?lAH?0#j>4$jt$&t$oXU&0srh zuHStZ{CMQ}QQw$DWnEngm-|U6mJ~t%VM3WGbf?Ku)hxP`mwmbb>h@5a^v)L1rr?sR z@Pm?Y0NW$Y>K}q8Q+Xij!pjm42q3EI3csgRqM$QRn|j3W5j-_5Xt6rma31bO}2F{}mz%3afYjc#Xeigj2Y4I6HgzY8N&V zdfIxO%Ur^MTP!HX8do@Ut-KvimgSYI!)rT1Dl}7)#XbAw(qU6}N$gBQA%8D39*Tjs8%0bGiu)k|(*wJ54_>a0 zk8jlV#~?3}t8iMQ0d0=K$LD)yuvO39>Gv?U-ki8NC~&DHa&h6eOg~Y(feRc?B09xS zp}^u({rVPG(cNmIW~>bXYW$cIOHC~6eCT|Vq4@=Kqes5HxW8tKw<^Euk%VPVHVbVM zP+f=?G71>IbD&IgZPKRwZQH(mFtjTt_8uBPp9uGn&_JcTS+O29ElqXqs<2Bd;1j2U zgaDJT?D7pH2`h2XW|0u~a#z=)jZy;iJ{8DoHJk?n^eXNAfX{RTMLul=;{2nAo_ZS5b?x8Gb=bhei&F`-@lAz~(n;Om>yamlc zg4W+Kvmp_lGI?byPZlkNt>@RS;aiR?0Mc>0<${1nuImtb&H~N25|$i!g(uv_Q?Ifa zTL0msx;3x7YH6+5Q|u>&CLWk5ly9#NsSlUFyAA({f)7@^jGC{q!%5J*v=4m^_?l3! z+;;0-?Z?9J5D|@j62je{daUCs@@*x@e)V4kO24;LJW^}8fG!d%3{Qpem)27~+npXpL$Jhp};H+2+a@mgZn5mq`@yq92f!GHqZrEI(ms8~+n4dQH^!nFsmeVdT@SW8cR zS4&@y-5$Bco=uVN%+5B!O9~8B2t@T9AS;E7Wl_!ExE6C!a%WQ2Tt<~xXSnh-iIkP# zJPdSkwcbLA>-+(CG7;|Wr;)lTNv@z=@wfL z8@(uc4bY&?*P92=q5CC*z5=1U*h~A!*TA_De>-mBNC$a8G2?Dh?h3Sg`+&9G?#9*{ z<448VD3w4BHq2XbF|-lxnIok^G5MT}W1jUgZ_J14)EMj?oOqGdTUDh{eo4NQ8sxaU zr)5d%O>Mz1Yx-mh7!B)Ee=^!rk43oa=CLvhUc?GYad7IUcK-z?sd};HX|%>3h0|Je zvyJSJvUl#@egt|}h!}M?O5?QKLY(Xz#^UY{`tffg@S!~JHXaXHZb=clMqE=<84NI& zx4f+BIsqLMZk^K``H>kd?*^b8fZvr{ogtm`2WTP)hLrQqkxEW&Zh|0fQT*eg2!5F1DQ<^s<$`SuGJo2Fn~a^>570)V5BFLuqWPcHdMT z?3@p=sidAwTM^c0zTjj;cqp>hyUFG9@d+^=3)lco+v6;C`b0shNW3Q$>by3}ygA>= zKLYq+jF4+&WGsNxXUYK>OGn3nrANP4a|4!rXp``b@*)G%Ys|Cf{!Mi|EZ|{qf5O2Z zfBs3MPt{m2f<@U-R)1K$0|?2>6~p9;1aZSg4%eO8QWI$m6cH0ig%)A9a(%|M$CVAk@hJy8c79#uY+6ol0 z(9(e7o~mH_*M)B%I-9#LiuRR!ws&$WX&*oyM&+yua4HiD*1GWbM96yqHc}62LWuI^ zOX;+-va*i(0V5+XDDCc;UFhZVvRXk7Hl9zz?n_(xK0E|0?(pM9JN?F@r3c&0U=xM; zrK8mQuvsWPEm0L)nsd-B0u2p0=aa2l&U@=0HP&wo04%I}+L}i_0(G&=fH45M>kZX| z%_ia#Gd4*Piqb2KeDlFOf8H-YKY!Hg^mt<-B``*M_{DG7>m)@sK8~wA*dmDz>w%Df zO!y#GAVo9Z2or{m=U#xZN$ie>Ga#?X=M;2ZglAb0Pp(@IKV2Pj`4IC*Gb+BvQYp+UDPz?JWv;+J^h1EEnO2Iyl3FJ^8=X%u5|O^{e4pUFA4w_!0GFnCPu`LqTbab; zCn$Q5sW=PTSrI&1yi3{18z16RX4mfUH+}G|MyC9BVi2-`ksiK&=jco@*++nf+_(9O zrgZji8y{CV830j))Yg-e*kh;){)z|xa?ooH_FOZH40a@trfp%h>A2kpi!yiF5F>Tp ztRpRh(*dVgLDGR)r1PlN<%?Tn9(Lel_A zWjx^|>l1v8W0?LYW|7A@ogI;PnuXuMCsO!fv1E+&uNjnGkM{cb>7QKJD@2CZLcgrSS%Z zKaoGx3LaL}jzf*z4t)2DOp+hz(dbl09as9Je~j0=VvzCrfLvGeZLVF<7M50tdtIj7 zwqg_V9F14W6LXM)-)v9a{g~S_CeZOa)xl5FsF^~vTwzue znysC=<<_tF+OleI`F;vwXkU8ajuurol2nm7+X}_R%d28N;*v}69B_H9N_es(^#Mu) zJ^eXqVVt_S#+P(mm!Hl`ZuI$9xfzVrX?t~*Xz#Kr+`b*)ID1v$J2>&%0Vg}$e)bvY z>?zvCO&vAATl%oSlvl+O&GE6pOwd7ej(CYnorn338jjBY9LbjybV?`v!1WzxBZY98 zu+#qi)$tjlSSsHiB?`!25oWt0h5Y%)?H{JaD^h=a5&84~|AlYQ#y>m{|2S6H_pjWz z5%uUFrUb6<(sJa}rx%VLJBDP${`c#nZ)pH1Db?3Uw;Vxaz&~$Do+T?E;~utAa0i!S z!9Xz2>IpCCXGji4af&!A-r~lmtS|j_xOP0cJL|*q`RLldJ((RD<|Jp9T2K&lk^}J@ zt^N*`VJYJ7IBb@{7Qgo(85D|S$K94^5I0(s9DD{dNO*K@0GuvX#IvEHA;+HK&qo5; z8U~)~EKYRz|H#CQ82Gnc29=OZCL}Nd%i`h;9B(q0Y(5Mfnw>RcbQ&uMUl%Rd->w$h zS$}}SrUl}2=En7yuQHkzuA8GQj9hzI8>Gx2edc0xkm)Gma!<|3&@pe+J!36odx^{s z7uQ>t2ept*aOR{x5*@A0$o|!aT-E3%XnzNYXRPPWy`hh+E5_xqDn!-PMRJ>cdU?Aw z*HEmszCN5&BTeqst*q6GCrsiznQ!Ps`g{`w~Tie$LB)%EdT z)IicQ>v=4~H<>_!k-yp3=ljkxVs?i=e>mle)c27MM=3F>W_Yr7VBn~l;ULm;?1#dc0@3NwM7HmWQoR#zdkw!8vM7Xjj@ zi@3*!CMVAtdn}eO&2{MZ5NyrjcQtW9Y1^w;uYjhswMUQ$#(w4sD=VvlU*8`%C14ps zz01tPqJBKln+Li;X_=Xs9X=KJdvR}VZMEhZORf$)pcx6|&JFqe*}jmbYO?L<$Ll1w zAFd3aHk9kYw;}t?;W5SdOR{tS+GM9b@VVIh+L%X)9M1$%364Q+>Rp`i)4k<|-4gLf zRUx)xmF%?s`DkI~7p*Pfi7QA#B(B&tno-`}bYwI-cGF&>XJIzL!Z^}e)__g> zNcp>1oFqI|Gv-ZCIZqlohQgD7@P$=dntY*oBQUwtc}hlT0o$ydXVhA3*MFdTcLwvy zQB~$QPCwpu%5QrjB0wk4?J&VYxu=L5UhcB^CmdG2Gnm1Ax0FQ~m5EW}O48CaTq>sYShN7IveY2NhQWSQD;vYb0Cu~ERtde zLaBVVmg@w%V_#^vIZtbzrO;$X#JkYZtTju&MAT`oXj*Q&RHBdVS{ofu+L6%uKnPXL zfCqk)q+q8Q4Jnw>)<L-k1fyl8$Sf1d7O}46j078?*8~;D1`698bu=stxUl5 zb0(cl?`KbY9u-GNgLg;|grjEOPq$7v2{+?wCMr{N}Hj zd_3`s-X{Bn3k|;a@b^Q?FYY{V?G4xu?Xu#|Zn-PTcAUveFU&bzr|?|!Hkpy%ds;lW zzS1axOUTHfM1cChis);1-) z>F);z-}Q4JoSS0<$EsGsI*FzG?$2l@mjy{AL{(I*>Gt8Kh97lMEz^qY=ge`lx%=b#?V$8J!G5iKTK6B z=5wczBQ?K2#B|24Tu6Vc!I)*SA~5n#n{;xIo~#fLj>2)EYM1LWQQHhwQ?1$BygQ&1 z=itnurB&Z1=I{8um3oUf4LURdL!|Dq+EZ#1_G5QVxli!KO%RKA znZKsMORI!pUDw&Wmnw#!x=*7Gck1wkRV?{XwK`N~4dzx#2uRN{-c5e>dmnq+*9la7 z)W^WHWR^fmF)VX7*XQ0=jQLrb$!+4S>renYi?FcH6&ab){A#9fS!U0629hdW!C!u& zOg74qi=fZT&Ct{<{dWE9Fq)}Pj6IQb_6`d-2|rZuh*V&x=jRt|uV!dhs{7`y2MZooLfYw&Oz ztrZ;3da~`kkRIeGdTuRF3Am2E7ivT;&-8T9v`x2k7#WJ){rUAl%-J-ekARO!dS1zd zzu7{bc3P|hl6L&*rxSYarjtdnaQ8uifh3 z(zLQJbM{oRXXxL2sZLwB*JnoYsO5<2;@@L?5FwpQdC_2K7-wW8B7?N!7!|6#BhqqXQRTQ_U&*9CDj+Ag4C7f+oIzj z2#RNpTNgb80>wp$0)QL?#_@386Fgwbb9Hk=3RJl86dG=l%Lx%Q6xb;9l2zK?+?A?h zO%grMb2fh+VcO|;i)t&q#6)bkzPZ>c`YIL1tYRLS*M-JH1nsSlbuX0ySTtcDmx2QHFi@}Rm zWZ9Ui&FF8U{-Ydh|}ihaRedXMbF=@3J=q z^CvAMBj&H-)L(+xhyzflVrb}B@KHj*{l^Oj#@V2wA>rWo;sRX&%iNnAl=en&i;0)p zi*yD6KOHt`u!+fI{GKc0nRkAh1=VcTQ^gO1impIJ;k#@ef=EFwZf-j6L2!XPh7mJG zLZ^h&xnYPXCtf`|I$qz`PR~6b5qj^Z*TA04&0lcvc!%^ZN@O2w7?LR+}uiiA=C2%^niHH zg-`!3Ta?c8J$n8P+zy`6&( zw!y4;i+YE8Y;<=5&+%d|4-ai$mLFgqFVeOY__46eg>|KJk5t?i!kR=<{WZz0@PKW5 zewFi%Sp(~x+Ny&&-RgQtV*%b3x2|w)uCKf>Z{}fNG-{5h%gZCW5k9cj$r75UA>I(i zG0m@OS3wSVcZf+90|-GCZcL&l|71_e1882snrO7q1mqRNB)(R2mE^ucUW^1iC3d*E zqj|zW&2YbEe}yNX$%jMK2M+MPPW*kiI_^a??2>J)JF6Lz9oA?jzviezwg$(QgnBhk z9NBHZ2910zgr*;_<$$9SoNxcAV)C?^H-yTQKe55p1{-C&;BzzG&obo7+nFUj#-2Mrc2yf4mC$+H)P5#Koe4nN5WMkkdAUt!vix9q*QzUnO1gr z>2!i3y`E|J)Z5Y@+NM2yM_LLTwOOrX*#{!@Qt-$ zx+1>KVcskQ6WG)(8VT$#+LqGu>%`45wZ9|PYcOG5{12VxQ}w{r@j8V<4(7^z5T#VS zq>~UTP^r`8FSB%$T}e2Xmu1}*+-^ji6gmAmc?f0=(E=Vob zYbMNQWL%u}eTjAQJa0DA!Gz}vdFbQV)YjJK8{xQ9Y!i<9C#z<=H{FA`vIVJN1hY}< zvTE!a1EeS_He&^oa|%L2@@Yd2A&+Wk%jyVE19+ZE?4z@0qX z-E+7y&OWHsC+MM&@=v9iip`r_{)us&(;&Lxd(1_) zZ6{U-+V=9f{36MT_dKGqxsK|ta47E9ZWGV21HOWs*mPmXnb3-;hXpgztT=ne`V8*I za&3H~ylI8L@HaiCV4BO6kajud5}@ zim?i?bj4~3CRv!+No#|y)uMv0- zCin^-gdvq2K1Qfcir$ibcda2gXj?d=(&h>8;a!s=Fe$eC28lZT76Cptj_gj1;Ced} ze|p3-WAf&rp=eQbQ5+#=^?bKT7%Z$HIvsP{&}wh|LblWk{ z36v2yrZgJmFkKA1$q=PaXv@1RL|W&#IPCb`T{Us(sa|jO+y`r(XpjtRjJG#SskyCE zQy1>XQDFlW$3?;5x5;TtlpjyPZbYT@bkv+bUL9K}Z=p_}>or>#sU^(YRaosY&)cnO zN9-SR!wY9Z1*OxibGE0ITkV1D?U&jsZLq&%Uq878z z^u8Alv@s3Kb-@S7W;EVYqqU9$u1q&+8!_`?b&)%bG2E0zCJvBF2aAV+z@;P@=P8J zC0v8aFr??;9KywC76&w%>p$&%)JWEmTm*@e8@%^X>1@4byNxYcTT>NAJB_`IuBXDp zQs3j5nwp9|adU8R5a=ZqUU;XN|4x+mo7fpX4jx8exI{dutX3Ecwb5{r-6Jr%m+DL0 z&%wQ*W2MTJ4bQ$&9_h*tD|n{ldJToYa$t!Z{;Y~ZBqsKz-c2iFe0b_078$;v`f+5i zvXTRqY#L`7p5yB8I4tHQE2Nq+fW=9)*WJEU;FZAO%JgVHT%=kUzmFx{76$yGGv?BR zeKq>?jZq?tCu=X0#Mi!iYZb3$@Z+pIbJ_MoJq|C{M+JO2EL>bSU>;Ur&=Rce~lyH;J?1B#>5e@ghZN``x(EBkXqIvvKq;8qjEwQGb+ zkr;vf?fEh(;H%Vke|K5sEx)JxQ;Qy_ZtpAir~4A>!DzC3W0Q;W-qKT4yiETCsyHvw z3*8`7i?|z7k;KvR(y}samL#wE;%_!xuapS3{Z@MEd%vvK#>C|c&L=~wrCH=-{BD(- zuhj|-jXXa+D6MO(7eWU{NZdkk;8IM}F)acV5x~A})XXt*N0fk;=%CP+6u>_qmd4{N}>)awM?2jhQ@`)D|-wR)!tt2qatXb)Fv| zUlbJ8YD)WS^xxXNIQ(P6zVpn=2&iaMpfeh4cE)3;+T{2gS%B$;yZWA4}iNq8I8)b$I- zxI-zpbtBW==j64`=4=FK!=Fdz0Sp{DY3ZcQ3{6dSw^=tv{o~FzV4Od-Pl!4U>Lp_^ zz0t8zHwn|jOzmiw`Z2T4Wa481$|6YK@N0cX|Mj=Jhu!9AbM_7@Ru|LOJVIgBB`b3? zIhUS`=97rt9FxO?%#I(MQXnvW{FFe*C)8U`XZLs5MFmT4Ul+s#MAMjiTkXEIr)l0S z>yu3%K_-@xDn?bwOz(C&0MK47j2&`#`Y^zmeLg&2$k=2hQd?7#r84(PID!>_rc|3x z+g&D!<|!L_!a7?_M%;?R zXf?gHB!kF zP>6lBdE3v$-R*|5a>7zkx$>j2vkP+#&89?P0kj47vxtdxI?t0j z+-U0hwLPEwDGhJBpi+s`Lk;8ftxGPW&!)yA5=^%52?X5g?D6%)^R=Cx_qtGTlUL%g z?56u-LiUv8nu;0G+cDRA{0O>fc&1`??j(t9X{Jk2qx?V$u~SJB(Sf)2L%{P^ptQB<#)AoTL8AFWb(&>8^~8nDbv8Ddje~g(-7-PS)$4o^)91b8~maM%^5Z z>?gl}!lKg#|#W$N9KaxHmm6+a*1&9M#i!U8CGDD}OuS zOJsInN~eg)ipMF3VPe{?k0n7j`au-w{B) zp{@35CBQz#s?DhYY|M13;PaUhKa;d8qD?$qqLOD^I40_5)(m0~FwcUw!`Ghy|6XU5 z;v-c4v6%JWD_AjEKawU7@Jw^k+*Ptv_OV95TTvfG%f2;qs5H0$kVqYVx=D*Z(aVao zw099mA*z0#PdmXk9spea4m{8U`rZJOxO2}~g#{+4|O@-#n6`G$Uo zXW_`nlHq%mwVuHe+;Gxo5oYz2@Z#NtDiUXeU_bVy{b)nWK+8vkeb6Q!#{!%{okBW8 za82r#dlgz0YlQ&#UtxRKfxX%zoU3<@wL*oP}9&K~ZNxPxCMr`42&eRONvI&M( z`<0!pR-ZL{z1b>z+qOXIqP-sYE3~`L?D&}X`S4%I)cN}CxP$)|=jnlmS6O z1VAM+{*dAR2xR}6sQe=}I$QPUeIow>#K8%MXh?@3+kw;G!FiW1YU#LW!mf*7veYoV zr_~%b`InMD|Fq+~q&Ck}ItG#Mod#g*Et}>9;d?ZxH}({Y{9P?#_f1Pg{_SdhlND#bclaQ za3g!S@^gtz#w^~Yo6UTHg&9FOFc2sSWSJNHWrY={cWB6aXG1b$Xo{%S=tGLctkc@T zsb~rm8f(oPZx5{XARf2j!?#HzTh-ead0%biep;-NG+Q~@ALS>9D6QLGW_o_A0TmA` zqAJXlw6+LpMqUord-`3-Cc1?;3sG59_U2I=mE-9#vXS9&ogA-EZZ<3x zx~`-akLGP}<)xT06s&0e1l#>3;k;xJeX+O1EA2>Uz1N$k3*N3Zh>MG-s08!C1@Onm z+-*}*^=_kF=R^--vNa@d@`H}L4!sr| zP0r-4%PBSv@t;=5OaaGBiQU(NIbQ5?QzR&q@`WTPIxynQ7{W zy_tCJl(x#K!)%)_abwN`Sv4$>W#>Wx^L18oV2Nh3k14IUG*Di*@hEDd1sD z)wR~YWD#kyK9^AYb5~QmSSknzcc7130dz^z^_AIk zb4OZlsEh|busXF+rO}L4C1DG^x4lT^Jev%AcJFQ=?~KI=rz4Amw7|#d{$(hQDL~i( z_3b4pi&=WtRx21sXE;x$1oVe$o{(x)4I%_{%S z6(iNIV5cm!D`Do70TyL(&m;e{dx>e)YQe>`Y88eHh08g^0>2Fp&e`7AsB3IQY#1@k z#S&H%vh#+pTsO~=eh!-uv*EEX1@6+1`ZRgU zGZImx&shhfcD{ATH8El@!c+eckW-~YAktu6T~H?h!+aWJ%tOZrDxI0LC&cdFEiR~& zaz1I?)v4K&sUTMH@*+veQZD->T<+H%%YKg~WVD{pD$Rqvje+Ey45)zarPKkZywY+k z7|RZH!r0sHYdw>6W9%#zfUOEM8)^VS}k`bB$2n}QN6pgc!M#sA6ZfXV1I*2gQ zmw}k1#4(+>?PUmyL!5dh$4e_!zbTcbD{m#%yk-a4QyrQWz3Nw7Es(8(*6N%fi#y$! z9Ulh+8~=$U?C}jt+Mj-z#UaJTbdCaF!k!iw>lN)6pL})|GOv)~qCqa6_bw>)2HWYS0Nf z-NJJ!mtE4aiOVYo7uFn9v@*>#wDssd)Yof(2atcZ7&d?LRGf9%Xf^trk5$*qPA*NE z-)CE2Nk;^x8q4*j-ZWw-_$BJ{tcB_ph|{=oG}^WW9S^S8u$R`snN%r6`zhLg+n0a{ z+)skqR(@qapBnl=@V|=oLNx0{5j8cnurRnrXIF`W)c|-%R(y7PrR@#SSERtskd~z| z$VnwAAgod17yZ>Kt=-5lna@Znj)c>*K3CkG_1xu#p5*4+nd+CWlY%@~8)*PUhBOVU zdP7j%41Hh58EHJ2n7&?vz!xQtSNC7oyVoxbg&w^nit>BHbe4t;6VGF8Y+SOvwLaP+%Zv)f@7pmZRJo#$?_P-d9CeXp#edZ7CjH@XhVT0T8`uX# z?a7=3NWiD4*;Z(&zEYpDu!DBR!ZF}d|6D@635rsO>fQ*uiNmhzM)eV*HX0U}P572p zsq1ELL3kN|rMNtRmW{DLN5}a33Gxpn^ThYW-MG3!esyzEOP6pfzPp1W+#20V-plQNrHXlWOV1EdB zZ8b1*R_m9Td3hImmgQxxZFxx$tS=mXGFil2+aySCSAHN3^hIDKTYos^&)4X{VEE0Z z-7fea+q9L7W~*`;+yY>juNmI75JP35n-$Z(+{ zJSQctuIMb!mN9GJxKct95>LJR3BxU~tn5JIQG!kPuYZ83k+avwA+$a}*3F{Za^Tw6 z>AzElI-Sj}2l6s-5Q@!C@#_W1$;bnMYG}@D$XP#Krr!Z{y)Wnk2PZw$Zntn&o-7YG zN=Pv4%!~BIFQ{S$IU>Wuxf2NERxigiC(7%K$_Wn(vksRA~pF;mY4J|D#i>PQ1 z{VsdQe+SlrOhdLFvFHr>qDY^aAK?Ez?Ee|?zsBK-t4n&yD2kM&KC9*A9Fp|0mT^;c7Xk3;j& z`$wZhZD9Ciq9}dW?h5lOux&28);2aquV7R@8R3$w9u@zUQ3ct;(S?7cR54}A>wP#Y z{}odeNzmPKiU9aNAUL>~a061OZ=tT%dwVhEamnN@4f(WutIx7|^4Wq!&5Lr`)RUKr z(0)_@qT`IFDaq)_Qah^@S5u_@{%I)m3+}iz&Cqy%c~FD~JkwNTKu?raO$H(4FCeC= zG?he&)>0>HYovT($IV_2qH;%RXlM$R{`Lha2?+^4)0(6I<_oIh@pluJ=6d7)#(JuO zCO*Z#-HD^G&yIiWe`d?T1BHxY=9rz&FNM7nk8BV_X7SOF`6kuS{&N+Sic#nW1lo%! z!KtZ&`N><@m0d0^Ee(u|i+c(FK!1P#?hcW4ubmH9IKk~OK%r$HNbcRc*9=LAx$c&F zqVhl@At7O`rM_MP$nuRkd!$8WkM4X@t1c12&CdiehI)R|y?ghNP%Br{z)VO&0uO8n u(sqY{+yI;6*fSwP)PDhJ37-qyrs0Td=*g`y&_^DL{59pPxmSL<|GxlF@#}j4 literal 0 HcmV?d00001 diff --git a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst index 5e50242969..58f8a2122c 100644 --- a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst +++ b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst @@ -464,6 +464,7 @@ Viewing components | sampling_gpu_power | GPU Power Usage via ROCm-SMI. Derived fro... | | sampling_gpu_temp | GPU Temperature via ROCm-SMI. Derived fro... | | sampling_gpu_busy | GPU Utilization (% busy) via ROCm-SMI. De... | + | sampling_vcn_busy | GPU VCN Utilization (% activity) via ROCm... | | sampling_gpu_memory_usage | GPU Memory Usage via ROCm-SMI. Derived fr... | |-----------------------------------|----------------------------------------------| diff --git a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst index 66cb931202..de31e938d4 100644 --- a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst +++ b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst @@ -325,15 +325,23 @@ this file. .. image:: ../data/rocprof-sys-perfetto.png :alt: Visualization of a performance graph in Perfetto + :width: 800 .. image:: ../data/rocprof-sys-rocm.png :alt: Visualization of ROCm data in Perfetto + :width: 800 .. image:: ../data/rocprof-sys-rocm-flow.png :alt: Visualization of ROCm flow data in Perfetto + :width: 800 .. image:: ../data/rocprof-sys-user-api.png :alt: Visualization of ROCm API calls in Perfetto + :width: 800 + +.. image:: ../data/rocprof-sys-gpu-metrics.png + :alt: Visualization of ROCm GPU metrics in Perfetto + :width: 800 Timemory output ======================================== diff --git a/projects/rocprofiler-systems/source/lib/core/categories.hpp b/projects/rocprofiler-systems/source/lib/core/categories.hpp index 0f09f4f1b4..cdf122baf8 100644 --- a/projects/rocprofiler-systems/source/lib/core/categories.hpp +++ b/projects/rocprofiler-systems/source/lib/core/categories.hpp @@ -105,6 +105,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_busy, ROCPROFSYS_CATEGORY_ROCM_SMI ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, "device_temp", "Temperature of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_vcn_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions") ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions") ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions") @@ -167,6 +168,7 @@ using name = perfetto_category; ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_temp), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_vcn_activity), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \ ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \ ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \ diff --git a/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp b/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp index 8e9343d9d2..145c568a07 100644 --- a/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp @@ -82,6 +82,8 @@ struct backtrace_gpu_power {}; struct backtrace_gpu_memory {}; +struct backtrace_gpu_vcn +{}; using sampling_wall_clock = data_tracker; using sampling_cpu_clock = data_tracker; using sampling_percent = data_tracker; @@ -89,6 +91,7 @@ using sampling_gpu_busy = data_tracker; using sampling_gpu_temp = data_tracker; using sampling_gpu_power = data_tracker; using sampling_gpu_memory = data_tracker; +using sampling_gpu_vcn = data_tracker; template @@ -121,6 +124,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy, fal ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_vcn, false_type) #endif TIMEMORY_SET_COMPONENT_API(rocprofsys::component::roctracer, project::rocprofsys, @@ -152,6 +156,9 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_temp, project::ro tpls::rocm, device::gpu, os::supports_linux, category::temperature, category::sampling, category::process_sampling) +TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_vcn, project::rocprofsys, + tpls::rocm, device::gpu, os::supports_linux, + category::sampling, category::process_sampling) TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::roctracer, "roctracer", "High-precision ROCm API and kernel tracing", "") @@ -180,6 +187,10 @@ TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_power, TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_temp, "sampling_gpu_temp", "GPU Temperature via ROCm-SMI", "Derived from sampling") +TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_vcn, + "sampling_gpu_vcn", + "GPU VCN Utilization (% activity) via ROCm-SMI", + "Derived from sampling") // statistics type TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_wall_clock, double) @@ -188,6 +199,7 @@ TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_temp, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_power, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_memory, double) +TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_vcn, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::comm_data_tracker_t, float) // enable timing units @@ -219,6 +231,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy, false ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_temp, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_power, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_memory, false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_vcn, false_type) // reporting categories (mean) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_mean, component::sampling_percent, false_type) diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index b5f249d4ca..c8a9ba6963 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -317,8 +317,8 @@ configure_settings(bool _init) ROCPROFSYS_CONFIG_SETTING( bool, "ROCPROFSYS_USE_ROCM_SMI", - "Enable sampling GPU power, temp, utilization, and memory usage", true, "backend", - "rocm_smi", "rocm", "process_sampling"); + "Enable sampling GPU power, temp, utilization, vcn_activity and memory usage", + true, "backend", "rocm_smi", "rocm", "process_sampling"); ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_SAMPLING", "Enable statistical sampling of call-stack", false, @@ -626,10 +626,11 @@ configure_settings(bool _init) rocprofiler_sdk::config_settings(_config); - ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_METRICS", - "rocm-smi metrics to collect: busy, temp, power, mem_usage", - "busy,temp,power,mem_usage", "backend", "rocm_smi", "rocm", - "process_sampling", "advanced"); + ROCPROFSYS_CONFIG_SETTING( + std::string, "ROCPROFSYS_ROCM_SMI_METRICS", + "rocm-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage", + "busy,temp,power,vcn_activity,mem_usage", "backend", "rocm_smi", "rocm", + "process_sampling", "advanced"); ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB", "Hint for shared-memory buffer size in perfetto (in KB)", diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h index fbc17bc7cf..82db6199ae 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h @@ -57,6 +57,7 @@ extern "C" ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, + ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, ROCPROFSYS_CATEGORY_ROCM_RCCL, ROCPROFSYS_CATEGORY_SAMPLING, ROCPROFSYS_CATEGORY_PTHREAD, diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.cpp index 202a8cd854..e6fbe380b4 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.cpp @@ -127,6 +127,7 @@ data::sample(uint32_t _dev_id) { auto _ts = tim::get_clock_real_now(); assert(_ts < std::numeric_limits::max()); + rsmi_gpu_metrics_t _gpu_metrics; auto _state = get_state().load(); @@ -159,6 +160,13 @@ data::sample(uint32_t _dev_id) &m_power, &power_type) ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get, _dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage); + ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).vcn_activity, + rsmi_dev_gpu_metrics_info_get, _dev_id, &_gpu_metrics); + + for(const auto& activity : _gpu_metrics.vcn_activity) + { + if(activity != UINT16_MAX) m_vcn_metrics.push_back(activity); + } #undef ROCPROFSYS_RSMI_GET } @@ -257,6 +265,7 @@ data::post_process(uint32_t _dev_id) using component::sampling_gpu_memory; using component::sampling_gpu_power; using component::sampling_gpu_temp; + using component::sampling_gpu_vcn; if(device_count < _dev_id) return; @@ -273,7 +282,7 @@ data::post_process(uint32_t _dev_id) auto _settings = get_settings(_dev_id); auto _process_perfetto = [&]() { - auto _idx = std::array{}; + auto _idx = std::array{}; { _idx.fill(_idx.size()); uint64_t nidx = 0; @@ -281,6 +290,7 @@ data::post_process(uint32_t _dev_id) if(_settings.temp) _idx.at(1) = nidx++; if(_settings.power) _idx.at(2) = nidx++; if(_settings.mem_usage) _idx.at(3) = nidx++; + if(_settings.vcn_activity) _idx.at(4) = nidx++; } for(auto& itr : _rocm_smi) @@ -301,6 +311,14 @@ data::post_process(uint32_t _dev_id) if(_settings.mem_usage) counter_track::emplace(_dev_id, addendum("Memory Usage"), "megabytes"); + if(_settings.vcn_activity) + { + for(std::size_t i = 0; i < std::size(itr.m_vcn_metrics); ++i) + counter_track::emplace( + _dev_id, + addendum(("VCN Activity on " + std::to_string(i)).c_str()), + "%"); + } } uint64_t _ts = itr.m_ts; if(!_thread_info->is_valid_time(_ts)) continue; @@ -322,6 +340,16 @@ data::post_process(uint32_t _dev_id) if(_settings.mem_usage) TRACE_COUNTER("device_memory_usage", counter_track::at(_dev_id, _idx.at(3)), _ts, _usage); + if(_settings.vcn_activity) + { + uint64_t idx = _idx.at(4); + for(const auto& temp : itr.m_vcn_metrics) + { + TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx), + _ts, temp); + ++idx; + } + } } }; @@ -411,6 +439,7 @@ setup() key_pair_t{ "temp", get_settings(dev_id).temp }, key_pair_t{ "power", get_settings(dev_id).power }, key_pair_t{ "mem_usage", get_settings(dev_id).mem_usage }, + key_pair_t{ "vcn_activity", get_settings(dev_id).vcn_activity }, }; get_settings(dev_id) = { false, false, false, false }; @@ -491,3 +520,7 @@ ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, double) + +ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( + TIMEMORY_ESC(data_tracker), true, + double) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp index ef1b3d4302..f6ea3f6538 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocm_smi.hpp @@ -71,10 +71,11 @@ device_count(); struct settings { - bool busy = true; - bool temp = true; - bool power = true; - bool mem_usage = true; + bool busy = true; + bool temp = true; + bool power = true; + bool mem_usage = true; + bool vcn_activity = true; }; struct data @@ -99,12 +100,13 @@ struct data static void post_process(uint32_t _dev_id); - uint32_t m_dev_id = std::numeric_limits::max(); - timestamp_t m_ts = 0; - busy_perc_t m_busy_perc = 0; - temp_t m_temp = 0; - power_t m_power = 0; - mem_usage_t m_mem_usage = 0; + uint32_t m_dev_id = std::numeric_limits::max(); + timestamp_t m_ts = 0; + busy_perc_t m_busy_perc = 0; + temp_t m_temp = 0; + power_t m_power = 0; + mem_usage_t m_mem_usage = 0; + std::vector m_vcn_metrics = {}; friend std::ostream& operator<<(std::ostream& _os, const data& _v) { @@ -179,5 +181,9 @@ ROCPROFSYS_DECLARE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, double) +ROCPROFSYS_DECLARE_EXTERN_COMPONENT( + TIMEMORY_ESC(data_tracker), true, + double) + # endif #endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp index 6615fcd145..8bd394cd6d 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp @@ -129,6 +129,7 @@ using component::sampling_gpu_busy; using component::sampling_gpu_memory; using component::sampling_gpu_power; using component::sampling_gpu_temp; +using component::sampling_gpu_vcn; using component::sampling_percent; using component::sampling_wall_clock; } // namespace sampling @@ -1572,6 +1573,12 @@ struct sampling_initialization sampling_gpu_temp::display_unit() = "degC"; sampling_gpu_temp::set_precision(1); sampling_gpu_temp::set_format_flags(sampling_gpu_temp::get_format_flags()); + + sampling_gpu_vcn::label() = "sampling_gpu_vcn_percent"; + sampling_gpu_vcn::description() = "Utilization of VCN(s)"; + sampling_gpu_vcn::set_precision(0); + sampling_gpu_vcn::set_format_flags(sampling_gpu_vcn::get_format_flags() & + std::ios_base::showpoint); } }; } // namespace From 64bb1ea13fbbc628b808a9acf4df86370329e796 Mon Sep 17 00:00:00 2001 From: Pranjal Swarup Date: Wed, 18 Dec 2024 17:34:02 -0500 Subject: [PATCH 11/12] Merge proto files from multiprocess run into one file. (#63) - Added script to merge multiprocess output automatically to one file when there are multiprocess proto files written into output folder - Execute the merge multiprocess script from the rank 0 process - Added the scripts folder path to env path, via setup-env.sh - Installed merge_multiprocess_output.sh to /share/rocprofiler-systems/bin dir Co-authored-by: David Galiffi [ROCm/rocprofiler-systems commit: 0263e951ff30b53ffb33248de26512d651ac0856] --- projects/rocprofiler-systems/CMakeLists.txt | 11 +++++ .../cmake/Templates/modulefile.in | 1 + .../cmake/Templates/setup-env.sh.in | 1 + .../scripts/merge-multiprocess-output.sh | 45 +++++++++++++++++++ .../source/lib/core/perfetto.cpp | 19 ++++++++ 5 files changed, 77 insertions(+) create mode 100755 projects/rocprofiler-systems/scripts/merge-multiprocess-output.sh diff --git a/projects/rocprofiler-systems/CMakeLists.txt b/projects/rocprofiler-systems/CMakeLists.txt index 68d13c4dc2..a19be6ac22 100644 --- a/projects/rocprofiler-systems/CMakeLists.txt +++ b/projects/rocprofiler-systems/CMakeLists.txt @@ -370,6 +370,11 @@ configure_file( ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCPROFSYS_VERSION} @ONLY) +configure_file( + ${PROJECT_SOURCE_DIR}/scripts/merge-multiprocess-output.sh + ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/bin/merge-multiprocess-output.sh + COPYONLY) + install( FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/setup-env.sh ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/perfetto.cfg @@ -387,6 +392,12 @@ install( DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${PROJECT_NAME} COMPONENT setup) +install( + PROGRAMS + ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/bin/merge-multiprocess-output.sh + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/bin/ + COMPONENT setup) + # ------------------------------------------------------------------------------# # # install diff --git a/projects/rocprofiler-systems/cmake/Templates/modulefile.in b/projects/rocprofiler-systems/cmake/Templates/modulefile.in index cf45f889d6..f1f09e40b9 100644 --- a/projects/rocprofiler-systems/cmake/Templates/modulefile.in +++ b/projects/rocprofiler-systems/cmake/Templates/modulefile.in @@ -11,6 +11,7 @@ set ROOT [file normalize [file dirname [file normalize ${ModulesCurrentModulefil setenv @PROJECT_NAME_UNDERSCORED@_ROOT "${ROOT}" prepend-path CMAKE_PREFIX_PATH "${ROOT}" prepend-path PATH "${ROOT}/bin" +prepend-path PATH "${ROOT}/@CMAKE_INSTALL_DATAROOTDIR@/@PROJECT_NAME@/bin" prepend-path LD_LIBRARY_PATH "${ROOT}/@CMAKE_INSTALL_LIBDIR@" prepend-path PYTHONPATH "${ROOT}/@CMAKE_INSTALL_PYTHONDIR@" setenv @PROJECT_NAME_UNDERSCORED@_DIR "${ROOT}/@CMAKE_INSTALL_DATAROOTDIR@/cmake/@PROJECT_NAME@" diff --git a/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in b/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in index 882c7838c0..4e086316c7 100644 --- a/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in +++ b/projects/rocprofiler-systems/cmake/Templates/setup-env.sh.in @@ -15,6 +15,7 @@ fi @PROJECT_NAME_UNDERSCORED@_ROOT=${BASEDIR} PATH=${BASEDIR}/bin:${PATH} +PATH=${BASEDIR}/@CMAKE_INSTALL_DATAROOTDIR@/@PROJECT_NAME@/bin:${PATH} LD_LIBRARY_PATH=${BASEDIR}/@CMAKE_INSTALL_LIBDIR@:${LD_LIBRARY_PATH} PYTHONPATH=${BASEDIR}/@CMAKE_INSTALL_PYTHONDIR@:${PYTHONPATH} CMAKE_PREFIX_PATH=${BASEDIR}:${CMAKE_PREFIX_PATH} diff --git a/projects/rocprofiler-systems/scripts/merge-multiprocess-output.sh b/projects/rocprofiler-systems/scripts/merge-multiprocess-output.sh new file mode 100755 index 0000000000..d257e45e7c --- /dev/null +++ b/projects/rocprofiler-systems/scripts/merge-multiprocess-output.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Check if the folder path is provided +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +# Assign the folder path to a variable +FOLDER_PATH=$1 + +# Check if the folder exists +if [ ! -d "$FOLDER_PATH" ]; then + echo "Error: Folder '$FOLDER_PATH' does not exist." + exit 1 +fi + +# Check if there are more than one .proto files +PROTO_FILES=("$FOLDER_PATH"/*.proto) +if [ ${#PROTO_FILES[@]} -le 1 ]; then + exit 0 +fi + +echo "Merging multiprocess files ..." +# Check if all .proto files have been fully written or wait +TIMEOUT=60 # Timeout in seconds +for file in "${PROTO_FILES[@]}"; do + SECONDS=0 + while lsof "$file" > /dev/null 2>&1; do + if [ $SECONDS -ge $TIMEOUT ]; then + echo "Timeout reached while waiting for $file to be released." + break + fi + echo "Waiting for $file to be released..." + sleep 1 + done +done + +# Output file name +OUTPUT_FILE="merged.proto" + +# Merge all .proto files into one file +cat "$FOLDER_PATH"/*.proto > "$FOLDER_PATH"/"$OUTPUT_FILE" + +echo "All multiprocess .proto files in '$FOLDER_PATH' have been merged into '$OUTPUT_FILE'." diff --git a/projects/rocprofiler-systems/source/lib/core/perfetto.cpp b/projects/rocprofiler-systems/source/lib/core/perfetto.cpp index 427ca6e6f0..810cd71d3d 100644 --- a/projects/rocprofiler-systems/source/lib/core/perfetto.cpp +++ b/projects/rocprofiler-systems/source/lib/core/perfetto.cpp @@ -263,6 +263,25 @@ post_process(tim::manager* _timemory_manager, bool& _perfetto_output_error) _timemory_manager->add_file_output("protobuf", "perfetto", _filename); } ofs.close(); + + if(dmp::rank() == 0) + { + const char* file_path = _filename.c_str(); + auto folder_path = [](std::string_view _v) { + return tim::filepath::dirname(std::string(_v)); + }; + // Execute the merge script + std::string command = + "merge-multiprocess-output.sh '" + folder_path(file_path) + "'"; + int result = system(command.c_str()); + if(result != 0) + { + ROCPROFSYS_VERBOSE(0, + "Failed to execute merge-multiprocess-output.sh with " + "folder path: %s\n", + folder_path(file_path).c_str()); + } + } } else if(dmp::rank() == 0) { From 7eca1ca69d670bd13c5d038f547dbc4ae33f9099 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Wed, 18 Dec 2024 17:42:31 -0500 Subject: [PATCH 12/12] Update VERSION to 1.0.0 Bumping major version now that rocprofiler-sdk has been integrated. [ROCm/rocprofiler-systems commit: 33b043b11cdbe9f2dd191b9e953b845ce9874ec5] --- projects/rocprofiler-systems/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rocprofiler-systems/VERSION b/projects/rocprofiler-systems/VERSION index 0ea3a944b3..3eefcb9dd5 100644 --- a/projects/rocprofiler-systems/VERSION +++ b/projects/rocprofiler-systems/VERSION @@ -1 +1 @@ -0.2.0 +1.0.0