From 83f9ed86962b2aece952bede7433aa3393b37720 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 8 Mar 2023 00:19:29 -0600 Subject: [PATCH] Python 3.11 support + update RedHat CPack (#254) * Fixes for Python 3.11 * Add python 3.11 to scripts - also tweak to to{upper,lower} bash functions * Fix PAPI RPM packaging in RedHat - fix error from #!/usr/bin/python in papi_hl_output_writer.py - requires either python2 or python3 instead of python * cpack updates - only generate STGZ for RedHat - support `--generators` arg in build-release.sh - support 7z, zip, and other zip generators - fix build-release.sh with `--mpi` - support setting CONDA_ROOT * Support rhel/fedora/centos in omnitrace-install.py * RedHat status badge * Fix support for Python 3.11 + tweak ubuntu ci - Remove installing clang and mpich in Ubuntu CI container - Fallback on conda-forge for Python 3.11 - Enable entrypoint-rhel.sh for RHEL CI - Pull latest container by default * Update ElfUtils and PAPI builds - quieter build output - disable-nls for ElfUtils - use -s flag for make * Development Guide Docs --- .github/workflows/cpack.yml | 23 +- README.md | 1 + cmake/ElfUtils.cmake | 8 +- cmake/PAPI.cmake | 27 +- cmake/Templates/omnitrace-install.py.in | 14 +- docker/Dockerfile.opensuse | 5 +- docker/Dockerfile.opensuse.ci | 6 +- docker/Dockerfile.rhel | 7 +- docker/Dockerfile.rhel.ci | 6 +- docker/Dockerfile.ubuntu | 5 +- docker/Dockerfile.ubuntu.ci | 8 +- docker/build-docker-ci.sh | 19 +- docker/build-docker-release.sh | 2 +- docker/build-docker.sh | 19 +- docker/entrypoint-rhel.sh | 11 +- docker/test-docker-release.sh | 4 +- scripts/build-release.sh | 111 +++++++-- scripts/run-ci.sh | 4 +- scripts/test-install.sh | 2 +- source/docs/development.md | 311 ++++++++++++++++++++++++ source/docs/index.md | 1 + source/python/libpyomnitrace.cpp | 55 ++++- 22 files changed, 570 insertions(+), 79 deletions(-) create mode 100644 source/docs/development.md diff --git a/.github/workflows/cpack.yml b/.github/workflows/cpack.yml index 94423260da..319a0e7043 100644 --- a/.github/workflows/cpack.yml +++ b/.github/workflows/cpack.yml @@ -161,6 +161,21 @@ jobs: run: | echo "CI_SCRIPT_ARGS=--rocm +python" >> $GITHUB_ENV + - name: Configure Ubuntu Generators + if: ${{ matrix.os-distro == 'ubuntu' }} + run: | + echo "CI_GENERATOR_ARGS=--generators STGZ DEB" >> $GITHUB_ENV + + - name: Configure OpenSUSE Generators + if: ${{ matrix.os-distro == 'opensuse' }} + run: | + echo "CI_GENERATOR_ARGS=--generators STGZ RPM" >> $GITHUB_ENV + + - name: Configure RedHat Generators + if: ${{ matrix.os-distro == 'rhel' }} + run: | + echo "CI_GENERATOR_ARGS=--generators STGZ" >> $GITHUB_ENV + - name: Build Base Container timeout-minutes: 30 run: | @@ -172,7 +187,7 @@ jobs: timeout-minutes: 150 run: | pushd docker - ./build-docker-release.sh --distro ${{ matrix.os-distro }} --versions ${{ matrix.os-version }} --rocm-versions ${{ matrix.rocm-version }} -- ${CI_SCRIPT_ARGS} + ./build-docker-release.sh --distro ${{ matrix.os-distro }} --versions ${{ matrix.os-version }} --rocm-versions ${{ matrix.rocm-version }} -- ${CI_SCRIPT_ARGS} ${CI_GENERATOR_ARGS} popd - name: List Files @@ -199,7 +214,7 @@ jobs: - name: RPM Artifacts timeout-minutes: 10 - if: ${{ matrix.os-distro != 'ubuntu' }} + if: ${{ matrix.os-distro == 'opensuse' }} uses: actions/upload-artifact@v3 with: name: omnitrace-rpm-${{ matrix.os-distro }}-${{ matrix.os-version }}-rocm-${{ matrix.rocm-version }}-installer @@ -252,7 +267,7 @@ jobs: - name: Test RPM Install timeout-minutes: 20 - if: ${{ matrix.os-distro != 'ubuntu' }} + if: ${{ matrix.os-distro == 'opensuse' }} run: | set -v for i in omnitrace-*.rpm @@ -262,7 +277,7 @@ jobs: - name: Upload RPM Release Assets uses: softprops/action-gh-release@v1 - if: matrix.os-distro != 'ubuntu' && startsWith(github.ref, 'refs/tags/') && github.repository == 'AMDResearch/omnitrace' + if: matrix.os-distro == 'opensuse' && startsWith(github.ref, 'refs/tags/') && github.repository == 'AMDResearch/omnitrace' with: fail_on_unmatched_files: True files: | diff --git a/README.md b/README.md index ac117a16cf..c7660a91d3 100755 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Ubuntu 20.04 with GCC, ROCm, and MPI](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-focal.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-focal.yml) [![Ubuntu 22.04 (GCC, Python, ROCm)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-jammy.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/ubuntu-jammy.yml) [![OpenSUSE 15.x with GCC](https://github.com/AMDResearch/omnitrace/actions/workflows/opensuse.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/opensuse.yml) +[![RedHat Linux (GCC, Python, ROCm)](https://github.com/AMDResearch/omnitrace/actions/workflows/redhat.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/redhat.yml) [![Installer Packaging (CPack)](https://github.com/AMDResearch/omnitrace/actions/workflows/cpack.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/cpack.yml) [![Documentation](https://github.com/AMDResearch/omnitrace/actions/workflows/docs.yml/badge.svg)](https://github.com/AMDResearch/omnitrace/actions/workflows/docs.yml) diff --git a/cmake/ElfUtils.cmake b/cmake/ElfUtils.cmake index ec17e542bd..0b5308b7a8 100644 --- a/cmake/ElfUtils.cmake +++ b/cmake/ElfUtils.cmake @@ -109,16 +109,16 @@ externalproject_add( CFLAGS=-fPIC\ -O3\ -Wno-error=null-dereference CXX=${ElfUtils_CXX_COMPILER} CXXFLAGS=-fPIC\ -O3\ -Wno-error=null-dereference [=[LDFLAGS=-Wl,-rpath='$$ORIGIN']=] /configure --enable-install-elfh - --prefix=${_eu_root} --disable-libdebuginfod --disable-debuginfod - --enable-thread-safety - BUILD_COMMAND ${MAKE_COMMAND} install + --prefix=${_eu_root} --disable-libdebuginfod --disable-debuginfod --disable-nls + --enable-thread-safety --enable-silent-rules + BUILD_COMMAND ${MAKE_COMMAND} install -s BUILD_BYPRODUCTS "${_eu_build_byproducts}" INSTALL_COMMAND "") # target for re-executing the installation add_custom_target( omnitrace-elfutils-install - COMMAND ${MAKE_COMMAND} install + COMMAND ${MAKE_COMMAND} install -s WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/external/elfutils/src/ElfUtils-External COMMENT "Installing ElfUtils...") diff --git a/cmake/PAPI.cmake b/cmake/PAPI.cmake index c24a166d3a..af8a207e6f 100644 --- a/cmake/PAPI.cmake +++ b/cmake/PAPI.cmake @@ -206,13 +206,13 @@ externalproject_add( PATCH_COMMAND ${CMAKE_COMMAND} -E env CC=${PAPI_C_COMPILER} CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation LIBS=-lrt LDFLAGS=-lrt - ${OMNITRACE_PAPI_EXTRA_ENV} /configure + ${OMNITRACE_PAPI_EXTRA_ENV} /configure --quiet --prefix=${OMNITRACE_PAPI_INSTALL_DIR} --with-static-lib=yes --with-shared-lib=no --with-perf-events --with-tests=no --with-components=${_OMNITRACE_PAPI_COMPONENTS} CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install + ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s BUILD_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils + ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s INSTALL_COMMAND "" BUILD_BYPRODUCTS "${_OMNITRACE_PAPI_BUILD_BYPRODUCTS}") @@ -220,9 +220,9 @@ externalproject_add( add_custom_target( omnitrace-papi-install COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install + ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils + ${OMNITRACE_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s WORKING_DIRECTORY ${OMNITRACE_PAPI_SOURCE_DIR}/src COMMENT "Installing PAPI...") @@ -288,7 +288,24 @@ foreach( papi_native_avail papi_version papi_xml_event_info) + string(REPLACE "_" "-" _UTIL_EXE_INSTALL_NAME "omnitrace-${_UTIL_EXE}") + + # RPM installer on RedHat/RockyLinux throws error that #!/usr/bin/python should either + # be #!/usr/bin/python2 or #!/usr/bin/python3 + if(_UTIL_EXE STREQUAL "papi_hl_output_writer.py") + file( + READ + "${PROJECT_BINARY_DIR}/external/papi/source/src/high-level/scripts/${_UTIL_EXE}" + _HL_OUTPUT_WRITER) + string(REPLACE "#!/usr/bin/python\n" "#!/usr/bin/python3\n" _HL_OUTPUT_WRITER + "${_HL_OUTPUT_WRITER}") + file(MAKE_DIRECTORY "${OMNITRACE_PAPI_INSTALL_DIR}/bin") + file(WRITE "${OMNITRACE_PAPI_INSTALL_DIR}/bin/${_UTIL_EXE}3" + "${_HL_OUTPUT_WRITER}") + set(_UTIL_EXE "${_UTIL_EXE}3") + endif() + install( PROGRAMS ${OMNITRACE_PAPI_INSTALL_DIR}/bin/${_UTIL_EXE} DESTINATION ${CMAKE_INSTALL_BINDIR} diff --git a/cmake/Templates/omnitrace-install.py.in b/cmake/Templates/omnitrace-install.py.in index 9bed2498bd..5a149575ef 100755 --- a/cmake/Templates/omnitrace-install.py.in +++ b/cmake/Templates/omnitrace-install.py.in @@ -55,7 +55,6 @@ def get_rocm_version(rocm_hint): def get_os_info(os_distrib, os_version): - _os_info = {} with open("/etc/os-release", "r") as f: for line in [_v.strip() for _v in f.readlines()]: @@ -67,10 +66,18 @@ def get_os_info(os_distrib, os_version): os_distrib = "ubuntu" elif "suse" in _os_info["ID_LIKE"]: os_distrib = "opensuse" + elif "rhel" in _os_info["ID_LIKE"]: + os_distrib = "rhel" + elif "fedora" in _os_info["ID_LIKE"]: + os_distrib = "rhel" + elif "centos" in _os_info["ID_LIKE"]: + os_distrib = "rhel" else: raise RuntimeError( "Unknown ID_LIKE value in /etc/os-release: {}".format(_os_info["ID_LIKE"]) ) + elif os_distrib == "fedora" or os_distrib == "centos": + os_distrib = "rhel" if os_version is None: @@ -84,7 +91,6 @@ def get_os_info(os_distrib, os_version): def print_log(*args, **kwargs): - sys.stdout.flush() sys.stderr.flush() sys.stderr.write("### ") @@ -94,14 +100,12 @@ def print_log(*args, **kwargs): def run(*args, **kwargs): - print_log("Executing: {}\n".format(" ".join(*args))) sp.run(*args, **kwargs, check=True) sys.stderr.write("\n") if __name__ == "__main__": - parser = argparse.ArgumentParser() parser.add_argument( @@ -131,7 +135,7 @@ if __name__ == "__main__": help="Target OS distribution", type=str, default=None, - choices=("auto", "ubuntu", "opensuse"), + choices=("auto", "ubuntu", "opensuse", "rhel", "centos", "fedora"), ) parser.add_argument( "-v", "--os-version", help="Target OS version", type=str, default=None diff --git a/docker/Dockerfile.opensuse b/docker/Dockerfile.opensuse index 9bc9c29eab..871cf5da44 100644 --- a/docker/Dockerfile.opensuse +++ b/docker/Dockerfile.opensuse @@ -31,14 +31,15 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ zypper clean --all; \ fi -ARG PYTHON_VERSIONS="6 7 8 9 10" +ARG PYTHON_VERSIONS="6 7 8 9 10 11" RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ bash miniconda.sh -b -p /opt/conda && \ export PATH="/opt/conda/bin:${PATH}" && \ conda config --set always_yes yes --set changeps1 no && \ conda update -c defaults -n base conda && \ - for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ conda clean -a -y && \ conda init diff --git a/docker/Dockerfile.opensuse.ci b/docker/Dockerfile.opensuse.ci index 68a4f98267..0e6de38cf5 100644 --- a/docker/Dockerfile.opensuse.ci +++ b/docker/Dockerfile.opensuse.ci @@ -33,16 +33,16 @@ RUN cd /tmp/dyninst && \ shopt -s dotglob extglob && \ rm -rf * -ARG PYTHON_VERSIONS="6 7 8 9 10" +ARG PYTHON_VERSIONS="6 7 8 9 10 11" RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ bash miniconda.sh -b -p /opt/conda && \ export PATH="/opt/conda/bin:${PATH}" && \ conda config --set always_yes yes --set changeps1 no && \ conda update -c defaults -n base conda && \ - for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults python=3.${i} pip numpy; done && \ - conda clean -a -y && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \ for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ + conda clean -a -y && \ cd /tmp && \ shopt -s dotglob extglob && \ rm -rf * diff --git a/docker/Dockerfile.rhel b/docker/Dockerfile.rhel index 8f4bf66373..a42fd426b4 100644 --- a/docker/Dockerfile.rhel +++ b/docker/Dockerfile.rhel @@ -34,14 +34,15 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ yum clean all; \ fi -ARG PYTHON_VERSIONS="6 7 8 9 10" +ARG PYTHON_VERSIONS="6 7 8 9 10 11" RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ bash miniconda.sh -b -p /opt/conda && \ export PATH="/opt/conda/bin:${PATH}" && \ conda config --set always_yes yes --set changeps1 no && \ conda update -c defaults -n base conda && \ - for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ conda clean -a -y && \ conda init @@ -50,3 +51,5 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then ln -sf /opt/rocm-${ROCM_VERSION}* /o WORKDIR /home ENV LC_ALL C.UTF-8 SHELL [ "/bin/bash", "--login", "-c" ] +COPY ./entrypoint-rhel.sh /docker-entrypoint.sh +ENTRYPOINT [ "/docker-entrypoint.sh" ] diff --git a/docker/Dockerfile.rhel.ci b/docker/Dockerfile.rhel.ci index fcf348a97e..67b91a017a 100644 --- a/docker/Dockerfile.rhel.ci +++ b/docker/Dockerfile.rhel.ci @@ -33,16 +33,16 @@ RUN cd /tmp/dyninst && \ shopt -s dotglob extglob && \ rm -rf * -ARG PYTHON_VERSIONS="6 7 8 9 10" +ARG PYTHON_VERSIONS="6 7 8 9 10 11" RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ bash miniconda.sh -b -p /opt/conda && \ export PATH="/opt/conda/bin:${PATH}" && \ conda config --set always_yes yes --set changeps1 no && \ conda update -c defaults -n base conda && \ - for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults python=3.${i} pip numpy; done && \ - conda clean -a -y && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \ for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ + conda clean -a -y && \ cd /tmp && \ shopt -s dotglob extglob && \ rm -rf * diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index f61d3d27b3..9d0412b5d3 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -18,7 +18,7 @@ ARG EXTRA_PACKAGES="" ARG ROCM_REPO_VERSION="debian" ARG ROCM_VERSION="0.0" ARG ROCM_REPO_DIST="ubuntu" -ARG PYTHON_VERSIONS="6 7 8 9 10" +ARG PYTHON_VERSIONS="6 7 8 9 10 11" ENV PATH ${HOME}/.local/bin:${PATH} RUN apt-get update && \ @@ -48,7 +48,8 @@ RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh - export PATH="/opt/conda/bin:${PATH}" && \ conda config --set always_yes yes --set changeps1 no && \ conda update -c defaults -n base conda && \ - for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \ + for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ conda clean -a -y && \ conda init diff --git a/docker/Dockerfile.ubuntu.ci b/docker/Dockerfile.ubuntu.ci index 0446bec1f5..6395d4fe42 100644 --- a/docker/Dockerfile.ubuntu.ci +++ b/docker/Dockerfile.ubuntu.ci @@ -16,7 +16,7 @@ ARG EXTRA_PACKAGES="" ARG ELFUTILS_DOWNLOAD_VERSION="0.186" ARG BOOST_DOWNLOAD_VERSION="1.79.0" ARG NJOBS="12" -ARG PYTHON_VERSIONS="6 7 8 9 10" +ARG PYTHON_VERSIONS="6 7 8 9 10 11" ENV PATH /usr/local/bin:${PATH} ENV LIBRARY_PATH /usr/local/lib:/usr/local/lib64:${LIBRARY_PATH} @@ -27,7 +27,7 @@ COPY ./dyninst-source /tmp/dyninst RUN apt-get update && \ apt-get dist-upgrade -y && \ - apt-get install -y autoconf autotools-dev bash-completion build-essential bzip2 clang cmake curl environment-modules git-core gnupg2 gzip libiberty-dev libmpich-dev libpapi-dev libpfm4-dev libtool locales lsb-release m4 mpich python3-pip unzip wget zip zlib1g-dev && \ + apt-get install -y autoconf autotools-dev bash-completion build-essential bzip2 cmake curl environment-modules git-core gnupg2 gzip libiberty-dev libpapi-dev libpfm4-dev libtool locales lsb-release m4 python3-pip unzip wget zip zlib1g-dev && \ python3 -m pip install 'cmake==3.18.4' && \ apt-get autoclean && \ locale -a && \ @@ -40,9 +40,9 @@ RUN apt-get update && \ export PATH="/opt/conda/bin:${PATH}" && \ conda config --set always_yes yes --set changeps1 no && \ conda update -c defaults -n base conda && \ - for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults python=3.${i} pip numpy; done && \ - conda clean -a -y && \ + for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \ for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \ + conda clean -a -y && \ cd /tmp && \ shopt -s dotglob extglob && \ rm -rf * diff --git a/docker/build-docker-ci.sh b/docker/build-docker-ci.sh index fd1f0e1908..92bb339253 100755 --- a/docker/build-docker-ci.sh +++ b/docker/build-docker-ci.sh @@ -8,7 +8,9 @@ set -e : ${NJOBS=$(nproc)} : ${ELFUTILS_VERSION:=0.186} : ${BOOST_VERSION:=1.79.0} +: ${PYTHON_VERSIONS:="6 7 8 9 10 11"} : ${PUSH:=0} +: ${PULL:=--pull} verbose-run() { @@ -18,12 +20,12 @@ verbose-run() tolower() { - echo "$@" | awk -F '\|~\|' '{print tolower($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; } toupper() { - echo "$@" | awk -F '\|~\|' '{print toupper($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; } usage() @@ -32,11 +34,13 @@ usage() echo "Options:" print_option "help -h" "" "This message" print_option "push" "" "Push the container to DockerHub when completed" + print_option "no-pull" "" "Do not pull down most recent base container" echo "" print_default_option() { printf " --%-20s %-24s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; } print_default_option distro "[ubuntu|opensuse|rhel]" "OS distribution" "${DISTRO}" print_default_option versions "[VERSION] [VERSION...]" "Ubuntu, OpenSUSE, or RHEL release" "${VERSIONS}" + print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}" print_default_option "jobs -j" "[N]" "parallel build jobs" "${NJOBS}" print_default_option elfutils-version "[0.183..0.186]" "ElfUtils version" "${ELFUTILS_VERSION}" print_default_option boost-version "[1.67.0..1.79.0]" "Boost version" "${BOOST_VERSION}" @@ -75,6 +79,11 @@ do VERSIONS=${1} last() { VERSIONS="${VERSIONS} ${1}"; } ;; + "--python-versions") + shift + PYTHON_VERSIONS=${1} + last() { PYTHON_VERSIONS="${PYTHON_VERSIONS} ${1}"; } + ;; --jobs|-j) shift NJOBS=${1} @@ -99,6 +108,10 @@ do PUSH=1 reset-last ;; + "--no-pull") + PULL="" + reset-last + ;; --*) reset-last last ${1} @@ -137,11 +150,13 @@ fi for VERSION in ${VERSIONS} do verbose-run docker build . \ + ${PULL} \ -f ${DOCKER_FILE} \ --tag ${USER}/omnitrace:ci-base-${DISTRO}-${VERSION} \ --build-arg DISTRO=${DISTRO_IMAGE} \ --build-arg VERSION=${VERSION} \ --build-arg NJOBS=${NJOBS} \ + --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" \ --build-arg ELFUTILS_DOWNLOAD_VERSION=${ELFUTILS_VERSION} \ --build-arg BOOST_DOWNLOAD_VERSION=${BOOST_VERSION} done diff --git a/docker/build-docker-release.sh b/docker/build-docker-release.sh index 4b779645d4..3d99412f33 100755 --- a/docker/build-docker-release.sh +++ b/docker/build-docker-release.sh @@ -101,7 +101,7 @@ reset-last : ${VERSIONS:=20.04 18.04} : ${ROCM_VERSIONS:=5.0 4.5 4.3} : ${MPI:=0} -: ${PYTHON_VERSIONS:="6 7 8 9 10"} +: ${PYTHON_VERSIONS:="6 7 8 9 10 11"} : ${RETRY:=3} n=0 diff --git a/docker/build-docker.sh b/docker/build-docker.sh index 33ace904a8..84f98eb848 100755 --- a/docker/build-docker.sh +++ b/docker/build-docker.sh @@ -4,21 +4,22 @@ : ${ROCM_VERSIONS:="5.0"} : ${DISTRO:=ubuntu} : ${VERSIONS:=20.04} -: ${PYTHON_VERSIONS:="6 7 8 9 10"} +: ${PYTHON_VERSIONS:="6 7 8 9 10 11"} : ${BUILD_CI:=""} : ${PUSH:=0} +: ${PULL:=--pull} : ${RETRY:=3} set -e tolower() { - echo "$@" | awk -F '\|~\|' '{print tolower($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; } toupper() { - echo "$@" | awk -F '\|~\|' '{print toupper($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; } usage() @@ -26,6 +27,7 @@ usage() print_option() { printf " --%-20s %-24s %s\n" "${1}" "${2}" "${3}"; } echo "Options:" print_option "help -h" "" "This message" + print_option "no-pull" "" "Do not pull down most recent base container" echo "" print_default_option() { printf " --%-20s %-24s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; } @@ -118,6 +120,11 @@ do ;; --push) PUSH=1 + reset-last + ;; + --no-pull) + PULL="" + reset-last ;; --retry|-r) shift @@ -184,7 +191,7 @@ do *) ;; esac - verbose-build docker build . -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg ROCM_REPO_VERSION=${ROCM_REPO_VERSION} --build-arg ROCM_REPO_DIST=${ROCM_REPO_DIST} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + verbose-build docker build . ${PULL} -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg ROCM_REPO_VERSION=${ROCM_REPO_VERSION} --build-arg ROCM_REPO_DIST=${ROCM_REPO_DIST} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" elif [ "${DISTRO}" = "rhel" ]; then if [ -z "${VERSION_MINOR}" ]; then send-error "Please provide a major and minor version of the OS. Supported: >= 8.7, <= 9.1" @@ -216,7 +223,7 @@ do # use Rocky Linux as a base image for RHEL builds DISTRO_BASE_IMAGE=rockylinux - verbose-build docker build . -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_BASE_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + verbose-build docker build . ${PULL} -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_BASE_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" elif [ "${DISTRO}" = "opensuse" ]; then case "${VERSION}" in 15.*) @@ -253,7 +260,7 @@ do ;; esac PERL_REPO="SLE_${VERSION_MAJOR}_SP${VERSION_MINOR}" - verbose-build docker build . -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PERL_REPO=${PERL_REPO} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" + verbose-build docker build . ${PULL} -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PERL_REPO=${PERL_REPO} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" fi if [ "${PUSH}" -ne 0 ]; then docker push ${CONTAINER} diff --git a/docker/entrypoint-rhel.sh b/docker/entrypoint-rhel.sh index c337ee8425..f3968fbece 100755 --- a/docker/entrypoint-rhel.sh +++ b/docker/entrypoint-rhel.sh @@ -1,10 +1,13 @@ -#!/bin/bash +#!/bin/bash -l -source /etc/profile.d/modules.sh -module load mpi +if [ -f /etc/profile.d/modules.sh ]; then + source /etc/profile.d/modules.sh + module load mpi &> /dev/null +endif if [ -z "${1}" ]; then - exec bash + : ${SHELL:=/bin/bash} + exec ${SHELL} else set -e eval $@ diff --git a/docker/test-docker-release.sh b/docker/test-docker-release.sh index c65d1b3786..c57f68f5ae 100755 --- a/docker/test-docker-release.sh +++ b/docker/test-docker-release.sh @@ -13,12 +13,12 @@ set -e tolower() { - echo "$@" | awk -F '\|~\|' '{print tolower($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; } toupper() { - echo "$@" | awk -F '\|~\|' '{print toupper($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; } usage() diff --git a/scripts/build-release.sh b/scripts/build-release.sh index 28905db0c7..5f789e76b1 100755 --- a/scripts/build-release.sh +++ b/scripts/build-release.sh @@ -1,5 +1,6 @@ #!/bin/bash -e +: ${VERBOSE:=0} : ${EXTRA_ARGS:=""} : ${BUILD_DIR:=build-release} : ${VERSION:=0.0.4} @@ -14,7 +15,7 @@ : ${MAX_THREADS:=2048} : ${PERFETTO_TOOLS:="ON"} : ${HIDDEN_VIZ:="ON"} -: ${PYTHON_VERSIONS:="6 7 8 9 10"} +: ${PYTHON_VERSIONS:="6 7 8 9 10 11"} : ${GENERATORS:="STGZ DEB RPM"} : ${MPI_IMPL:="openmpi"} : ${CLEAN:=0} @@ -24,6 +25,7 @@ : ${WITH_ROCM:=0} : ${WITH_ROCM_MPI:=0} : ${IS_DOCKER:=0} +: ${CONDA_ROOT:=/opt/conda} if [ -z "${DISTRO}" ]; then if [ -f /etc/os-release ]; then @@ -35,17 +37,17 @@ fi tolower() { - echo "$@" | awk -F '\|~\|' '{print tolower($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; } toupper() { - echo "$@" | awk -F '\|~\|' '{print toupper($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; } usage() { - print_option() { printf " --%-10s %-24s %s\n" "${1}" "${2}" "${3}"; } + print_option() { printf " --%-10s %-36s %s\n" "${1}" "${2}" "${3}"; } echo "Options:" python_info="(Use '+nopython' to build w/o python, use '+python' to python build with python)" print_option core "[+nopython] [+python]" "Core ${python_info}" @@ -55,7 +57,7 @@ usage() print_option mpi-impl "[openmpi|mpich]" "MPI implementation" echo "" - print_default_option() { printf " --%-20s %-14s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; } + print_default_option() { printf " --%-20s %-26s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; } print_default_option lto "[on|off]" "Enable LTO" "${LTO}" print_default_option strip "[on|off]" "Strip libraries" "${STRIP}" print_default_option perfetto-tools "[on|off]" "Install perfetto tools" "${PERFETTO_TOOLS}" @@ -64,8 +66,23 @@ usage() print_default_option hidden-visibility "[on|off]" "Build with hidden visibility" "${HIDDEN_VIZ}" print_default_option max-threads "N" "Max number of threads supported" "${MAX_THREADS}" print_default_option parallel "N" "Number of parallel build jobs" "${NJOBS}" + print_default_option generators "[STGZ][DEB][RPM][+others]" "CPack generators" "${GENERATORS}" } +send-error() +{ + usage + echo -e "\nError: ${@}" + exit 1 +} + +reset-last() +{ + last() { send-error "Unsupported argument :: ${1}"; } +} + +reset-last + while [[ $# -gt 0 ]] do ARG=${1} @@ -75,10 +92,12 @@ do case "${ARG}" in --clean) CLEAN=1 + reset-last continue ;; --fresh) FRESH=1 + reset-last continue ;; esac @@ -103,56 +122,72 @@ do ;; --core) WITH_CORE=${VAL} + reset-last ;; --mpi) WITH_MPI=${VAL} + reset-last ;; --rocm) WITH_ROCM=${VAL} + reset-last ;; --rocm-mpi) WITH_ROCM_MPI=${VAL} + reset-last ;; --mpi-impl) MPI_IMPL=${1} shift + reset-last ;; --lto) LTO=$(toupper ${1}) shift + reset-last ;; --static-libgcc) LIBGCC=$(toupper ${1}) shift + reset-last ;; --static-libstdcxx) LIBSTDCXX=$(toupper ${1}) shift + reset-last ;; --strip) STRIP=$(toupper ${1}) shift + reset-last ;; --hidden-visibility) HIDDEN_VIZ=$(toupper ${1}) shift + reset-last ;; --perfetto-tools) PERFETTO_TOOLS=$(toupper ${1}) shift + reset-last ;; --max-threads) MAX_THREADS=${1} shift + reset-last ;; --parallel) NJOBS=${1} shift + reset-last + ;; + --generators) + GENERATORS=$(toupper ${1}) + shift + last() { GENERATORS="${GENERATORS} $(toupper ${1})"; } ;; *) - echo -e "Error! Unknown option : ${ARG}" - usage - exit 1 + last ${ARG} ${1} ;; esac done @@ -210,7 +245,9 @@ build-and-package-base() fi pushd ${BUILD_DIR}/${DIR} verbose-run cat CPackConfig.cmake - verbose-run cat cmake_install.cmake + if [ "${VERBOSE}" -gt 0 ]; then + verbose-run cat cmake_install.cmake + fi popd verbose-run cmake --build ${BUILD_DIR}/${DIR} --target all --parallel ${NJOBS} verbose-run cmake --build ${BUILD_DIR}/${DIR} --target install --parallel ${NJOBS} @@ -233,12 +270,54 @@ build-and-package-base() SEP="_" DEST="deb" ;; - DEB | RPM) + RPM) verbose-run cpack -G RPM -D CPACK_PACKAGING_INSTALL_PREFIX=/opt/omnitrace EXT="rpm" SEP="-" DEST="rpm" ;; + 7Z | 7ZIP) + verbose-run cpack -G 7Z + EXT="7z" + SEP="-" + DEST="zip" + ;; + TBZ2) + verbose-run cpack -G TBZ2 + EXT="tar.bz2" + SEP="-" + DEST="zip" + ;; + TGZ) + verbose-run cpack -G TGZ + EXT="tar.gz" + SEP="-" + DEST="zip" + ;; + TXZ) + verbose-run cpack -G TXZ + EXT="tar.xz" + SEP="-" + DEST="zip" + ;; + TZ) + verbose-run cpack -G TZ + EXT="tar.Z" + SEP="-" + DEST="zip" + ;; + TZST) + verbose-run cpack -G TZST + EXT="tar.zst" + SEP="-" + DEST="zip" + ;; + ZIP) + verbose-run cpack -G ZIP + EXT="zip" + SEP="-" + DEST="zip" + ;; *) echo "Unsupported cpack generator: ${i}" continue @@ -284,14 +363,16 @@ build-and-package() fi } -if [ -d /opt/conda/bin ]; then - export PATH=${PATH}:/opt/conda/bin +if [ -d ${CONDA_ROOT}/bin ]; then + export PATH=${PATH}:${CONDA_ROOT}/bin source activate fi if [ "${IS_DOCKER}" -ne 0 ]; then git config --global --add safe.directory ${PWD}; fi -build-and-package ${WITH_CORE} ${DISTRO}-core -DOMNITRACE_USE_HIP=OFF -build-and-package ${WITH_MPI} ${DISTRO}-${MPI_IMPL} -DOMNITRACE_USE_HIP=ON -build-and-package ${WITH_ROCM} ${DISTRO}-rocm-${ROCM_VERSION} -DOMNITRACE_USE_HIP=ON +verbose-run echo "Build omnitrace installers with generators: ${GENERATORS}" + +build-and-package ${WITH_CORE} ${DISTRO}-core -DOMNITRACE_USE_HIP=OFF -DOMNITRACE_USE_MPI=OFF +build-and-package ${WITH_MPI} ${DISTRO}-${MPI_IMPL} -DOMNITRACE_USE_HIP=OFF -DOMNITRACE_USE_MPI=ON +build-and-package ${WITH_ROCM} ${DISTRO}-rocm-${ROCM_VERSION} -DOMNITRACE_USE_HIP=ON -DOMNITRACE_USE_MPI=OFF build-and-package ${WITH_ROCM_MPI} ${DISTRO}-rocm-${ROCM_VERSION}-${MPI_IMPL} -DOMNITRACE_USE_HIP=ON -DOMNITRACE_USE_MPI=ON diff --git a/scripts/run-ci.sh b/scripts/run-ci.sh index 9ac5c72561..f66fa66697 100755 --- a/scripts/run-ci.sh +++ b/scripts/run-ci.sh @@ -5,12 +5,12 @@ cd $(dirname ${SCRIPT_DIR}) tolower() { - echo "$@" | awk -F '\|~\|' '{print tolower($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}'; } toupper() { - echo "$@" | awk -F '\|~\|' '{print toupper($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; } : ${CMAKE_BUILD_PARALLEL_LEVEL:=$(nproc)} diff --git a/scripts/test-install.sh b/scripts/test-install.sh index 19052dc7af..f0859a8ca6 100755 --- a/scripts/test-install.sh +++ b/scripts/test-install.sh @@ -21,7 +21,7 @@ verbose-run() toupper() { - echo "$@" | awk -F '\|~\|' '{print toupper($1)}'; + echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}'; } get-bool() diff --git a/source/docs/development.md b/source/docs/development.md new file mode 100644 index 0000000000..3720045f67 --- /dev/null +++ b/source/docs/development.md @@ -0,0 +1,311 @@ +# Development Guide + +## Miscellaneous Info + +- [CDash Testing Dashboard](https://my.cdash.org/index.php?project=Omnitrace) + - requires login to view + +## Executables + +### omnitrace-avail: [source/bin/omnitrace-avail](https://github.com/AMDResearch/omnitrace/tree/main/source/bin/omnitrace-avail) + +The main of `omnitrace-avail` has three important sections: + +1. Printing components +2. Printing options +3. Printing hardware counters + +### omnitrace-sample: [source/bin/omnitrace-sample](https://github.com/AMDResearch/omnitrace/tree/main/source/bin/omnitrace-sample) + +General design: + +- Requires a command-line format of `omnitrace-sample -- ` +- Translates command line options into environment variables +- Adds `libomnitrace-dl.so` to `LD_PRELOAD` +- Application is launched via `execvpe` with ` ` and modified environment + +### omnitrace-casual: [source/bin/omnitrace-causal](https://github.com/AMDResearch/omnitrace/tree/main/source/bin/omnitrace-causal) + +Nearly identical design to [omnitrace-sample](#omnitrace-sample-sourcebinomnitrace-sample) when +there is exactly one causal profiling configuration variant (this enables debugging). + +When more than one causal profiling configuration variant it produced from command-line options, +for each variant: + +- `omnitrace-causal` calls `fork()` +- child process launches ` ` via `execvpe` which modified environment for variant +- parent process waits for child process to finish + +### omnitrace: [source/bin/omnitrace](https://github.com/AMDResearch/omnitrace/tree/main/source/bin/omnitrace) + +- Requires a command-line format of `omnitrace -- ` +- User specifies in options whether they want to do runtime instrumentation, binary rewrite, or attach to process +- Either opens the instrumentation target (binary rewrite), launches the target and stops it before it starts executing main (runtime), or + attaches to running executable and pauses it +- Finds all functions in target(s) +- Finds `libomnitrace-dl` and finds the functions +- Iterates over all the functions and instruments them as long as they satisfy the defined criteria (minimum number of instructions, etc.) + - See the `module_function` class +- Most of the workflow has been the same at the point but once the instrumentation is complete, it diverges + - For a binary rewrite: outputs new instrumented binary and exits + - For runtime instrumentation or attaching to a process: instructs the application to resume executing and then waits for the application to exit + +### omnitrace-critical-trace: [source/bin/omnitrace-critical-trace](https://github.com/AMDResearch/omnitrace/tree/main/source/bin/omnitrace-critical-trace) + +Post-processing tool for critical-trace data output by omnitrace. + +## Libraries + +### Common Library: [source/lib/common](https://github.com/AMDResearch/omnitrace/tree/main/source/lib/common) + +General header-only functionality used in multiple executables and/or libraries. Not installed or exported outside of the build tree. + +### Core Library: [source/lib/core](https://github.com/AMDResearch/omnitrace/tree/main/source/lib/core) + +Static PIC library with functionality that does not depend on any components. Not installed or exported outside of the build tree. + +### Binary Library: [source/lib/binary](https://github.com/AMDResearch/omnitrace/tree/main/source/lib/binary) + +Static PIC library with functionality for reading/analyzing binary info. Mostly used by the causal profiling sections +of [libomnitrace](#libomnitrace-sourcelibomnitrace). Not installed or exported outside of the build tree. + +### libomnitrace: [source/lib/omnitrace](https://github.com/AMDResearch/omnitrace/tree/main/source/lib/omnitrace) + +This is the main library encapsulating all the capabilities. + +### libomnitrace-dl: [source/lib/omnitrace-dl](https://github.com/AMDResearch/omnitrace/tree/main/source/lib/omnitrace-dl) + +Lightweight, front-end library for [libomnitrace](#libomnitrace-sourcelibomnitrace) which serves 3 primary purposes: + +1. Dramatically speeds up instrumentation time vs. using [libomnitrace](#libomnitrace-sourcelibomnitrace) directly since Dyninst must parse entire library in order to find instrumentation functions ([libomnitrace](#libomnitrace-sourcelibomnitrace) is dlopen'ed when the instrumentation functions get called) +2. Prevents re-entry if [libomnitrace](#libomnitrace-sourcelibomnitrace) calls an instrumentated function internally) +3. Coordinates communication between [libomnitrace-user](#libomnitrace-user-sourcelibomnitrace-user) and [libomnitrace](#libomnitrace-sourcelibomnitrace) + +### libomnitrace-user: [source/lib/omnitrace-user](https://github.com/AMDResearch/omnitrace/tree/main/source/lib/omnitrace-user) + +Provides a set of functions and types for the users to add to their code, e.g. disabling data collection globally or on a specific thread, +user-defined regions, etc. If [libomnitrace-dl](#libomnitrace-dl-sourcelibomnitrace-dl) is not loaded, the user API is effectively no-op +function calls. + +## Concepts + +### Component + +Most measurements and capabilities are encapsulated into a "component" with the following definitions: + +- Measurement: recording of some data relevant to performance, e.g. current call-stack, hardware counter values, current memory usage, timestamp +- Capability: handles the implementation or orchestration of some feature which is used to collect measurements, e.g. a component which handles setting up function wrappers around various functions such as `pthread_create`, `MPI_Init`, etc. + +Components are designed to hold no data at all or only the data for both an instantaeous measurement and a phase measurement. + +Components which store data typically implement a static `record()` function (for getting a record of the measurement), +`start()` + `stop()` member functions for calculating a phase measurement, and a `sample()` member function for storing an +instantaneous measurement. In reality, there are several more "standard" functions but these are the most often used ones. + +Components which do not store data may also have `start()`, `stop()`, and `sample()` functions but for components which +implement function wrappers, they typically provide a call operator or `audit(...)` functions which are invoked with the +wrappee function's arguments before the wrappee gets called and with the return value after the wrappee gets called. + +***The goal of this design is to provide relatively small and resuable lightweight objects for recording measurements +and/or implementing capabilities.*** + +#### Wall-Clock Component Example + +A component for computing the elapsed wall-clock time looks like this: + +```cpp +struct wall_clock +{ + using value_type = int64_t; + + static value_type record() noexcept + { + return std::chrono::steady_clock::now().time_since_epoch().count(); + } + + void sample() noexcept + { + value = record(); + } + + void start() noexcept + { + value = record(); + } + + void stop() noexcept + { + auto _start_value = value; + value = record(); + accum += (value - _start_value); + } + +private: + int64_t value = 0; + int64_t accum = 0; +}; +``` + +#### Function Wrapper Component Example + +A component which implements wrappers around `fork()` and `exit(int)` (and stores no data) may look like this: + +```cpp +struct function_wrapper +{ + pid_t operator()(const gotcha_data&, pid_t (*real_fork)()) + { + // disable all collection before forking + categories::disable_categories(config::get_enabled_categories()); + + auto _pid_v = real_fork(); + + // only re-enable collection on parent process + if(_pid_v != 0) + categories::enable_categories(config::get_enabled_categories()); + + return _pid_v; + } + + void operator()(const gotcha_data&, void (*real_exit)(int), int _exit_code) + { + // catch the call to exit and finalize before truly exiting + omnitrace_finalize(); + + real_exit(_exit_code); + } +}; +``` + +#### Component Member Functions + +There are no real restrictions or requirements on the member functions a component needs to provide. +Unless the component is being directly used, invocation of component member functions via "component bundlers" +(provided via timemory) makes extensive use of template metaprogramming concept to find the best match (if any) +for calling a components member function. This is a bit easier to demonstrate via example: + +```cpp +struct foo +{ + void sample() { puts("foo::sample()"); } +}; + +struct bar +{ + void sample(int) { puts("bar::sample(int)"); } +}; + +struct spam +{ + void start(int) { puts("spam::start()"); } + void stop() { puts("spam::stop()"); } +}; + +int main() +{ + auto _bundle = component_tuple{ "main" }; + + puts("A"); + _bundle.start(); + + puts("B"); + _bundle.sample(10); + + puts("C"); + _bundle.sample(); + + puts("D"); + _bundle.stop(); +} +``` + +In the above, this would be the message printed: + +```console +A +bar::start() +B +foo::sample() +bar::sample(int) +C +foo::sample() +D +spam::stop() +``` + +In section A, the bundle determined only the `spam` object had a `start` function. Since this is determined +via template metaprogramming instead of dynamic polymorphism, this effectively elides any code related to +the `foo` or `bar` objects. In section B, since an integer of `10` was passed to the bundle, +the bundle forwards that value onto `spam::sample(int)` after it invokes `foo::sample()` -- which +is invoked because it recognizes that the call is the `sample` member function is still possible without +the arguments. + +## Memory Model + +Collected data is generally stored in one of following 3 places: + +1. Perfetto (i.e. data is handed directly to perfetto) +2. Managed implictly by timemory and accessed as needed +3. Thread-local data + +In general, only instrumentation for relatively simple data is directly passed to Perfetto and/or timemory during runtime. +For example, the callbacks from binary instrumentation, user API instrumentation, and roctracer directly invoke +calls to Perfetto and/or timemory's storage model. Otherwise, the data is stored by omnitrace in the thread-data model +which is more persistent than simply using `thread_local` static data (which is problematic because the data gets deleted +when a thread terminates). + +### Thread Identification + +Each CPU thread is assigned two integral identifiers. One identifier is simply an atomic increment everytime a new thread is created +(called `internal_value`). +The other identifier tries to account for the fact that OmniTrace, Perfetto, ROCm, etc. start background threads and for these threads +(called `sequent_value`). When a thread is created as a byproduct of OmniTrace, the index is offset by a large value. This serves +two purposes: (1) accessing the data for threads created by the user is closer in memory and (2) when log messages are printed, +the index more-or-less correlates to the order of thread creation to the user's knowledge. + +The `sequent_value` is typically the one used to access the thread-data. + +### Thread-Data Class + +Currently, most thread data is effectively stored in a static `std::array, OMNITRACE_MAX_THREADS>` instance. +`OMNITRACE_MAX_THREADS` is a value defined a compile-time and set to 2048 for release builds. During finalization, +omnitrace iterates over all the thread-data and then transforms that data into something that is passed to perfetto and/or timemory. +The downside of the current model is that if the user exceeds `OMNITRACE_MAX_THREADS`, omnitrace segfaults. To fix this issue, +a new model is being adopted which has all the benefits of this model but permits dynamic expansion. + +## Sampling Model + +The general structure for the sampling is within timemory (`source/timemory/sampling`). Currently, all sampling is done per-thread +via POSIX timers. Omnitrace supports using a realtime timer and a CPU-time timer. Both have adjustable frequencies, delays, and durations. +By default, only CPU-time sampling is enabled. Initial settings are inherited from the settings starting with `OMNITRACE_SAMPLING_`. +For each type of timer, there exists timer-specific settings that can be used to override the common/inherited settings for that timer +specifically. For the CPU-time sampler, these settings start with `OMNITRACE_SAMPLING_CPUTIME` and `OMNITRACE_SAMPLING_REALTIME` for +the realtime sampler. For example, `OMNITRACE_SAMPLING_FREQ=500` initially sets the sampling frequency to 500 interrupts per second +(based on their clock). Settings `OMNITRACE_SAMPLING_REALTIME_FREQ=10` will lower the sampling frequency for the realtime sampler +to 10 interrupts per second of realtime. + +The omnitrace-specific implementation can be found in [source/lib/omnitrace/library/sampling.cpp](https://github.com/AMDResearch/omnitrace/blob/main/source/lib/omnitrace/library/sampling.cpp). +Within [sampling.cpp](https://github.com/AMDResearch/omnitrace/blob/main/source/lib/omnitrace/library/sampling.cpp), you will a bundle of 3 sampling components: +`backtrace_timestamp`, `backtrace`, and `backtrace_metrics`. +The first component [backtrace_timestamp](https://github.com/AMDResearch/omnitrace/blob/main/source/lib/omnitrace/library/components/backtrace_timestamp.hpp) simply +records the wall-clock time of the sample. +The second component [backtrace](https://github.com/AMDResearch/omnitrace/blob/main/source/lib/omnitrace/library/components/backtrace.hpp) records the call-stack via libunwind. +The last component [backtrace_metrics](https://github.com/AMDResearch/omnitrace/blob/main/source/lib/omnitrace/library/components/backtrace_metrics.hpp) is responsible for recording the +metrics for that sample, e.g. peak RSS, HW counters, etc. These 3 components are bundled together in a tuple-like struct (e.g. `tuple`) +a buffer of at least 1024 instances of this tuple are mmap'ed per-thread. When this buffer is full, before taking the next sample, the sampler will hand the buffer +off to it's allocator thread and mmap a new buffer. The allocator thread takes this data and either dynamically stores it in memory or writes it to a file depending on the value of `OMNITRACE_USE_TEMPORARY_FILES`. +This schema avoids all allocations in the signal handler, allows the data to grow dynamically, avoid potentially slow I/O within the signal handler, and also enables the capability to avoid I/O altogether. +The maximum number of samplers handled by each allocator is governed by the setting `OMNITRACE_SAMPLING_ALLOCATOR_SIZE` setting (the default is 8) -- whenever an allocator has reached it's limit, +a new internal thread is created to handle the new samplers. + +## Time-Window Constraint Model + +Recently with the introduction of tracing delay/duration/etc., the [constraint namespace](https://github.com/AMDResearch/omnitrace/blob/main/source/lib/core/constraint.hpp) +was introduced to improve the management of delays and/or duration limits of data collection. The `spec` class takes a clock identifier, a delay value, a duration value, and an +integer indicating how many times to repeat the delay + duration. Thus, it is possible to perform tasks such as periodically enabling tracing for brief periods +of time in between long periods without data collection during the application, e.g. `OMNITRACE_TRACE_PERIODS = realtime:10:1:5 process_cputime:10:2:20` would enable +five periods of no data collection for 10 seconds of realtime followed by 1 second of data collection + twenty periods of no data collection for 10 seconds +of process CPU time followed by 2 CPU-time seconds of data collection. + +Eventually, the goal is have all subsets of data collection which currently support more rudimentary models of time window constraints, such as process sampling and causal profiling, +to be migrated to this model. diff --git a/source/docs/index.md b/source/docs/index.md index eaae8e1f2b..cab3d9ff7d 100644 --- a/source/docs/index.md +++ b/source/docs/index.md @@ -20,4 +20,5 @@ user_api python youtube + development ``` diff --git a/source/python/libpyomnitrace.cpp b/source/python/libpyomnitrace.cpp index 024ceefbc8..b3ccc7e0ab 100644 --- a/source/python/libpyomnitrace.cpp +++ b/source/python/libpyomnitrace.cpp @@ -53,6 +53,9 @@ #include #include +#define OMNITRACE_PYTHON_VERSION \ + ((10000 * PY_MAJOR_VERSION) + (100 * PY_MINOR_VERSION) + PY_MICRO_VERSION) + namespace pyomnitrace { namespace pyprofile @@ -260,10 +263,34 @@ get_config() return *_tl_instance; } // -int32_t -get_depth(PyFrameObject* frame) +int +get_frame_lineno(PyFrameObject* frame) { - return (frame->f_back) ? (get_depth(frame->f_back) + 1) : 0; +#if OMNITRACE_PYTHON_VERSION >= 31100 + return PyFrame_GetLineNumber(frame); +#else + return frame->f_lineno; +#endif +} +// +int +get_frame_lasti(PyFrameObject* frame) +{ +#if OMNITRACE_PYTHON_VERSION >= 31100 + return PyFrame_GetLasti(frame); +#else + return frame->f_lasti; +#endif +} +// +auto +get_frame_code(PyFrameObject* frame) +{ +#if OMNITRACE_PYTHON_VERSION >= 31100 + return PyFrame_GetCode(frame); +#else + return frame->f_code; +#endif } // void @@ -364,9 +391,9 @@ profiler_function(py::object pframe, const char* swhat, py::object arg) } // append the line number if(_config.include_line && _config.include_filename) - _funcname.append(TIMEMORY_JOIN("", ':', frame->f_lineno, ']')); + _funcname.append(TIMEMORY_JOIN("", ':', get_frame_lineno(frame), ']')); else if(_config.include_line) - _funcname.append(TIMEMORY_JOIN("", ':', frame->f_lineno)); + _funcname.append(TIMEMORY_JOIN("", ':', get_frame_lineno(frame))); else if(_config.include_filename) _funcname += "]"; return _funcname; @@ -386,7 +413,7 @@ profiler_function(py::object pframe, const char* swhat, py::object arg) auto& _only_funcs = _config.restrict_functions; auto& _incl_funcs = _config.include_functions; auto& _skip_funcs = _config.exclude_functions; - auto _func = py::cast(frame->f_code->co_name); + auto _func = py::cast(get_frame_code(frame)->co_name); if(!_only_funcs.empty()) { @@ -419,7 +446,7 @@ profiler_function(py::object pframe, const char* swhat, py::object arg) auto& _only_files = _config.restrict_filenames; auto& _incl_files = _config.include_filenames; auto& _skip_files = _config.exclude_filenames; - auto _full = py::cast(frame->f_code->co_filename); + auto _full = py::cast(get_frame_code(frame)->co_filename); auto _file = (_full.find('/') != std::string::npos) ? _full.substr(_full.find_last_of('/') + 1) : _full; @@ -470,14 +497,18 @@ profiler_function(py::object pframe, const char* swhat, py::object arg) // start function auto _profiler_call = [&]() { + int _lineno = 0; + int _lasti = 0; if(_annotate) { + _lineno = get_frame_lineno(frame); + _lasti = get_frame_lasti(frame); _config.annotations.at(0).value = const_cast(_full.c_str()); - _config.annotations.at(1).value = &frame->f_lineno; - _config.annotations.at(2).value = &frame->f_lasti; - _config.annotations.at(3).value = &frame->f_code->co_argcount; - _config.annotations.at(4).value = &frame->f_code->co_nlocals; - _config.annotations.at(5).value = &frame->f_code->co_stacksize; + _config.annotations.at(1).value = &_lineno; + _config.annotations.at(2).value = &_lasti; + _config.annotations.at(3).value = &get_frame_code(frame)->co_argcount; + _config.annotations.at(4).value = &get_frame_code(frame)->co_nlocals; + _config.annotations.at(5).value = &get_frame_code(frame)->co_stacksize; } _config.records.emplace_back([&_label_ref, _annotate]() {