From adb6e94de504821ba06ab6e25c5b22f8265d7349 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 26 Jan 2022 15:02:53 -0600 Subject: [PATCH] MPI gotcha updates (#20) * MPI gotcha updates * Release script updates - build for ubuntu focal and bionic - use OpenMPI for OMNITRACE_USE_MPI_HEADERS * build-release.sh updates [ROCm/rocprofiler-systems commit: a546949ff4ba93996255c16bdf5ca3c08d5fa0ed] --- .../rocprofiler-systems/docker/Dockerfile | 9 ++- .../docker/build-docker-release.sh | 13 ++++- .../docker/build-docker.sh | 13 ++++- .../rocprofiler-systems/external/timemory | 2 +- .../include/library/components/mpi_gotcha.hpp | 7 ++- .../scripts/build-release.sh | 57 +++++++++---------- projects/rocprofiler-systems/src/library.cpp | 7 ++- .../src/library/components/mpi_gotcha.cpp | 28 ++++++--- .../src/library/config.cpp | 6 +- 9 files changed, 87 insertions(+), 55 deletions(-) diff --git a/projects/rocprofiler-systems/docker/Dockerfile b/projects/rocprofiler-systems/docker/Dockerfile index 99db7f3f5d..593e8ca7d6 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile +++ b/projects/rocprofiler-systems/docker/Dockerfile @@ -1,5 +1,7 @@ -FROM ubuntu:20.04 +ARG DISTRO +ARG VERSION +FROM ${DISTRO}:${VERSION} ENV HOME /root ENV LANG en_US.UTF-8 @@ -15,9 +17,12 @@ SHELL [ "/bin/bash", "-c" ] ARG EXTRA_PACKAGES="" ARG ROCM_REPO_VERSION="debian" +ENV PATH ${HOME}/.local/bin:${PATH} + RUN apt-get update && \ apt-get dist-upgrade -y && \ - apt-get install -y build-essential cmake libnuma-dev wget gnupg2 m4 bash-completion git-core autoconf libtool autotools-dev && \ + apt-get install -y build-essential cmake libnuma-dev wget gnupg2 m4 bash-completion git-core autoconf libtool autotools-dev python3-pip lsb-release libpapi-dev libpfm4-dev && \ + python3 -m pip install 'cmake==3.18.4' && \ wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \ echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION}/ ubuntu main" | tee /etc/apt/sources.list.d/rocm.list && \ apt-get update && \ diff --git a/projects/rocprofiler-systems/docker/build-docker-release.sh b/projects/rocprofiler-systems/docker/build-docker-release.sh index 702d977871..d5c611cc6a 100755 --- a/projects/rocprofiler-systems/docker/build-docker-release.sh +++ b/projects/rocprofiler-systems/docker/build-docker-release.sh @@ -15,8 +15,15 @@ build-release() docker run -it --rm -v ${PWD}:/home/omnitrace --env ROCM_VERSION=${ROCM_VERSION} --env VERSION=${CODE_VERSION} ${CONTAINER} /home/omnitrace/scripts/build-release.sh } +: ${DISTRO:=ubuntu} +: ${VERSIONS:=20.04 18.04} + CODE_VERSION=$(cat VERSION) -build-release jrmadsen/omnitrace-base-rocm-4.5 4.5.0 ${CODE_VERSION} -build-release jrmadsen/omnitrace-base-rocm-4.3 4.3.0 ${CODE_VERSION} -build-release jrmadsen/omnitrace-base-rocm-4.3.1 4.3.1 ${CODE_VERSION} +for VERSION in ${VERSIONS} +do + TAG=${DISTRO}-${VERSION} + build-release jrmadsen/omnitrace-${TAG}-rocm-4.5 4.5.0 ${CODE_VERSION} + build-release jrmadsen/omnitrace-${TAG}-rocm-4.3 4.3.0 ${CODE_VERSION} + build-release jrmadsen/omnitrace-${TAG}-rocm-4.3.1 4.3.1 ${CODE_VERSION} +done diff --git a/projects/rocprofiler-systems/docker/build-docker.sh b/projects/rocprofiler-systems/docker/build-docker.sh index 8795ef2df0..236b260a3d 100755 --- a/projects/rocprofiler-systems/docker/build-docker.sh +++ b/projects/rocprofiler-systems/docker/build-docker.sh @@ -1,8 +1,17 @@ #!/usr/bin/env bash : ${ROCM_VERSIONS:="4.5 4.3 4.3.1"} +: ${DISTRO:=ubuntu} +: ${VERSIONS:=20.04 18.04} -for i in ${ROCM_VERSIONS} +set -e + +if [ ! -f Dockerfile ]; then cd docker; fi + +for VERSION in ${VERSIONS} do - docker build . --tag jrmadsen/omnitrace-base-rocm-${i} --build-arg ROCM_REPO_VERSION=${i} + for i in ${ROCM_VERSIONS} + do + docker build . --tag jrmadsen/omnitrace-${DISTRO}-${VERSION}-rocm-${i} --build-arg DISTRO=${DISTRO} --build-arg VERSION=${VERSION} --build-arg ROCM_REPO_VERSION=${i} + done done diff --git a/projects/rocprofiler-systems/external/timemory b/projects/rocprofiler-systems/external/timemory index 335abea0c5..e459158b4e 160000 --- a/projects/rocprofiler-systems/external/timemory +++ b/projects/rocprofiler-systems/external/timemory @@ -1 +1 @@ -Subproject commit 335abea0c51e498cf419716839690cd0ccb1aeac +Subproject commit e459158b4eea5576c5bc21a57e444a983af68713 diff --git a/projects/rocprofiler-systems/include/library/components/mpi_gotcha.hpp b/projects/rocprofiler-systems/include/library/components/mpi_gotcha.hpp index 971c6968ff..f0bff42ef5 100644 --- a/projects/rocprofiler-systems/include/library/components/mpi_gotcha.hpp +++ b/projects/rocprofiler-systems/include/library/components/mpi_gotcha.hpp @@ -65,9 +65,10 @@ struct mpi_gotcha : comp::base void audit(const gotcha_data_t& _data, audit::outgoing, int _retval); private: - void* m_comm = nullptr; - int* m_rank = nullptr; - int* m_size = nullptr; + int* m_rank_ptr = nullptr; + int* m_size_ptr = nullptr; + int m_rank = 0; + int m_size = 1; }; using mpi_gotcha_t = comp::gotcha<5, tim::component_tuple, api::omnitrace>; diff --git a/projects/rocprofiler-systems/scripts/build-release.sh b/projects/rocprofiler-systems/scripts/build-release.sh index efb8d2edff..67eab888dc 100755 --- a/projects/rocprofiler-systems/scripts/build-release.sh +++ b/projects/rocprofiler-systems/scripts/build-release.sh @@ -2,17 +2,19 @@ : ${EXTRA_ARGS:=""} : ${EXTRA_TAGS:=""} -: ${VERSION:=0.0.3} -: ${ROCM_VERSION:=4.3.0} +: ${VERSION:=0.0.4} +: ${ROCM_VERSION:=4.5.0} : ${NJOBS:=8} +DISTRO=$(lsb_release -i | awk '{print $NF}')-$(lsb_release -r | awk '{print $NF}') + STANDARD_ARGS="-DCPACK_GENERATOR=STGZ -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=OFF -DOMNITRACE_MAX_THREADS=2048 -DOMNITRACE_BUILD_TESTING=OFF -DTIMEMORY_USE_LIBUNWIND=ON -DTIMEMORY_BUILD_LIBUNWIND=ON -DTIMEMORY_BUILD_PORTABLE=ON" STANDARD_ARGS="${STANDARD_ARGS} -DOMNITRACE_BUILD_DYNINST=ON $(echo -DDYNINST_BUILD_{TBB,BOOST,ELFUTILS,LIBIBERTY}=ON)" if [ -n "${EXTRA_ARGS}" ]; then STANDARD_ARGS="${STANDARD_ARGS} ${EXTRA_ARGS}" fi -PACKAGE_BASE_TAG=omnitrace-${VERSION}-Linux +PACKAGE_BASE_TAG=omnitrace-${VERSION}-${DISTRO} if [ -n "${EXTRA_TAGS}" ]; then PACKAGE_BASE_TAG="${PACKAGE_BASE_TAG}-${EXTRA_TAGS}" fi @@ -21,48 +23,45 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) cd $(dirname ${SCRIPT_DIR}) echo -e "Working directory: $(pwd)" -umask 000 +umask 0000 if [ ! -f build-release/${PACKAGE_BASE_TAG}.sh ]; then - cmake -B build-release/core ${STANDARD_ARGS} -DCMAKE_INSTALL_PREFIX=build-release/core/install-release -DDYNINST_USE_OpenMP=OFF -DOMNITRACE_USE_MPI_HEADERS=OFF -DOMNITRACE_USE_ROCTRACER=OFF . - cmake --build build-release/core --target package --parallel ${NJOBS} - cp build-release/core/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}.sh + cmake -B build-release/${DISTRO}-core ${STANDARD_ARGS} -DCMAKE_INSTALL_PREFIX=build-release/${DISTRO}-core/install-release -DDYNINST_USE_OpenMP=OFF -DOMNITRACE_USE_MPI_HEADERS=OFF -DOMNITRACE_USE_HIP=OFF . + cmake --build build-release/${DISTRO}-core --target package --parallel ${NJOBS} + cp build-release/${DISTRO}-core/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}.sh fi -apt-get install -y libmpich-dev mpich +apt-get install -y libopenmpi-dev openmpi-bin -STANDARD_ARGS="${STANDARD_ARGS} -DOMNITRACE_USE_ROCTRACER=ON -DOMNITRACE_USE_MPI_HEADERS=ON -DDYNINST_USE_OpenMP=ON" +STANDARD_ARGS="${STANDARD_ARGS} -DOMNITRACE_USE_HIP=ON -DOMNITRACE_USE_MPI_HEADERS=ON -DDYNINST_USE_OpenMP=ON" if [ ! -f build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}.sh ]; then - cmake -B build-release/rocm-${ROCM_VERSION} -DCMAKE_INSTALL_PREFIX=build-release/rocm-${ROCM_VERSION}/install-release ${STANDARD_ARGS} . - cmake --build build-release/rocm-${ROCM_VERSION} --target package --parallel ${NJOBS} - cp build-release/rocm-${ROCM_VERSION}/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}.sh + cmake -B build-release/${DISTRO}-rocm-${ROCM_VERSION} -DCMAKE_INSTALL_PREFIX=build-release/${DISTRO}-rocm-${ROCM_VERSION}/install-release ${STANDARD_ARGS} . + cmake --build build-release/${DISTRO}-rocm-${ROCM_VERSION} --target package --parallel ${NJOBS} + cp build-release/${DISTRO}-rocm-${ROCM_VERSION}/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}.sh fi -apt-get install -y libpapi-dev libpfm4-dev - STANDARD_ARGS="${STANDARD_ARGS} -DTIMEMORY_USE_PAPI=ON" if [ ! -f build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI.sh ]; then - cmake -B build-release/rocm-${ROCM_VERSION}-papi -DCMAKE_INSTALL_PREFIX=build-release/rocm-${ROCM_VERSION}-papi/install-release ${STANDARD_ARGS} . - cmake --build build-release/rocm-${ROCM_VERSION}-papi --target package --parallel ${NJOBS} - cp build-release/rocm-${ROCM_VERSION}-papi/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI.sh + cmake -B build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi -DCMAKE_INSTALL_PREFIX=build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi/install-release ${STANDARD_ARGS} . + cmake --build build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi --target package --parallel ${NJOBS} + cp build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI.sh fi STANDARD_ARGS="${STANDARD_ARGS} -DOMNITRACE_USE_MPI=ON" + +if [ ! -f build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-OpenMPI.sh ]; then + cmake -B build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-openmpi -DCMAKE_INSTALL_PREFIX=build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-openmpi/install-release ${STANDARD_ARGS} . + cmake --build build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-openmpi --target package --parallel ${NJOBS} + cp build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-openmpi/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-OpenMPI.sh +fi + +apt-get purge -y libopenmpi-dev openmpi-bin apt-get install -y libmpich-dev mpich if [ ! -f build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-MPICH.sh ]; then - cmake -B build-release/rocm-${ROCM_VERSION}-papi-mpich -DCMAKE_INSTALL_PREFIX=build-release/rocm-${ROCM_VERSION}-papi-mpich/install-release ${STANDARD_ARGS} . - cmake --build build-release/rocm-${ROCM_VERSION}-papi-mpich --target package --parallel ${NJOBS} - cp build-release/rocm-${ROCM_VERSION}-papi-mpich/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-MPICH.sh -fi - -apt-get purge -y libmpich-dev mpich -apt-get install -y libopenmpi-dev openmpi-bin - -if [ ! -f build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-OpenMPI.sh ]; then - cmake -B build-release/rocm-${ROCM_VERSION}-papi-openmpi -DCMAKE_INSTALL_PREFIX=build-release/rocm-${ROCM_VERSION}-papi-openmpi/install-release ${STANDARD_ARGS} . - cmake --build build-release/rocm-${ROCM_VERSION}-papi-openmpi --target package --parallel ${NJOBS} - cp build-release/rocm-${ROCM_VERSION}-papi-openmpi/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-OpenMPI.sh + cmake -B build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-mpich -DCMAKE_INSTALL_PREFIX=build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-mpich/install-release ${STANDARD_ARGS} . + cmake --build build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-mpich --target package --parallel ${NJOBS} + cp build-release/${DISTRO}-rocm-${ROCM_VERSION}-papi-mpich/omnitrace-${VERSION}-Linux.sh build-release/${PACKAGE_BASE_TAG}-ROCm-${ROCM_VERSION}-PAPI-MPICH.sh fi diff --git a/projects/rocprofiler-systems/src/library.cpp b/projects/rocprofiler-systems/src/library.cpp index 3d364652f3..34b0ab37e8 100644 --- a/projects/rocprofiler-systems/src/library.cpp +++ b/projects/rocprofiler-systems/src/library.cpp @@ -387,9 +387,10 @@ omnitrace_init_tooling() return (_v.find("OMNITRACE_") == 0); }; - tim::print_env(std::cerr, [_is_omnitrace_option](const std::string& _v) { - return _is_omnitrace_option(_v, std::set{}); - }); + if(tim::settings::verbose() > 0) + tim::print_env(std::cerr, [_is_omnitrace_option](const std::string& _v) { + return _is_omnitrace_option(_v, std::set{}); + }); print_config_settings(std::cerr, _is_omnitrace_option); } diff --git a/projects/rocprofiler-systems/src/library/components/mpi_gotcha.cpp b/projects/rocprofiler-systems/src/library/components/mpi_gotcha.cpp index bf55fd0a40..a61458fd0b 100644 --- a/projects/rocprofiler-systems/src/library/components/mpi_gotcha.cpp +++ b/projects/rocprofiler-systems/src/library/components/mpi_gotcha.cpp @@ -133,18 +133,17 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming) } void -mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, comm_t _comm, int* _val) +mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, comm_t, int* _val) { OMNITRACE_CONDITIONAL_BASIC_PRINT(get_debug_env(), "[%s] %s()\n", __FUNCTION__, _data.tool_id.c_str()); - m_comm = &_comm; if(_data.tool_id == "MPI_Comm_rank") { - m_rank = _val; + m_rank_ptr = _val; } else if(_data.tool_id == "MPI_Comm_size") { - m_size = _val; + m_size_ptr = _val; } else { @@ -171,6 +170,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) { OMNITRACE_CONDITIONAL_BASIC_PRINT( get_debug_env(), "[%s] Activating MPI wrappers...\n", __FUNCTION__); + + // use env vars OMNITRACE_MPIP_PERMIT_LIST and OMNITRACE_MPIP_REJECT_LIST + // to control the gotcha bindings at runtime comp::configure_mpip, api::omnitrace>(); mpip_index = @@ -183,10 +185,13 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) { if(_data.tool_id == "MPI_Comm_rank") { - if(m_rank) + if(m_rank_ptr) { - tim::mpi::set_rank(*m_rank, *static_cast(m_comm)); - tim::settings::default_process_suffix() = *m_rank; + m_rank = std::max(*m_rank_ptr, m_rank); + OMNITRACE_CONDITIONAL_BASIC_PRINT(tim::settings::verbose() > 0, + "MPI rank: %i\n", m_rank); + tim::mpi::set_rank(m_rank); + tim::settings::default_process_suffix() = m_rank; get_perfetto_output_filename().clear(); (void) get_perfetto_output_filename(); } @@ -198,8 +203,13 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) } else if(_data.tool_id == "MPI_Comm_size") { - if(m_size) - tim::mpi::set_size(*m_size, *static_cast(m_comm)); + if(m_size_ptr) + { + m_size = std::max(*m_size_ptr, m_size); + OMNITRACE_CONDITIONAL_BASIC_PRINT(tim::settings::verbose() > 0, + "MPI size: %i\n", m_size); + tim::mpi::set_size(m_size); + } else { OMNITRACE_PRINT("[%s] %s() returned %i :: nullptr to size\n", diff --git a/projects/rocprofiler-systems/src/library/config.cpp b/projects/rocprofiler-systems/src/library/config.cpp index a317e5a2a3..8bcc0d4ef3 100644 --- a/projects/rocprofiler-systems/src/library/config.cpp +++ b/projects/rocprofiler-systems/src/library/config.cpp @@ -274,12 +274,12 @@ configure_settings() void print_config_settings( - std::ostream& _os, + std::ostream& _ros, std::function&)>&& _filter) { OMNITRACE_CONDITIONAL_BASIC_PRINT(true, "configuration:\n"); - auto _flags = _os.flags(); + std::stringstream _os{}; bool _md = tim::get_env("OMNITRACE_SETTINGS_DESC_MARKDOWN", false); @@ -370,7 +370,7 @@ print_config_settings( } _os << _spacer.str() << "\n"; - _os.setf(_flags); + _ros << _os.str() << std::flush; } std::string&