From 4fcd8cc78d46efd863c58cc3342e3771708b88d1 Mon Sep 17 00:00:00 2001 From: Sajina PK Date: Wed, 4 Jun 2025 18:06:18 -0400 Subject: [PATCH] Enable MPI tracing for Fortran (#185) - Move the MPI gotcha functionality from Timemory to the repo. - Add the PMPI Fortran MPI functions to the existing mpi gotcha handle. --- cmake/Packages.cmake | 12 +- examples/transpose/CMakeLists.txt | 2 +- .../rocprof-sys-instrument.cpp | 6 +- source/lib/core/CMakeLists.txt | 1 + source/lib/core/config.cpp | 6 +- source/lib/core/debug.hpp | 4 +- source/lib/core/mpi.hpp | 725 ++++++++++++++++ source/lib/core/perfetto.cpp | 2 +- source/lib/core/timemory.hpp | 3 +- .../library/components/CMakeLists.txt | 1 + .../library/components/mpi_gotcha.cpp | 85 +- .../library/components/mpi_gotcha.hpp | 5 +- .../rocprof-sys/library/components/mpip.hpp | 779 ++++++++++++++++++ 13 files changed, 1576 insertions(+), 55 deletions(-) create mode 100644 source/lib/core/mpi.hpp create mode 100644 source/lib/rocprof-sys/library/components/mpip.hpp diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index 596d2d1329..c911da78ae 100644 --- a/cmake/Packages.cmake +++ b/cmake/Packages.cmake @@ -210,13 +210,12 @@ set(_ROCPROFSYS_MPI_HEADERS_ALLOW_MPICH ${MPI_HEADERS_ALLOW_MPICH}) if(ROCPROFSYS_USE_MPI) find_package(MPI ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) target_link_libraries(rocprofiler-systems-mpi INTERFACE MPI::MPI_C MPI::MPI_CXX) - rocprofiler_systems_target_compile_definitions( - rocprofiler-systems-mpi INTERFACE TIMEMORY_USE_MPI=1 ROCPROFSYS_USE_MPI) + rocprofiler_systems_target_compile_definitions(rocprofiler-systems-mpi + INTERFACE ROCPROFSYS_USE_MPI) elseif(ROCPROFSYS_USE_MPI_HEADERS) find_package(MPI-Headers ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) - rocprofiler_systems_target_compile_definitions( - rocprofiler-systems-mpi INTERFACE TIMEMORY_USE_MPI_HEADERS=1 - ROCPROFSYS_USE_MPI_HEADERS) + rocprofiler_systems_target_compile_definitions(rocprofiler-systems-mpi + INTERFACE ROCPROFSYS_USE_MPI_HEADERS) target_link_libraries(rocprofiler-systems-mpi INTERFACE MPI::MPI_HEADERS) endif() @@ -543,9 +542,6 @@ set(TIMEMORY_QUIET_CONFIG CACHE BOOL "Make timemory configuration quieter") # timemory feature settings -set(TIMEMORY_USE_MPI - ${ROCPROFSYS_USE_MPI} - CACHE BOOL "Enable MPI support in timemory" FORCE) set(TIMEMORY_USE_GOTCHA ON CACHE BOOL "Enable GOTCHA support in timemory") diff --git a/examples/transpose/CMakeLists.txt b/examples/transpose/CMakeLists.txt index 001571f587..bc7c721fc7 100644 --- a/examples/transpose/CMakeLists.txt +++ b/examples/transpose/CMakeLists.txt @@ -41,7 +41,7 @@ if((NOT CMAKE_CXX_COMPILER_IS_HIPCC OR (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang return() endif() -option(TRANSPOSE_USE_MPI "Enable MPI support in transpose exe" ${TIMEMORY_USE_MPI}) +option(TRANSPOSE_USE_MPI "Enable MPI support in transpose exe" ${ROCPROFSYS_USE_MPI}) find_package(Threads REQUIRED) if(TRANSPOSE_USE_MPI) diff --git a/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp b/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp index 365fdc7b5b..77a11cedbc 100644 --- a/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp +++ b/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp @@ -1711,8 +1711,10 @@ main(int argc, char** argv) { "rocprofsys_user_stop_thread_trace" }); #if ROCPROFSYS_USE_MPI > 0 || ROCPROFSYS_USE_MPI_HEADERS > 0 // if any of the below MPI functions are found, enable MPI support - for(const auto* itr : { "MPI_Init", "MPI_Init_thread", "MPI_Finalize", - "MPI_Comm_rank", "MPI_Comm_size" }) + for(const auto* itr : + { "MPI_Init", "MPI_Init_thread", "MPI_Finalize", "MPI_Comm_rank", "MPI_Comm_size", + "MPI_INIT", "mpi_init", "mpi_init_", "mpi_init__", "MPI_INIT_THREAD", + "mpi_init_thread", "mpi_init_thread_", "mpi_init_thread__" }) { if(find_function(app_image, itr) != nullptr) { diff --git a/source/lib/core/CMakeLists.txt b/source/lib/core/CMakeLists.txt index 184229642c..ee14f6baec 100644 --- a/source/lib/core/CMakeLists.txt +++ b/source/lib/core/CMakeLists.txt @@ -31,6 +31,7 @@ set(core_headers ${CMAKE_CURRENT_LIST_DIR}/exception.hpp ${CMAKE_CURRENT_LIST_DIR}/gpu.hpp ${CMAKE_CURRENT_LIST_DIR}/locking.hpp + ${CMAKE_CURRENT_LIST_DIR}/mpi.hpp ${CMAKE_CURRENT_LIST_DIR}/mproc.hpp ${CMAKE_CURRENT_LIST_DIR}/perf.hpp ${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index 3bb205bdf1..95b735462a 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -1046,7 +1046,7 @@ configure_settings(bool _init) settings::use_output_suffix() = _config->get("ROCPROFSYS_USE_PID"); if(settings::use_output_suffix()) settings::default_process_suffix() = process::get_id(); -#if !defined(TIMEMORY_USE_MPI) && defined(TIMEMORY_USE_MPI_HEADERS) +#if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) if(tim::dmp::is_initialized()) settings::default_process_suffix() = tim::dmp::rank(); #endif @@ -1367,7 +1367,7 @@ configure_disabled_settings(const std::shared_ptr& _config) _config->find(itr)->second->set_hidden(true); #endif -#if !defined(TIMEMORY_USE_MPI) || TIMEMORY_USE_MPI == 0 +#if !defined(ROCPROFSYS_USE_MPI) || ROCPROFSYS_USE_MPI == 0 _config->disable("ROCPROFSYS_PERFETTO_COMBINE_TRACES"); _config->disable("ROCPROFSYS_COLLAPSE_PROCESSES"); _config->find("ROCPROFSYS_PERFETTO_COMBINE_TRACES")->second->set_hidden(true); @@ -1991,7 +1991,7 @@ get_perfetto_buffer_size() bool get_perfetto_combined_traces() { -#if defined(TIMEMORY_USE_MPI) && TIMEMORY_USE_MPI > 0 +#if defined(ROCPROFSYS_USE_MPI) && ROCPROFSYS_USE_MPI > 0 static auto _v = get_config()->find("ROCPROFSYS_PERFETTO_COMBINE_TRACES"); return static_cast&>(*_v->second).get(); #else diff --git a/source/lib/core/debug.hpp b/source/lib/core/debug.hpp index c82b33069e..b933cf33f8 100644 --- a/source/lib/core/debug.hpp +++ b/source/lib/core/debug.hpp @@ -159,9 +159,9 @@ as_hex(void*, size_t); #endif #if !defined(ROCPROFSYS_DEBUG_PROCESS_IDENTIFIER) -# if defined(TIMEMORY_USE_MPI) +# if defined(ROCPROFSYS_USE_MPI) # define ROCPROFSYS_DEBUG_PROCESS_IDENTIFIER static_cast(::tim::dmp::rank()) -# elif defined(TIMEMORY_USE_MPI_HEADERS) +# elif defined(ROCPROFSYS_USE_MPI_HEADERS) # define ROCPROFSYS_DEBUG_PROCESS_IDENTIFIER \ (::tim::dmp::is_initialized()) ? static_cast(::tim::dmp::rank()) \ : static_cast(::tim::process::get_id()) diff --git a/source/lib/core/mpi.hpp b/source/lib/core/mpi.hpp new file mode 100644 index 0000000000..9814586553 --- /dev/null +++ b/source/lib/core/mpi.hpp @@ -0,0 +1,725 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/* + * Defines mpi functions and dummy functions when compiled without MPI + * + */ + +#pragma once + +#include "debug.hpp" +#include + +#include +#include + +#include +#include +#include + +#if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) && \ + !defined(OMPI_SKIP_MPICXX) +# define ROCPROFSYS_UNDEFINE_OMPI_SKIP_MPICXX 1 +# define OMPI_SKIP_MPICXX 1 +#endif + +#if defined(ROCPROFSYS_USE_MPI) || defined(ROCPROFSYS_USE_MPI_HEADERS) +# include +#endif + +#if defined(MPICH) && MPICH > 0 +# define ROCPROFSYS_MPI_MPICH 1 +#elif defined(OMPI_MAJOR_VERSION) && defined(OMPI_MINOR_VERSION) && \ + defined(OMPI_PATCH_VERSION) +# define ROCPROFSYS_MPI_OPENMPI 1 +#endif + +namespace rocprofsys +{ +namespace mpi +{ +//--------------------------------------------------------------------------------------// + +#if !defined(ROCPROFSYS_USE_MPI) +struct dummy_data_type +{ + enum type + { + int_t, + float_t, + double_t + }; +}; +#endif + +//--------------------------------------------------------------------------------------// + +#if !defined(ROCPROFSYS_USE_MPI) && !defined(MPI_INT) +# define MPI_INT ::rocprofsys::mpi::dummy_data_type::int_t +#endif + +#if !defined(ROCPROFSYS_USE_MPI) && !defined(MPI_FLOAT) +# define MPI_FLOAT ::rocprofsys::mpi::dummy_data_type::float_t +#endif + +#if !defined(ROCPROFSYS_USE_MPI) && !defined(MPI_DOUBLE) +# define MPI_DOUBLE ::rocprofsys::mpi::dummy_data_type::double_t +#endif + +//--------------------------------------------------------------------------------------// + +#if defined(ROCPROFSYS_USE_MPI) || defined(ROCPROFSYS_USE_MPI_HEADERS) +# if defined(MPICH) && (MPICH > 0) +static constexpr bool is_mpich = true; +# else +static constexpr bool is_mpich = false; +# endif +# if defined(OPEN_MPI) && (OPEN_MPI > 0) +static constexpr bool is_openmpi = true; +# else +static constexpr bool is_openmpi = false; +# endif +#endif + +//--------------------------------------------------------------------------------------// + +#if defined(ROCPROFSYS_USE_MPI) || defined(ROCPROFSYS_USE_MPI_HEADERS) + +using comm_t = MPI_Comm; +using info_t = MPI_Info; +using data_type_t = MPI_Datatype; +using status_t = MPI_Status; + +# if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) && \ + defined(OPEN_MPI) && (OPEN_MPI > 0) +static const comm_t comm_world_v = nullptr; +static const comm_t comm_self_v = nullptr; +static const info_t info_null_v = nullptr; +# else +static const comm_t comm_world_v = MPI_COMM_WORLD; +static const comm_t comm_self_v = MPI_COMM_SELF; +static const info_t info_null_v = MPI_INFO_NULL; +# endif +static const int success_v = MPI_SUCCESS; +static const int comm_type_shared_v = MPI_COMM_TYPE_SHARED; + +namespace threading +{ +enum : int +{ + /// Only one thread will execute. + single = MPI_THREAD_SINGLE, + /// Only main thread will do MPI calls. The process may be multi-threaded, but only + /// the main thread will make MPI calls (all MPI calls are funneled to the main + /// thread) + funneled = MPI_THREAD_FUNNELED, + /// Only one thread at the time do MPI calls. The process may be multi-threaded, and + /// multiple threads may make MPI calls, but only one at a time: MPI calls are not + /// made concurrently from two distinct threads (all MPI calls are serialized). + serialized = MPI_THREAD_SERIALIZED, + /// Multiple thread may do MPI calls with no restrictions. + multiple = MPI_THREAD_MULTIPLE +}; +} // namespace threading + +#else // dummy MPI types + +using comm_t = int32_t; +using info_t = int32_t; +using data_type_t = int32_t; +using status_t = int32_t; +static const comm_t comm_world_v = 0; +static const comm_t comm_self_v = 0; +static const info_t info_null_v = 0; +static const int success_v = 0; +static const int comm_type_shared_v = 0; + +namespace threading +{ +enum : int +{ + /// Only one thread will execute. + single = 0, + /// Only main thread will do MPI calls. The process may be multi-threaded, but only + /// the main thread will make MPI calls (all MPI calls are funneled to the main + /// thread) + funneled = 1, + /// Only one thread at the time do MPI calls. The process may be multi-threaded, and + /// multiple threads may make MPI calls, but only one at a time: MPI calls are not + /// made concurrently from two distinct threads (all MPI calls are serialized). + serialized = 2, + /// Multiple thread may do MPI calls with no restrictions. + multiple = 3 +}; +} // namespace threading + +#endif + +//--------------------------------------------------------------------------------------// + +namespace threading +{ +inline auto +get_id() +{ + return ::tim::threading::get_id(); +} +} // namespace threading + +template +using communicator_map_t = std::unordered_map; + +inline int32_t rank(comm_t = comm_world_v); +inline int32_t size(comm_t = comm_world_v); +inline void set_rank(int32_t, comm_t = comm_world_v); +inline void set_size(int32_t, comm_t = comm_world_v); + +//--------------------------------------------------------------------------------------// +// Currently ROCPROFSYS_MPI_THREAD is just a placeholder for future +// implementation. + +inline bool& +use_mpi_thread() +{ + static bool _instance = tim::get_env("ROCPROFSYS_MPI_THREAD", true); + return _instance; +} + +//--------------------------------------------------------------------------------------// + +inline std::string& +use_mpi_thread_type() +{ + static std::string _instance = + tim::get_env("ROCPROFSYS_MPI_THREAD_TYPE", ""); + return _instance; +} + +//--------------------------------------------------------------------------------------// + +inline bool& +fail_on_error() +{ + static bool _instance = tim::get_env("ROCPROFSYS_MPI_FAIL_ON_ERROR", false); + return _instance; +} + +//--------------------------------------------------------------------------------------// + +inline bool& +quiet() +{ + static bool _instance = tim::get_env("ROCPROFSYS_MPI_QUIET", false); + return _instance; +} + +//--------------------------------------------------------------------------------------// + +#if !defined(ROCPROFSYS_MPI_ERROR_FUNCTION) +# define ROCPROFSYS_MPI_ERROR_FUNCTION(FUNC, ...) # FUNC +#endif + +#if !defined(ROCPROFSYS_MPI_ERROR_CHECK) +# define ROCPROFSYS_MPI_ERROR_CHECK(...) \ + ::rocprofsys::mpi::check_error(ROCPROFSYS_MPI_ERROR_FUNCTION(__VA_ARGS__, ""), \ + __VA_ARGS__) +#endif + +//--------------------------------------------------------------------------------------// + +inline bool +check_error(const char* _func, int err_code, comm_t _comm = mpi::comm_world_v) +{ +#if defined(ROCPROFSYS_USE_MPI) + bool _success = (err_code == MPI_SUCCESS); + if(!_success && !mpi::quiet()) + { + int len = 0; + char msg[1024]; + PMPI_Error_string(err_code, msg, &len); + msg[std::min(len, 1023)] = '\0'; + int _rank = rank(); + fprintf(stderr, "[rank=%i][pid=%i][tid=%i][%s]> Error code (%i): %s\n", _rank, + (int) process::get_id(), (int) threading::get_id(), _func, err_code, msg); + } + if(!_success && fail_on_error()) PMPI_Abort(_comm, err_code); + return (err_code == MPI_SUCCESS); +#else + tim::consume_parameters(_func, err_code, _comm); + return false; +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +barrier(comm_t comm = comm_world_v); + +inline bool +is_supported() +{ +#if defined(ROCPROFSYS_USE_MPI) + return true; +#else + return false; +#endif +} + +//--------------------------------------------------------------------------------------// + +inline bool& +is_finalized() +{ +#if defined(ROCPROFSYS_USE_MPI) + int32_t _fini = 0; + PMPI_Finalized(&_fini); + static bool _instance = static_cast(_fini); + if(!_instance) _instance = static_cast(_fini); +#else + static bool _instance = true; +#endif + return _instance; +} + +//--------------------------------------------------------------------------------------// + +template +inline std::function& +is_initialized_callback() +{ + static std::function _v = []() -> bool { + int32_t _init = 0; +#if defined(ROCPROFSYS_USE_MPI) + if(!is_finalized()) PMPI_Initialized(&_init); +#endif + return (_init != 0) ? true : false; + }; + return _v; +} + +//--------------------------------------------------------------------------------------// + +inline bool +is_initialized() +{ + return is_initialized_callback()(); +} + +//--------------------------------------------------------------------------------------// + +inline void +initialize(int& argc, char**& argv) +{ +#if defined(ROCPROFSYS_USE_MPI) + if(!is_initialized()) + { + using namespace threading; + bool _success_v = false; + if(use_mpi_thread()) + { + auto _init = [&argc, &argv](int itr, const std::string& _type) { + int _actual = -1; + auto ret = MPI_Init_thread(&argc, &argv, itr, &_actual); + if(_actual != itr) + { + fprintf(stderr, "Warning! MPI_Init_thread does not support: %s\n", + _type.c_str()); + } + return ROCPROFSYS_MPI_ERROR_CHECK(ret); + }; + + // ROCPROFSYS_MPI_ERROR_CHECK(MPI_Init(&argc, &argv)); + // int _provided = 0; + // MPI_Query_thread(&_provided); + + auto _mpi_type = use_mpi_thread_type(); + if(_mpi_type == "single") + { + _success_v = _init(single, _mpi_type); + } + else if(_mpi_type == "serialized") + { + _success_v = _init(serialized, _mpi_type); + } + else if(_mpi_type == "funneled") + { + _success_v = _init(funneled, _mpi_type); + } + else if(_mpi_type == "multiple") + { + _success_v = _init(multiple, _mpi_type); + } + else + { + _success_v = _init(multiple, "multiple"); + } + } + + if(!_success_v) ROCPROFSYS_MPI_ERROR_CHECK(MPI_Init(&argc, &argv)); + } +#else + tim::consume_parameters(argc, argv); +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +initialize(int* argc, char*** argv) +{ + initialize(*argc, *argv); +} + +//--------------------------------------------------------------------------------------// + +inline void +finalize() +{ +#if defined(ROCPROFSYS_USE_MPI) + if(is_initialized()) + { + // barrier(); + MPI_Finalize(); + is_finalized() = true; + // finalized + } +#endif +} + +//--------------------------------------------------------------------------------------// + +#if defined(ROCPROFSYS_USE_MPI) + +int32_t +rank(comm_t comm) +{ + int32_t _rank = 0; + if(is_initialized()) + { + // this is used to guard against the queries that might happen after an + // application calls MPI_Finalize() directly + static communicator_map_t* _instance = new communicator_map_t(); + if(_instance->find(comm) == _instance->end()) + { + PMPI_Comm_rank(comm, &_rank); + (*_instance)[comm] = _rank; + } + else + { + _rank = (*_instance)[comm]; + } + } + return std::max(_rank, (int32_t) 0); +} + +int32_t +size(comm_t comm) +{ + int32_t _size = 1; + if(is_initialized()) + { + // this is used to guard against the queries that might happen after an + // application calls MPI_Finalize() directly + static communicator_map_t* _instance = new communicator_map_t(); + if(_instance->find(comm) == _instance->end()) + { + PMPI_Comm_size(comm, &_size); + (*_instance)[comm] = _size; + } + else + { + _size = (*_instance)[comm]; + } + } + return std::max(_size, (int32_t) 1); +} + +void set_rank(int32_t, comm_t) {} +void set_size(int32_t, comm_t) {} + +#else + +struct comm_data +{ + using entry_t = std::array; + + static int32_t rank(comm_t _comm) { return std::max(m_data()[_comm][0], 0); } + static int32_t size(comm_t _comm) { return std::max(m_data()[_comm][1], 1); } + + friend void set_rank(int32_t, comm_t); + friend void set_size(int32_t, comm_t); + +private: + static std::map& m_data() + { + static std::map _v = { { 0, entry_t{ 0, 1 } } }; + return _v; + } +}; + +int32_t +rank(comm_t comm) +{ + return comm_data::rank(comm); +} + +int32_t +size(comm_t comm) +{ + return comm_data::size(comm); +} + +void +set_rank(int32_t _rank, comm_t comm) +{ + comm_data::m_data()[comm][0] = _rank; +} + +void +set_size(int32_t _size, comm_t comm) +{ + comm_data::m_data()[comm][1] = _size; +} + +#endif + +//--------------------------------------------------------------------------------------// + +inline void +barrier(comm_t comm) +{ +#if defined(ROCPROFSYS_USE_MPI) + if(is_initialized()) PMPI_Barrier(comm); +#else + tim::consume_parameters(comm); +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +comm_split(comm_t comm, int split_size, int rank, comm_t* local_comm) +{ +#if defined(ROCPROFSYS_USE_MPI) + if(is_initialized()) + ROCPROFSYS_MPI_ERROR_CHECK(PMPI_Comm_split(comm, split_size, rank, local_comm)); +#else + tim::consume_parameters(comm, split_size, rank, local_comm); +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +comm_split_type(comm_t comm, int split_size, int key, info_t info, comm_t* local_comm) +{ +#if defined(ROCPROFSYS_USE_MPI) + if(is_initialized()) + { + ROCPROFSYS_MPI_ERROR_CHECK( + PMPI_Comm_split_type(comm, split_size, key, info, local_comm)); + } +#else + tim::consume_parameters(comm, split_size, key, info, local_comm); +#endif +} + +//--------------------------------------------------------------------------------------// +/// returns the communicator for the node +inline comm_t +get_node_comm() +{ + if(!is_initialized()) return comm_world_v; + auto _get_node_comm = []() { + comm_t local_comm; + comm_split_type(mpi::comm_world_v, mpi::comm_type_shared_v, 0, mpi::info_null_v, + &local_comm); + return local_comm; + }; + static comm_t _instance = _get_node_comm(); + return _instance; +} + +//--------------------------------------------------------------------------------------// +/// returns the number of ranks on a node +inline int32_t +get_num_ranks_per_node() +{ + if(!is_initialized()) return 1; + return size(get_node_comm()); +} + +//--------------------------------------------------------------------------------------// + +inline int32_t +get_num_nodes() +{ + if(!is_initialized()) return 1; + auto _world_size = size(comm_world_v); + auto _ncomm_size = get_num_ranks_per_node(); + return (_world_size >= _ncomm_size) ? (_world_size / _ncomm_size) : 1; +} + +//--------------------------------------------------------------------------------------// + +inline int32_t +get_node_index() +{ + if(!is_initialized()) return 0; + return rank() / get_num_ranks_per_node(); +} + +//--------------------------------------------------------------------------------------// + +inline void +send(const std::string& str, int dest, int tag, comm_t comm = mpi::comm_world_v) +{ +#if defined(ROCPROFSYS_USE_MPI) + using ulli_t = unsigned long long; + ulli_t len = str.size(); + ROCPROFSYS_MPI_ERROR_CHECK( + PMPI_Send(&len, 1, MPI_UNSIGNED_LONG_LONG, dest, tag, comm)); + if(len != 0) + { + ulli_t _cmax = std::numeric_limits::max(); + if(len <= _cmax) + { + ROCPROFSYS_MPI_ERROR_CHECK( + PMPI_Send(const_cast(str.data()), len, MPI_CHAR, dest, tag, comm)); + } + else + { + auto _len = str.length() / sizeof(long); + auto _rem = str.length() % sizeof(long); + auto _str = str; + if(_rem > 0) + { + _str.resize(_str.length() + _rem, '\0'); + _len += 1; + } + ROCPROFSYS_MPI_ERROR_CHECK(PMPI_Send(const_cast(_str.data()), _len, + MPI_LONG, dest, tag, comm)); + } + } +#else + tim::consume_parameters(str, dest, tag, comm); +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +recv(std::string& str, int src, int tag, comm_t comm = mpi::comm_world_v) +{ +#if defined(ROCPROFSYS_USE_MPI) + using ulli_t = unsigned long long; + ulli_t len = 0; + MPI_Status s; + ROCPROFSYS_MPI_ERROR_CHECK( + PMPI_Recv(&len, 1, MPI_UNSIGNED_LONG_LONG, src, tag, comm, &s)); + if(len != 0) + { + ulli_t _cmax = std::numeric_limits::max(); + if(len <= _cmax) + { + std::vector tmp(len); + ROCPROFSYS_MPI_ERROR_CHECK( + PMPI_Recv(tmp.data(), len, MPI_CHAR, src, tag, comm, &s)); + str.assign(tmp.begin(), tmp.end()); + } + else + { + auto _len = len / sizeof(long); + auto _rem = len % sizeof(long); + if(_rem > 0) _len += 1; + std::vector tmp(_len); + ROCPROFSYS_MPI_ERROR_CHECK( + PMPI_Recv(tmp.data(), _len, MPI_LONG, src, tag, comm, &s)); + std::vector chars = {}; + auto _ratio = sizeof(long) / sizeof(char); + chars.reserve(_len * _ratio); + for(auto& itr : tmp) + { + for(size_t i = 0; i < _ratio; ++i) + { + chars.emplace_back(itr >> (i * sizeof(void*))); + if(chars.size() == len) break; + } + } + str.assign(chars.begin(), chars.end()); + } + } + else + { + str.clear(); + } +#else + tim::consume_parameters(str, src, tag, comm); +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +gather(const void* sendbuf, int sendcount, data_type_t sendtype, void* recvbuf, + int recvcount, data_type_t recvtype, int root, comm_t comm = mpi::comm_world_v) +{ +#if defined(ROCPROFSYS_USE_MPI) + if(is_initialized()) + { + ROCPROFSYS_MPI_ERROR_CHECK(PMPI_Gather(sendbuf, sendcount, sendtype, recvbuf, + recvcount, recvtype, root, comm)); + } +#else + tim::consume_parameters(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, + root, comm); +#endif +} + +//--------------------------------------------------------------------------------------// + +inline void +comm_spawn_multiple(int count, char** commands, char*** argv, const int* maxprocs, + const info_t* info, int root, comm_t comm, comm_t* intercomm, + int* errcodes) +{ +#if defined(ROCPROFSYS_USE_MPI) + if(is_initialized()) + { + ROCPROFSYS_MPI_ERROR_CHECK(PMPI_Comm_spawn_multiple( + count, commands, argv, maxprocs, info, root, comm, intercomm, errcodes)); + } +#else + tim::consume_parameters(count, commands, argv, maxprocs, info, root, comm, intercomm, + errcodes); +#endif +} + +//--------------------------------------------------------------------------------------// + +} // namespace mpi +} // namespace rocprofsys + +#if defined(ROCPROFSYS_UNDEFINE_OMPI_SKIP_MPICXX) && ROCPROFSYS_UNDEFINE_OMPI_SKIP_MPICXX +# undef OMPI_SKIP_MPICXX +#endif diff --git a/source/lib/core/perfetto.cpp b/source/lib/core/perfetto.cpp index 41b0c71a45..50f366721e 100644 --- a/source/lib/core/perfetto.cpp +++ b/source/lib/core/perfetto.cpp @@ -209,7 +209,7 @@ post_process(tim::manager* _timemory_manager, bool& _perfetto_output_error) }; auto trace_data = char_vec_t{}; -#if defined(TIMEMORY_USE_MPI) && TIMEMORY_USE_MPI > 0 +#if defined(ROCPROFSYS_USE_MPI) && ROCPROFSYS_USE_MPI > 0 if(get_perfetto_combined_traces()) { using perfetto_mpi_get_t = tim::operation::finalize::mpi_get; diff --git a/source/lib/core/timemory.hpp b/source/lib/core/timemory.hpp index 92363bdb22..0d35fe0d36 100644 --- a/source/lib/core/timemory.hpp +++ b/source/lib/core/timemory.hpp @@ -27,11 +27,9 @@ #include "defines.hpp" #include -#include #include #include #include -#include #include #include #include @@ -40,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/source/lib/rocprof-sys/library/components/CMakeLists.txt b/source/lib/rocprof-sys/library/components/CMakeLists.txt index 3ccd1f9818..977d302569 100644 --- a/source/lib/rocprof-sys/library/components/CMakeLists.txt +++ b/source/lib/rocprof-sys/library/components/CMakeLists.txt @@ -26,6 +26,7 @@ set(component_headers ${CMAKE_CURRENT_LIST_DIR}/ensure_storage.hpp ${CMAKE_CURRENT_LIST_DIR}/exit_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/fork_gotcha.hpp + ${CMAKE_CURRENT_LIST_DIR}/mpip.hpp ${CMAKE_CURRENT_LIST_DIR}/mpi_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/numa_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/vaapi_gotcha.hpp diff --git a/source/lib/rocprof-sys/library/components/mpi_gotcha.cpp b/source/lib/rocprof-sys/library/components/mpi_gotcha.cpp index 94d694cede..bd65c608d2 100644 --- a/source/lib/rocprof-sys/library/components/mpi_gotcha.cpp +++ b/source/lib/rocprof-sys/library/components/mpi_gotcha.cpp @@ -25,11 +25,12 @@ #include "core/components/fwd.hpp" #include "core/config.hpp" #include "core/debug.hpp" +#include "core/mpi.hpp" #include "core/mproc.hpp" #include "library/components/category_region.hpp" #include "library/components/comm_data.hpp" +#include "mpip.hpp" -#include #include #include #include @@ -46,8 +47,7 @@ namespace component { namespace { -using mpip_bundle_t = - tim::component_tuple, comp::comm_data>; +using mpip_bundle_t = tim::component_tuple, comm_data>; struct comm_rank_data { @@ -102,7 +102,7 @@ auto mpi_comm_records = std::map{}; using tim::auto_lock_t; using tim::type_mutex; -#if defined(TIMEMORY_USE_MPI) +#if defined(ROCPROFSYS_USE_MPI) int rocprofsys_mpi_copy(MPI_Comm, int, void*, void*, void*, int*) { @@ -117,7 +117,7 @@ rocprofsys_mpi_fini(MPI_Comm, int, void*, void*) if(!_blocked.empty()) tim::signals::block_signals(_blocked, tim::signals::sigmask_scope::process); if(mpip_index != std::numeric_limits::max()) - comp::deactivate_mpip(mpip_index); + deactivate_mpip(mpip_index); if(is_root_process()) rocprofsys_finalize_hidden(); return MPI_SUCCESS; } @@ -127,7 +127,7 @@ rocprofsys_mpi_fini(MPI_Comm, int, void*, void*) void rocprofsys_mpi_set_attr() { -#if defined(TIMEMORY_USE_MPI) +#if defined(ROCPROFSYS_USE_MPI) auto _blocked = get_sampling_signals(); if(!_blocked.empty()) tim::signals::block_signals(_blocked, tim::signals::sigmask_scope::process); @@ -162,17 +162,28 @@ mpi_gotcha::configure() mpi_gotcha_t::get_initializer() = []() { mpi_gotcha_t::template configure<0, int, int*, char***>("MPI_Init"); - mpi_gotcha_t::template configure<1, int, int*, char***, int, int*>( + mpi_gotcha_t::template configure<1, int, int*, char***>("PMPI_Init"); + mpi_gotcha_t::template configure<2, int, int*, char***, int, int*>( "MPI_Init_thread"); - mpi_gotcha_t::template configure<2, int>("MPI_Finalize"); + mpi_gotcha_t::template configure<3, int, int*, char***, int, int*>( + "PMPI_Init_thread"); + mpi_gotcha_t::template configure<4, int>("MPI_Finalize"); + mpi_gotcha_t::template configure<5, int>("PMPI_Finalize"); reject_bindings.emplace("MPI_Init"); + reject_bindings.emplace("PMPI_Init"); reject_bindings.emplace("MPI_Init_thread"); + reject_bindings.emplace("PMPI_Init_thread"); reject_bindings.emplace("MPI_Finalize"); + reject_bindings.emplace("PMPI_Finalize"); #if defined(ROCPROFSYS_USE_MPI_HEADERS) && ROCPROFSYS_USE_MPI_HEADERS > 0 - mpi_gotcha_t::template configure<3, int, comm_t, int*>("MPI_Comm_rank"); - mpi_gotcha_t::template configure<4, int, comm_t, int*>("MPI_Comm_size"); + mpi_gotcha_t::template configure<6, int, comm_t, int*>("MPI_Comm_rank"); + mpi_gotcha_t::template configure<7, int, comm_t, int*>("PMPI_Comm_rank"); + mpi_gotcha_t::template configure<8, int, comm_t, int*>("MPI_Comm_size"); + mpi_gotcha_t::template configure<9, int, comm_t, int*>("PMPI_Comm_size"); reject_bindings.emplace("MPI_Comm_rank"); + reject_bindings.emplace("PMPI_Comm_rank"); reject_bindings.emplace("MPI_Comm_size"); + reject_bindings.emplace("PMPI_Comm_size"); #endif }; } @@ -207,13 +218,13 @@ mpi_gotcha::update() auto _rank = _rank_data.rank; auto _size = _rank_data.size; - tim::mpi::set_rank(_rank); - tim::mpi::set_size(_size); - tim::settings::default_process_suffix() = _rank; + rocprofsys::mpi::set_rank(_rank); + rocprofsys::mpi::set_size(_size); + rocprofsys::settings::default_process_suffix() = _rank; ROCPROFSYS_BASIC_VERBOSE(0, "[pid=%i] MPI rank: %i (%i), MPI size: %i (%i)\n", - process::get_id(), tim::mpi::rank(), _rank, - tim::mpi::size(), _size); + process::get_id(), rocprofsys::mpi::rank(), _rank, + rocprofsys::mpi::size(), _size); last_comm_record = _rank_data; config::get_use_pid() = true; return true; @@ -236,9 +247,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***) ROCPROFSYS_BASIC_DEBUG_F("%s(int*, char***)\n", _data.tool_id.c_str()); rocprofsys_push_trace_hidden(_data.tool_id.c_str()); -#if !defined(TIMEMORY_USE_MPI) && defined(TIMEMORY_USE_MPI_HEADERS) - tim::mpi::is_initialized_callback() = []() { return true; }; - tim::mpi::is_finalized() = false; +#if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) + rocprofsys::mpi::is_initialized_callback() = []() { return true; }; + rocprofsys::mpi::is_finalized() = false; #endif } @@ -248,9 +259,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***, in ROCPROFSYS_BASIC_DEBUG_F("%s(int*, char***, int, int*)\n", _data.tool_id.c_str()); rocprofsys_push_trace_hidden(_data.tool_id.c_str()); -#if !defined(TIMEMORY_USE_MPI) && defined(TIMEMORY_USE_MPI_HEADERS) - tim::mpi::is_initialized_callback() = []() { return true; }; - tim::mpi::is_finalized() = false; +#if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) + rocprofsys::mpi::is_initialized_callback() = []() { return true; }; + rocprofsys::mpi::is_finalized() = false; #endif } @@ -264,11 +275,11 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming) tim::signals::block_signals(_blocked, tim::signals::sigmask_scope::process); if(mpip_index != std::numeric_limits::max()) - comp::deactivate_mpip(mpip_index); + deactivate_mpip(mpip_index); -#if !defined(TIMEMORY_USE_MPI) && defined(TIMEMORY_USE_MPI_HEADERS) - tim::mpi::is_initialized_callback() = []() { return false; }; - tim::mpi::is_finalized() = true; +#if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) + rocprofsys::mpi::is_initialized_callback() = []() { return false; }; + rocprofsys::mpi::is_finalized() = true; #else if(is_root_process() && rocprofsys::get_state() < rocprofsys::State::Finalized) rocprofsys_finalize_hidden(); @@ -278,15 +289,17 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming) void mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, comm_t _comm, int* _val) { - ROCPROFSYS_BASIC_DEBUG_F("%s()\n", _data.tool_id.c_str()); + ROCPROFSYS_BASIC_DEBUG_F("%s(comm_t _comm, int* _val)\n", _data.tool_id.c_str()); rocprofsys_push_trace_hidden(_data.tool_id.c_str()); - if(_data.tool_id == "MPI_Comm_rank") + if(_data.tool_id.find("MPI_Comm_rank") == 0 || + _data.tool_id.find("PMPI_Comm_rank") == 0) { m_comm_val = (uintptr_t) _comm; // NOLINT m_rank_ptr = _val; } - else if(_data.tool_id == "MPI_Comm_size") + else if(_data.tool_id.find("MPI_Comm_size") == 0 || + _data.tool_id.find("PMPI_Comm_size") == 0) { m_comm_val = (uintptr_t) _comm; // NOLINT m_size_ptr = _val; @@ -305,7 +318,8 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) if(!settings::use_output_suffix()) settings::use_output_suffix() = true; - if(_retval == tim::mpi::success_v && _data.tool_id.find("MPI_Init") == 0) + if(_retval == rocprofsys::mpi::success_v && + (_data.tool_id.find("MPI_Init") == 0 || _data.tool_id.find("PMPI_Init") == 0)) { rocprofsys_mpi_set_attr(); // rocprof-sys will set this environement variable to true in binary rewrite mode @@ -319,9 +333,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) // use env vars ROCPROFSYS_MPIP_PERMIT_LIST and ROCPROFSYS_MPIP_REJECT_LIST // to control the gotcha bindings at runtime - comp::configure_mpip(permit_bindings, - reject_bindings); - mpip_index = comp::activate_mpip(); + configure_mpip(permit_bindings, + reject_bindings); + mpip_index = activate_mpip(); } auto_lock_t _lk{ type_mutex() }; @@ -339,7 +353,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) } } } - else if(_retval == tim::mpi::success_v && _data.tool_id.find("MPI_Comm_") == 0) + else if(_retval == rocprofsys::mpi::success_v && + (_data.tool_id.find("MPI_Comm_") == 0 || + _data.tool_id.find("PMPI_Comm_") == 0)) { auto_lock_t _lk{ type_mutex() }; if(m_comm_val != null_comm()) @@ -356,7 +372,8 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval) : std::max(m_size, _get_rank() + 1); }; - if(_data.tool_id == "MPI_Comm_rank" || _data.tool_id == "MPI_Comm_size") + if(_data.tool_id == "MPI_Comm_rank" || _data.tool_id == "MPI_Comm_size" || + _data.tool_id == "PMPI_Comm_rank" || _data.tool_id == "PMPI_Comm_size") { _comm_entry.rank = m_rank = std::max(_comm_entry.rank, _get_rank()); _comm_entry.size = m_size = std::max(_comm_entry.size, _get_size()); diff --git a/source/lib/rocprof-sys/library/components/mpi_gotcha.hpp b/source/lib/rocprof-sys/library/components/mpi_gotcha.hpp index cd0bac0ee0..b4147292f7 100644 --- a/source/lib/rocprof-sys/library/components/mpi_gotcha.hpp +++ b/source/lib/rocprof-sys/library/components/mpi_gotcha.hpp @@ -24,6 +24,7 @@ #include "core/common.hpp" #include "core/defines.hpp" +#include "core/mpi.hpp" #include "core/timemory.hpp" #include @@ -35,7 +36,7 @@ namespace component // this is used to wrap MPI_Init and MPI_Init_thread struct mpi_gotcha : comp::base { - using comm_t = tim::mpi::comm_t; + using comm_t = rocprofsys::mpi::comm_t; using gotcha_data_t = comp::gotcha_data; ROCPROFSYS_DEFAULT_OBJECT(mpi_gotcha) @@ -81,5 +82,5 @@ private: } // namespace component using mpi_gotcha_t = - comp::gotcha<5, tim::component_tuple, project::rocprofsys>; + comp::gotcha<10, tim::component_tuple, project::rocprofsys>; } // namespace rocprofsys diff --git a/source/lib/rocprof-sys/library/components/mpip.hpp b/source/lib/rocprof-sys/library/components/mpip.hpp new file mode 100644 index 0000000000..1c5e359914 --- /dev/null +++ b/source/lib/rocprof-sys/library/components/mpip.hpp @@ -0,0 +1,779 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "core/timemory.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if !defined(ROCPROFSYS_USE_MPI) && defined(ROCPROFSYS_USE_MPI_HEADERS) && \ + !defined(OMPI_SKIP_MPICXX) +# define ROCPROFSYS_UNDEFINE_OMPI_SKIP_MPICXX 1 +# define OMPI_SKIP_MPICXX 1 +#endif + +#if defined(ROCPROFSYS_USE_MPI) || defined(ROCPROFSYS_USE_MPI_HEADERS) +# include +#endif + +#if !defined(NUM_ROCPROFSYS_MPIP_WRAPPERS) +# define NUM_ROCPROFSYS_MPIP_WRAPPERS 500 +#endif + +namespace rocprofsys +{ +namespace component +{ +// +//--------------------------------------------------------------------------------------// +// +template +TIMEMORY_VISIBILITY("default") +TIMEMORY_NOINLINE void configure_mpip(const std::set& permit = {}, + const std::set& reject = {}); +// +//--------------------------------------------------------------------------------------// +// +template +TIMEMORY_VISIBILITY("default") +TIMEMORY_NOINLINE uint64_t activate_mpip(); +// +//--------------------------------------------------------------------------------------// +// +template +TIMEMORY_VISIBILITY("default") +TIMEMORY_NOINLINE uint64_t deactivate_mpip(uint64_t); +// +//--------------------------------------------------------------------------------------// +// +template +struct mpip_handle : base, void> +{ + static constexpr size_t mpip_wrapper_count = NUM_ROCPROFSYS_MPIP_WRAPPERS; + + using value_type = void; + using this_type = mpip_handle; + using base_type = base; + + using mpi_toolset_t = Toolset; + using mpip_gotcha_t = tim::component::gotcha; + using mpip_tuple_t = tim::component_tuple; + using toolset_ptr_t = std::shared_ptr; + + static std::string label() { return "mpip_handle"; } + static std::string description() { return "Handle for activating MPI wrappers"; } + + void get() {} + + void start() + { + if(get_tool_count()++ == 0) + { + get_tool_instance() = std::make_shared("rocprofsys_mpip"); + get_tool_instance()->start(); + } + } + + void stop() + { + auto idx = --get_tool_count(); + if(get_tool_instance().get()) + { + get_tool_instance()->stop(); + if(idx == 0) get_tool_instance().reset(); + } + } + + int get_count() { return get_tool_count().load(); } + +private: + struct persistent_data + { + std::atomic m_configured; + std::atomic m_count; + toolset_ptr_t m_tool; + }; + + static persistent_data& get_persistent_data() + { + static persistent_data _instance; + return _instance; + } + + static std::atomic& get_configured() + { + return get_persistent_data().m_configured; + } + + static toolset_ptr_t& get_tool_instance() { return get_persistent_data().m_tool; } + + static std::atomic& get_tool_count() + { + return get_persistent_data().m_count; + } +}; +// +//======================================================================================// +// +} // namespace component +} // namespace rocprofsys +// +//======================================================================================// +// +#include +// +//======================================================================================// +// +/// \fn uint64_t rocprofsys::component::activate_mpip() +/// \brief The thread that first activates mpip will be the thread that turns it off. +/// Function returns the number of new mpip handles +/// +template +uint64_t +rocprofsys::component::activate_mpip() +{ + using handle_t = rocprofsys::component::mpip_handle; + + static std::shared_ptr _handle; + + if(!_handle.get()) + { + _handle = std::make_shared(); + _handle->start(); + + auto cleanup_functor = [=]() { + if(_handle) + { + _handle->stop(); + _handle.reset(); + } + }; + + static std::string _label = []() { + std::stringstream ss; + ss << "rocprofsys-mpip-" << demangle() << "-" << demangle(); + return ss.str(); + }(); + ROCPROFSYS_BASIC_DEBUG_F("Adding cleanup for %s", _label.c_str()); + tim::manager::instance()->add_cleanup(_label, cleanup_functor); + return 1; + } + return 0; +} +// +//======================================================================================// +// +/// \fn uint64_t rocprofsys::component::deactivate_mpip(uint64_t id) +/// \brief The thread that created the initial mpip handle will turn off. Returns +/// the number of handles active +/// +template +uint64_t +rocprofsys::component::deactivate_mpip(uint64_t id) +{ + if(id > 0) + { + static std::string _label = []() { + std::stringstream ss; + ss << "rocprofsys-mpip-" << demangle() << "-" << demangle(); + return ss.str(); + }(); + ROCPROFSYS_BASIC_DEBUG_F("Removing cleanup for %s", _label.c_str()); + tim::manager::instance()->cleanup(_label); + return 0; + } + return 1; +} +// +//======================================================================================// +// +#if !defined(TIMEMORY_USE_GOTCHA) || \ + (!defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)) +// +template +void +rocprofsys::component::configure_mpip(const std::set&, + const std::set&) +{} +// +#else +// +template +void +rocprofsys::component::configure_mpip(const std::set& permit, + const std::set& reject) +{ + static constexpr size_t mpip_wrapper_count = NUM_ROCPROFSYS_MPIP_WRAPPERS; + static bool is_initialized = false; + + using mpip_gotcha_t = tim::component::gotcha; + + if(!is_initialized) + { + // generate the gotcha wrappers + mpip_gotcha_t::get_initializer() = []() { + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 0, MPI_Accumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 1, MPI_Add_error_class); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 2, MPI_Add_error_code); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 3, MPI_Add_error_string); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 4, MPI_Aint_add); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 5, MPI_Aint_diff); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 6, MPI_Allgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 7, MPI_Allgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 8, MPI_Alloc_mem); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 9, MPI_Allreduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 10, MPI_Alltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 11, MPI_Alltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 12, MPI_Alltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 13, MPI_Barrier); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 14, MPI_Bcast); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 15, MPI_Bsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 16, MPI_Bsend_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 17, MPI_Buffer_attach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 18, MPI_Buffer_detach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 19, MPI_Cancel); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 20, MPI_Cart_coords); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 21, MPI_Cart_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 22, MPI_Cart_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 23, MPI_Cart_map); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 24, MPI_Cart_rank); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 25, MPI_Cart_shift); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 26, MPI_Cart_sub); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 27, MPI_Cartdim_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 28, MPI_Close_port); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 29, MPI_Comm_accept); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 30, MPI_Comm_call_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 31, MPI_Comm_compare); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 32, MPI_Comm_connect); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 33, MPI_Comm_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 34, MPI_Comm_create_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 35, MPI_Comm_create_group); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 36, MPI_Comm_create_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 37, MPI_Comm_delete_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 38, MPI_Comm_disconnect); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 39, MPI_Comm_dup); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 40, MPI_Comm_dup_with_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 41, MPI_Comm_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 42, MPI_Comm_free_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 43, MPI_Comm_get_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 44, MPI_Comm_get_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 45, MPI_Comm_get_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 46, MPI_Comm_get_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 47, MPI_Comm_get_parent); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 48, MPI_Comm_group); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 49, MPI_Comm_idup); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 50, MPI_Comm_join); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 51, MPI_Comm_rank); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 52, MPI_Comm_remote_group); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 53, MPI_Comm_remote_size); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 54, MPI_Comm_set_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 55, MPI_Comm_set_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 56, MPI_Comm_set_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 57, MPI_Comm_set_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 58, MPI_Comm_size); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 59, MPI_Comm_spawn); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 60, MPI_Comm_spawn_multiple); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 61, MPI_Comm_split); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 62, MPI_Comm_split_type); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 63, MPI_Comm_test_inter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 64, MPI_Compare_and_swap); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 65, MPI_Dims_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 66, MPI_Dist_graph_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 67, MPI_Dist_graph_create_adjacent); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 68, MPI_Dist_graph_neighbors); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 69, MPI_Dist_graph_neighbors_count); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 70, MPI_Error_class); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 71, MPI_Error_string); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 72, MPI_Exscan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 73, MPI_Fetch_and_op); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 74, MPI_File_call_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 75, MPI_File_create_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 76, MPI_File_get_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 77, MPI_File_set_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 78, MPI_Free_mem); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 79, MPI_Gather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 80, MPI_Gatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 81, MPI_Get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 82, MPI_Get_accumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 83, MPI_Get_address); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 84, MPI_Get_count); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 85, MPI_Get_elements); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 86, MPI_Get_elements_x); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 87, MPI_Get_library_version); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 88, MPI_Get_processor_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 89, MPI_Get_version); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 90, MPI_Graph_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 91, MPI_Graph_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 92, MPI_Graph_map); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 93, MPI_Graph_neighbors); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 94, MPI_Graph_neighbors_count); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 95, MPI_Graphdims_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 96, MPI_Grequest_complete); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 97, MPI_Grequest_start); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 98, MPI_Group_compare); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 99, MPI_Group_difference); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 100, MPI_Group_excl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 101, MPI_Group_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 102, MPI_Group_incl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 103, MPI_Group_intersection); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 104, MPI_Group_range_excl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 105, MPI_Group_range_incl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 106, MPI_Group_rank); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 107, MPI_Group_size); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 108, MPI_Group_translate_ranks); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 109, MPI_Group_union); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 110, MPI_Iallgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 111, MPI_Iallgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 112, MPI_Iallreduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 113, MPI_Ialltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 114, MPI_Ialltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 115, MPI_Ialltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 116, MPI_Ibarrier); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 117, MPI_Ibcast); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 118, MPI_Ibsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 119, MPI_Iexscan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 120, MPI_Igather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 121, MPI_Igatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 122, MPI_Improbe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 123, MPI_Imrecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 124, MPI_Ineighbor_allgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 125, MPI_Ineighbor_allgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 126, MPI_Ineighbor_alltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 127, MPI_Ineighbor_alltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 128, MPI_Ineighbor_alltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 129, MPI_Info_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 130, MPI_Info_delete); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 131, MPI_Info_dup); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 132, MPI_Info_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 133, MPI_Info_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 134, MPI_Info_get_nkeys); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 135, MPI_Info_get_nthkey); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 136, MPI_Info_get_valuelen); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 137, MPI_Info_set); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 138, MPI_Intercomm_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 139, MPI_Intercomm_merge); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 140, MPI_Iprobe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 141, MPI_Irecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 142, MPI_Ireduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 143, MPI_Ireduce_scatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 144, MPI_Ireduce_scatter_block); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 145, MPI_Irsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 146, MPI_Is_thread_main); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 147, MPI_Iscan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 148, MPI_Iscatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 149, MPI_Iscatterv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 150, MPI_Isend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 151, MPI_Issend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 152, MPI_Lookup_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 153, MPI_Mprobe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 154, MPI_Mrecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 155, MPI_Neighbor_allgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 156, MPI_Neighbor_allgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 157, MPI_Neighbor_alltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 158, MPI_Neighbor_alltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 159, MPI_Neighbor_alltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 160, MPI_Op_commutative); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 161, MPI_Op_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 162, MPI_Op_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 163, MPI_Open_port); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 164, MPI_Pack); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 165, MPI_Pack_external); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 166, MPI_Pack_external_size); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 167, MPI_Pack_size); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 168, MPI_Probe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 169, MPI_Publish_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 170, MPI_Put); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 171, MPI_Query_thread); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 172, MPI_Raccumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 173, MPI_Recv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 174, MPI_Recv_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 175, MPI_Reduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 176, MPI_Reduce_local); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 177, MPI_Reduce_scatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 178, MPI_Reduce_scatter_block); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 179, MPI_Request_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 180, MPI_Request_get_status); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 181, MPI_Rget); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 182, MPI_Rget_accumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 183, MPI_Rput); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 184, MPI_Rsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 185, MPI_Rsend_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 186, MPI_Scan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 187, MPI_Scatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 188, MPI_Scatterv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 189, MPI_Send); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 190, MPI_Send_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 191, MPI_Sendrecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 192, MPI_Sendrecv_replace); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 193, MPI_Ssend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 194, MPI_Ssend_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 195, MPI_Start); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 196, MPI_Startall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 197, MPI_Status_f2c); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 198, MPI_Status_set_cancelled); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 199, MPI_Status_set_elements); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 200, MPI_Status_set_elements_x); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 201, MPI_Topo_test); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 202, MPI_Unpack); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 203, MPI_Unpack_external); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 204, MPI_Unpublish_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 205, MPI_Wait); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 206, MPI_Waitall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 207, MPI_Waitany); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 208, MPI_Waitsome); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 209, MPI_Win_allocate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 210, MPI_Win_allocate_shared); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 211, MPI_Win_attach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 212, MPI_Win_call_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 213, MPI_Win_complete); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 214, MPI_Win_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 215, MPI_Win_create_dynamic); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 216, MPI_Win_create_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 217, MPI_Win_create_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 218, MPI_Win_delete_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 219, MPI_Win_detach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 220, MPI_Win_fence); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 221, MPI_Win_flush); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 222, MPI_Win_flush_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 223, MPI_Win_flush_local); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 224, MPI_Win_flush_local_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 225, MPI_Win_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 226, MPI_Win_free_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 227, MPI_Win_get_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 228, MPI_Win_get_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 229, MPI_Win_get_group); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 230, MPI_Win_get_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 231, MPI_Win_get_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 232, MPI_Win_lock); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 233, MPI_Win_lock_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 234, MPI_Win_post); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 235, MPI_Win_set_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 236, MPI_Win_set_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 237, MPI_Win_set_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 238, MPI_Win_set_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 239, MPI_Win_shared_query); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 240, MPI_Win_start); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 241, MPI_Win_sync); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 242, MPI_Win_test); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 243, MPI_Win_unlock); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 244, MPI_Win_unlock_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 245, MPI_Win_wait); + + // MPI profiling interface wrappers + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 250, PMPI_Accumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 251, PMPI_Add_error_class); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 252, PMPI_Add_error_code); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 253, PMPI_Add_error_string); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 254, PMPI_Aint_add); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 255, PMPI_Aint_diff); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 256, PMPI_Allgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 257, PMPI_Allgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 258, PMPI_Alloc_mem); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 259, PMPI_Allreduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 260, PMPI_Alltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 261, PMPI_Alltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 262, PMPI_Alltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 263, PMPI_Barrier); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 264, PMPI_Bcast); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 265, PMPI_Bsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 266, PMPI_Bsend_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 267, PMPI_Buffer_attach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 268, PMPI_Buffer_detach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 269, PMPI_Cancel); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 270, PMPI_Cart_coords); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 271, PMPI_Cart_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 272, PMPI_Cart_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 273, PMPI_Cart_map); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 274, PMPI_Cart_rank); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 275, PMPI_Cart_shift); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 276, PMPI_Cart_sub); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 277, PMPI_Cartdim_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 278, PMPI_Close_port); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 279, PMPI_Comm_accept); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 280, PMPI_Comm_call_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 281, PMPI_Comm_compare); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 282, PMPI_Comm_connect); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 283, PMPI_Comm_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 284, PMPI_Comm_create_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 285, PMPI_Comm_create_group); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 286, PMPI_Comm_create_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 287, PMPI_Comm_delete_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 288, PMPI_Comm_disconnect); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 289, PMPI_Comm_dup); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 290, PMPI_Comm_dup_with_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 291, PMPI_Comm_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 292, PMPI_Comm_free_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 293, PMPI_Comm_get_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 294, PMPI_Comm_get_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 295, PMPI_Comm_get_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 296, PMPI_Comm_get_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 297, PMPI_Comm_get_parent); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 298, PMPI_Comm_group); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 299, PMPI_Comm_idup); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 300, PMPI_Comm_join); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 301, PMPI_Comm_rank); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 302, PMPI_Comm_remote_group); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 303, PMPI_Comm_remote_size); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 304, PMPI_Comm_set_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 305, PMPI_Comm_set_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 306, PMPI_Comm_set_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 307, PMPI_Comm_set_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 308, PMPI_Comm_size); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 309, PMPI_Comm_spawn); + // TIMEMORY_C_GOTCHA(mpip_gotcha_t, 310, PMPI_Comm_spawn_multiple); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 311, PMPI_Comm_split); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 312, PMPI_Comm_split_type); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 313, PMPI_Comm_test_inter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 314, PMPI_Compare_and_swap); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 315, PMPI_Dims_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 316, PMPI_Dist_graph_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 317, PMPI_Dist_graph_create_adjacent); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 318, PMPI_Dist_graph_neighbors); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 319, PMPI_Dist_graph_neighbors_count); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 320, PMPI_Error_class); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 321, PMPI_Error_string); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 322, PMPI_Exscan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 323, PMPI_Fetch_and_op); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 324, PMPI_File_call_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 325, PMPI_File_create_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 326, PMPI_File_get_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 327, PMPI_File_set_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 328, PMPI_Free_mem); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 329, PMPI_Gather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 330, PMPI_Gatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 331, PMPI_Get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 332, PMPI_Get_accumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 333, PMPI_Get_address); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 334, PMPI_Get_count); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 335, PMPI_Get_elements); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 336, PMPI_Get_elements_x); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 337, PMPI_Get_library_version); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 338, PMPI_Get_processor_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 339, PMPI_Get_version); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 340, PMPI_Graph_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 341, PMPI_Graph_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 342, PMPI_Graph_map); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 343, PMPI_Graph_neighbors); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 344, PMPI_Graph_neighbors_count); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 345, PMPI_Graphdims_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 346, PMPI_Grequest_complete); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 347, PMPI_Grequest_start); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 348, PMPI_Group_compare); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 349, PMPI_Group_difference); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 350, PMPI_Group_excl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 351, PMPI_Group_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 352, PMPI_Group_incl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 353, PMPI_Group_intersection); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 354, PMPI_Group_range_excl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 355, PMPI_Group_range_incl); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 356, PMPI_Group_rank); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 357, PMPI_Group_size); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 358, PMPI_Group_translate_ranks); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 359, PMPI_Group_union); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 360, PMPI_Iallgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 361, PMPI_Iallgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 362, PMPI_Iallreduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 363, PMPI_Ialltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 364, PMPI_Ialltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 365, PMPI_Ialltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 366, PMPI_Ibarrier); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 367, PMPI_Ibcast); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 368, PMPI_Ibsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 369, PMPI_Iexscan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 370, PMPI_Igather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 371, PMPI_Igatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 372, PMPI_Improbe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 373, PMPI_Imrecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 374, PMPI_Ineighbor_allgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 375, PMPI_Ineighbor_allgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 376, PMPI_Ineighbor_alltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 377, PMPI_Ineighbor_alltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 378, PMPI_Ineighbor_alltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 379, PMPI_Info_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 380, PMPI_Info_delete); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 381, PMPI_Info_dup); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 382, PMPI_Info_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 383, PMPI_Info_get); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 384, PMPI_Info_get_nkeys); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 385, PMPI_Info_get_nthkey); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 386, PMPI_Info_get_valuelen); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 387, PMPI_Info_set); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 388, PMPI_Intercomm_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 389, PMPI_Intercomm_merge); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 390, PMPI_Iprobe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 391, PMPI_Irecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 392, PMPI_Ireduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 393, PMPI_Ireduce_scatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 394, PMPI_Ireduce_scatter_block); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 395, PMPI_Irsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 396, PMPI_Is_thread_main); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 397, PMPI_Iscan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 398, PMPI_Iscatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 399, PMPI_Iscatterv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 400, PMPI_Isend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 401, PMPI_Issend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 402, PMPI_Lookup_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 403, PMPI_Mprobe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 404, PMPI_Mrecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 405, PMPI_Neighbor_allgather); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 406, PMPI_Neighbor_allgatherv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 407, PMPI_Neighbor_alltoall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 408, PMPI_Neighbor_alltoallv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 409, PMPI_Neighbor_alltoallw); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 410, PMPI_Op_commutative); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 411, PMPI_Op_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 412, PMPI_Op_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 413, PMPI_Open_port); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 414, PMPI_Pack); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 415, PMPI_Pack_external); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 416, PMPI_Pack_external_size); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 417, PMPI_Pack_size); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 418, PMPI_Probe); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 419, PMPI_Publish_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 420, PMPI_Put); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 421, PMPI_Query_thread); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 422, PMPI_Raccumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 423, PMPI_Recv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 424, PMPI_Recv_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 425, PMPI_Reduce); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 426, PMPI_Reduce_local); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 427, PMPI_Reduce_scatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 428, PMPI_Reduce_scatter_block); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 429, PMPI_Request_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 430, PMPI_Request_get_status); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 431, PMPI_Rget); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 432, PMPI_Rget_accumulate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 433, PMPI_Rput); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 434, PMPI_Rsend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 435, PMPI_Rsend_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 436, PMPI_Scan); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 437, PMPI_Scatter); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 438, PMPI_Scatterv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 439, PMPI_Send); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 440, PMPI_Send_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 441, PMPI_Sendrecv); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 442, PMPI_Sendrecv_replace); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 443, PMPI_Ssend); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 444, PMPI_Ssend_init); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 445, PMPI_Start); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 446, PMPI_Startall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 447, PMPI_Status_f2c); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 448, PMPI_Status_set_cancelled); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 449, PMPI_Status_set_elements); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 450, PMPI_Status_set_elements_x); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 451, PMPI_Topo_test); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 452, PMPI_Unpack); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 453, PMPI_Unpack_external); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 454, PMPI_Unpublish_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 455, PMPI_Wait); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 456, PMPI_Waitall); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 457, PMPI_Waitany); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 458, PMPI_Waitsome); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 459, PMPI_Win_allocate); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 460, PMPI_Win_allocate_shared); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 461, PMPI_Win_attach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 462, PMPI_Win_call_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 463, PMPI_Win_complete); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 464, PMPI_Win_create); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 465, PMPI_Win_create_dynamic); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 466, PMPI_Win_create_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 467, PMPI_Win_create_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 468, PMPI_Win_delete_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 469, PMPI_Win_detach); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 470, PMPI_Win_fence); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 471, PMPI_Win_flush); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 472, PMPI_Win_flush_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 473, PMPI_Win_flush_local); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 474, PMPI_Win_flush_local_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 475, PMPI_Win_free); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 476, PMPI_Win_free_keyval); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 477, PMPI_Win_get_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 478, PMPI_Win_get_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 479, PMPI_Win_get_group); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 480, PMPI_Win_get_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 481, PMPI_Win_get_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 482, PMPI_Win_lock); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 483, PMPI_Win_lock_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 484, PMPI_Win_post); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 485, PMPI_Win_set_attr); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 486, PMPI_Win_set_errhandler); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 487, PMPI_Win_set_info); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 488, PMPI_Win_set_name); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 489, PMPI_Win_shared_query); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 490, PMPI_Win_start); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 491, PMPI_Win_sync); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 492, PMPI_Win_test); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 493, PMPI_Win_unlock); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 494, PMPI_Win_unlock_all); + TIMEMORY_C_GOTCHA(mpip_gotcha_t, 495, PMPI_Win_wait); + }; + + // provide environment variable for suppressing wrappers + mpip_gotcha_t::get_reject_list() = [reject]() { + auto _reject = reject; + // check environment + auto reject_list = tim::get_env( + TIMEMORY_SETTINGS_PREFIX "ROCPROFSYS_MPIP_REJECT_LIST", ""); + // add environment setting + for(const auto& itr : tim::delimit(reject_list)) + _reject.insert(itr); + return _reject; + }; + + // provide environment variable for selecting wrappers + mpip_gotcha_t::get_permit_list() = [permit]() { + auto _permit = permit; + // check environment + auto permit_list = tim::get_env( + TIMEMORY_SETTINGS_PREFIX "ROCPROFSYS_MPIP_PERMIT_LIST", ""); + // add environment setting + for(const auto& itr : tim::delimit(permit_list)) + _permit.insert(itr); + return _permit; + }; + + is_initialized = true; + } +} +// +#endif +// +//======================================================================================// +// + +#if defined(ROCPROFSYS_UNDEFINE_OMPI_SKIP_MPICXX) && ROCPROFSYS_UNDEFINE_OMPI_SKIP_MPICXX +# undef OMPI_SKIP_MPICXX +#endif