diff --git a/projects/rocprofiler-systems/CHANGELOG.md b/projects/rocprofiler-systems/CHANGELOG.md index 8832b5bf21..24d2a55379 100644 --- a/projects/rocprofiler-systems/CHANGELOG.md +++ b/projects/rocprofiler-systems/CHANGELOG.md @@ -8,6 +8,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs. ### Added +- Support for UCX (Unified Communication X) API tracing. - Documentation for `--trace-legacy` / `-L` CLI flag for direct tracing mode. - Added dependency to `spdlog` library. - Added environment variable `ROCPROFSYS_LOG_LEVEL` which control level of logging. diff --git a/projects/rocprofiler-systems/cmake/Modules/FindUCX.cmake b/projects/rocprofiler-systems/cmake/Modules/FindUCX.cmake new file mode 100644 index 0000000000..212857a763 --- /dev/null +++ b/projects/rocprofiler-systems/cmake/Modules/FindUCX.cmake @@ -0,0 +1,61 @@ +# ------------------------------------------------------------------------------# +# +# Finds headers for UCX (Unified Communication X) +# +# UCX is a high-performance communication framework used as a transport layer +# for MPI and other communication libraries. This module locates UCX headers +# (ucp.h, uct.h) for tracing and interception purposes. +# +# ------------------------------------------------------------------------------# + +include(FindPackageHandleStandardArgs) + +# ----------------------------------------------------------------------------------------# + +set(UCX_HEADERS_INCLUDE_DIR_INTERNAL + "${PROJECT_SOURCE_DIR}/source/lib/rocprof-sys/library/tpls/ucx" + CACHE PATH + "Path to internal UCX headers" +) + +# ----------------------------------------------------------------------------------------# +# Find UCX headers (ucp.h and uct.h are under ucx/ subdirectory) +find_path( + UCX_HEADERS_INCLUDE_DIR + NAMES ucp/api/ucp.h + PATHS /usr/include /usr/local/include /opt/ucx/include +) + +if(NOT EXISTS "${UCX_HEADERS_INCLUDE_DIR}") + rocprofiler_systems_message( + AUTHOR_WARNING + "UCX headers do not exist! Setting UCX_HEADERS_INCLUDE_DIR to internal directory: ${UCX_HEADERS_INCLUDE_DIR_INTERNAL}" + ) + set(UCX_HEADERS_INCLUDE_DIR + "${UCX_HEADERS_INCLUDE_DIR_INTERNAL}" + CACHE PATH + "Path to UCX headers" + FORCE + ) +else() + rocprofiler_systems_message(STATUS "UCX headers found: ${UCX_HEADERS_INCLUDE_DIR}") +endif() + +mark_as_advanced(UCX_HEADERS_INCLUDE_DIR) + +# ----------------------------------------------------------------------------------------# + +find_package_handle_standard_args(UCX DEFAULT_MSG UCX_HEADERS_INCLUDE_DIR) + +# ------------------------------------------------------------------------------# + +if(UCX_FOUND) + add_library(roc::ucx-headers INTERFACE IMPORTED) + target_include_directories( + roc::ucx-headers + SYSTEM + INTERFACE ${UCX_HEADERS_INCLUDE_DIR} + ) +endif() + +# ------------------------------------------------------------------------------# diff --git a/projects/rocprofiler-systems/cmake/Packages.cmake b/projects/rocprofiler-systems/cmake/Packages.cmake index e1fca7bdb2..0916e9cce8 100644 --- a/projects/rocprofiler-systems/cmake/Packages.cmake +++ b/projects/rocprofiler-systems/cmake/Packages.cmake @@ -42,6 +42,9 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-mpi rocprofiler_systems_add_interface_library(rocprofiler-systems-libva "Provides VA-API headers" ) +rocprofiler_systems_add_interface_library(rocprofiler-systems-ucx + "Provides UCX headers" +) rocprofiler_systems_add_interface_library(rocprofiler-systems-bfd "Provides Binary File Descriptor (BFD)" ) @@ -968,6 +971,9 @@ target_include_directories( INTERFACE ${LIBVA_HEADERS_INCLUDE_DIR} ) +find_package(UCX ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) +target_include_directories(rocprofiler-systems-ucx INTERFACE ${UCX_HEADERS_INCLUDE_DIR}) + # ----------------------------------------------------------------------------------------# # # PTL (Parallel Tasking Library) submodule diff --git a/projects/rocprofiler-systems/docker/Dockerfile.opensuse b/projects/rocprofiler-systems/docker/Dockerfile.opensuse index f25e02c4e9..b41b7d200c 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.opensuse +++ b/projects/rocprofiler-systems/docker/Dockerfile.opensuse @@ -26,7 +26,7 @@ RUN zypper --non-interactive update -y && \ zypper --non-interactive install -y chrpath cmake curl dpkg-devel \ gcc-c++ gcc-fortran git gmock gtest iproute2 libdrm-devel libnuma-devel \ ninja nlohmann_json-devel openmpi3-devel python3-pip rpm-build \ - sqlite3-devel wget && \ + sqlite3-devel wget libucp-devel libuct-devel && \ python3 -m pip install 'cmake==3.21' ARG ROCM_VERSION=0.0 diff --git a/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci b/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci index f02d5a0742..14e35e75ac 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.opensuse.ci @@ -30,7 +30,7 @@ RUN zypper --non-interactive update -y && \ zypper --non-interactive install -y chrpath cmake curl dpkg-devel \ gcc-c++ gcc-fortran git gmock gtest iproute2 ninja nlohmann_json-devel \ openmpi3-devel papi-devel python3-devel python3-pip rpm-build \ - sqlite3-devel vim wget && \ + sqlite3-devel vim wget libucp-devel libuct-devel && \ zypper --non-interactive clean --all && \ python3 -m pip install 'cmake==3.21' perfetto diff --git a/projects/rocprofiler-systems/docker/Dockerfile.rhel b/projects/rocprofiler-systems/docker/Dockerfile.rhel index 95fcbdad8d..83f9712b97 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.rhel +++ b/projects/rocprofiler-systems/docker/Dockerfile.rhel @@ -17,7 +17,7 @@ RUN yum groupinstall -y "Development Tools" && \ yum install -y epel-release && crb enable && \ yum install -y --allowerasing chrpath cmake curl dpkg-devel gmock-devel gtest-devel \ iproute json-devel libdrm-devel ninja-build numactl-devel openmpi-devel \ - papi-devel python3-pip sqlite-devel texinfo wget which zlib-devel && \ + papi-devel python3-pip sqlite-devel texinfo wget which zlib-devel ucx-devel && \ yum clean all && \ python3 -m pip install 'cmake==3.21' && \ python3 -m pip install 'perfetto' diff --git a/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci b/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci index 0d6116e88e..24f828a4ed 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.rhel.ci @@ -21,7 +21,7 @@ RUN yum groupinstall -y "Development Tools" && \ yum install -y epel-release && crb enable && \ yum install -y --allowerasing chrpath cmake curl dpkg-devel gmock-devel gtest-devel \ iproute json-devel ninja-build numactl-devel openmpi-devel papi-devel \ - python3-devel python3-pip sqlite-devel texinfo wget which vim zlib-devel && \ + python3-devel python3-pip sqlite-devel texinfo wget which vim zlib-devel ucx-devel && \ yum clean all && \ python3 -m pip install 'cmake==3.21' perfetto diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu index 4243a33b3d..b5561e0a31 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu @@ -29,7 +29,8 @@ RUN apt-get update && \ build-essential chrpath cmake curl flex gettext git-core gnupg2 iproute2 \ libgmock-dev libgtest-dev libnuma1 libopenmpi-dev libpapi-dev libpfm4-dev \ librpm-dev libsqlite3-dev libtool libudev1 lsb-release m4 ninja-build \ - nlohmann-json3-dev python3-pip rpm texinfo wget && \ + nlohmann-json3-dev python3-pip rpm texinfo wget \ + libucx-dev ucx-utils && \ OS_VERSION=$(grep '^VERSION_ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"') && \ OS_ID=$(grep '^ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"') && \ if [ "${OS_ID}" == "ubuntu" ] && [ "${OS_VERSION}" == "22.04" ]; then \ diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci index 4ddc13076d..be9e27a7fd 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu.ci @@ -27,7 +27,8 @@ RUN apt-get update && \ bzip2 chrpath cmake curl environment-modules flex gettext git-core gnupg2 \ gzip iproute2 libgmock-dev libgtest-dev libiberty-dev libpapi-dev libpfm4-dev \ libsqlite3-dev libtool locales lsb-release m4 ninja-build nlohmann-json3-dev \ - python3-pip software-properties-common texinfo unzip wget vim zip zlib1g-dev && \ + python3-pip software-properties-common texinfo unzip wget vim zip zlib1g-dev \ + libucx-dev ucx-utils && \ apt-get autoclean RUN OS_VERSION=$(grep '^VERSION_ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"') && \ diff --git a/projects/rocprofiler-systems/source/lib/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/CMakeLists.txt index 35cfbc91c8..54af395632 100644 --- a/projects/rocprofiler-systems/source/lib/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/CMakeLists.txt @@ -47,6 +47,7 @@ target_link_libraries( $ $ $ + $ $ $ $ diff --git a/projects/rocprofiler-systems/source/lib/core/categories.hpp b/projects/rocprofiler-systems/source/lib/core/categories.hpp index 260bdc9c68..05ad8906d5 100644 --- a/projects/rocprofiler-systems/source/lib/core/categories.hpp +++ b/projects/rocprofiler-systems/source/lib/core/categories.hpp @@ -127,8 +127,9 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, " ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions") ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions") ROCPROFSYS_DEFINE_CATEGORY(category, mpi, ROCPROFSYS_CATEGORY_MPI, "mpi", "MPI regions") +ROCPROFSYS_DEFINE_CATEGORY(category, ucx, ROCPROFSYS_CATEGORY_UCX, "ucx", "UCX regions") ROCPROFSYS_DEFINE_CATEGORY(category, process_sampling, ROCPROFSYS_CATEGORY_PROCESS_SAMPLING, "process_sampling", "Process-level data") -ROCPROFSYS_DEFINE_CATEGORY(category, comm_data, ROCPROFSYS_CATEGORY_COMM_DATA, "comm_data", "MPI/RCCL counters for tracking amount of data sent or received") +ROCPROFSYS_DEFINE_CATEGORY(category, comm_data, ROCPROFSYS_CATEGORY_COMM_DATA, "comm_data", "MPI/RCCL/UCX counters for tracking amount of data sent or received") ROCPROFSYS_DEFINE_CATEGORY(category, causal, ROCPROFSYS_CATEGORY_CAUSAL, "causal", "Causal profiling data") ROCPROFSYS_DEFINE_CATEGORY(category, cpu_freq, ROCPROFSYS_CATEGORY_CPU_FREQ, "cpu_frequency", "CPU frequency (collected in background thread)") ROCPROFSYS_DEFINE_CATEGORY(category, process_page, ROCPROFSYS_CATEGORY_PROCESS_PAGE, "process_physical_memory", "Physical memory usage (RSS) in process in MB (collected in background thread)") @@ -207,6 +208,7 @@ using name = perfetto_category; ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \ ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \ ROCPROFSYS_PERFETTO_CATEGORY(category::mpi), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::ucx), \ ROCPROFSYS_PERFETTO_CATEGORY(category::sampling), \ ROCPROFSYS_PERFETTO_CATEGORY(category::process_sampling), \ ROCPROFSYS_PERFETTO_CATEGORY(category::comm_data), \ diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index b4a1cbe6e0..87ae1dfa51 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -379,6 +379,10 @@ configure_settings(bool _init) "Enable support for MPI functions", true, "mpi", "backend", "parallelism"); + ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_UCX", + "Enable support for UCX functions", true, "ucx", "backend", + "parallelism"); + ROCPROFSYS_CONFIG_SETTING( bool, "ROCPROFSYS_USE_RCCLP", "Enable support for ROCm Communication Collectives Library (RCCL) Performance", @@ -1943,6 +1947,13 @@ get_use_mpip() return static_cast&>(*_v->second).get(); } +bool& +get_use_ucx() +{ + static auto _v = get_config()->find("ROCPROFSYS_USE_UCX"); + return static_cast&>(*_v->second).get(); +} + bool get_use_kokkosp() { diff --git a/projects/rocprofiler-systems/source/lib/core/config.hpp b/projects/rocprofiler-systems/source/lib/core/config.hpp index 49982e72d6..49235faf6f 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.hpp +++ b/projects/rocprofiler-systems/source/lib/core/config.hpp @@ -225,6 +225,9 @@ get_use_pid(); bool& get_use_mpip(); +bool& +get_use_ucx(); + bool get_use_kokkosp(); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h index f768ed3362..7640efdf1d 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h @@ -80,6 +80,7 @@ extern "C" ROCPROFSYS_CATEGORY_PTHREAD, ROCPROFSYS_CATEGORY_KOKKOS, ROCPROFSYS_CATEGORY_MPI, + ROCPROFSYS_CATEGORY_UCX, ROCPROFSYS_CATEGORY_PROCESS_SAMPLING, ROCPROFSYS_CATEGORY_COMM_DATA, ROCPROFSYS_CATEGORY_CAUSAL, diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp index 9d84cf46f8..921e7f4390 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp @@ -55,6 +55,7 @@ #include "library/components/mpi_gotcha.hpp" #include "library/components/numa_gotcha.hpp" #include "library/components/pthread_gotcha.hpp" +#include "library/components/ucx_gotcha.hpp" #include "library/components/vaapi_gotcha.hpp" #include "library/coverage.hpp" #include "library/process_sampler.hpp" @@ -609,6 +610,12 @@ rocprofsys_init_tooling_hidden(void) // start these gotchas once settings have been initialized if(get_init_bundle()) get_init_bundle()->start(); + if(get_use_ucx()) + { + LOG_DEBUG("Setting up UCX traces...\n"); + component::ucx_gotcha::start(); + } + if(get_use_vaapi_tracing()) { LOG_DEBUG("Setting up VA-API traces..."); @@ -900,6 +907,12 @@ rocprofsys_finalize_hidden(void) fini_bundle_t _finalization{}; _finalization.start(); + if(get_use_ucx()) + { + LOG_DEBUG("Shutting down UCX tracing...\n"); + component::ucx_gotcha::shutdown(); + } + if(get_use_vaapi_tracing()) { LOG_DEBUG("Shutting down VA-API tracing..."); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt index a1a30d73f1..842b7ecdbb 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/CMakeLists.txt @@ -11,6 +11,7 @@ set(component_sources ${CMAKE_CURRENT_LIST_DIR}/kill_gotcha.cpp ${CMAKE_CURRENT_LIST_DIR}/mpi_gotcha.cpp ${CMAKE_CURRENT_LIST_DIR}/numa_gotcha.cpp + ${CMAKE_CURRENT_LIST_DIR}/ucx_gotcha.cpp ${CMAKE_CURRENT_LIST_DIR}/vaapi_gotcha.cpp ${CMAKE_CURRENT_LIST_DIR}/pthread_gotcha.cpp ${CMAKE_CURRENT_LIST_DIR}/pthread_create_gotcha.cpp @@ -32,6 +33,7 @@ set(component_headers ${CMAKE_CURRENT_LIST_DIR}/mpip.hpp ${CMAKE_CURRENT_LIST_DIR}/mpi_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/numa_gotcha.hpp + ${CMAKE_CURRENT_LIST_DIR}/ucx_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/vaapi_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/pthread_gotcha.hpp ${CMAKE_CURRENT_LIST_DIR}/pthread_create_gotcha.hpp diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp index 01f0c132ad..e1668f539e 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp @@ -85,6 +85,7 @@ metadata_initialize_comm_data_categories() trace_cache::get_metadata_registry().add_string( trait::name::value); trace_cache::get_metadata_registry().add_string(trait::name::value); + trace_cache::get_metadata_registry().add_string(trait::name::value); _is_initialized = true; } @@ -128,6 +129,16 @@ metadata_initialize_comm_data_pmc() trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); #endif + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + comm_data::ucx_send::label, "Tracks UCX communication data sizes", + trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, + rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + comm_data::ucx_recv::label, "Tracks UCX communication data sizes", + trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, + rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); } template @@ -172,6 +183,8 @@ comm_data::start() metadata_initialize_track(); metadata_initialize_track(); #endif + metadata_initialize_track(); + metadata_initialize_track(); } } @@ -195,7 +208,7 @@ comm_data::configure() _once = true; comm_data_tracker_t::label() = "comm_data"; - comm_data_tracker_t::description() = "Tracks MPI/RCCL communication data sizes"; + comm_data_tracker_t::description() = "Tracks MPI/RCCL/UCX communication data sizes"; comm_data_tracker_t::display_unit() = "MB"; comm_data_tracker_t::unit() = units::megabyte; @@ -471,6 +484,298 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen } #endif +// UCX communication tracking implementations + +// ucp_tag_send_nbx: (void* ep, const void* buffer, size_t count, uint64_t tag, const +// void* param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, const void*, + size_t count, uint64_t tag, const void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + add(JOIN('/', _name, JOIN('=', "tag", tag)), count); + } +} + +// ucp_tag_recv_nbx: (void* worker, void* buffer, size_t count, uint64_t tag, uint64_t +// tag_mask, const void* param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, void*, size_t count, + uint64_t tag, uint64_t tag_mask, const void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + add(JOIN('/', _name, JOIN('=', "tag", tag)), count); + add(JOIN('/', _name, JOIN('=', "tag", tag), JOIN('=', "tag_mask", tag_mask)), + count); + } +} + +// ucp_put_nbx: (void* ep, const void* buffer, size_t count, uint64_t remote_addr, void* +// rkey, const void* param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, const void*, + size_t count, uint64_t remote_addr, void*, const void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + add(JOIN('/', _name, JOIN('=', "remote_addr", remote_addr)), count); + } +} + +// ucp_get_nbx: (void* ep, void* buffer, size_t count, uint64_t remote_addr, void* rkey, +// const void* param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, void*, size_t count, + uint64_t remote_addr, void*, const void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + add(JOIN('/', _name, JOIN('=', "remote_addr", remote_addr)), count); + } +} + +// ucp_am_send_nbx: (void* ep, unsigned id, const void* header, size_t header_length, +// const void* buffer, size_t count, const void* param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, unsigned id, + const void*, size_t header_length, const void*, size_t count, + const void*) +{ + if(count == 0 && header_length == 0) return; + + size_t total_size = header_length + count; + if(get_use_perfetto()) write_perfetto_counter_track(total_size); + + { + cache_comm_data_events(0, total_size); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, total_size); + add(JOIN('/', _name, JOIN('=', "am_id", id)), total_size); + } +} + +// ucp_stream_send_nbx: (void* ep, const void* buffer, size_t count, const void* param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, const void*, + size_t count, const void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + } +} + +// ucp_stream_recv_nbx: (void* ep, void* buffer, size_t count, size_t* length, const void* +// param) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, void*, size_t count, + size_t*, const void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + } +} + +// Legacy: ucp_tag_send_nb/nbx - send with tag matching (for old-style wrappers) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, size_t count, void*, + void*, void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + } +} + +// Legacy: ucp_tag_recv_nb/nbx - receive with tag matching (for old-style wrappers) +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, size_t count, void*, + void*, void*, void*, void*) +{ + if(count == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(count); + + { + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + } +} + +// ucp_put/get operations - RMA +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, size_t length, + uint64_t, void*, void*) +{ + if(length == 0) return; + + bool is_put = _data.tool_id.find("ucp_put") != std::string::npos; + + if(get_use_perfetto()) + { + if(is_put) + write_perfetto_counter_track(length); + else + write_perfetto_counter_track(length); + } + + { + if(is_put) + cache_comm_data_events(0, length); + else + cache_comm_data_events(0, length); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, length); + } +} + +// ucp_am_send_nb/nbx - active message send +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, unsigned, void*, + size_t header_length, void*, size_t length, unsigned, void*) +{ + size_t total_length = header_length + length; + if(total_length == 0) return; + + if(get_use_perfetto()) write_perfetto_counter_track(total_length); + + { + cache_comm_data_events(0, total_length); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, total_length); + } +} + +// ucp_stream_send/recv operations +void +comm_data::audit(const gotcha_data& _data, audit::incoming, void*, void*, size_t count, + void*, unsigned, void*) +{ + if(count == 0) return; + + bool is_send = _data.tool_id.find("send") != std::string::npos; + + if(get_use_perfetto()) + { + if(is_send) + write_perfetto_counter_track(count); + else + write_perfetto_counter_track(count); + } + + { + if(is_send) + cache_comm_data_events(0, count); + else + cache_comm_data_events(0, count); + } + + if(rocprofsys::get_use_timemory()) + { + auto _name = std::string_view{ _data.tool_id }; + tracker_t _t{ _name }; + add(_t, count); + } +} + #if defined(ROCPROFSYS_USE_RCCL) // Kept for reference, but now gathered throught the SDK callbacks. diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.hpp index 066b3dfd92..5ec770da3b 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.hpp @@ -77,6 +77,18 @@ struct comm_data : base static constexpr auto label = "MPI Comm Send"; }; + struct ucx_recv + { + static constexpr auto value = "comm_data"; + static constexpr auto label = "UCX Comm Recv"; + }; + + struct ucx_send + { + static constexpr auto value = "comm_data"; + static constexpr auto label = "UCX Comm Send"; + }; + ROCPROFSYS_DEFAULT_OBJECT(comm_data) static void preinit(); @@ -135,6 +147,61 @@ struct comm_data : base MPI_Datatype recvtype, MPI_Comm); #endif + // UCX communication tracking + // ucp_tag_send_nbx - send with tag matching (5 params: ep, buffer, count, tag, param) + static void audit(const gotcha_data& _data, audit::incoming, void*, const void*, + size_t count, uint64_t tag, const void*); + + // ucp_tag_recv_nbx - receive with tag matching (6 params: worker, buffer, count, tag, + // tag_mask, param) + static void audit(const gotcha_data& _data, audit::incoming, void*, void*, + size_t count, uint64_t tag, uint64_t tag_mask, const void*); + + // ucp_put_nbx - RMA put operation (6 params: ep, buffer, count, remote_addr, rkey, + // param) + static void audit(const gotcha_data& _data, audit::incoming, void*, const void*, + size_t count, uint64_t remote_addr, void* rkey, const void*); + + // ucp_get_nbx - RMA get operation (6 params: ep, buffer, count, remote_addr, rkey, + // param) + static void audit(const gotcha_data& _data, audit::incoming, void*, void*, + size_t count, uint64_t remote_addr, void* rkey, const void*); + + // ucp_am_send_nbx - active message send (7 params: ep, id, header, header_length, + // buffer, count, param) + static void audit(const gotcha_data& _data, audit::incoming, void*, unsigned id, + const void* header, size_t header_length, const void* buffer, + size_t count, const void*); + + // ucp_stream_send_nbx - stream send (4 params: ep, buffer, count, param) + static void audit(const gotcha_data& _data, audit::incoming, void*, const void*, + size_t count, const void*); + + // ucp_stream_recv_nbx - stream receive (5 params: ep, buffer, count, length, param) + static void audit(const gotcha_data& _data, audit::incoming, void*, void*, + size_t count, size_t* length, const void*); + + // Legacy UCX functions (kept for compatibility) + // ucp_tag_send_nb/nbx - send with tag matching + static void audit(const gotcha_data& _data, audit::incoming, void*, size_t count, + void*, void*, void*); + + // ucp_tag_recv_nb/nbx - receive with tag matching + static void audit(const gotcha_data& _data, audit::incoming, void*, size_t count, + void*, void*, void*, void*, void*); + + // ucp_put/get operations - RMA (legacy) + static void audit(const gotcha_data& _data, audit::incoming, void*, size_t length, + uint64_t, void*, void*); + + // ucp_am_send_nb/nbx - active message send (legacy) + static void audit(const gotcha_data& _data, audit::incoming, void*, unsigned, void*, + size_t, void*, size_t, unsigned, void*); + + // ucp_stream_send/recv operations (legacy) + static void audit(const gotcha_data& _data, audit::incoming, void*, void*, + size_t count, void*, unsigned, void*); + private: static auto& add(tracker_t& _t, data_type value) { diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/ucx_gotcha.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/ucx_gotcha.cpp new file mode 100644 index 0000000000..d9203929f1 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/ucx_gotcha.cpp @@ -0,0 +1,374 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "library/components/ucx_gotcha.hpp" +#include "core/common.hpp" +#include "core/config.hpp" +#include "core/state.hpp" +#include "core/timemory.hpp" +#include "library/components/category_region.hpp" +#include "library/runtime.hpp" + +#include +#include +#include +#include + +#include +#include + +namespace rocprofsys +{ +namespace component +{ +namespace +{ +auto& +get_ucx_gotcha() +{ + static auto _v = tim::lightweight_tuple{}; + return _v; +} +} // namespace + +void +ucx_gotcha::configure() +{ + // don't emit warnings for missing UCX functions unless debug or verbosity >= 3 + if(get_verbose_env() < 3 && !get_debug_env()) + { + for(size_t i = 0; i < ucx_gotcha_t::capacity(); ++i) + { + auto* itr = ucx_gotcha_t::at(i); + if(itr) itr->verbose = -1; + } + } + + ucx_gotcha_t::get_initializer() = []() { + // Active Message + ucx_gotcha_t::configure<0, void*, void*, unsigned, void*, size_t, void*, size_t, + unsigned, void*>("ucp_am_send_nb"); + ucx_gotcha_t::configure<1, void*, void*, unsigned, const void*, size_t, + const void*, size_t, const void*>("ucp_am_send_nbx"); + ucx_gotcha_t::configure<2, void*, void*, void*, size_t, void*>( + "ucp_am_recv_data_nbx"); + ucx_gotcha_t::configure<3, void, void*, void*>("ucp_am_data_release"); + + // Atomic operations + ucx_gotcha_t::configure<4, void*, void*, uint32_t, uint64_t, void*>( + "ucp_atomic_add32"); + ucx_gotcha_t::configure<5, void*, void*, uint64_t, uint64_t, void*>( + "ucp_atomic_add64"); + ucx_gotcha_t::configure<6, void*, void*, uint32_t, uint32_t, uint64_t, void*>( + "ucp_atomic_cswap32"); + ucx_gotcha_t::configure<7, void*, void*, uint64_t, uint64_t, uint64_t, void*>( + "ucp_atomic_cswap64"); + ucx_gotcha_t::configure<8, void*, void*, uint32_t, uint64_t, void*, void*>( + "ucp_atomic_fadd32"); + ucx_gotcha_t::configure<9, void*, void*, uint64_t, uint64_t, void*, void*>( + "ucp_atomic_fadd64"); + ucx_gotcha_t::configure<10, void*, void*, uint32_t, uint64_t, void*, void*>( + "ucp_atomic_swap32"); + ucx_gotcha_t::configure<11, void*, void*, uint64_t, uint64_t, void*, void*>( + "ucp_atomic_swap64"); + ucx_gotcha_t::configure<12, int, void*, int, uint64_t, const void*, size_t, + void*>("ucp_atomic_post"); + ucx_gotcha_t::configure<13, void*, void*, int, uint64_t, void*, size_t, void*, + void*>("ucp_atomic_fetch_nb"); + ucx_gotcha_t::configure<14, void*, void*, unsigned, void*, void*, size_t, + uint64_t, void*>("ucp_atomic_op_nbx"); + + // Cleanup and config + ucx_gotcha_t::configure<15, void, void*>("ucp_cleanup"); + ucx_gotcha_t::configure<16, int, void*, const char*, const char*, const char*>( + "ucp_config_modify"); + ucx_gotcha_t::configure<17, int, const char*, const char*, void**>( + "ucp_config_read"); + ucx_gotcha_t::configure<18, void, void*>("ucp_config_release"); + + // Connection management + ucx_gotcha_t::configure<19, void*, void*, unsigned>("ucp_disconnect_nb"); + + // Datatype + ucx_gotcha_t::configure<20, int, void*, void**>("ucp_dt_create_generic"); + ucx_gotcha_t::configure<21, void, void*>("ucp_dt_destroy"); + + // Endpoint + ucx_gotcha_t::configure<22, int, void*, const void*, void**>("ucp_ep_create"); + ucx_gotcha_t::configure<23, void, void*>("ucp_ep_destroy"); + ucx_gotcha_t::configure<24, void*, void*, const void*>("ucp_ep_modify_nb"); + ucx_gotcha_t::configure<25, void*, void*, const void*>("ucp_ep_close_nbx"); + ucx_gotcha_t::configure<26, int, void*>("ucp_ep_flush"); + ucx_gotcha_t::configure<27, void*, void*, unsigned, void*>("ucp_ep_flush_nb"); + ucx_gotcha_t::configure<28, void*, void*, const void*>("ucp_ep_flush_nbx"); + + // Listener + ucx_gotcha_t::configure<29, int, void*, const void*, void**>( + "ucp_listener_create"); + ucx_gotcha_t::configure<30, void, void*>("ucp_listener_destroy"); + ucx_gotcha_t::configure<31, int, void*, void*>("ucp_listener_query"); + ucx_gotcha_t::configure<32, int, void*, void*>("ucp_listener_reject"); + + // Memory + ucx_gotcha_t::configure<33, int, void*, void*, size_t, int>("ucp_mem_advise"); + ucx_gotcha_t::configure<34, int, void*, const void*, void**>("ucp_mem_map"); + ucx_gotcha_t::configure<35, int, void*, void*>("ucp_mem_unmap"); + ucx_gotcha_t::configure<36, int, void*, void*>("ucp_mem_query"); + + // Put/Get operations + ucx_gotcha_t::configure<37, int, void*, const void*, size_t, uint64_t, void*>( + "ucp_put"); + ucx_gotcha_t::configure<38, int, void*, void*, size_t, uint64_t, void*>( + "ucp_get"); + ucx_gotcha_t::configure<39, int, void*, const void*, size_t, uint64_t, void*>( + "ucp_put_nbi"); + ucx_gotcha_t::configure<40, int, void*, void*, size_t, uint64_t, void*>( + "ucp_get_nbi"); + ucx_gotcha_t::configure<41, void*, void*, const void*, size_t, uint64_t, void*, + void*>("ucp_put_nb"); + ucx_gotcha_t::configure<42, void*, void*, void*, size_t, uint64_t, void*, void*>( + "ucp_get_nb"); + ucx_gotcha_t::configure<43, void*, void*, const void*, size_t, uint64_t, void*, + const void*>("ucp_put_nbx"); + ucx_gotcha_t::configure<44, void*, void*, void*, size_t, uint64_t, void*, + const void*>("ucp_get_nbx"); + + // Request + ucx_gotcha_t::configure<45, void*, void*>("ucp_request_alloc"); + ucx_gotcha_t::configure<46, void, void*, void*>("ucp_request_cancel"); + ucx_gotcha_t::configure<47, int, void*>("ucp_request_is_completed"); + + // Remote key + ucx_gotcha_t::configure<48, void, void*>("ucp_rkey_buffer_release"); + ucx_gotcha_t::configure<49, void, void*>("ucp_rkey_destroy"); + ucx_gotcha_t::configure<50, int, void*, void*, void**, size_t*>("ucp_rkey_pack"); + ucx_gotcha_t::configure<51, int, void*, void*, void**>("ucp_rkey_ptr"); + + // Stream + ucx_gotcha_t::configure<52, void, void*, void*>("ucp_stream_data_release"); + ucx_gotcha_t::configure<53, void*, void*, void*, size_t, size_t*, unsigned, + void*>("ucp_stream_recv_data_nb"); + ucx_gotcha_t::configure<54, void*, void*, const void*, size_t, void*>( + "ucp_stream_send_nb"); + ucx_gotcha_t::configure<55, void*, void*, void*, size_t, size_t*, void*>( + "ucp_stream_recv_nb"); + ucx_gotcha_t::configure<56, void*, void*, const void*, size_t, const void*>( + "ucp_stream_send_nbx"); + ucx_gotcha_t::configure<57, void*, void*, void*, size_t, size_t*, const void*>( + "ucp_stream_recv_nbx"); + ucx_gotcha_t::configure<58, void*, void*>("ucp_stream_worker_poll"); + + // Tag matching + ucx_gotcha_t::configure<59, void*, void*, void*, void*, size_t, void*, void*>( + "ucp_tag_msg_recv_nb"); + ucx_gotcha_t::configure<60, void*, void*, void*, void*, size_t, const void*>( + "ucp_tag_msg_recv_nbx"); + ucx_gotcha_t::configure<61, void*, void*, const void*, size_t, void*, void*>( + "ucp_tag_send_nbr"); + ucx_gotcha_t::configure<62, void*, void*, void*, size_t, void*, void*, void*>( + "ucp_tag_recv_nbr"); + ucx_gotcha_t::configure<63, void*, void*, const void*, size_t, void*, void*>( + "ucp_tag_send_nb"); + ucx_gotcha_t::configure<64, void*, void*, void*, size_t, void*, void*, void*>( + "ucp_tag_recv_nb"); + ucx_gotcha_t::configure<65, void*, void*, const void*, size_t, uint64_t, + const void*>("ucp_tag_send_nbx"); + ucx_gotcha_t::configure<66, void*, void*, void*, size_t, uint64_t, uint64_t, + const void*>("ucp_tag_recv_nbx"); + ucx_gotcha_t::configure<67, void*, void*, const void*, size_t, uint64_t, void*>( + "ucp_tag_send_sync_nb"); + ucx_gotcha_t::configure<68, void*, void*, const void*, size_t, uint64_t, + const void*>("ucp_tag_send_sync_nbx"); + + // Worker + ucx_gotcha_t::configure<69, int, void*, const void*, void**>("ucp_worker_create"); + ucx_gotcha_t::configure<70, void, void*>("ucp_worker_destroy"); + ucx_gotcha_t::configure<71, int, void*, void**, size_t*>( + "ucp_worker_get_address"); + ucx_gotcha_t::configure<72, int, void*, int*>("ucp_worker_get_efd"); + ucx_gotcha_t::configure<73, int, void*>("ucp_worker_arm"); + ucx_gotcha_t::configure<74, int, void*>("ucp_worker_fence"); + ucx_gotcha_t::configure<75, int, void*>("ucp_worker_wait"); + ucx_gotcha_t::configure<76, int, void*>("ucp_worker_signal"); + ucx_gotcha_t::configure<77, int, void*, void*, size_t, void*>( + "ucp_worker_wait_mem"); + ucx_gotcha_t::configure<78, int, void*>("ucp_worker_flush"); + ucx_gotcha_t::configure<79, void*, void*, unsigned, void*>("ucp_worker_flush_nb"); + ucx_gotcha_t::configure<80, void*, void*, unsigned, void*>( + "ucp_worker_flush_nbx"); + ucx_gotcha_t::configure<81, int, void*, unsigned, void*, void*, void*>( + "ucp_worker_set_am_handler"); + ucx_gotcha_t::configure<82, int, void*, const void*>( + "ucp_worker_set_am_recv_handler"); + ucx_gotcha_t::configure<83, unsigned, void*>("ucp_worker_progress"); + + // UCT Active Message (low-level transport) + ucx_gotcha_t::configure<84, ssize_t, void*, unsigned, void*, void*>( + "uct_ep_am_bcopy"); + ucx_gotcha_t::configure<85, ssize_t, void*, unsigned, const void*, unsigned, + const void*, size_t, void*>("uct_ep_am_zcopy"); + ucx_gotcha_t::configure<86, ssize_t, void*, unsigned, uint64_t, const void*, + unsigned>("uct_ep_am_short"); + ucx_gotcha_t::configure<87, unsigned, void*>("uct_iface_progress"); + ucx_gotcha_t::configure<88, int, void*, unsigned, void*, void*, unsigned>( + "uct_iface_set_am_handler"); + + // Legacy UCX function variants that might be used on older systems + ucx_gotcha_t::configure<89, void*, void*, const void*, size_t, void*>( + "ucp_tag_send"); + ucx_gotcha_t::configure<90, void*, void*, void*, size_t, void*, void*>( + "ucp_tag_recv"); + ucx_gotcha_t::configure<91, void*, void*, const void*, size_t, int, int, void*>( + "ucp_send"); + ucx_gotcha_t::configure<92, void*, void*, void*, size_t, int, int, void*>( + "ucp_recv"); + }; +} + +void +ucx_gotcha::shutdown() +{ + ucx_gotcha_t::disable(); +} + +void +ucx_gotcha::start() +{ + if(!get_ucx_gotcha().get()->get_is_running()) + { + configure(); + get_ucx_gotcha().start(); + } +} + +void +ucx_gotcha::stop() +{} + +// Generic audit functions now handled by template in header + +// Specific audit functions for tag operations +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, const void* arg2, + size_t arg3, uint64_t arg4, const void* arg5) +{ + category_region::start(std::string_view{ _data.tool_id }, "ep", arg1, + "buffer", arg2, "count", arg3, "tag", arg4, + "param", arg5); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5); +} + +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, void* arg2, + size_t arg3, uint64_t arg4, uint64_t arg5, const void* arg6) +{ + category_region::start(std::string_view{ _data.tool_id }, "worker", + arg1, "buffer", arg2, "count", arg3, "tag", + arg4, "tag_mask", arg5, "param", arg6); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6); +} + +// RMA operations +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, const void* arg2, + size_t arg3, uint64_t arg4, void* arg5, const void* arg6) +{ + category_region::start(std::string_view{ _data.tool_id }, "ep", arg1, + "buffer", arg2, "count", arg3, "remote_addr", + arg4, "rkey", arg5, "param", arg6); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6); +} + +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, void* arg2, + size_t arg3, uint64_t arg4, void* arg5, const void* arg6) +{ + category_region::start(std::string_view{ _data.tool_id }, "ep", arg1, + "buffer", arg2, "count", arg3, "remote_addr", + arg4, "rkey", arg5, "param", arg6); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6); +} + +// Active message send +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, unsigned arg2, + const void* arg3, size_t arg4, const void* arg5, size_t arg6, + const void* arg7) +{ + category_region::start( + std::string_view{ _data.tool_id }, "ep", arg1, "id", arg2, "header", arg3, + "header_length", arg4, "buffer", arg5, "count", arg6, "param", arg7); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6, arg7); +} + +// Stream operations +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, const void* arg2, + size_t arg3, const void* arg4) +{ + category_region::start(std::string_view{ _data.tool_id }, "ep", arg1, + "buffer", arg2, "count", arg3, "param", arg4); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4); +} + +void +ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, void* arg2, + size_t arg3, size_t* arg4, const void* arg5) +{ + category_region::start(std::string_view{ _data.tool_id }, "ep", arg1, + "buffer", arg2, "count", arg3, "length", arg4, + "param", arg5); + + // Also trigger communication data tracking + comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5); +} + +void +ucx_gotcha::audit(const gotcha_data& _data, audit::outgoing, void* ret) +{ + category_region::stop(std::string_view{ _data.tool_id }, "return", + ret); +} + +void +ucx_gotcha::audit(const gotcha_data& _data, audit::outgoing, int ret) +{ + category_region::stop(std::string_view{ _data.tool_id }, "return", + ret); +} + +} // namespace component +} // namespace rocprofsys + +TIMEMORY_STORAGE_INITIALIZER(rocprofsys::component::ucx_gotcha) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/ucx_gotcha.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/ucx_gotcha.hpp new file mode 100644 index 0000000000..5473c2b058 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/ucx_gotcha.hpp @@ -0,0 +1,114 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "core/common.hpp" +#include "core/defines.hpp" +#include "core/timemory.hpp" +#include "library/components/comm_data.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace component +{ +struct ucx_gotcha : tim::component::base +{ + static constexpr size_t gotcha_capacity = 100; + + using gotcha_data = tim::component::gotcha_data; + + ROCPROFSYS_DEFAULT_OBJECT(ucx_gotcha) + + // string id for component + static std::string label() { return "ucx_gotcha"; } + + // generate the gotcha wrappers + static void configure(); + static void shutdown(); + + static void start(); + static void stop(); + + // Generic template audit function for UCX operations with void* parameters + template + static void audit(const gotcha_data& _data, audit::incoming, Args...) + { + category_region::start(std::string_view{ _data.tool_id }); + } + +public: + // Specific audit functions for tag operations (with uint64_t tags) + // ucp_tag_send_nbx: (void* ep, const void* buffer, size_t count, uint64_t tag, const + // void* param) + static void audit(const gotcha_data&, audit::incoming, void*, const void*, size_t, + uint64_t, const void*); + // ucp_tag_recv_nbx: (void* worker, void* buffer, size_t count, uint64_t tag, uint64_t + // tag_mask, const void* param) + static void audit(const gotcha_data&, audit::incoming, void*, void*, size_t, uint64_t, + uint64_t, const void*); + + // RMA operations + // ucp_put_nbx: (void* ep, const void* buffer, size_t count, uint64_t remote_addr, + // void* rkey, const void* param) + static void audit(const gotcha_data&, audit::incoming, void*, const void*, size_t, + uint64_t, void*, const void*); + // ucp_get_nbx: (void* ep, void* buffer, size_t count, uint64_t remote_addr, void* + // rkey, const void* param) + static void audit(const gotcha_data&, audit::incoming, void*, void*, size_t, uint64_t, + void*, const void*); + + // Active message send + // ucp_am_send_nbx: (void* ep, unsigned id, const void* header, size_t header_length, + // const void* buffer, size_t count, const void* param) + static void audit(const gotcha_data&, audit::incoming, void*, unsigned, const void*, + size_t, const void*, size_t, const void*); + + // Stream operations + // ucp_stream_send_nbx: (void* ep, const void* buffer, size_t count, const void* + // param) + static void audit(const gotcha_data&, audit::incoming, void*, const void*, size_t, + const void*); + // ucp_stream_recv_nbx: (void* ep, void* buffer, size_t count, size_t* length, const + // void* param) + static void audit(const gotcha_data&, audit::incoming, void*, void*, size_t, size_t*, + const void*); + + // Outgoing audit for return values + static void audit(const gotcha_data&, audit::outgoing, void*); + static void audit(const gotcha_data&, audit::outgoing, int); +}; +} // namespace component + +using ucx_bundle_t = + tim::component_bundle; +using ucx_gotcha_t = tim::component::gotcha; +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/ucp/api/ucp.h b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/ucp/api/ucp.h new file mode 100644 index 0000000000..2615ef792e --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/ucp/api/ucp.h @@ -0,0 +1,4130 @@ +/* + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2020. ALL RIGHTS RESERVED. + * Copyright (C) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. + * Copyright (C) Los Alamos National Security, LLC. 2018 ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCP_H_ +#define UCP_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** + * @defgroup UCP_API Unified Communication Protocol (UCP) API + * @{ + * This section describes UCP API. + * @} + */ + +/** + * @defgroup UCP_CONTEXT UCP Application Context + * @ingroup UCP_API + * @{ + * Application context is a primary concept of UCP design which + * provides an isolation mechanism, allowing resources associated + * with the context to separate or share network communication context + * across multiple instances of applications. + * + * This section provides a detailed description of this concept and + * routines associated with it. + * + * @} + */ + +/** + * @defgroup UCP_WORKER UCP Worker + * @ingroup UCP_API + * @{ + * UCP Worker routines + * @} + */ + +/** + * @defgroup UCP_MEM UCP Memory routines + * @ingroup UCP_API + * @{ + * UCP Memory routines + * @} + */ + +/** + * @defgroup UCP_WAKEUP UCP Wake-up routines + * @ingroup UCP_API + * @{ + * UCP Wake-up routines + * @} + */ + +/** + * @defgroup UCP_ENDPOINT UCP Endpoint + * @ingroup UCP_API + * @{ + * UCP Endpoint routines + * @} + */ + +/** + * @defgroup UCP_COMM UCP Communication routines + * @ingroup UCP_API + * @{ + * UCP Communication routines + * @} + */ + +/** + * @defgroup UCP_CONFIG UCP Configuration + * @ingroup UCP_API + * @{ + * This section describes routines for configuration + * of the UCP network layer + * @} + */ + +/** + * @defgroup UCP_DATATYPE UCP Data type routines + * @ingroup UCP_API + * @{ + * UCP Data type routines + * @} + */ + +/** + * @ingroup UCP_CONTEXT + * @brief UCP context parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_params_field +{ + UCP_PARAM_FIELD_FEATURES = UCS_BIT(0), /**< features */ + UCP_PARAM_FIELD_REQUEST_SIZE = UCS_BIT(1), /**< request_size */ + UCP_PARAM_FIELD_REQUEST_INIT = UCS_BIT(2), /**< request_init */ + UCP_PARAM_FIELD_REQUEST_CLEANUP = UCS_BIT(3), /**< request_cleanup */ + UCP_PARAM_FIELD_TAG_SENDER_MASK = UCS_BIT(4), /**< tag_sender_mask */ + UCP_PARAM_FIELD_MT_WORKERS_SHARED = UCS_BIT(5), /**< mt_workers_shared */ + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS = UCS_BIT(6), /**< estimated_num_eps */ + UCP_PARAM_FIELD_ESTIMATED_NUM_PPN = UCS_BIT(7), /**< estimated_num_ppn */ + UCP_PARAM_FIELD_NAME = UCS_BIT(8), /**< name */ + UCP_PARAM_FIELD_NODE_LOCAL_ID = UCS_BIT(9) +}; + +/** + * @ingroup UCP_CONTEXT + * @brief UCP configuration features + * + * The enumeration list describes the features supported by UCP. An + * application can request the features using @ref ucp_params_t "UCP parameters" + * during @ref ucp_init "UCP initialization" process. + */ +enum ucp_feature +{ + /** Request tag matching support */ + UCP_FEATURE_TAG = UCS_BIT(0), + + /** Request remote memory access support */ + UCP_FEATURE_RMA = UCS_BIT(1), + + /** Request 32-bit atomic operations support */ + UCP_FEATURE_AMO32 = UCS_BIT(2), + + /** Request 64-bit atomic operations support */ + UCP_FEATURE_AMO64 = UCS_BIT(3), + + /** Request interrupt notification support */ + UCP_FEATURE_WAKEUP = UCS_BIT(4), + + /** Request stream support */ + UCP_FEATURE_STREAM = UCS_BIT(5), + + /** Request Active Message support */ + UCP_FEATURE_AM = UCS_BIT(6), + + /** + * Request support mapping a peer's memory handle that was created by + * @ref ucp_mem_map and packed by @ref ucp_memh_pack with the flag + * @ref UCP_MEMH_PACK_FLAG_EXPORT and use it for local operations + */ + UCP_FEATURE_EXPORTED_MEMH = UCS_BIT(7), + + /** Request device-based communication features */ + UCP_FEATURE_DEVICE = UCS_BIT(8) +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_worker_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_worker_params_field +{ + UCP_WORKER_PARAM_FIELD_THREAD_MODE = UCS_BIT(0), /**< UCP thread mode */ + UCP_WORKER_PARAM_FIELD_CPU_MASK = UCS_BIT(1), /**< Worker's CPU bitmap */ + UCP_WORKER_PARAM_FIELD_EVENTS = UCS_BIT(2), /**< Worker's events bitmap */ + UCP_WORKER_PARAM_FIELD_USER_DATA = UCS_BIT(3), /**< User data */ + UCP_WORKER_PARAM_FIELD_EVENT_FD = UCS_BIT(4), /**< External event file + descriptor */ + UCP_WORKER_PARAM_FIELD_FLAGS = UCS_BIT(5), /**< Worker flags */ + UCP_WORKER_PARAM_FIELD_NAME = UCS_BIT(6), /**< Worker name */ + UCP_WORKER_PARAM_FIELD_AM_ALIGNMENT = UCS_BIT(7), /**< Alignment of active + messages on the receiver */ + UCP_WORKER_PARAM_FIELD_CLIENT_ID = UCS_BIT(8) /**< Client id */ +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker flags + * + * This enumeration allows specifying flags for @ref ucp_worker_params_t.flags, + * which is used as parameter for @ref ucp_worker_create. + */ +typedef enum +{ + UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK = UCS_BIT(0) /**< Do not print warnings + about request leaks */ +} ucp_worker_flags_t; + +/** + * @ingroup UCP_WORKER + * @brief UCP listener parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_listener_params_t + * are present. It is used to enable backward compatibility support. + */ +enum ucp_listener_params_field +{ + /** + * Sock address and length. + */ + UCP_LISTENER_PARAM_FIELD_SOCK_ADDR = UCS_BIT(0), + + /** + * User's callback and argument for handling the creation of an endpoint. + * */ + UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER = UCS_BIT(1), + + /**< User's callback and argument for handling the incoming connection + * request. */ + UCP_LISTENER_PARAM_FIELD_CONN_HANDLER = UCS_BIT(2) +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker address flags. + * + * The enumeration list describes possible UCP worker address flags, indicating + * what needs to be included to the worker address returned by + * @ref ucp_worker_query "ucp_worker_query()" routine. + */ +typedef enum +{ + + /**< Pack addresses of network devices only. Using such shortened addresses + * for the remote node peers will reduce the amount of wireup data being + * exchanged during connection establishment phase. */ + UCP_WORKER_ADDRESS_FLAG_NET_ONLY = UCS_BIT(0) +} ucp_worker_address_flags_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_ep_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_ep_params_field +{ + UCP_EP_PARAM_FIELD_REMOTE_ADDRESS = UCS_BIT(0), /**< Address of remote + peer */ + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE = + UCS_BIT(1), /**< Error handling mode. + @ref ucp_err_handling_mode_t */ + UCP_EP_PARAM_FIELD_ERR_HANDLER = UCS_BIT(2), /**< Handler to process + transport level errors */ + UCP_EP_PARAM_FIELD_USER_DATA = UCS_BIT(3), /**< User data pointer */ + UCP_EP_PARAM_FIELD_SOCK_ADDR = UCS_BIT(4), /**< Socket address field */ + UCP_EP_PARAM_FIELD_FLAGS = UCS_BIT(5), /**< Endpoint flags */ + /**< Connection request field */ + UCP_EP_PARAM_FIELD_CONN_REQUEST = UCS_BIT(6), + UCP_EP_PARAM_FIELD_NAME = UCS_BIT(7), /**< Endpoint name */ + UCP_EP_PARAM_FIELD_LOCAL_SOCK_ADDR = UCS_BIT(8) /**< Local socket Address */ +}; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint parameters flags. + * + * The enumeration list describes the endpoint's parameters flags supported by + * @ref ucp_ep_create() function. + */ +enum ucp_ep_params_flags_field +{ + UCP_EP_PARAMS_FLAGS_CLIENT_SERVER = UCS_BIT(0), /**< Using a client-server + connection establishment + mechanism. + @ref ucs_sock_addr_t + sockaddr field + must be provided and + contain the address + of the remote peer */ + UCP_EP_PARAMS_FLAGS_NO_LOOPBACK = UCS_BIT(1), /**< Avoid connecting the + endpoint to itself when + connecting the endpoint + to the same worker it + was created on. + Affects protocols which + send to a particular + remote endpoint, for + example stream */ + UCP_EP_PARAMS_FLAGS_SEND_CLIENT_ID = UCS_BIT(2) /**< Send client id + when connecting to remote + socket address as part of the + connection request payload. + On the remote side value + can be obtained from + @ref ucp_conn_request_h using + @ref ucp_conn_request_query */ +}; + +/** + * @ingroup UCP_ENDPOINT + * @brief Close UCP endpoint modes. + * + * The enumeration is used to specify the behavior of @ref ucp_ep_close_nbx. + */ +typedef enum +{ + UCP_EP_CLOSE_FLAG_FORCE = UCS_BIT(0) /**< @ref ucp_ep_close_nbx releases + the endpoint without any + confirmation from the peer. All + outstanding requests will be + completed with + @ref UCS_ERR_CANCELED error. + @note This mode may cause + transport level errors on remote + side, so it requires set + @ref UCP_ERR_HANDLING_MODE_PEER + for all endpoints created on + both (local and remote) sides to + avoid undefined behavior. If this + flag is not set then + @ref ucp_ep_close_nbx schedules + flushes on all outstanding + operations. + @note this flag is incompatible + with @ref UCP_OP_ATTR_FLAG_NO_IMM_CMPL, + since it forces immediate completion. + */ +} ucp_ep_close_flags_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP performance fields and flags + * + * The enumeration allows specifying which fields in @ref ucp_ep_evaluate_perf_param_t are + * present and operation flags are used. It is used to enable backward + * compatibility support. + */ +typedef enum ucp_ep_perf_param_field +{ + /** Enables @ref ucp_ep_evaluate_perf_param_t::message_size */ + UCP_EP_PERF_PARAM_FIELD_MESSAGE_SIZE = UCS_BIT(0) +} ucp_ep_perf_param_field_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP performance fields and flags + * + * The enumeration allows specifying which fields in @ref ucp_ep_evaluate_perf_attr_t are + * present and operation flags are used. It is used to enable backward + * compatibility support. + */ +typedef enum ucp_ep_perf_attr_field +{ + /** Enables @ref ucp_ep_evaluate_perf_attr_t::estimated_time */ + UCP_EP_PERF_ATTR_FIELD_ESTIMATED_TIME = UCS_BIT(0) +} ucp_ep_perf_attr_field_t; + +/** + * @ingroup UCP_MEM + * @brief UCP memory mapping parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_mem_map_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_mem_map_params_field +{ + /** + * Address of the memory that will be used in the @ref ucp_mem_map routine. + */ + UCP_MEM_MAP_PARAM_FIELD_ADDRESS = UCS_BIT(0), + + /** + * The size of memory that will be allocated or registered in the + * @ref ucp_mem_map routine. + */ + UCP_MEM_MAP_PARAM_FIELD_LENGTH = UCS_BIT(1), + + /** Allocation flags. */ + UCP_MEM_MAP_PARAM_FIELD_FLAGS = UCS_BIT(2), + + /** Memory protection mode. */ + UCP_MEM_MAP_PARAM_FIELD_PROT = UCS_BIT(3), + + /** Memory type. */ + UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE = UCS_BIT(4), + + /** Exported memory handle buffer. */ + UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER = UCS_BIT(5) +}; + +/** + * @ingroup UCP_MEM + * @brief UCP memory advice parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_mem_advise_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_mem_advise_params_field +{ + UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS = UCS_BIT(0), /**< Address of the memory */ + UCP_MEM_ADVISE_PARAM_FIELD_LENGTH = UCS_BIT(1), /**< The size of memory */ + UCP_MEM_ADVISE_PARAM_FIELD_ADVICE = UCS_BIT(2) /**< Advice on memory usage */ +}; + +/** + * @ingroup UCP_CONTEXT + * @brief UCP library attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_lib_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_lib_attr_field +{ + /**< UCP library maximum supported thread level flag */ + UCP_LIB_ATTR_FIELD_MAX_THREAD_LEVEL = UCS_BIT(0) +}; + +/** + * @ingroup UCP_CONTEXT + * @brief UCP context attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_context_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_context_attr_field +{ + UCP_ATTR_FIELD_REQUEST_SIZE = UCS_BIT(0), /**< UCP request size */ + UCP_ATTR_FIELD_THREAD_MODE = UCS_BIT(1), /**< UCP context thread flag */ + UCP_ATTR_FIELD_MEMORY_TYPES = UCS_BIT(2), /**< UCP supported memory types */ + UCP_ATTR_FIELD_NAME = UCS_BIT(3), /**< UCP context name */ + UCP_ATTR_FIELD_DEVICE_COUNTER_SIZE = UCS_BIT(4) /**< UCP Device API counter size */ +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_worker_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_worker_attr_field +{ + UCP_WORKER_ATTR_FIELD_THREAD_MODE = UCS_BIT(0), /**< UCP thread mode */ + UCP_WORKER_ATTR_FIELD_ADDRESS = UCS_BIT(1), /**< UCP address */ + UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS = UCS_BIT(2), /**< UCP address flags */ + UCP_WORKER_ATTR_FIELD_MAX_AM_HEADER = UCS_BIT(3), /**< Maximum header size + used by UCP AM API */ + UCP_WORKER_ATTR_FIELD_NAME = UCS_BIT(4), /**< UCP worker name */ + UCP_WORKER_ATTR_FIELD_MAX_INFO_STRING = UCS_BIT(5) /**< Maximum size of + info string */ +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker address attributes field mask. + * + * The enumeration allows specifying which fields in + * @ref ucp_worker_address_attr_t are present. It is used to enable backward + * compatibility support. + */ +enum ucp_worker_address_attr_field +{ + UCP_WORKER_ADDRESS_ATTR_FIELD_UID = UCS_BIT(0) /**< Unique id of the worker */ +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP listener attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_listener_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_listener_attr_field +{ + UCP_LISTENER_ATTR_FIELD_SOCKADDR = UCS_BIT(0) /**< Sockaddr used for listening */ +}; + +/** + * @ingroup UCP_WORKER + * @brief UCP listener's connection request attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_conn_request_attr_t + * are present. It is used to enable backward compatibility support. + */ +enum ucp_conn_request_attr_field +{ + UCP_CONN_REQUEST_ATTR_FIELD_CLIENT_ADDR = UCS_BIT(0), /**< Client's address */ + UCP_CONN_REQUEST_ATTR_FIELD_CLIENT_ID = UCS_BIT(1) /**< Remote client id */ +}; + +/** + * @ingroup UCP_DATATYPE + * @brief UCP data type classification + * + * The enumeration list describes the datatypes supported by UCP. + */ +enum ucp_dt_type +{ + UCP_DATATYPE_CONTIG = 0, /**< Contiguous datatype */ + UCP_DATATYPE_STRIDED = 1, /**< Strided datatype */ + UCP_DATATYPE_IOV = 2, /**< Scatter-gather list with multiple pointers */ + UCP_DATATYPE_GENERIC = 7, /**< Generic datatype with + user-defined pack/unpack routines */ + UCP_DATATYPE_SHIFT = 3, /**< Number of bits defining + the datatype classification */ + UCP_DATATYPE_CLASS_MASK = UCS_MASK(UCP_DATATYPE_SHIFT) /**< Data-type class + mask */ +}; + +/** + * @ingroup UCP_DATATYPE + * @brief UCP datatype attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_datatype_attr_t + * are present and which datatype attributes are queried. + */ +enum ucp_datatype_attr_field +{ + /** @ref ucp_datatype_attr_t::packed_size field is queried. */ + UCP_DATATYPE_ATTR_FIELD_PACKED_SIZE = UCS_BIT(0), + + /** @ref ucp_datatype_attr_t::buffer field is set. */ + UCP_DATATYPE_ATTR_FIELD_BUFFER = UCS_BIT(1), + + /** @ref ucp_datatype_attr_t::count field is set. */ + UCP_DATATYPE_ATTR_FIELD_COUNT = UCS_BIT(2) +}; + +/** + * @ingroup UCP_MEM + * @brief UCP memory mapping flags. + * + * The enumeration list describes the memory mapping flags supported by @ref + * ucp_mem_map() function. + */ +enum +{ + /** + * Complete the mapping faster, possibly by not populating the pages in the + * mapping up-front, and mapping them later when they are accessed by + * communication routines. + */ + UCP_MEM_MAP_NONBLOCK = UCS_BIT(0), + + /** + * Identify requirement for allocation, if passed address is not a + * null-pointer, then it will be used as a hint or direct address for + * allocation. + */ + UCP_MEM_MAP_ALLOCATE = UCS_BIT(1), + + /** + * Don't interpret address as a hint: place the mapping at exactly that + * address. The address must be a multiple of the page size. + */ + UCP_MEM_MAP_FIXED = UCS_BIT(2), + + /** + * Register the memory region so its remote access key would likely be + * equal to remote access keys received from other peers, when compared with + * @ref ucp_rkey_compare. This flag is a hint. When remote access keys + * received from different peers are compared equal, they can be used + * interchangeably, avoiding the need to keep all of them in memory. + */ + UCP_MEM_MAP_SYMMETRIC_RKEY = UCS_BIT(3), + + /** + * Enforce pinning of the memory pages in the mapping and populate them up-front. + * This flag is mutually exclusive with UCP_MEM_MAP_NONBLOCK. + */ + UCP_MEM_MAP_LOCK = UCS_BIT(4) +}; + +/** + * @ingroup UCP_MEM + * @brief UCP memory mapping protection mode. + * + * The enumeration list describes the memory mapping protections supported by the @ref + * ucp_mem_map() function. + */ +enum +{ + UCP_MEM_MAP_PROT_LOCAL_READ = UCS_BIT(0), /**< Enable local read access. */ + UCP_MEM_MAP_PROT_LOCAL_WRITE = UCS_BIT(1), /**< Enable local write access. */ + UCP_MEM_MAP_PROT_REMOTE_READ = UCS_BIT(8), /**< Enable remote read access. */ + UCP_MEM_MAP_PROT_REMOTE_WRITE = UCS_BIT(9) /**< Enable remote write access. */ +}; + +/** + * @ingroup UCP_WORKER + * @brief Flags for a UCP Active Message callback. + * + * Flags that indicate how to handle UCP Active Messages. + */ +enum ucp_am_cb_flags +{ + /** + * Indicates that the entire message will be handled in one callback. + */ + UCP_AM_FLAG_WHOLE_MSG = UCS_BIT(0), + + /** + * Guarantees that the specified @ref ucp_am_recv_callback_t callback, + * will always be called with @ref UCP_AM_RECV_ATTR_FLAG_DATA flag set, + * so the data will be accessible outside the callback, until + * @ref ucp_am_data_release is called. + */ + UCP_AM_FLAG_PERSISTENT_DATA = UCS_BIT(1) +}; + +/** + * @ingroup UCP_WORKER + * @brief Flags for sending a UCP Active Message. + * + * Flags dictate the behavior of @ref ucp_am_send_nb and @ref ucp_am_send_nbx + * routines. + */ +enum ucp_send_am_flags +{ + /** + * Force relevant reply endpoint to be passed to the data callback on the receiver. + */ + UCP_AM_SEND_FLAG_REPLY = UCS_BIT(0), + + /** + * Force UCP to use only eager protocol for AM sends. + */ + UCP_AM_SEND_FLAG_EAGER = UCS_BIT(1), + + /** + * Force UCP to use only rendezvous protocol for AM sends. + */ + UCP_AM_SEND_FLAG_RNDV = UCS_BIT(2), + + /** + * The flag indicates that the header should be copied to an internal buffer + * in case it's needed after the send function returns. If this flag is + * specified, the header can be released immediately after the send + * function returns, even if the non-blocking send request is not completed. + */ + UCP_AM_SEND_FLAG_COPY_HEADER = UCS_BIT(3), + + /** + * Backward compatibility. + */ + UCP_AM_SEND_REPLY = UCP_AM_SEND_FLAG_REPLY +}; + +/** + * @ingroup UCP_ENDPOINT + * @brief Descriptor flags for Active Message callback. + * + * In a callback, if flags is set to UCP_CB_PARAM_FLAG_DATA in + * a callback then data was allocated, so if UCS_INPROGRESS is + * returned from the callback, the data parameter will persist + * and the user has to call @ref ucp_am_data_release when data is + * no longer needed. + */ +enum ucp_cb_param_flags +{ + UCP_CB_PARAM_FLAG_DATA = UCS_BIT(0) +}; + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_op_nbx + * + * This enumeration defines which atomic memory operation should be + * performed by the @ref ucp_atomic_op_nbx routine. + */ +typedef enum +{ + UCP_ATOMIC_OP_ADD, /**< Atomic add */ + UCP_ATOMIC_OP_SWAP, /**< Atomic swap */ + UCP_ATOMIC_OP_CSWAP, /**< Atomic conditional swap */ + UCP_ATOMIC_OP_AND, /**< Atomic and */ + UCP_ATOMIC_OP_OR, /**< Atomic or */ + UCP_ATOMIC_OP_XOR, /**< Atomic xor */ + UCP_ATOMIC_OP_LAST +} ucp_atomic_op_t; + +/** + * @ingroup UCP_COMM + * @brief Flags to define behavior of @ref ucp_stream_recv_nb function + * + * This enumeration defines behavior of @ref ucp_stream_recv_nb function. + */ +typedef enum +{ + UCP_STREAM_RECV_FLAG_WAITALL = UCS_BIT(0) /**< This flag requests that + the operation will not be + completed until all + requested data is received + and placed in the user + buffer. */ +} ucp_stream_recv_flags_t; + +/** + * @ingroup UCP_COMM + * @brief UCP operation fields and flags + * + * The enumeration allows specifying which fields in @ref ucp_request_param_t are + * present and operation flags are used. It is used to enable backward + * compatibility support. + */ +typedef enum +{ + UCP_OP_ATTR_FIELD_REQUEST = UCS_BIT(0), /**< request field */ + UCP_OP_ATTR_FIELD_CALLBACK = UCS_BIT(1), /**< cb field */ + UCP_OP_ATTR_FIELD_USER_DATA = UCS_BIT(2), /**< user_data field */ + UCP_OP_ATTR_FIELD_DATATYPE = UCS_BIT(3), /**< datatype field */ + UCP_OP_ATTR_FIELD_FLAGS = UCS_BIT(4), /**< operation-specific flags */ + UCP_OP_ATTR_FIELD_REPLY_BUFFER = UCS_BIT(5), /**< reply_buffer field */ + UCP_OP_ATTR_FIELD_MEMORY_TYPE = UCS_BIT(6), /**< memory type field */ + UCP_OP_ATTR_FIELD_RECV_INFO = UCS_BIT(7), /**< recv_info field */ + UCP_OP_ATTR_FIELD_MEMH = UCS_BIT(8), /**< memory handle field */ + + UCP_OP_ATTR_FLAG_NO_IMM_CMPL = UCS_BIT(16), /**< Deny immediate completion, + i.e NULL cannot be returned. + If a completion callback is + provided, it can be called + before the function + returns. */ + UCP_OP_ATTR_FLAG_FAST_CMPL = UCS_BIT(17), /**< expedite local completion, + even if it delays remote + data delivery. Note for + implementer: this option + can disable zero copy + and/or rendezvous protocols + which require + synchronization with the + remote peer before releasing + the local send buffer */ + UCP_OP_ATTR_FLAG_FORCE_IMM_CMPL = UCS_BIT(18), /**< force immediate complete + operation, fail if the + operation cannot be + completed immediately */ + UCP_OP_ATTR_FLAG_MULTI_SEND = + UCS_BIT(19) /**< optimize for bandwidth of + multiple in-flight operations, + rather than for the latency + of a single operation. + This flag and UCP_OP_ATTR_FLAG_FAST_CMPL + are mutually exclusive. */ +} ucp_op_attr_t; + +/** + * @ingroup UCP_COMM + * @brief UCP request query attributes + * + * The enumeration allows specifying which fields in @ref ucp_request_attr_t are + * present. It is used to enable backward compatibility support. + */ +typedef enum +{ + UCP_REQUEST_ATTR_FIELD_INFO_STRING = UCS_BIT(0), + UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE = UCS_BIT(1), + UCP_REQUEST_ATTR_FIELD_STATUS = UCS_BIT(2), + UCP_REQUEST_ATTR_FIELD_MEM_TYPE = UCS_BIT(3) +} ucp_req_attr_field; + +/** + * @ingroup UCP_COMM + * @brief UCP AM receive data parameter fields and flags + * + * The enumeration allows specifying which fields in @ref ucp_am_recv_param_t + * are present and receive operation flags are used. It is used to enable + * backward compatibility support. + */ +typedef enum +{ + UCP_AM_RECV_ATTR_FIELD_REPLY_EP = UCS_BIT(0), /**< reply_ep field */ + + /** + * Indicates that the data provided in @ref ucp_am_recv_callback_t callback + * can be held by the user. If UCS_INPROGRESS is returned from the callback, + * the data parameter will persist and the user has to call + * @ref ucp_am_data_release when data is no longer needed. This flag is + * mutually exclusive with @a UCP_AM_RECV_ATTR_FLAG_RNDV. + */ + UCP_AM_RECV_ATTR_FLAG_DATA = UCS_BIT(16), + + /** + * Indicates that the arriving data was sent using rendezvous protocol. + * In this case @a data parameter of the @ref ucp_am_recv_callback_t points + * to the internal UCP descriptor, which can be used for obtaining the actual + * data by calling @ref ucp_am_recv_data_nbx routine. This flag is mutually + * exclusive with @a UCP_AM_RECV_ATTR_FLAG_DATA. + */ + UCP_AM_RECV_ATTR_FLAG_RNDV = UCS_BIT(17) +} ucp_am_recv_attr_t; + +/** + * @ingroup UCP_COMM + * @brief UCP AM receive data parameters fields and flags + * + * The enumeration allows specifying which fields in @ref ucp_am_handler_param_t + * are present. It is used to enable backward compatibility support. + */ +enum ucp_am_handler_param_field +{ + /** + * Indicates that @ref ucp_am_handler_param_t.id field is valid. + */ + UCP_AM_HANDLER_PARAM_FIELD_ID = UCS_BIT(0), + /** + * Indicates that @ref ucp_am_handler_param_t.flags field is valid. + */ + UCP_AM_HANDLER_PARAM_FIELD_FLAGS = UCS_BIT(1), + /** + * Indicates that @ref ucp_am_handler_param_t.cb field is valid. + */ + UCP_AM_HANDLER_PARAM_FIELD_CB = UCS_BIT(2), + /** + * Indicates that @ref ucp_am_handler_param_t.arg field is valid. + */ + UCP_AM_HANDLER_PARAM_FIELD_ARG = UCS_BIT(3) +}; + +/** + * @ingroup UCP_DATATYPE + * @brief Generate an identifier for contiguous data type. + * + * This macro creates an identifier for contiguous datatype that is defined by + * the size of the basic element. + * + * @param [in] _elem_size Size of the basic element of the type. + * + * @return Data-type identifier. + * + * @note In case of partial receive, the buffer will be filled with integral + * count of elements. + */ +#define ucp_dt_make_contig(_elem_size) \ + (((ucp_datatype_t) (_elem_size) << UCP_DATATYPE_SHIFT) | UCP_DATATYPE_CONTIG) + +/** + * @ingroup UCP_DATATYPE + * @brief Generate an identifier for Scatter-gather IOV data type. + * + * This macro creates an identifier for datatype of scatter-gather list + * with multiple pointers + * + * @return Data-type identifier. + * + * @note In the event of partial receive, @ref ucp_dt_iov_t::buffer can be + * filled with any number of bytes according to its + * @ref ucp_dt_iov_t::length. + */ +#define ucp_dt_make_iov() ((ucp_datatype_t) UCP_DATATYPE_IOV) + +/** + * @ingroup UCP_DATATYPE + * @brief Structure for scatter-gather I/O. + * + * This structure is used to specify a list of buffers which can be used + * within a single data transfer function call. This list should remain valid + * until the data transfer request is completed. + * + * @note If @a length is zero, the memory pointed to by @a buffer + * will not be accessed. Otherwise, @a buffer must point to valid memory. + */ +typedef struct ucp_dt_iov +{ + void* buffer; /**< Pointer to a data buffer */ + size_t length; /**< Length of the @a buffer in bytes */ +} ucp_dt_iov_t; + +/** + * @ingroup UCP_DATATYPE + * @brief UCP generic data type descriptor + * + * This structure provides a generic datatype descriptor that + * is used for definition of application defined datatypes. + + * Typically, the descriptor is used for an integration with datatype + * engines implemented within MPI and SHMEM implementations. + * + * @note In case of partial receive, any amount of received data is acceptable + * which matches buffer size. + */ +typedef struct ucp_generic_dt_ops +{ + /** + * @ingroup UCP_DATATYPE + * @brief Start a packing request. + * + * The pointer refers to application defined start-to-pack routine. It will + * be called from the @ref ucp_tag_send_nb routine. + * + * @param [in] context User-defined context. + * @param [in] buffer Buffer to pack. + * @param [in] count Number of elements to pack into the buffer. + * + * @return A custom state that is passed to the following + * @ref ucp_generic_dt_ops::pack "pack()" routine. + */ + void* (*start_pack)(void* context, const void* buffer, size_t count); + + /** + * @ingroup UCP_DATATYPE + * @brief Start an unpacking request. + * + * The pointer refers to application defined start-to-unpack routine. It will + * be called from the @ref ucp_tag_recv_nb routine. + * + * @param [in] context User-defined context. + * @param [in] buffer Buffer to unpack to. + * @param [in] count Number of elements to unpack in the buffer. + * + * @return A custom state that is passed later to the following + * @ref ucp_generic_dt_ops::unpack "unpack()" routine. + */ + void* (*start_unpack)(void* context, void* buffer, size_t count); + + /** + * @ingroup UCP_DATATYPE + * @brief Get the total size of packed data. + * + * The pointer refers to user defined routine that returns the size of data + * in a packed format. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_pack + * "start_pack()" routine. + * + * @return The size of the data in a packed form. + */ + size_t (*packed_size)(void* state); + + /** + * @ingroup UCP_DATATYPE + * @brief Pack data. + * + * The pointer refers to application defined pack routine. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_pack + * "start_pack()" routine. + * @param [in] offset Virtual offset in the output stream. + * @param [in] dest Destination buffer to pack the data. + * @param [in] max_length Maximum length to pack. + * + * @return The size of the data that was written to the destination buffer. + * Must be less than or equal to @e max_length. + */ + size_t (*pack)(void* state, size_t offset, void* dest, size_t max_length); + + /** + * @ingroup UCP_DATATYPE + * @brief Unpack data. + * + * The pointer refers to application defined unpack routine. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_unpack + * "start_unpack()" routine. + * @param [in] offset Virtual offset in the input stream. + * @param [in] src Source to unpack the data from. + * @param [in] length Length to unpack. + * + * @return UCS_OK or an error if unpacking failed. + */ + ucs_status_t (*unpack)(void* state, size_t offset, const void* src, size_t length); + + /** + * @ingroup UCP_DATATYPE + * @brief Finish packing/unpacking. + * + * The pointer refers to application defined finish routine. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_pack + * "start_pack()" + * and + * @ref ucp_generic_dt_ops::start_unpack + * "start_unpack()" + * routines. + */ + void (*finish)(void* state); +} ucp_generic_dt_ops_t; + +/** + * @ingroup UCP_DATATYPE + * @brief UCP datatype attributes + * + * This structure provides attributes of a UCP datatype. + */ +typedef struct ucp_datatype_attr +{ + /** + * Mask of valid fields in this structure, using bits from @ref + * ucp_datatype_attr_field. Fields not specified in this mask will be + * ignored. Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Packed size of the given datatype. (output parameter) + */ + size_t packed_size; + + /** + * Pointer to a data buffer of the associated data type. + * This field is optional. + * If @ref UCP_DATATYPE_ATTR_FIELD_BUFFER is not set in @ref field_mask, + * this field defaults to @e NULL. + */ + const void* buffer; + + /** + * Number of elements in @a buffer. + * This value is optional. + * If @ref UCP_DATATYPE_ATTR_FIELD_COUNT is not set in @ref field_mask, the + * value of this field defaults to 1. + */ + size_t count; +} ucp_datatype_attr_t; + +/** + * @ingroup UCP_CONFIG + * @brief Tuning parameters for UCP library. + * + * The structure defines the parameters that are used for + * UCP library tuning during UCP library @ref ucp_init "initialization". + * + * @note UCP library implementation uses the @ref ucp_feature "features" + * parameter to optimize the library functionality that minimize memory + * footprint. For example, if the application does not require send/receive + * semantics UCP library may avoid allocation of expensive resources associated with + * send/receive queues. + */ +typedef struct ucp_params +{ + /** + * Mask of valid fields in this structure, using bits from @ref ucp_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * UCP @ref ucp_feature "features" that are used for library + * initialization. It is recommended for applications only to request + * the features that are required for an optimal functionality + * This field must be specified. + */ + uint64_t features; + + /** + * The size of a reserved space in a non-blocking requests. Typically + * applications use this space for caching own structures in order to avoid + * costly memory allocations, pointer dereferences, and cache misses. + * For example, MPI implementation can use this memory for caching MPI + * descriptors + * This field defaults to 0 if not specified. + */ + size_t request_size; + + /** + * Pointer to a routine that is used for the request initialization. + * This function will be called only on the very first time a request memory + * is initialized, and may not be called again if a request is reused. + * If a request should be reset before the next reuse, it can be done before + * calling @ref ucp_request_free. + * + * @e NULL can be used if no such is function required, which is also the + * default if this field is not specified by @ref field_mask. + */ + ucp_request_init_callback_t request_init; + + /** + * Pointer to a routine that is responsible for final cleanup of the memory + * associated with the request. This routine may not be called every time a + * request is released. For some implementations, the cleanup call may be + * delayed and only invoked at @ref ucp_worker_destroy. + * + * @e NULL can be used if no such function is required, which is also the + * default if this field is not specified by @ref field_mask. + */ + ucp_request_cleanup_callback_t request_cleanup; + + /** + * Mask which specifies particular bits of the tag which can uniquely + * identify the sender (UCP endpoint) in tagged operations. + * This field defaults to 0 if not specified. + */ + uint64_t tag_sender_mask; + + /** + * This flag indicates if this context is shared by multiple workers + * from different threads. If so, this context needs thread safety + * support; otherwise, the context does not need to provide thread + * safety. + * For example, if the context is used by single worker, and that + * worker is shared by multiple threads, this context does not need + * thread safety; if the context is used by worker 1 and worker 2, + * and worker 1 is used by thread 1 and worker 2 is used by thread 2, + * then this context needs thread safety. + * Note that actual thread mode may be different from mode passed + * to @ref ucp_init. To get actual thread mode use + * @ref ucp_context_query. + */ + int mt_workers_shared; + + /** + * An optimization hint of how many endpoints will be created on this context. + * For example, when used from MPI or SHMEM libraries, this number will specify + * the number of ranks (or processing elements) in the job. + * Does not affect semantics, but only transport selection criteria and the + * resulting performance. + * The value can be also set by UCX_NUM_EPS environment variable. In such case + * it will override the number of endpoints set by @e estimated_num_eps + */ + size_t estimated_num_eps; + + /** + * An optimization hint for a single node. For example, when used from MPI or + * OpenSHMEM libraries, this number will specify the number of Processes Per + * Node (PPN) in the job. Does not affect semantics, only transport selection + * criteria and the resulting performance. + * The value can be also set by the UCX_NUM_PPN environment variable, which + * will override the number of endpoints set by @e estimated_num_ppn + */ + size_t estimated_num_ppn; + + /** + * Tracing and analysis tools can identify the context using this name. + * To retrieve the context's name, use @ref ucp_context_query, as the name + * you supply may be changed by UCX under some circumstances, e.g. a name + * conflict. This field is only assigned if you set + * @ref UCP_PARAM_FIELD_NAME in the field mask. If not, then a default + * unique name will be created for you. + */ + const char* name; + + /** + * An optimization hint for a single node. For example, when used from MPI or + * OpenSHMEM libraries, this number will specify the local identificator on + * a single node in the job. Does not affect semantics, only transport + * selection criteria and the resulting performance. + * The value can be also set by the UCX_LOCAL_NODE_ID environment variable, + * which will override the id set by @e node_local_id + */ + size_t node_local_id; +} ucp_params_t; + +/** + * @ingroup UCP_CONTEXT + * @brief Lib attributes. + * + * The structure defines the attributes that characterize the Library. + */ +typedef struct ucp_lib_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_lib_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Maximum level of thread support of the library, which is permanent + * throughout the lifetime of the library. Accordingly, the user can call + * @ref ucp_worker_create with appropriate + * @ref ucp_worker_params_t.thread_mode. + * For supported thread levels please see @ref ucs_thread_mode_t. + */ + ucs_thread_mode_t max_thread_level; +} ucp_lib_attr_t; + +/** + * @ingroup UCP_CONTEXT + * @brief Context attributes. + * + * The structure defines the attributes that characterize + * the particular context. + */ +typedef struct ucp_context_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_context_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Size of UCP non-blocking request. When pre-allocated request is used + * (e.g. in @ref ucp_tag_recv_nbr) it should have enough space to fit + * UCP request data, which is defined by this value. + */ + size_t request_size; + + /** + * Thread safe level of the context. For supported thread levels please + * see @ref ucs_thread_mode_t. + */ + ucs_thread_mode_t thread_mode; + + /** + * Mask of which memory types are supported, for supported memory types + * please see @ref ucs_memory_type_t. + */ + uint64_t memory_types; + + /** + * Tracing and analysis tools can use name to identify this UCX context. + */ + char name[UCP_ENTITY_NAME_MAX]; + + /** + * Size usable to allocate a counter memory for UCP Device API usage. + */ + size_t device_counter_size; +} ucp_context_attr_t; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker attributes. + * + * The structure defines the attributes which characterize + * the particular worker. + */ +typedef struct ucp_worker_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_worker_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Thread safe level of the worker. + */ + ucs_thread_mode_t thread_mode; + + /** + * Flags indicating requested details of the worker address. + * If @ref UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS bit is set in the field_mask, + * this value should be set as well. Possible flags are specified + * in @ref ucp_worker_address_flags_t. @note This is an input attribute. + */ + uint32_t address_flags; + + /** + * Worker address, which can be passed to remote instances of the UCP library + * in order to connect to this worker. The memory for the address handle is + * allocated by @ref ucp_worker_query "ucp_worker_query()" routine, and + * must be released by using @ref ucp_worker_release_address + * "ucp_worker_release_address()" routine. + */ + ucp_address_t* address; + + /** + * Size of worker address in bytes. + */ + size_t address_length; + + /** + * Maximum allowed header size for @ref ucp_am_send_nbx routine. + */ + size_t max_am_header; + + /** + * Tracing and analysis tools can identify the worker using this name. + */ + char name[UCP_ENTITY_NAME_MAX]; + + /** + * Maximum debug string size that can be filled with @ref ucp_request_query. + */ + size_t max_debug_string; +} ucp_worker_attr_t; + +/** + * @ingroup UCP_MEM + * @brief Tuning parameters for the comparison function @ref ucp_rkey_compare + * + * The structure defines the parameters that can be used for UCP library remote + * keys comparison using @ref ucp_rkey_compare routine. + * + */ +typedef struct ucp_rkey_compare_params +{ + /** + * Mask of valid fields in this structure, must currently be zero. Fields + * not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; +} ucp_rkey_compare_params_t; + +/** + * @ingroup UCP_WORKER + * @brief Tuning parameters for the UCP worker. + * + * The structure defines the parameters that are used for the + * UCP worker tuning during the UCP worker @ref ucp_worker_create "creation". + */ +typedef struct ucp_worker_params +{ + /** + * Mask of valid fields in this structure, using bits from @ref + * ucp_worker_params_field. Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * The parameter thread_mode suggests the thread safety mode which worker + * and the associated resources should be created with. This is an + * optional parameter. The default value is UCS_THREAD_MODE_SINGLE and + * it is used when the value of the parameter is not set. When this + * parameter along with its corresponding bit in the + * field_mask - UCP_WORKER_PARAM_FIELD_THREAD_MODE is set, the + * @ref ucp_worker_create attempts to create worker with this thread mode. + * The thread mode with which worker is created can differ from the + * suggested mode. The actual thread mode of the worker should be obtained + * using the query interface @ref ucp_worker_query. + */ + ucs_thread_mode_t thread_mode; + + /** + * Mask of which CPUs worker resources should preferably be allocated on. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * UCP_WORKER_PARAM_FIELD_CPU_MASK), resources are allocated according to + * system's default policy. + */ + ucs_cpu_set_t cpu_mask; + + /** + * Mask of events (@ref ucp_wakeup_event_t) which are expected on wakeup. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * UCP_WORKER_PARAM_FIELD_EVENTS), all types of events will trigger on + * wakeup. + */ + unsigned events; + + /** + * User data associated with the current worker. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * UCP_WORKER_PARAM_FIELD_USER_DATA), it will default to NULL. + */ + void* user_data; + + /** + * External event file descriptor. + * This value is optional. + * If @ref UCP_WORKER_PARAM_FIELD_EVENT_FD is set in the field_mask, events + * on the worker will be reported on the provided event file descriptor. In + * this case, calling @ref ucp_worker_get_efd will result in an error. + * The provided file descriptor must be capable of aggregating notifications + * for arbitrary events, for example @c epoll(7) on Linux systems. + * @ref user_data will be used as the event user-data on systems which + * support it. For example, on Linux, it will be placed in + * @c epoll_data_t::ptr, when returned from @c epoll_wait(2). + * + * Otherwise, events will be reported to the event file descriptor returned + * from @ref ucp_worker_get_efd(). + */ + int event_fd; + + /** + * Worker flags. + * This value is optional. + * If @ref UCP_WORKER_PARAM_FIELD_FLAGS is not set in the field_mask, the + * value of this field will default to 0. + */ + uint64_t flags; + + /** + * Tracing and analysis tools can identify the worker using this name. To + * retrieve the worker's name, use @ref ucp_worker_query, as the name you + * supply may be changed by UCX under some circumstances, e.g. a name + * conflict. This field is only assigned if you set + * @ref UCP_WORKER_PARAM_FIELD_NAME in the field mask. If not, then a + * default unique name will be created for you. + */ + const char* name; + + /** + * Minimal address alignment of the active message data pointer as passed + * in argument @a data to the active message handler, defined as + * @a ucp_am_recv_callback_t. + */ + size_t am_alignment; + + /** + * Client id that is sent as part of the connection request payload + * when connecting to a remote socket address. On the remote side, + * this value can be obtained from @ref ucp_conn_request_h + * using @ref ucp_conn_request_query. + */ + uint64_t client_id; +} ucp_worker_params_t; + +/** + * @ingroup UCP_WORKER + * @brief UCP worker address attributes. + * + * The structure defines the attributes of the particular worker address. + */ +typedef struct ucp_worker_address_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_worker_address_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Unique id of the worker this address belongs to. + */ + uint64_t worker_uid; +} ucp_worker_address_attr_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint performance evaluation request attributes. + * + * The structure defines the attributes which characterize + * the request for performance estimation of a particular endpoint. + */ +typedef struct +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_ep_perf_param_field_t. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Message size to use for determining performance. + * This field must be initialized by the caller. + */ + size_t message_size; +} ucp_ep_evaluate_perf_param_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint performance evaluation result attributes. + * + * The structure defines the attributes which characterize + * the result of performance estimation of a particular endpoint. + */ +typedef struct +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_ep_perf_attr_field_t. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Estimated time (in seconds) required to send a message of a given size + * on this endpoint. + * This field is set by the @ref ucp_ep_evaluate_perf function. + */ + double estimated_time; +} ucp_ep_evaluate_perf_attr_t; + +/** + * @ingroup UCP_WORKER + * @brief UCP listener attributes. + * + * The structure defines the attributes which characterize + * the particular listener. + */ +typedef struct ucp_listener_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_listener_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Sockaddr on which this listener is listening for incoming connection + * requests. + */ + struct sockaddr_storage sockaddr; +} ucp_listener_attr_t; + +/** + * @ingroup UCP_WORKER + * @brief UCP listener's connection request attributes. + * + * The structure defines the attributes that characterize + * the particular connection request received on the server side. + */ +typedef struct ucp_conn_request_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_conn_request_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * The address of the remote client that sent the connection request to the + * server. + */ + struct sockaddr_storage client_address; + + /** + * Remote client id if remote endpoint's flag + * @ref UCP_EP_PARAMS_FLAGS_SEND_CLIENT_ID is set. + */ + uint64_t client_id; +} ucp_conn_request_attr_t; + +/** + * @ingroup UCP_WORKER + * @brief Parameters for a UCP listener object. + * + * This structure defines parameters for @ref ucp_listener_create, which is used to + * listen for incoming client/server connections. + */ +typedef struct ucp_listener_params +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_listener_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * An address in the form of a sockaddr. + * This field is mandatory for filling (along with its corresponding bit + * in the field_mask - @ref UCP_LISTENER_PARAM_FIELD_SOCK_ADDR). + * The @ref ucp_listener_create routine will return with an error if sockaddr + * is not specified. + */ + ucs_sock_addr_t sockaddr; + + /** + * Handler to endpoint creation in a client-server connection flow. + * In order for the callback inside this handler to be invoked, the + * UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER needs to be set in the + * field_mask. + */ + ucp_listener_accept_handler_t accept_handler; + + /** + * Handler of an incoming connection request in a client-server connection + * flow. In order for the callback inside this handler to be invoked, the + * @ref UCP_LISTENER_PARAM_FIELD_CONN_HANDLER needs to be set in the + * field_mask. + * @note User is expected to call ucp_ep_create with set + * @ref UCP_EP_PARAM_FIELD_CONN_REQUEST flag to + * @ref ucp_ep_params_t::field_mask and + * @ref ucp_ep_params_t::conn_request in order to be able to receive + * communications. + */ + ucp_listener_conn_handler_t conn_handler; +} ucp_listener_params_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief Output parameter of @ref ucp_stream_worker_poll function. + * + * The structure defines the endpoint and its user data. + */ +typedef struct ucp_stream_poll_ep +{ + /** + * Endpoint handle. + */ + ucp_ep_h ep; + + /** + * User data associated with an endpoint passed in + * @ref ucp_ep_params_t::user_data. + */ + void* user_data; + + /** + * Reserved for future use. + */ + unsigned flags; + + /** + * Reserved for future use. + */ + uint8_t reserved[16]; +} ucp_stream_poll_ep_t; + +/** + * @ingroup UCP_MEM + * @brief Tuning parameters for the UCP memory mapping. + * + * The structure defines the parameters that are used for the + * UCP memory mapping tuning during the @ref ucp_mem_map "ucp_mem_map" routine. + */ +typedef struct ucp_mem_map_params +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_mem_map_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * If the address is not NULL, the routine maps (registers) the memory segment + * pointed to by this address. + * If the pointer is NULL, the library allocates mapped (registered) memory + * segment and returns its address in this argument. + * Therefore, this value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * @ref UCP_MEM_MAP_PARAM_FIELD_ADDRESS), the ucp_mem_map routine will consider + * address as set to NULL and will allocate memory. + */ + void* address; + + /** + * Length (in bytes) to allocate or map (register). + * This field is mandatory for filling (along with its corresponding bit + * in the field_mask - @ref UCP_MEM_MAP_PARAM_FIELD_LENGTH). + * The @ref ucp_mem_map routine will return with an error if the length isn't + * specified. + */ + size_t length; + + /** + * Allocation flags, e.g. @ref UCP_MEM_MAP_NONBLOCK. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * @ref UCP_MEM_MAP_PARAM_FIELD_FLAGS), the @ref ucp_mem_map routine will + * consider the flags as set to zero. + */ + unsigned flags; + + /** + * Memory protection mode, e.g. @ref UCP_MEM_MAP_PROT_LOCAL_READ. + * This value is optional. + * If it's not set, the @ref ucp_mem_map routine will consider + * the flags as set to UCP_MEM_MAP_PROT_LOCAL_READ|UCP_MEM_MAP_PROT_LOCAL_WRITE| + * UCP_MEM_MAP_PROT_REMOTE_READ|UCP_MEM_MAP_PROT_REMOTE_WRITE. + */ + unsigned prot; + + /* + * Memory type (for possible memory types see @ref ucs_memory_type_t) + * It is an optimization hint to avoid memory type detection for map buffer. + * The meaning of this field depends on the operation type. + * + * - Memory allocation: (@ref UCP_MEM_MAP_ALLOCATE flag is set) This field + * specifies the type of memory to allocate. If it's not set (along with its + * corresponding bit in the field_mask - @ref UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE), + * @ref UCS_MEMORY_TYPE_HOST will be assumed by default. + * + * - Memory registration: This field specifies the type of memory which is + * pointed by @ref ucp_mem_map_params.address. If it's not set (along with its + * corresponding bit in the field_mask - @ref UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE), + * or set to @ref UCS_MEMORY_TYPE_UNKNOWN, the memory type will be detected + * internally. + */ + ucs_memory_type_t memory_type; + + /** + * Exported memory handle buffer as returned by @ref ucp_mem_map + * function for a memory handle created and packed by @ref ucp_memh_pack + * with @ref UCP_MEMH_PACK_FLAG_EXPORT flag. + * If this field is specified for @ref ucp_mem_map function, a resulting + * memory handle will be a mapping of peer memory instead of local + * memory. + * If the field is not set (along with its corresponding bit in the + * field_mask - @ref UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER), the + * @ref ucp_mem_map routine will consider the memory handle buffer to be + * set to NULL by default. + */ + const void* exported_memh_buffer; +} ucp_mem_map_params_t; + +/** + * @ingroup UCP_CONTEXT + * @brief UCP receive information descriptor + * + * The UCP receive information descriptor is allocated by application and filled + * in with the information about the received message by @ref ucp_tag_probe_nb + * or @ref ucp_tag_recv_request_test routines or + * @ref ucp_tag_recv_callback_t callback argument. + */ +struct ucp_tag_recv_info +{ + /** Sender tag */ + ucp_tag_t sender_tag; + /** The size of the received data */ + size_t length; +}; + +/** + * @ingroup UCP_CONTEXT + * @brief Operation parameters passed to @ref ucp_tag_send_nbx, + * @ref ucp_tag_send_sync_nbx, @ref ucp_tag_recv_nbx, @ref ucp_put_nbx, + * @ref ucp_get_nbx, @ref ucp_am_send_nbx and @ref ucp_am_recv_data_nbx. + * + * The structure @ref ucp_request_param_t is used to specify datatype of + * operation, provide user request in case the external request is used, + * set completion callback and custom user data passed to this callback. + * + * Example: implementation of function to send contiguous buffer to ep and + * invoke callback function at operation completion. If the + * operation completed immediately (status == UCS_OK) then + * callback is not called. + * + * @code{.c} + * ucs_status_ptr_t send_data(ucp_ep_h ep, void *buffer, size_t length, + * ucp_tag_t tag, void *request) + * { + * ucp_request_param_t param = { + * .op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | + * UCP_OP_ATTR_FIELD_REQUEST, + * .request = request, + * .cb.send = custom_send_callback_f, + * .user_data = pointer_to_user_context_passed_to_cb + * }; + * + * ucs_status_ptr_t status; + * + * status = ucp_tag_send_nbx(ep, buffer, length, tag, ¶m); + * if (UCS_PTR_IS_ERR(status)) { + * handle_error(status); + * } else if (status == UCS_OK) { + * // operation is completed + * } + * + * return status; + * } + * @endcode + */ +typedef struct +{ + /** + * Mask of valid fields in this structure and operation flags, using + * bits from @ref ucp_op_attr_t. Fields not specified in this mask will be + * ignored. Provides ABI compatibility with respect to adding new fields. + */ + uint32_t op_attr_mask; + + /* Operation specific flags. */ + uint32_t flags; + + /** + * Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a request. The size of the UCP request + * can be obtained by @ref ucp_context_query function. + */ + void* request; + + /** + * Callback function that is invoked whenever the + * send or receive operation is completed. + */ + union + { + ucp_send_nbx_callback_t send; + ucp_tag_recv_nbx_callback_t recv; + ucp_stream_recv_nbx_callback_t recv_stream; + ucp_am_recv_data_nbx_callback_t recv_am; + } cb; + + /** + * Datatype descriptor for the elements in the buffer. In case the + * op_attr_mask & UCP_OP_ATTR_FIELD_DATATYPE bit is not set, then use + * default datatype ucp_dt_make_contig(1) + */ + ucp_datatype_t datatype; + + /** + * Pointer to user data passed to callback function. + */ + void* user_data; + + /** + * Reply buffer. Can be used for storing operation result, for example by + * @ref ucp_atomic_op_nbx. + */ + void* reply_buffer; + + /** + * Memory type of the buffer. see @ref ucs_memory_type_t for possible memory types. + * An optimization hint to avoid memory type detection for request buffer. + * If this value is not set (along with its corresponding bit in the op_attr_mask - + * @ref UCP_OP_ATTR_FIELD_MEMORY_TYPE), then use default @ref UCS_MEMORY_TYPE_UNKNOWN + * which means the memory type will be detected internally. + */ + ucs_memory_type_t memory_type; + + /** + * Pointer to the information where received data details are stored + * in case of an immediate completion of receive operation. The user has to + * provide a pointer to valid memory/variable which will be updated on function + * return. + */ + union + { + size_t* length; /* Length of received message in bytes. + Relevant for non-tagged receive + operations. */ + ucp_tag_recv_info_t* tag_info; /* Information about received message. + Relevant for @a ucp_tag_recv_nbx + function. */ + } recv_info; + + /** + * Memory handle for pre-registered buffer. + * If the handle is provided, protocols that require registered memory can + * skip the registration step. As a result, the communication request + * overhead can be reduced and the request can be completed faster. + * The memory handle should be obtained by calling @ref ucp_mem_map. + */ + ucp_mem_h memh; + +} ucp_request_param_t; + +/** + * @ingroup UCP_COMM + * @brief Attributes of a particular request. + */ +typedef struct +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_req_attr_field. Fields not specified in this mask will + * be ignored. Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Pointer to allocated string of size @ref debug_string_size that will be filled + * with debug information about transports and protocols that were selected + * to complete the request. + */ + char* debug_string; + + /** + * Size of the @ref debug_string. String will be filled up to this size. + * Maximum possible size debug string can be obtained by querying the worker + * via @ref ucp_worker_query. + */ + size_t debug_string_size; + + /** + * Status of the request. The same as @ref ucp_request_check_status. + */ + ucs_status_t status; + + /** + * Detected memory type of the buffer passed to the operation. + */ + ucs_memory_type_t mem_type; +} ucp_request_attr_t; + +/** + * @ingroup UCP_WORKER + * @brief Active Message handler parameters passed to + * @ref ucp_worker_set_am_recv_handler routine. + */ +typedef struct ucp_am_handler_param +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_am_handler_param_field. Fields not specified in this mask will + * be ignored. Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Active Message id. + * @warning Value must be between 0 and UINT16_MAX. + */ + unsigned id; + + /** + * Handler flags as defined by @ref ucp_am_cb_flags. + */ + uint32_t flags; + + /** + * Active Message callback. To clear the already set callback, this value + * should be set to NULL. + */ + ucp_am_recv_callback_t cb; + + /** + * Active Message argument, which will be passed in to every invocation of + * @ref ucp_am_recv_callback_t function as the @a arg argument. + */ + void* arg; +} ucp_am_handler_param_t; + +/** + * @ingroup UCP_WORKER + * @brief Operation parameters provided in @ref ucp_am_recv_callback_t callback. + */ +struct ucp_am_recv_param +{ + /** + * Mask of valid fields in this structure and receive operation flags, using + * bits from @ref ucp_am_recv_attr_t. Fields not specified in this mask will + * be ignored. Provides ABI compatibility with respect to adding new fields. + */ + uint64_t recv_attr; + + /** + * Endpoint, which can be used for the reply to this message. + */ + ucp_ep_h reply_ep; +}; + +/** + * @ingroup UCP_CONTEXT + * @brief Get attributes of the UCP library. + * + * This routine fetches information about the UCP library attributes. + * + * @param [out] attr Filled with attributes of the UCP library. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_lib_query(ucp_lib_attr_t* attr); + +/** + * @ingroup UCP_CONFIG + * @brief Read UCP configuration descriptor + * + * The routine fetches the information about UCP library configuration from + * the run-time environment. Then, the fetched descriptor is used for + * UCP library @ref ucp_init "initialization". The Application can print out the + * descriptor using @ref ucp_config_print "print" routine. In addition + * the application is responsible for @ref ucp_config_release "releasing" the + * descriptor back to the UCP library. + * + * @param [in] env_prefix If non-NULL, the routine searches for the + * environment variables that start with + * @e \_UCX_ prefix. + * Otherwise, the routine searches for the + * environment variables that start with + * @e UCX_ prefix. + * @param [in] filename If non-NULL, read configuration from the file + * defined by @e filename. If the file does not + * exist, it will be ignored and no error reported + * to the application. + * @param [out] config_p Pointer to configuration descriptor as defined by + * @ref ucp_config_t "ucp_config_t". + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_config_read(const char* env_prefix, const char* filename, ucp_config_t** config_p); + +/** + * @ingroup UCP_CONFIG + * @brief Release configuration descriptor + * + * The routine releases the configuration descriptor that was allocated through + * @ref ucp_config_read "ucp_config_read()" routine. + * + * @param [out] config Configuration descriptor as defined by + * @ref ucp_config_t "ucp_config_t". + */ +void +ucp_config_release(ucp_config_t* config); + +/** + * @ingroup UCP_CONFIG + * @brief Modify context configuration. + * + * The routine changes one configuration setting stored in @ref ucp_config_t + * "configuration" descriptor. + * + * @param [in] config Configuration to modify. + * @param [in] name Configuration variable name. + * @param [in] value Value to set. + * + * @return Error code. + */ +ucs_status_t +ucp_config_modify(ucp_config_t* config, const char* name, const char* value); + +/** + * @ingroup UCP_CONFIG + * @brief Print configuration information + * + * The routine prints the configuration information that is stored in + * @ref ucp_config_t "configuration" descriptor. + * + * @todo Expose ucs_config_print_flags_t + * + * @param [in] config @ref ucp_config_t "Configuration descriptor" + * to print. + * @param [in] stream Output stream to print the configuration to. + * @param [in] title Configuration title to print. + * @param [in] print_flags Flags that control various printing options. + */ +void +ucp_config_print(const ucp_config_t* config, FILE* stream, const char* title, + ucs_config_print_flags_t print_flags); + +/** + * @ingroup UCP_CONTEXT + * @brief Get UCP library version. + * + * This routine returns the UCP library version. + * + * @param [out] major_version Filled with library major version. + * @param [out] minor_version Filled with library minor version. + * @param [out] release_number Filled with library release number. + */ +void +ucp_get_version(unsigned* major_version, unsigned* minor_version, + unsigned* release_number); + +/** + * @ingroup UCP_CONTEXT + * @brief Get UCP library version as a string. + * + * This routine returns the UCP library version as a string which consists of: + * "major.minor.release". + */ +const char* +ucp_get_version_string(void); + +/** @cond PRIVATE_INTERFACE */ +/** + * @ingroup UCP_CONTEXT + * @brief UCP context initialization with particular API version. + * + * This is an internal routine used to check compatibility with a particular + * API version. @ref ucp_init should be used to create UCP context. + */ +ucs_status_t +ucp_init_version(unsigned api_major_version, unsigned api_minor_version, + const ucp_params_t* params, const ucp_config_t* config, + ucp_context_h* context_p); +/** @endcond */ + +/** + * @ingroup UCP_CONTEXT + * @brief UCP context initialization. + * + * This routine creates and initializes a @ref ucp_context_h + * "UCP application context". + * + * @warning This routine must be called before any other UCP function + * call in the application. + * + * This routine checks API version compatibility, then discovers the available + * network interfaces, and initializes the network resources required for + * discovering of the network and memory related devices. + * This routine is responsible for initialization all information required for + * a particular application scope, for example, MPI application, OpenSHMEM + * application, etc. + * + * @note + * @li Higher level protocols can add additional communication isolation, as + * MPI does with its communicator object. A single communication context may + * be used to support multiple MPI communicators. + * @li The context can be used to isolate the communication that corresponds to + * different protocols. For example, if MPI and OpenSHMEM are using UCP to + * isolate the MPI communication from the OpenSHMEM communication, users should + * use different application context for each of the communication libraries. + * + * @param [in] config UCP configuration descriptor allocated through + * @ref ucp_config_read "ucp_config_read()" routine. + * @param [in] params User defined @ref ucp_params_t configurations for the + * @ref ucp_context_h "UCP application context". + * @param [out] context_p Initialized @ref ucp_context_h + * "UCP application context". + * + * @return Error code as defined by @ref ucs_status_t + */ +static inline ucs_status_t +ucp_init(const ucp_params_t* params, const ucp_config_t* config, ucp_context_h* context_p) +{ + return ucp_init_version(UCP_API_MAJOR, UCP_API_MINOR, params, config, context_p); +} + +/** + * @ingroup UCP_CONTEXT + * @brief Release UCP application context. + * + * This routine finalizes and releases the resources associated with a + * @ref ucp_context_h "UCP application context". + * + * @warning An application cannot call any UCP routine + * once the UCP application context released. + * + * The cleanup process releases and shuts down all resources associated with + * the application context. After calling this routine, calling any UCP + * routine without calling @ref ucp_init "UCP initialization routine" is invalid. + * + * @param [in] context_p Handle to @ref ucp_context_h + * "UCP application context". + */ +void +ucp_cleanup(ucp_context_h context_p); + +/** + * @ingroup UCP_CONTEXT + * @brief Get attributes specific to a particular context. + * + * This routine fetches information about the context. + * + * @param [in] context_p Handle to @ref ucp_context_h + * "UCP application context". + * + * @param [out] attr Filled with attributes of @p context_p context. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_context_query(ucp_context_h context_p, ucp_context_attr_t* attr); + +/** + * @ingroup UCP_MEM + * @brief Compare two remote keys + * + * This routine compares two remote keys. They must belong to the same worker. + * + * It sets the @a result argument to < 0 if rkey1 is lower than rkey2, 0 if they + * are equal or > 0 if rkey1 is greater than rkey2. The result value can be used + * for sorting remote keys. + * + * @param [in] worker Worker object both rkeys are referring to + * @param [in] rkey1 First rkey to compare + * @param [in] rkey2 Second rkey to compare + * @param [in] params Additional parameters to the comparison + * @param [out] result Result of the comparison + * + * @return UCS_OK - @a result contains the comparison result + * @return UCS_ERR_INVALID_PARAM - The routine arguments are invalid + * @return Other - Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_rkey_compare(ucp_worker_h worker, ucp_rkey_h rkey1, ucp_rkey_h rkey2, + const ucp_rkey_compare_params_t* params, int* result); + +/** + * @ingroup UCP_CONTEXT + * @brief Print context information. + * + * This routine prints information about the context configuration: including + * memory domains, transport resources, and other useful information associated + * with the context. + * + * @param [in] context Print this context object's configuration. + * @param [in] stream Output stream on which to print the information. + */ +void +ucp_context_print_info(const ucp_context_h context, FILE* stream); + +/** + * @ingroup UCP_WORKER + * @brief Create a worker object. + * + * This routine allocates and initializes a @ref ucp_worker_h "worker" object. + * Each worker is associated with one and only one @ref ucp_context_h + * "application" context. In the same time, an application context can create + * multiple @ref ucp_worker_h "workers" in order to enable concurrent access to + * communication resources. For example, application can allocate a dedicated + * worker for each application thread, where every worker can be progressed + * independently of others. + * + * @note The worker object is allocated within context of the calling thread + * + * @param [in] context Handle to @ref ucp_context_h + * "UCP application context". + * @param [in] params User defined @ref ucp_worker_params_t configurations for the + * @ref ucp_worker_h "UCP worker". + * @param [out] worker_p A pointer to the worker object allocated by the + * UCP library + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_create(ucp_context_h context, const ucp_worker_params_t* params, + ucp_worker_h* worker_p); + +/** + * @ingroup UCP_WORKER + * @brief Destroy a worker object. + * + * This routine releases the resources associated with a + * @ref ucp_worker_h "UCP worker". + * + * @warning Once the UCP worker destroy the worker handle cannot be used with any + * UCP routine. + * + * The destroy process releases and shuts down all resources associated with + * the @ref ucp_worker_h "worker". + * + * @param [in] worker Worker object to destroy. + */ +void +ucp_worker_destroy(ucp_worker_h worker); + +/** + * @ingroup UCP_WORKER + * @brief Get attributes specific to a particular worker. + * + * This routine fetches information about the worker. + * + * @param [in] worker Worker object to query. + * @param [out] attr Filled with attributes of worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_query(ucp_worker_h worker, ucp_worker_attr_t* attr); + +/** + * @ingroup UCP_WORKER + * @brief Print information about the worker. + * + * This routine prints information about the protocols being used, thresholds, + * UCT transport methods, and other useful information associated with the worker. + * + * @param [in] worker Worker object to print information for. + * @param [in] stream Output stream to print the information to. + */ +void +ucp_worker_print_info(ucp_worker_h worker, FILE* stream); + +/** + * @ingroup UCP_WORKER + * @brief Release an address of the worker object. + * + * This routine release an @ref ucp_address_t "address handle" associated within + * the @ref ucp_worker_h "worker" object. + * + * @warning Once the address released the address handle cannot be used with any + * UCP routine. + * + * @param [in] worker Worker object that is associated with the + * address object. + * @param [in] address Address to release; the address object has to + * be allocated using @ref ucp_worker_query + * "ucp_worker_query()" routine. + * + * @todo We should consider to change it to return int so we can catch the + * errors when worker != address + */ +void +ucp_worker_release_address(ucp_worker_h worker, ucp_address_t* address); + +/** + * @ingroup UCP_WORKER + * @brief Get attributes of the particular worker address. + * + * This routine fetches information about the worker address. The address can be + * either of local or remote worker. + * + * @param [in] address Worker address to query. + * @param [out] attr Filled with attributes of the worker address. + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t +ucp_worker_address_query(ucp_address_t* address, ucp_worker_address_attr_t* attr); + +/** + * @ingroup UCP_WORKER + * @brief Progress all communications on a specific worker. + * + * This routine explicitly progresses all communication operations on a worker. + * + * @note + * @li Typically, request wait and test routines call @ref + * ucp_worker_progress "this routine" to progress any outstanding operations. + * @li Transport layers, implementing asynchronous progress using threads, + * require callbacks and other user code to be thread safe. + * @li The state of communication can be advanced (progressed) by blocking + * routines. Nevertheless, the non-blocking routines can not be used for + * communication progress. + * + * @param [in] worker Worker to progress. + * + * @return Non-zero if any communication was progressed, zero otherwise. + */ +unsigned +ucp_worker_progress(ucp_worker_h worker); + +/** + * @ingroup UCP_WORKER + * @brief Poll for endpoints that are ready to consume streaming data. + * + * This non-blocking routine returns endpoints on a worker which are ready + * to consume streaming data. The ready endpoints are placed in @a poll_eps + * array, and the function return value indicates how many are there. + * + * @param [in] worker Worker to poll. + * @param [out] poll_eps Pointer to array of endpoints, should be + * allocated by user. + * @param [in] max_eps Maximum number of endpoints that should be filled + * in @a poll_eps. + * @param [in] flags Reserved for future use. + * + * @return Negative value indicates an error according to @ref ucs_status_t. + * On success, non-negative value (less or equal @a max_eps) indicates + * actual number of endpoints filled in @a poll_eps array. + * + */ +ssize_t +ucp_stream_worker_poll(ucp_worker_h worker, ucp_stream_poll_ep_t* poll_eps, + size_t max_eps, unsigned flags); + +/** + * @ingroup UCP_WAKEUP + * @brief Obtain an event file descriptor for event notification. + * + * This routine returns a valid file descriptor for polling functions. + * The file descriptor will get signaled when an event occurs, as part of the + * wake-up mechanism. Signaling means a call to poll() or select() with this + * file descriptor will return at this point, with this descriptor marked as the + * reason (or one of the reasons) the function has returned. The user does not + * need to release the obtained file descriptor. + * + * The wake-up mechanism exists to allow for the user process to register for + * notifications on events of the underlying interfaces, and wait until such + * occur. This is an alternative to repeated polling for request completion. + * The goal is to allow for waiting while consuming minimal resources from the + * system. This is recommended for cases where traffic is infrequent, and + * latency can be traded for lower resource consumption while waiting for it. + * + * There are two alternative ways to use the wakeup mechanism: the first is the + * file descriptor obtained per worker (this function) and the second is the + * @ref ucp_worker_wait function for waiting on the next event internally. + * + * @note UCP @ref ucp_feature "features" have to be triggered + * with @ref UCP_FEATURE_WAKEUP to select proper transport + * + * @param [in] worker Worker of notified events. + * @param [out] fd File descriptor. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_get_efd(ucp_worker_h worker, int* fd); + +/** + * @ingroup UCP_WAKEUP + * @brief Wait for an event of the worker. + * + * This routine waits (blocking) until an event has happened, as part of the + * wake-up mechanism. + * + * This function is guaranteed to return only if new communication events occur + * on the @a worker. Therefore one must drain all existing events before waiting + * on the file descriptor. This can be achieved by calling + * @ref ucp_worker_progress repeatedly until it returns 0. + * + * There are two alternative ways to use the wakeup mechanism. The first is by + * polling on a per-worker file descriptor obtained from @ref ucp_worker_get_efd. + * The second is by using this function to perform an internal wait for the next + * event associated with the specified worker. + * + * @note During the blocking call the wake-up mechanism relies on other means of + * notification and may not progress some of the requests as it would when + * calling @ref ucp_worker_progress (which is not invoked in that duration). + * + * @note UCP @ref ucp_feature "features" have to be triggered + * with @ref UCP_FEATURE_WAKEUP to select proper transport + * + * @param [in] worker Worker to wait for events on. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_wait(ucp_worker_h worker); + +/** + * @ingroup UCP_WAKEUP + * @brief Wait for memory update on the address + * + * This routine waits for a memory update at the local memory @a address. This + * is a blocking routine. The routine returns when the memory address is + * updated ("write") or an event occurs in the system. + * + * This function is guaranteed to return only if new communication events occur + * on the worker or @a address is modified. Therefore one must drain all existing + * events before waiting on the file descriptor. This can be achieved by calling + * @ref ucp_worker_progress repeatedly until it returns 0. + * + * @note This routine can be used by an application that executes busy-waiting + * loop checking for a memory update. Instead of continuous busy-waiting on an + * address the application can use @a ucp_worker_wait_mem, which may suspend + * execution until the memory is updated. The goal of the routine is to provide + * an opportunity for energy savings for architectures that support this + * functionality. + * + * @param [in] worker Worker to wait for updates on. + * @param [in] address Local memory address + */ +void +ucp_worker_wait_mem(ucp_worker_h worker, void* address); + +/** + * @ingroup UCP_WAKEUP + * @brief Turn on event notification for the next event. + * + * This routine needs to be called before waiting on each notification on this + * worker, so will typically be called once the processing of the previous event + * is over, as part of the wake-up mechanism. + * + * The worker must be armed before waiting on an event (must be re-armed after + * it has been signaled for reuse) with @ref ucp_worker_arm. + * The events triggering a signal of the file descriptor from + * @ref ucp_worker_get_efd depend on the interfaces used by the worker and + * defined in the transport layer, and typically represent a request completion + * or newly available resources. It can also be triggered by calling + * @ref ucp_worker_signal . + * + * The file descriptor is guaranteed to become signaled only if new communication + * events occur on the @a worker. Therefore one must drain all existing events + * before waiting on the file descriptor. This can be achieved by calling + * @ref ucp_worker_progress repeatedly until it returns 0. + * + * @code {.c} + * void application_initialization() { + * // should be called once in application init flow and before + * // process_communication() is used + * ... + * status = ucp_worker_get_efd(worker, &fd); + * ... + * } + * + * void process_communication() { + * // should be called every time need to wait for some condition such as + * // ucp request completion in sleep mode. + * + * for (;;) { + * // check for stop condition as long as progress is made + * if (check_for_events()) { + * break; + * } else if (ucp_worker_progress(worker)) { + * continue; // some progress happened but condition not met + * } + * + * // arm the worker and clean-up fd + * status = ucp_worker_arm(worker); + * if (UCS_OK == status) { + * poll(&fds, nfds, timeout); // wait for events (sleep mode) + * } else if (UCS_ERR_BUSY == status) { + * continue; // could not arm, need to progress more + * } else { + * abort(); + * } + * } + * } + * @endcode + * + * @note UCP @ref ucp_feature "features" have to be triggered + * with @ref UCP_FEATURE_WAKEUP to select proper transport + * + * @param [in] worker Worker of notified events. + * + * @return ::UCS_OK The operation completed successfully. File descriptor + * will be signaled by new events. + * @return ::UCS_ERR_BUSY There are unprocessed events which prevent the + * file descriptor from being armed. These events should + * be removed by calling @ref ucp_worker_progress(). + * The operation is not completed. File descriptor + * will not be signaled by new events. + * @return @ref ucs_status_t "Other" different error codes in case of issues. + */ +ucs_status_t +ucp_worker_arm(ucp_worker_h worker); + +/** + * @ingroup UCP_WAKEUP + * @brief Cause an event of the worker. + * + * This routine signals that the event has happened, as part of the wake-up + * mechanism. This function causes a blocking call to @ref ucp_worker_wait or + * waiting on a file descriptor from @ref ucp_worker_get_efd to return, even + * if no event from the underlying interfaces has taken place. + * + * @note It's safe to use this routine from any thread, even if UCX is compiled + * without multi-threading support and/or initialized with any value of + * @ref ucp_params_t::mt_workers_shared and + * @ref ucp_worker_params_t::thread_mode parameters + * + * @param [in] worker Worker to wait for events on. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_signal(ucp_worker_h worker); + +/** + * @ingroup UCP_WORKER + * @brief Create a listener to accept connections on. Connection requests on + * the listener will arrive at a local address specified by the user. + * + * This routine creates a new listener object that is bound to a specific + * local address. + * The listener will listen to incoming connection requests. + * After receiving a request from the remote peer, an endpoint to this peer + * will be created - either right away or by calling @ref ucp_ep_create, + * as specified by the callback type in @ref ucp_listener_params_t. + * The user's callback will be invoked once the endpoint is created. + * + * @param [in] worker Worker object to create the listener on. + * @param [in] params User defined @ref ucp_listener_params_t + * configurations for the @ref ucp_listener_h. + * @param [out] listener_p A handle to the created listener, can be released + * by calling @ref ucp_listener_destroy + * + * @return Error code as defined by @ref ucs_status_t + * + * @note @ref ucp_listener_params_t::conn_handler or + * @ref ucp_listener_params_t::accept_handler must be provided to be + * able to handle incoming connections. + */ +ucs_status_t +ucp_listener_create(ucp_worker_h worker, const ucp_listener_params_t* params, + ucp_listener_h* listener_p); + +/** + * @ingroup UCP_WORKER + * @brief Stop accepting connections on a local address of the worker object. + * + * This routine unbinds the worker from the given handle and stops + * listening for incoming connection requests on it. + * + * @param [in] listener A handle to the listener to stop listening on. + */ +void +ucp_listener_destroy(ucp_listener_h listener); + +/** + * @ingroup UCP_WORKER + * @brief Get attributes specific to a particular listener. + * + * This routine fetches information about the listener. + * + * @param [in] listener listener object to query. + * @param [out] attr Filled with attributes of the listener. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_listener_query(ucp_listener_h listener, ucp_listener_attr_t* attr); + +/** + * @ingroup UCP_WORKER + * @brief Get attributes specific to a particular connection request received + * on the server side. + * + * This routine fetches information about the connection request. + * + * @param [in] conn_request connection request object to query. + * @param [out] attr Filled with attributes of the connection request. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_conn_request_query(ucp_conn_request_h conn_request, ucp_conn_request_attr_t* attr); + +/** + * @ingroup UCP_COMM + * @brief Get information about ucp_request. + * + * @param [in] request Non-blocking request to query. + * @param [out] attr Filled with attributes of the request. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_request_query(void* request, ucp_request_attr_t* attr); + +/** + * @ingroup UCP_ENDPOINT + * @brief Create and connect an endpoint. + * + * This routine creates and connects an @ref ucp_ep_h "endpoint" on a @ref + * ucp_worker_h "local worker" for a destination @ref ucp_address_t "address" + * that identifies the remote @ref ucp_worker_h "worker". This function is + * non-blocking, and communications may begin immediately after it returns. If + * the connection process is not completed, communications may be delayed. + * The created @ref ucp_ep_h "endpoint" is associated with one and only one + * @ref ucp_worker_h "worker". + * + * @param [in] worker Handle to the worker; the endpoint + * is associated with the worker. + * @param [in] params User defined @ref ucp_ep_params_t configurations + * for the @ref ucp_ep_h "UCP endpoint". + * @param [out] ep_p A handle to the created endpoint. + * + * @return Error code as defined by @ref ucs_status_t + * + * @note One of the following fields has to be specified: + * - ucp_ep_params_t::address + * - ucp_ep_params_t::sockaddr + * - ucp_ep_params_t::conn_request + + * @note By default, ucp_ep_create() will connect an endpoint to itself if + * the endpoint is destined to the same @a worker on which it was created, + * i.e. @a params.address belongs to @a worker. This behavior can be changed by + * passing the @ref UCP_EP_PARAMS_FLAGS_NO_LOOPBACK flag in @a params.flags. + * In that case, the endpoint will be connected to the *next* endpoint created + * in the same way on the same @a worker. + */ +ucs_status_t +ucp_ep_create(ucp_worker_h worker, const ucp_ep_params_t* params, ucp_ep_h* ep_p); + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking @ref ucp_ep_h "endpoint" closure. + * + * @param [in] ep Handle to the endpoint to close. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * This operation supports specific flags, which can be + * passed in @a param by @ref ucp_request_param_t.flags. + * The exact set of flags is defined + * by @ref ucp_ep_close_flags_t. + * + * @return NULL - The endpoint is closed successfully. + * @return UCS_PTR_IS_ERR(_ptr) - The closure failed and an error code indicates + * the transport level status. However, resources + * are released and the @a endpoint can no longer + * be used. + * @return otherwise - The closure process is started, and can be + * completed at any point in time. A request + * handle is returned to the application in order + * to track progress of the endpoint closure. + */ +ucs_status_ptr_t +ucp_ep_close_nbx(ucp_ep_h ep, const ucp_request_param_t* param); + +/** + * @ingroup UCP_WORKER + * + * @brief Reject an incoming connection request. + * + * Reject the incoming connection request and release associated resources. If + * the remote initiator endpoint has set an @ref ucp_ep_params_t::err_handler, + * it will be invoked with status @ref UCS_ERR_REJECTED. + * + * @param [in] listener Handle to the listener on which the connection + * request was received. + * @param [in] conn_request Handle to the connection request to reject. + * + * @return Error code as defined by @ref ucs_status_t + * + */ +ucs_status_t +ucp_listener_reject(ucp_listener_h listener, ucp_conn_request_h conn_request); + +/** + * @ingroup UCP_ENDPOINT + * @brief Print endpoint information. + * + * This routine prints information about the endpoint transport methods, their + * thresholds, and other useful information associated with the endpoint. + * + * @param [in] ep Endpoint object whose configuration to print. + * @param [in] stream Output stream to print the information to. + */ +void +ucp_ep_print_info(ucp_ep_h ep, FILE* stream); + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking flush of outstanding AMO and RMA operations on the + * @ref ucp_ep_h "endpoint". + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_ep_h "endpoint". All the AMO and RMA operations issued on the + * @a ep prior to this call are completed both at the origin and at the target + * @ref ucp_ep_h "endpoint" when this call returns. + * + * @param [in] ep UCP endpoint. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be + * completed in any point in time. The request + * handle is returned to the application in + * order to track progress. + * + * + * The following example demonstrates how blocking flush can be implemented + * using non-blocking flush: + * @code {.c} + * ucs_status_t blocking_ep_flush(ucp_ep_h ep, ucp_worker_h worker) + * { + * ucp_request_param_t param; + * void *request; + * + * param.op_attr_mask = 0; + * request = ucp_ep_flush_nbx(ep, ¶m); + * if (request == NULL) { + * return UCS_OK; + * } else if (UCS_PTR_IS_ERR(request)) { + * return UCS_PTR_STATUS(request); + * } else { + * ucs_status_t status; + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request); + * } while (status == UCS_INPROGRESS); + * ucp_request_free(request); + * return status; + * } + * } + * @endcode + */ +ucs_status_ptr_t +ucp_ep_flush_nbx(ucp_ep_h ep, const ucp_request_param_t* param); + +/** + * @ingroup UCP_ENDPOINT + * @brief Estimate performance characteristics of a specific endpoint. + * + * This routine fetches information about the endpoint. + * + * @param [in] ep Endpoint to query. + * @param [in] param Filled by the user with request params. + * @param [out] attr Filled with performance estimation of the given operation + * on the endpoint. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_ep_evaluate_perf(ucp_ep_h ep, const ucp_ep_evaluate_perf_param_t* param, + ucp_ep_evaluate_perf_attr_t* attr); + +/** + * @ingroup UCP_MEM + * @brief Map or allocate memory for zero-copy operations. + * + * This routine maps or/and allocates a user-specified memory segment with @ref + * ucp_context_h "UCP application context" and the network resources associated + * with it. If the application specifies NULL as an address for the memory + * segment, the routine allocates a mapped memory segment and returns its + * address in the @a address_p argument. The network stack associated with an + * application context can typically send and receive data from the mapped + * memory without CPU intervention; some devices and associated network stacks + * require the memory to be mapped to send and receive data. The @ref ucp_mem_h + * "memory handle" includes all information required to access the memory + * locally using UCP routines, while @ref ucp_rkey_h + * "remote registration handle" provides an information that is necessary for + * remote memory access. + * + * @note + * Another well know terminology for the "map" operation that is typically + * used in the context of networking is memory "registration" or "pinning". The + * UCP library registers the memory the available hardware so it can be + * assessed directly by the hardware. + * + * Memory mapping assumptions: + * @li A given memory segment can be mapped by several different communication + * stacks, if these are compatible. + * @li The @a memh_p handle returned may be used with any sub-region of the + * mapped memory. + * @li If a large segment is registered, and then segmented for subsequent use + * by a user, then the user is responsible for segmentation and subsequent + * management. + * + * + * + * + * + * + * + *
Matrix of behavior
parameter/flag @ref UCP_MEM_MAP_NONBLOCK "NONBLOCK"@ref UCP_MEM_MAP_ALLOCATE "ALLOCATE"@ref UCP_MEM_MAP_FIXED "FIXED"@ref ucp_mem_map_params.address + * "address" @b result
@b value + * 0/1 - the value\n only affects the\n register/map\n + * phase 0 0 0 @ref anch_err "error" if length > 0
1 0 0 @ref anch_alloc_reg + * "alloc+register"
0 1 0 @ref anch_err "error"
0 0 + * defined @ref anch_reg "register"
1 1 0 @ref + * anch_err "error"
1 0 defined @ref anch_alloc_hint_reg "alloc+register,hint"
0 + * 1 defined @ref anch_err + * "error"
1 1 defined @ref + * anch_alloc_fixed_reg "alloc+register,fixed" + *
+ * + * @note + * @li \anchor anch_reg @b register means that the memory will be registered in + * corresponding transports for RMA/AMO operations. This case intends that + * the memory was allocated by user before. + * @li \anchor anch_alloc_reg @b alloc+register means that the memory will be allocated + * in the memory provided by the system and registered in corresponding + * transports for RMA/AMO operations. + * @li \anchor anch_alloc_hint_reg alloc+register,hint means that + * the memory will be allocated with using @ref ucp_mem_map_params.address + * as a hint and registered in corresponding transports for RMA/AMO operations. + * @li \anchor anch_alloc_fixed_reg alloc+register,fixed means that the memory + * will be allocated and registered in corresponding transports for RMA/AMO + * operations. + * @li \anchor anch_err @b error is an erroneous combination of the parameters. + * + * @param [in] context Application @ref ucp_context_h "context" to map + * (register) and allocate the memory on. + * @param [in] params User defined @ref ucp_mem_map_params_t configurations + * for the @ref ucp_mem_h "UCP memory handle". + * @param [out] memh_p UCP @ref ucp_mem_h "handle" for the allocated + * segment. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_mem_map(ucp_context_h context, const ucp_mem_map_params_t* params, ucp_mem_h* memh_p); + +/** + * @ingroup UCP_MEM + * @brief Unmap memory segment + * + * This routine unmaps a user specified memory segment, that was previously + * mapped using the @ref ucp_mem_map "ucp_mem_map()" routine. The unmap + * routine will also release the resources associated with the memory + * @ref ucp_mem_h "handle". When the function returns, the @ref ucp_mem_h + * and associated @ref ucp_rkey_h "remote key" will be invalid and cannot be + * used with any UCP routine. + * + * @note + * Another well know terminology for the "unmap" operation that is typically + * used in the context of networking is memory "de-registration". The UCP + * library de-registers the memory the available hardware so it can be returned + * back to the operation system. + * + * Error cases: + * @li Once memory is unmapped a network access to the region may cause a + * failure. + * + * @param [in] context Application @ref ucp_context_h "context" which was + * used to allocate/map the memory. + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_mem_unmap(ucp_context_h context, ucp_mem_h memh); + +/** + * @ingroup UCP_MEM + * @brief query mapped memory segment + * + * This routine returns address and length of memory segment mapped with + * @ref ucp_mem_map "ucp_mem_map()" routine. + * + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * @param [out] attr Filled with attributes of the @ref ucp_mem_h + * "UCP memory handle". + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_mem_query(const ucp_mem_h memh, ucp_mem_attr_t* attr); + +/** + * @ingroup UCP_MEM + * @brief Print memory mapping information. + * + * This routine maps memory and prints information about the created memory handle: + * including the mapped memory length, the allocation method, and other useful + * information associated with the memory handle. + * + * @param [in] mem_spec Size and optional type of the memory to map. + * The format of the string is: "[,]". + * For example: + * - "32768" : allocate 32 kilobytes of host memory. + * - "1m,cuda" : allocate 1 megabyte of cuda memory. + * @param [in] context The context on which the memory is mapped. + * @param [in] stream Output stream on which to print the information. + */ +void +ucp_mem_print_info(const char* mem_spec, ucp_context_h context, FILE* stream); + +/** + * @ingroup UCP_MEM + * @brief list of UCP memory use advice. + * + * The enumeration list describes memory advice supported by @ref + * ucp_mem_advise() function. + */ +typedef enum ucp_mem_advice +{ + UCP_MADV_NORMAL = 0, /**< No special treatment */ + UCP_MADV_WILLNEED /**< can be used on the memory mapped with + @ref UCP_MEM_MAP_NONBLOCK to speed up memory + mapping and to avoid page faults when + the memory is accessed for the first time. */ +} ucp_mem_advice_t; + +/** + * @ingroup UCP_MEM + * @brief Tuning parameters for the UCP memory advice. + * + * This structure defines the parameters that are used for the + * UCP memory advice tuning during the @ref ucp_mem_advise "ucp_mem_advise" + * routine. + */ +typedef struct ucp_mem_advise_params +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_mem_advise_params_field. All fields are mandatory. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Memory base address. + */ + void* address; + + /** + * Length (in bytes) to allocate or map (register). + */ + size_t length; + + /** + * Memory use advice @ref ucp_mem_advice + */ + ucp_mem_advice_t advice; +} ucp_mem_advise_params_t; + +/** + * @ingroup UCP_MEM + * @brief give advice about the use of memory + * + * This routine advises the UCP about how to handle memory range beginning at + * address and size of length bytes. This call does not influence the semantics + * of the application, but may influence its performance. The UCP may ignore + * the advice. + * + * @param [in] context Application @ref ucp_context_h "context" which was + * used to allocate/map the memory. + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * @param [in] params Memory base address and length. The advice field + * is used to pass memory use advice as defined in + * the @ref ucp_mem_advice list + * The memory range must belong to the @a memh + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_mem_advise(ucp_context_h context, ucp_mem_h memh, ucp_mem_advise_params_t* params); + +/** + * @ingroup UCP_MEM + * @brief UCP memory handle packing parameters field mask. + * + * The enumeration allows specifying which fields in + * @ref ucp_memh_pack_params_t are present. It is used to enable backward + * compatibility support. + */ +enum ucp_memh_pack_params_field +{ + /** + * Memory handle packing field that will be used in the @ref ucp_memh_pack + * routine. + */ + UCP_MEMH_PACK_PARAM_FIELD_FLAGS = UCS_BIT(0) +}; + +/** + * @ingroup UCP_MEM + * @brief UCP memory handle flags. + * + * The enumeration list describes the memory handle packing flags supported by + * @ref ucp_memh_pack() function. + */ +enum ucp_memh_pack_flags +{ + /** + * Pack a memory handle to be exported and used by peers for their local + * operations on a memory buffer allocated from same or another virtual + * memory space, but physically registered on the same network device. + * A peer should call @ref ucp_mem_map with the + * flag @ref UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER in order to + * import and use a memory handle buffer obtained from @ref ucp_memh_pack. + */ + UCP_MEMH_PACK_FLAG_EXPORT = UCS_BIT(0) +}; + +/** + * @ingroup UCP_MEM + * @brief Memory handle pack parameters passed to @ref ucp_memh_pack. + * + * This structure defines the parameters that are used for packing the + * UCP memory handle during the @ref ucp_memh_pack "ucp_memh_pack" + * routine. + */ +typedef struct ucp_memh_pack_params +{ + /** + * Mask of valid fields in this structure. Fields not specified in this + * mask will be ignored. Provides ABI compatibility with respect to adding + * new fields. + */ + uint64_t field_mask; + + /** + * Flags to control packing of a memory handle. + */ + uint64_t flags; +} ucp_memh_pack_params_t; + +/** + * @ingroup UCP_MEM + * @brief Pack a memory handle to a buffer specified by the user. + * + * This routine allocates a memory buffer and packs a memory handle into the + * buffer. A packed memory key is an opaque object that provides + * the information that is necessary for a peer. + * This routine packs the memory handle in a portable format such that the + * object can be unpacked on any platform supported by the UCP library, e.g. + * if the memory handle was packed as a remote memory key (RKEY), it should be + * unpacked by @ref ucp_ep_rkey_unpack "ucp_ep_rkey_unpack()". + * In order to release the memory buffer allocated by this routine, + * the application is responsible for calling the @ref ucp_memh_buffer_release + * "ucp_memh_buffer_release()" routine. + * + * + * @note + * @li RKEYs for InfiniBand and Cray Aries networks typically includes + * InfiniBand and Aries key. + * @li In order to enable remote direct memory access to the memory associated + * with the memory handle the application is responsible for sharing the RKEY with + * the peers that will initiate the access. + * + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * @param [in] params Memory handle packing parameters, as defined by + * @ref ucp_memh_pack_params_t. + * @param [out] buffer_p Memory buffer allocated by the library. + * The buffer contains the packed memory handle. + * @param [out] buffer_size_p Size (in bytes) of the buffer which contains + * packed memory handle. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_memh_pack(ucp_mem_h memh, const ucp_memh_pack_params_t* params, void** buffer_p, + size_t* buffer_size_p); + +/** + * @ingroup UCP_MEM + * @brief Memory handle release parameters passed to + * @ref ucp_memh_buffer_release. + * + * This structure defines the parameters that are used for releasing the + * UCP memory handle buffer during the @ref ucp_memh_buffer_release + * "ucp_memh_buffer_release" routine. + */ +typedef struct ucp_memh_buffer_release_params +{ + /** + * Mask of valid fields in this structure. All fields are mandatory. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; +} ucp_memh_buffer_release_params_t; + +/** + * @ingroup UCP_MEM + * @brief Release packed memory handle buffer. + * + * This routine releases the buffer that was allocated using @ref ucp_memh_pack + * "ucp_memh_pack()". + * + * @warning + * @li Once memory is released, an access to the memory may cause undefined + * behavior. + * @li If the input memory address was not allocated using + * @ref ucp_memh_pack "ucp_memh_pack()" routine, the behavior of this routine + * is undefined. + * + * @param [in] buffer Buffer to release. + * @param [in] params Memory handle buffer release parameters, as defined by + * @ref ucp_memh_buffer_release_params_t. + */ +void +ucp_memh_buffer_release(void* buffer, const ucp_memh_buffer_release_params_t* params); + +/** + * @ingroup UCP_MEM + * @brief Create remote access key from packed buffer. + * + * This routine unpacks the remote key (RKEY) object into the local memory + * such that it can be accessed and used by UCP routines. The RKEY object has + * to be packed using the @ref ucp_rkey_pack "ucp_rkey_pack()" routine. + * Application code should not make any changes to the content of the RKEY + * buffer. + * + * @note The application is responsible for releasing the RKEY object when + * it is no longer needed, by calling the @ref ucp_rkey_destroy + * "ucp_rkey_destroy()" routine. + * @note The remote key object can be used for communications only on the + * endpoint on which it was unpacked. + * + * @param [in] ep Endpoint to access using the remote key. + * @param [in] rkey_buffer Packed rkey. + * @param [out] rkey_p Remote key handle. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_ep_rkey_unpack(ucp_ep_h ep, const void* rkey_buffer, ucp_rkey_h* rkey_p); + +/** + * @ingroup UCP_MEM + * @brief Get a local pointer to remote memory. + * + * This routine returns a local pointer to the remote memory described + * by the rkey. + * + * @note This routine can return a valid pointer only for the endpoints + * that are reachable via shared memory. + * + * @param [in] rkey A remote key handle. + * @param [in] raddr A remote memory address within the memory area + * described by the rkey. + * @param [out] addr_p A pointer that can be used for direct + * access to the remote memory. + * + * @return Error code as defined by @ref ucs_status_t if the remote memory + * cannot be accessed directly or the remote memory address is not valid. + */ +ucs_status_t +ucp_rkey_ptr(ucp_rkey_h rkey, uint64_t raddr, void** addr_p); + +/** + * @ingroup UCP_MEM + * @brief Destroy the remote key + * + * This routine destroys the RKEY object and the memory that was allocated + * using the @ref ucp_ep_rkey_unpack "ucp_ep_rkey_unpack()" routine. This + * routine also releases any resources that are associated with the RKEY + * object. + * + * @warning + * @li Once the RKEY object is released an access to the memory will cause an + * undefined failure. + * @li If the RKEY object was not created using + * @ref ucp_ep_rkey_unpack "ucp_ep_rkey_unpack()" routine the behavior of this + * routine is undefined. + * @li The RKEY object must be destroyed after all outstanding operations which + * are using it are flushed, and before the endpoint on which it was unpacked + * is destroyed. + * + * @param [in] rkey Remote key to destroy. + */ +void +ucp_rkey_destroy(ucp_rkey_h rkey); + +/** + * @ingroup UCP_WORKER + * @brief Add user defined callback for Active Message. + * + * This routine installs a user defined callback to handle incoming Active + * Messages with a specific id. This callback is called whenever an Active + * Message that was sent from the remote peer by @ref ucp_am_send_nbx is + * received on this worker. + * + * @warning Handlers set by this function are not compatible with + @ref ucp_am_send_nb routine. + * + * @param [in] worker UCP worker on which to set the Active Message + * handler. + * @param [in] param Active Message handler parameters, as defined by + * @ref ucp_am_handler_param_t. + * + * @return error code if the worker does not support Active Messages or + * requested callback flags. + */ +ucs_status_t +ucp_worker_set_am_recv_handler(ucp_worker_h worker, const ucp_am_handler_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Send Active Message. + * + * This routine sends an Active Message to an ep. If the operation completes + * immediately, then the routine returns NULL and the callback function is + * ignored, even if specified. Otherwise, if no error is reported and a callback + * is requested (i.e. the UCP_OP_ATTR_FIELD_CALLBACK flag is set in the + * op_attr_mask field of @a param), then the UCP library will schedule + * invocation of the callback routine @a param->cb.send upon completion of the + * operation. + * + * @note If UCP_OP_ATTR_FLAG_NO_IMM_CMPL flag is set in the op_attr_mask field + * of @a param, then the operation will return a request handle, even if + * it completes immediately. + * @note This operation supports specific flags, which can be passed + * in @a param by @ref ucp_request_param_t.flags. The exact set of flags + * is defined by @ref ucp_send_am_flags. + * + * @param [in] ep UCP endpoint where the Active Message will be run. + * @param [in] id Active Message id. Specifies which registered + * callback to run. + * @param [in] header User defined Active Message header. NULL value is + * allowed if no header needed. In this case + * @a header_length must be set to 0. + * By default the header must be valid until + * the active message send operation completes. + * If the flag @ref UCP_AM_SEND_FLAG_COPY_HEADER + * is specified, the header is only required to be + * valid until this function call returns. + * @param [in] header_length Active message header length in bytes. + * @param [in] buffer Pointer to the data to be sent to the target node + * of the Active Message. + * @param [in] count Number of elements to send. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * + * @note Sending only header without actual data is allowed and is recommended + * for transferring a latency-critical amount of data. + * @note The maximum allowed header size can be obtained by querying worker + * attributes by the @ref ucp_worker_query routine. + * + * + * @return NULL - Active Message was sent immediately. + * @return UCS_PTR_IS_ERR(_ptr) - Error sending Active Message. + * @return otherwise - Operation was scheduled for send and can be + * completed at any point in time. The request + * handle is returned to the application in order + * to track progress of the message. If user + * request was not provided in @a param->request, + * the application is responsible for releasing + * the handle using @ref ucp_request_free routine. + */ +ucs_status_ptr_t +ucp_am_send_nbx(ucp_ep_h ep, unsigned id, const void* header, size_t header_length, + const void* buffer, size_t count, const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Receive Active Message as defined by provided data descriptor. + * + * This routine receives a message that is described by the data descriptor + * @a data_desc, local address @a buffer, size @a count and @a param + * parameters on the @a worker. The routine is non-blocking and therefore + * returns immediately. The receive operation is considered completed when the + * message is delivered to the @a buffer. If the receive operation cannot be + * started the routine returns an error. + * + * @note This routine can be performed on any valid data descriptor delivered in + * @ref ucp_am_recv_callback_t. + * Data descriptor is considered to be valid if: + * - It is a rendezvous request (@a UCP_AM_RECV_ATTR_FLAG_RNDV is set in + * @ref ucp_am_recv_param_t.recv_attr) or + * - It is a persistent data pointer (@a UCP_AM_RECV_ATTR_FLAG_DATA is set + * in @ref ucp_am_recv_param_t.recv_attr). In this case receive + * operation may be needed to unpack data to device memory (for example + * GPU device) or some specific datatype. + * @note After this call UCP takes ownership of @a data_desc descriptor, so + * there is no need to release it even if the operation fails. + * The routine returns a request handle instead, which can be used for + * tracking operation progress. + * + * @param [in] worker Worker that is used for the receive operation. + * @param [in] data_desc Data descriptor, provided in + @ref ucp_am_recv_callback_t routine. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive into @a buffer. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * + * @return NULL - The receive operation was completed + * immediately. In this case, if + * @a param->recv_info.length is specified in the + * @a param, the value to which it points is updated + * with the size of the received message. + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Receive operation was scheduled and can be + * completed at any point in time. The request + * handle is returned to the application in order + * to track operation progress. If user + * request was not provided in @a param->request, + * the application is responsible for releasing + * the handle using @ref ucp_request_free routine. + */ +ucs_status_ptr_t +ucp_am_recv_data_nbx(ucp_worker_h worker, void* data_desc, void* buffer, size_t count, + const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Releases Active Message data. + * + * This routine releases data that persisted through an Active Message + * callback because that callback returned UCS_INPROGRESS. + * + * @param [in] worker Worker which received the Active Message. + * @param [in] data Pointer to data that was passed into + * the Active Message callback as the data + * parameter. + */ +void +ucp_am_data_release(ucp_worker_h worker, void* data); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream send operation. + * + * This routine sends data that is described by the local address @a buffer, + * size @a count object to the destination endpoint @a ep. The routine is + * non-blocking and therefore returns immediately, however the actual send + * operation may be delayed. The send operation is considered completed when + * it is safe to reuse the source @e buffer. If the send operation is + * completed immediately the routine returns UCS_OK. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed at any point in time. The request + * handle is returned to the application in + * order to track progress of the message. + */ +ucs_status_ptr_t +ucp_stream_send_nbx(ucp_ep_h ep, const void* buffer, size_t count, + const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operation + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count object to the destination endpoint @a ep. Each + * message is associated with a @a tag value that is used for message + * matching on the @ref ucp_tag_recv_nb or @ref ucp_tag_recv_nbx "receiver". + * The routine is non-blocking and therefore returns immediately, however the + * actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK and the + * call-back function is @b not invoked. If the operation is @b not completed + * immediately and no error reported then the UCP library will schedule to + * invoke the call-back whenever the send operation is completed. In other + * words, the completion of a message can be signaled by the return code or + * the call-back. + * Immediate completion signals can be fine-tuned via the + * @ref ucp_request_param_t.op_attr_mask field in the + * @ref ucp_request_param_t structure. The values of this field + * are a bit-wise OR of the @ref ucp_op_attr_t enumeration. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] tag Message tag. + * @param [in] param Operation parameters, see @ref ucp_request_param_t + * + * @return UCS_OK - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. + */ +ucs_status_ptr_t +ucp_tag_send_nbx(ucp_ep_h ep, const void* buffer, size_t count, ucp_tag_t tag, + const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking synchronous tagged-send operation. + * + * Same as @ref ucp_tag_send_nbx, except the request completes only after there + * is a remote tag match on the message (which does not always mean the remote + * receive has been completed). This function never completes "in-place", and + * always returns a request handle. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * @note Returns @ref UCS_ERR_UNSUPPORTED if @ref UCP_ERR_HANDLING_MODE_PEER is + * enabled. This is a temporary implementation-related constraint that + * will be addressed in future releases. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] tag Message tag. + * @param [in] param Operation parameters, see @ref ucp_request_param_t + * + * @return UCS_OK - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. + */ +ucs_status_ptr_t +ucp_tag_send_sync_nbx(ucp_ep_h ep, const void* buffer, size_t count, ucp_tag_t tag, + const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream receive operation of structured data into a + * user-supplied buffer. + * + * This routine receives data that is described by the local address @a buffer, + * size @a count object on the endpoint @a ep. The routine is non-blocking + * and therefore returns immediately. The receive operation is considered + * complete when the message is delivered to the buffer. If the receive + * operation cannot be started, then the routine returns an error. + * + * @param [in] ep UCP endpoint that is used for the receive operation. + * @param [in] buffer Pointer to the buffer that will receive the data. + * @param [in] count Number of elements to receive into @a buffer. + * @param [out] length Size of the received data in bytes. The value is + * valid only if return code is NULL. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * This operation supports specific flags, which can be + * passed in @a param by @ref ucp_request_param_t.flags. + * The exact set of flags is defined by + * @ref ucp_stream_recv_flags_t. + * + * @return NULL - The receive operation was completed + * immediately. In this case the value pointed by + * @a length is updated by the size of received + * data. Note @a param->recv_info is not relevant + * for this function. + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. A request + * handle is returned to the application in order + * to track progress of the operation. + * + * @note The amount of data received, in bytes, is always an integral multiple + * of the @a datatype size. + */ +ucs_status_ptr_t +ucp_stream_recv_nbx(ucp_ep_h ep, void* buffer, size_t count, size_t* length, + const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream receive operation of unstructured data into + * a UCP-supplied buffer. + * + * This routine receives any available data from endpoint @a ep. + * Unlike @ref ucp_stream_recv_nb, the returned data is unstructured and is + * treated as an array of bytes. If data is immediately available, + * UCS_STATUS_PTR(_ptr) is returned as a pointer to the data, and @a length + * is set to the size of the returned data buffer. The routine is non-blocking + * and therefore returns immediately. + * + * @param [in] ep UCP endpoint that is used for the receive + * operation. + * @param [out] length Length of received data. + * + * @return NULL - No received data available on the @a ep. + * @return UCS_PTR_IS_ERR(_ptr) - the receive operation failed and + * UCS_PTR_STATUS(_ptr) indicates an error. + * @return otherwise - The pointer to the data UCS_STATUS_PTR(_ptr) + * is returned to the application. After the data + * is processed, the application is responsible + * for releasing the data buffer by calling the + * @ref ucp_stream_data_release routine. + * + * @note This function returns packed data (equivalent to ucp_dt_make_contig(1)). + * @note This function returns a pointer to a UCP-supplied buffer, whereas + * @ref ucp_stream_recv_nb places the data into a user-provided buffer. + * In some cases, receiving data directly into a UCP-supplied buffer can + * be more optimal, for example by processing the incoming data in-place + * and thus avoiding extra memory copy operations. + */ +ucs_status_ptr_t +ucp_stream_recv_data_nb(ucp_ep_h ep, size_t* length); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a info object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates what bits of the tag have to be matched. The + * routine is a non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to notify the application about completion of the receive + * operation the UCP library will invoke the call-back @a cb when the received + * message is in the receive buffer and ready for application access. If the + * receive operation cannot be started, then the routine returns an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] param Operation parameters, see @ref ucp_request_param_t + * + * @return NULL - The receive operation was completed + * immediately. In this case, if + * @a param->recv_info.tag_info is specified in the + * @a param, the value to which it points is updated + * with the information about the received message. + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_tag_recv_nbx(ucp_worker_h worker, void* buffer, size_t count, ucp_tag_t tag, + ucp_tag_t tag_mask, const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking probe and return a message. + * + * This routine probes (checks) if a messages described by the @a tag and + * @a tag_mask was received (fully or partially) on the @a worker. The tag + * value of the received message has to match the @a tag and @a tag_mask + * values, where the @a tag_mask indicates what bits of the tag have to be + * matched. The function returns immediately and if the message is matched it + * returns a handle for the message. + * + * @param [in] worker UCP worker that is used for the probe operation. + * @param [in] tag Message tag to probe for. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] remove The flag indicates if the matched message has to + * be removed from UCP library. + * If true (1), the message handle is removed from + * the UCP library and the application is responsible + * to call @ref ucp_tag_msg_recv_nb + * "ucp_tag_msg_recv_nb()" in order to receive the data + * and release the resources associated with the + * message handle. + * If false (0), the return value is merely an indication + * to whether a matching message is present, and it cannot + * be used in any other way, and in particular it cannot + * be passed to @ref ucp_tag_msg_recv_nb(). + * @param [out] info If the matching message is found the descriptor is + * filled with the details about the message. + * + * @return NULL - No match found. + * @return Message handle (not NULL) - If message is matched the message handle + * is returned. + * + * @note This function does not advance the communication state of the network. + * If this routine is used in busy-poll mode, need to make sure + * @ref ucp_worker_progress() is called periodically to extract messages + * from the transport. + */ +ucp_tag_message_h +ucp_tag_probe_nb(ucp_worker_h worker, ucp_tag_t tag, ucp_tag_t tag_mask, int remove, + ucp_tag_recv_info_t* info); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking receive operation for a probed message. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a message handle on the @a worker. + * The @a message handle can be obtained by calling the @ref + * ucp_tag_probe_nb "ucp_tag_probe_nb()" routine. The @ref ucp_tag_msg_recv_nbx + * "ucp_tag_msg_recv_nbx()" routine is non-blocking and therefore returns + * immediately. The receive operation is considered completed when the message + * is delivered to the @a buffer. In order to notify the application about + * completion of the receive operation the UCP library will invoke the + * call-back @a cb when the received message is in the receive buffer and ready + * for application access. If the receive operation cannot be started, then the + * routine returns an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer that will receive the data. + * @param [in] count Number of elements to receive + * @param [in] message Message handle. + * @param [in] param Operation parameters, see @ref ucp_request_param_t + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_tag_msg_recv_nbx(ucp_worker_h worker, void* buffer, size_t count, + ucp_tag_message_h message, const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory put operation. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the + * @ref ucp_rkey_h "memory handle" rkey. The routine returns immediately and + * @b does @b not guarantee re-usability of the source address @e buffer. If + * the operation is completed immediately the routine return UCS_OK, otherwise + * UCS_INPROGRESS or an error is returned to user. If the put operation + * completes immediately, the routine returns UCS_OK and the call-back routine + * @a param.cb.send is @b not invoked. If the operation is @b not completed + * immediately and no error is reported, then the UCP library will schedule + * invocation of the call-back routine @a param.cb.send upon completion of + * the put operation. In other words, the completion of a put operation can be + * signaled by the return code or execution of the call-back. + * Immediate completion signals can be fine-tuned via the + * @ref ucp_request_param_t.op_attr_mask field in the + * @ref ucp_request_param_t structure. The values of this field + * are a bit-wise OR of the @ref ucp_op_attr_t enumeration. + * + * @note The completion of a put operation signals the local @e buffer can be + * reused. The completion of the operation on the remote address requires use + * of @ref ucp_worker_flush_nbx "ucp_worker_flush_nbx()" or + * @ref ucp_ep_flush_nbx "ucp_ep_flush_nbx()", after completion of which the + * data in @e remote_addr is guaranteed to be available. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] count Number of elements of type + * @ref ucp_request_param_t.datatype to put. If + * @ref ucp_request_param_t.datatype is not specified, + * the type defaults to ucp_dt_make_contig(1), which + * corresponds to byte elements. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] param Operation parameters, see @ref ucp_request_param_t + * + * @return UCS_OK - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + * + * @note Only the datatype ucp_dt_make_contig(1) is supported + * for @a param->datatype, see @ref ucp_dt_make_contig. + */ +ucs_status_ptr_t +ucp_put_nbx(ucp_ep_h ep, const void* buffer, size_t count, uint64_t remote_addr, + ucp_rkey_h rkey, const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory get operation. + * + * This routine initiates a load of a contiguous block of data that is + * described by the remote memory address @a remote_addr and the @ref ucp_rkey_h + * "memory handle" @a rkey in the local contiguous memory region described + * by @a buffer address. The routine returns immediately and @b does @b not + * guarantee that remote data is loaded and stored under the local address @e + * buffer. If the operation is completed immediately the routine return UCS_OK, + * otherwise UCS_INPROGRESS or an error is returned to user. If the get + * operation completes immediately, the routine returns UCS_OK and the + * call-back routine @a param.cb.send is @b not invoked. If the operation is + * @b not completed immediately and no error is reported, then the UCP library + * will schedule invocation of the call-back routine @a param.cb.send upon + * completion of the get operation. In other words, the completion of a get + * operation can be signaled by the return code or execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * @note The completion of a get operation signals the local @e buffer holds the + * the expected data and that both local @e buffer and remote @e remote_addr are + * safe to be reused, unlike with @ref ucp_put_nbx "ucp_put_nbx" where the use + * of @ref ucp_worker_flush_nbx "ucp_worker_flush_nbx()" or + * @ref ucp_ep_flush_nbx "ucp_ep_flush_nbx()" is required before the remote data + * is available. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] count Number of elements of type + * @ref ucp_request_param_t.datatype to put. If + * @ref ucp_request_param_t.datatype is not specified, + * the type defaults to ucp_dt_make_contig(1), which + * corresponds to byte elements. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * + * @return UCS_OK - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + * + * @note Only the datatype ucp_dt_make_contig(1) is supported + * for @a param->datatype, see @ref ucp_dt_make_contig. + */ +ucs_status_ptr_t +ucp_get_nbx(ucp_ep_h ep, void* buffer, size_t count, uint64_t remote_addr, + ucp_rkey_h rkey, const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Post an atomic memory operation. + * + * This routine will post an atomic operation to remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The routine is non-blocking and therefore returns immediately. + * However, the actual atomic operation may be delayed. In order to enable + * fetching semantics for atomic operations user has to specify + * @a param.reply_buffer. Please see @ref atomic_ops "table" below for more + * details. + * + * @note The user should not modify any part of the @a buffer (or also + * @a param->reply_buffer for fetch operations), until the operation + * completes. + * @note Only ucp_dt_make_config(4) and ucp_dt_make_contig(8) are supported + * in @a param->datatype, see @ref ucp_dt_make_contig. Also, currently + * atomic operations can handle one element only. Thus, @a count + * argument must be set to 1. + * + * + * + *
Atomic Operations Semantic
Atomic Operation Pseudo code + * X Y Z + * Result + *
@ref UCP_ATOMIC_OP_ADD Result=Y; Y+=X + * bufferremote_addr- + * param.reply_buffer(optional) + *
@ref UCP_ATOMIC_OP_SWAP Result=Y; Y=X + * bufferremote_addr - + * param.reply_buffer + *
@ref UCP_ATOMIC_OP_CSWAP + * Result=Y; if (X==Y) then Y=Zbuffer + * remote_addr param.reply_buffer + * param.reply_buffer + *
@ref UCP_ATOMIC_OP_AND Result=Y; Y&=X + * bufferremote_addr - + * param.reply_buffer(optional) + *
@ref UCP_ATOMIC_OP_OR Result=Y; Y|=X + * bufferremote_addr - + * param.reply_buffer(optional) + *
@ref UCP_ATOMIC_OP_XOR Result=Y; Y^=X + * bufferremote_addr - + * param.reply_buffer(optional) + *
+ * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_op_t. + * @param [in] buffer Address of operand for the atomic operation. See + * @ref atomic_ops "Atomic Operations Semantic table" + * for exact usage by different atomic operations. + * @param [in] count Number of elements in @a buffer and @a result. The + * size of each element is specified by + * @ref ucp_request_param_t.datatype + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. + * + * @return NULL - The operation completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at some time in the future. The + * request handle is returned to the application + * in order to track progress of the operation. + */ +ucs_status_ptr_t +ucp_atomic_op_nbx(ucp_ep_h ep, ucp_atomic_op_t opcode, const void* buffer, size_t count, + uint64_t remote_addr, ucp_rkey_h rkey, + const ucp_request_param_t* param); + +/** + * @ingroup UCP_COMM + * @brief Check the status of non-blocking request. + * + * This routine checks the state of the request and returns its current status. + * Any value different from UCS_INPROGRESS means that request is in a completed + * state. + * + * @param [in] request Non-blocking request to check. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_request_check_status(void* request); + +/** + * @ingroup UCP_COMM + * @brief Check the status and currently available state of non-blocking request + * returned from @ref ucp_tag_recv_nb routine. + * + * This routine checks the state and returns current status of the request + * returned from @ref ucp_tag_recv_nb routine or the user allocated request + * for @ref ucp_tag_recv_nbr. Any value different from UCS_INPROGRESS means + * that the request is in a completed state. + * + * @param [in] request Non-blocking request to check. + * @param [out] info It is filled with the details about the message + * available at the moment of calling. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_tag_recv_request_test(void* request, ucp_tag_recv_info_t* info); + +/** + * @ingroup UCP_COMM + * @brief Check the status and currently available state of non-blocking request + * returned from @ref ucp_stream_recv_nb routine. + * + * This routine checks the state and returns current status of the request + * returned from @ref ucp_stream_recv_nb routine. Any value different from + * UCS_INPROGRESS means that the request is in a completed state. + * + * @param [in] request Non-blocking request to check. + * @param [out] length_p The size of the received data in bytes. This value + * is only valid if the status is UCS_OK. If valid, it + * is always an integral multiple of the datatype size + * associated with the request. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_stream_recv_request_test(void* request, size_t* length_p); + +/** + * @ingroup UCP_COMM + * @brief Cancel an outstanding communications request. + * + * @param [in] worker UCP worker. + * @param [in] request Non-blocking request to cancel. + * + * This routine tries to cancels an outstanding communication request. After + * calling this routine, the @a request will be in completed or canceled (but + * not both) state regardless of the status of the target endpoint associated + * with the communication request. If the request is completed successfully, + * the @ref ucp_send_callback_t "send" or @ref ucp_tag_recv_callback_t + * "receive" completion callbacks (based on the type of the request) will be + * called with the @a status argument of the callback set to UCS_OK, and in a + * case it is canceled the @a status argument is set to UCS_ERR_CANCELED. It is + * important to note that in order to release the request back to the library + * the application is responsible for calling @ref ucp_request_free + * "ucp_request_free()". + */ +void +ucp_request_cancel(ucp_worker_h worker, void* request); + +/** + * @ingroup UCP_COMM + * @brief Release UCP data buffer returned by @ref ucp_stream_recv_data_nb. + * + * @param [in] ep Endpoint @a data received from. + * @param [in] data Data pointer to release, which was returned from + * @ref ucp_stream_recv_data_nb. + * + * This routine releases internal UCP data buffer returned by + * @ref ucp_stream_recv_data_nb when @a data is processed, the application can't + * use this buffer after calling this function. + */ +void +ucp_stream_data_release(ucp_ep_h ep, void* data); + +/** + * @ingroup UCP_COMM + * @brief Release a communications request. + * + * @param [in] request Non-blocking request to release. + * + * This routine releases the non-blocking request back to the library, regardless + * of its current state. Communications operations associated with this request + * will make progress internally, however no further notifications or callbacks + * will be invoked for this request. + */ +void +ucp_request_free(void* request); + +/** + * @ingroup UCP_DATATYPE + * @brief Create a generic datatype. + * + * This routine create a generic datatype object. + * The generic datatype is described by the @a ops @ref ucp_generic_dt_ops_t + * "object" which provides a table of routines defining the operations for + * generic datatype manipulation. Typically, generic datatypes are used for + * integration with datatype engines provided with MPI implementations (MPICH, + * Open MPI, etc). + * The application is responsible for releasing the @a datatype_p object using + * @ref ucp_dt_destroy "ucp_dt_destroy()" routine. + * + * @param [in] ops Generic datatype function table as defined by + * @ref ucp_generic_dt_ops_t . + * @param [in] context Application defined context passed to this + * routine. The context is passed as a parameter + * to the routines in the @a ops table. + * @param [out] datatype_p A pointer to datatype object. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_dt_create_generic(const ucp_generic_dt_ops_t* ops, void* context, + ucp_datatype_t* datatype_p); + +/** + * @ingroup UCP_DATATYPE + * @brief Destroy a datatype and release its resources. + * + * This routine destroys the @a datatype object and + * releases any resources that are associated with the object. + * The @a datatype object must be allocated using @ref ucp_dt_create_generic + * "ucp_dt_create_generic()" routine. + * + * @warning + * @li Once the @a datatype object is released an access to this object may + * cause an undefined failure. + * + * @param [in] datatype Datatype object to destroy. + */ +void +ucp_dt_destroy(ucp_datatype_t datatype); + +/** + * @ingroup UCP_DATATYPE + * @brief Query attributes of a datatype. + * + * This routine fetches information about the attributes of a datatype. + * When @ref UCP_DATATYPE_ATTR_FIELD_PACKED_SIZE is set in @a field_mask of @a attr, + * the field @a packed_size is set to the packed size (bytes) of the datatype. + * + * @param [in] datatype Datatype object to query. + * @param [inout] attr Filled with attributes of the datatype. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_dt_query(ucp_datatype_t datatype, ucp_datatype_attr_t* attr); + +/** + * @ingroup UCP_WORKER + * + * @brief Assures ordering between non-blocking operations + * + * This routine ensures ordering of non-blocking communication operations on + * the @ref ucp_worker_h "UCP worker". Communication operations issued on a + * particular endpoint created on the @a worker prior to this call are + * guaranteed to be completed before any communication operations issued on the + * same endpoint after this call. + * + * @note The primary difference between @ref ucp_worker_fence "ucp_worker_fence()" + * and the @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" is the fact the fence + * routine does not guarantee completion of the operations on the call return but + * only ensures the order between communication operations. The + * @ref ucp_worker_flush_nb "flush" operation on return guarantees that all + * operations are completed and corresponding memory regions were updated. + * + * @param [in] worker UCP worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_fence(ucp_worker_h worker); + +/** + * @ingroup UCP_WORKER + * + * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h + * "worker" + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the + * @a worker prior to this call are completed both at the origin and at the + * target when this call returns. + * + * @note For description of the differences between @ref ucp_worker_flush_nb + * "flush" and @ref ucp_worker_fence "fence" operations please see + * @ref ucp_worker_fence "ucp_worker_fence()" + * + * @param [in] worker UCP worker. + * @param [in] param Operation parameters, see @ref ucp_request_param_t + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be + * completed in any point in time. The request + * handle is returned to the application in order + * to track progress. + */ +ucs_status_ptr_t +ucp_worker_flush_nbx(ucp_worker_h worker, const ucp_request_param_t* param); + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_ep_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_ep_attr_field +{ + UCP_EP_ATTR_FIELD_NAME = UCS_BIT(0), /**< UCP endpoint name */ + UCP_EP_ATTR_FIELD_LOCAL_SOCKADDR = UCS_BIT(1), /**< Sockaddr used by the endpoint */ + UCP_EP_ATTR_FIELD_REMOTE_SOCKADDR = + UCS_BIT(2), /**< Sockaddr the endpoint is connected to */ + UCP_EP_ATTR_FIELD_TRANSPORTS = + UCS_BIT(3), /**< Transport and device used by endpoint */ + UCP_EP_ATTR_FIELD_USER_DATA = + UCS_BIT(4) /**< User data associated with the endpoint */ +}; + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint attributes. + * + * The structure defines the attributes that characterize the particular + * endpoint. + */ +typedef struct ucp_ep_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_ep_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Endpoint name. Tracing and analysis tools can identify the endpoint using + * this name. + */ + char name[UCP_ENTITY_NAME_MAX]; + + /** + * Local socket address for this endpoint. Valid only for endpoints created + * by connecting to a socket address. + * If this field is specified for an endpoint not connected to a socket address, + * UCS_ERR_NOT_CONNECTED will be returned. + */ + struct sockaddr_storage local_sockaddr; + + /** + * Remote socket address this endpoint is connected to. Valid only for endpoints + * created by connecting to a socket address. + * If this field is specified for an endpoint not connected to a socket address, + * UCS_ERR_NOT_CONNECTED will be returned. + */ + struct sockaddr_storage remote_sockaddr; + + /** + * Structure defining an array containing transport and device names used + * by this endpoint. The caller is responsible for allocation and + * deallocation of this array. + */ + ucp_transports_t transports; + + /** + * User data associated with an endpoint passed in + * @ref ucp_ep_params_t::user_data. + */ + void* user_data; +} ucp_ep_attr_t; + +/** + * @ingroup UCP_ENDPOINT + * @brief Get attributes of a given endpoint. + * + * This routine fetches information about the endpoint. + * + * @param [in] ep Endpoint object to query. + * @param [out] attr Filled with attributes of the endpoint. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_ep_query(ucp_ep_h ep, ucp_ep_attr_t* attr); + +/** + * @example ucp_hello_world.c + * UCP hello world client / server example utility. + * + * @example ucp_client_server.c + * UCP client / server example using different APIs (tag, stream, am) utility. + */ + +END_C_DECLS + +#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/ucp/api/ucp_compat.h b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/ucp/api/ucp_compat.h new file mode 100644 index 0000000000..665e8bcdc4 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/ucp/api/ucp_compat.h @@ -0,0 +1,1439 @@ +/* + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_COMPAT_H_ +#define UCP_COMPAT_H_ + +#include +#include + +BEGIN_C_DECLS + +/** @file ucp_compat.h */ + +/** + * @ingroup UCP_WORKER + * @deprecated Replaced by @ref ucp_listener_conn_handler_t. + */ +typedef struct ucp_listener_accept_handler +{ + ucp_listener_accept_callback_t cb; /**< Endpoint creation callback */ + void* arg; /**< User defined argument for the + callback */ +} ucp_listener_accept_handler_t; + +/** + * @ingroup UCP_COMM + * @deprecated Replaced by @ref ucp_request_test. + */ +int +ucp_request_is_completed(void* request); + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_request_free. + */ +void +ucp_request_release(void* request); + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_ep_close_nb. + */ +void +ucp_ep_destroy(ucp_ep_h ep); + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_ep_close_nb. + */ +ucs_status_ptr_t +ucp_disconnect_nb(ucp_ep_h ep); + +/** + * @ingroup UCP_COMM + * @deprecated User should allocate requests using standard methods such as + * malloc() or alloca(). + * + * @param [in] worker UCP worker. + * + * @return This function is not implemented and always returns NULL. + */ +void* +ucp_request_alloc(ucp_worker_h worker); + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_tag_recv_request_test and + * @ref ucp_request_check_status depends on use case. + * + * @note Please use @ref ucp_request_check_status for cases that only need to + * check the completion status of an outstanding request. + * @ref ucp_request_check_status can be used for any type of request. + * @ref ucp_tag_recv_request_test should only be used for requests + * returned by @ref ucp_tag_recv_nb (or request allocated by user for + * @ref ucp_tag_recv_nbr) for which additional information + * (returned via the @a info pointer) is needed. + */ +ucs_status_t +ucp_request_test(void* request, ucp_tag_recv_info_t* info); + +/** + * @ingroup UCP_MEM + * @deprecated Replaced by @ref ucp_memh_pack "ucp_memh_pack()". + * @brief Pack memory region remote access key. + * + * This routine allocates a memory buffer and packs a remote access key (RKEY) + * object into it. RKEY is an opaque object that provides the information that is + * necessary for remote memory access. + * This routine packs the RKEY object in a portable format such that the + * object can be @ref ucp_ep_rkey_unpack "unpacked" on any platform supported by the + * UCP library. In order to release the memory buffer allocated by this routine, + * the application is responsible for calling the @ref ucp_rkey_buffer_release + * "ucp_rkey_buffer_release()" routine. + * + * + * @note + * @li RKEYs for InfiniBand and Cray Aries networks typically include + * the InfiniBand and Aries key. + * @li In order to enable remote direct memory access to the memory associated + * with the memory handle, the application is responsible for sharing the RKEY with + * the peers that will initiate the access. + * + * @param [in] context Application @ref ucp_context_h "context" which was + * used to allocate/map the memory. + * @param [in] memh @ref ucp_mem_h "Handle" to the memory region. + * @param [out] rkey_buffer_p Memory buffer allocated by the library. + * The buffer contains the packed RKEY. + * @param [out] size_p Size (in bytes) of the packed RKEY. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void** rkey_buffer_p, + size_t* size_p); + +/** + * @ingroup UCP_MEM + * @deprecated Replaced by @ref ucp_memh_buffer_release + * "ucp_memh_buffer_release()". + * @brief Release packed remote key buffer. + * + * This routine releases the buffer that was allocated using @ref ucp_rkey_pack + * "ucp_rkey_pack()". + * + * @warning + * @li Once memory is released, an access to the memory may cause undefined + * behavior. + * @li If the input memory address was not allocated using + * @ref ucp_rkey_pack "ucp_rkey_pack()" routine, the behavior of this routine + * is undefined. + * + * @param [in] rkey_buffer Buffer to release. + */ +void +ucp_rkey_buffer_release(void* rkey_buffer); + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_ep_flush_nb. + */ +ucs_status_t +ucp_ep_flush(ucp_ep_h ep); + +/** + * @ingroup UCP_WORKER + * + * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h + * "worker" + * @deprecated Replaced by @ref ucp_worker_flush_nb. The following example + * implements the same functionality using @ref ucp_worker_flush_nb : + * @code + * ucs_status_t worker_flush(ucp_worker_h worker) + * { + * void *request = ucp_worker_flush_nb(worker); + * if (request == NULL) { + * return UCS_OK; + * } else if (UCS_PTR_IS_ERR(request)) { + * return UCS_PTR_STATUS(request); + * } else { + * ucs_status_t status; + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request); + * } while (status == UCS_INPROGRESS); + * ucp_request_release(request); + * return status; + * } + * } + * @endcode + * + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the + * @a worker prior to this call are completed both at the origin and at the + * target when this call returns. + * + * @note For description of the differences between @ref ucp_worker_flush + * "flush" and @ref ucp_worker_fence "fence" operations please see + * @ref ucp_worker_fence "ucp_worker_fence()" + * + * @param [in] worker UCP worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_flush(ucp_worker_h worker); + +/** + * @ingroup UCP_COMM + * @brief Blocking remote memory put operation. + * @deprecated Replaced by @ref ucp_put_nb. The following example implements + * the same functionality using @ref ucp_put_nb : + * @code + * void empty_callback(void *request, ucs_status_t status) + * { + * } + * + * ucs_status_t put(ucp_ep_h ep, const void *buffer, size_t length, + * uint64_t remote_addr, ucp_rkey_h rkey) + * { + * void *request = ucp_put_nb(ep, buffer, length, remote_addr, rkey, + * empty_callback), + * if (request == NULL) { + * return UCS_OK; + * } else if (UCS_PTR_IS_ERR(request)) { + * return UCS_PTR_STATUS(request); + * } else { + * ucs_status_t status; + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request); + * } while (status == UCS_INPROGRESS); + * ucp_request_release(request); + * return status; + * } + * } + * @endcode + * + * This routine stores contiguous block of data that is described by the + * local address @a buffer in the remote contiguous memory region described by + * @a remote_addr address and the @ref ucp_rkey_h "memory handle" @a rkey. The + * routine returns when it is safe to reuse the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_put(ucp_ep_h ep, const void* buffer, size_t length, uint64_t remote_addr, + ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Blocking remote memory get operation. + * @deprecated Replaced by @ref ucp_get_nb. @see ucp_put. + * + * This routine loads contiguous block of data that is described by the remote + * address @a remote_addr and the @ref ucp_rkey_h "memory handle" @a rkey in + * the local contiguous memory region described by @a buffer address. The + * routine returns when remote data is loaded and stored under the local address + * @e buffer. + * + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_get(ucp_ep_h ep, void* buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic add operation for 32 bit integers + * @deprecated Replaced by @ref ucp_atomic_post with opcode UCP_ATOMIC_POST_OP_ADD. + * @see ucp_put. + * + * This routine performs an add operation on a 32 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes the sum of the original remote value and the + * operand value (@a add) is stored in remote memory. + * The call to the routine returns immediately, independent of operation + * completion. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_add32(ucp_ep_h ep, uint32_t add, uint64_t remote_addr, ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic add operation for 64 bit integers + * @deprecated Replaced by @ref ucp_atomic_post with opcode UCP_ATOMIC_POST_OP_ADD. + * @see ucp_put. + * + * This routine performs an add operation on a 64 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes the sum of the original remote value and the + * operand value (@a add) is stored in remote memory. + * The call to the routine returns immediately, independent of operation + * completion. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_add64(ucp_ep_h ep, uint64_t add, uint64_t remote_addr, ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic fetch and add operation for 32 bit integers + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_FADD. + * @see ucp_put. + * + * This routine performs an add operation on a 32 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes, the original remote value is stored in the + * local memory @a result, and the sum of the original remote value and the + * operand value is stored in remote memory. + * The call to the routine returns when the operation is completed and the + * @a result value is updated. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_fadd32(ucp_ep_h ep, uint32_t add, uint64_t remote_addr, ucp_rkey_h rkey, + uint32_t* result); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic fetch and add operation for 64 bit integers + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_FADD. + * @see ucp_put. + * + * This routine performs an add operation on a 64 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes, the original remote value is stored in the + * local memory @a result, and the sum of the original remote value and the + * operand value is stored in remote memory. + * The call to the routine returns when the operation is completed and the + * @a result value is updated. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_fadd64(ucp_ep_h ep, uint64_t add, uint64_t remote_addr, ucp_rkey_h rkey, + uint64_t* result); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic swap operation for 32 bit values + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_SWAP. + * @see ucp_put. + * + * This routine swaps a 32 bit value between local and remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a swap value is the value that is used for the swap operation. + * When the operation completes, the remote value is stored in the + * local memory @a result, and the operand value (@a swap) is stored in remote + * memory. The call to the routine returns when the operation is completed and + * the @a result value is updated. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_swap32(ucp_ep_h ep, uint32_t swap, uint64_t remote_addr, ucp_rkey_h rkey, + uint32_t* result); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic swap operation for 64 bit values + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_SWAP. + * @see ucp_put. + * + * This routine swaps a 64 bit value between local and remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a swap value is the value that is used for the swap operation. + * When the operation completes, the remote value is stored in the + * local memory @a result, and the operand value (@a swap) is stored in remote + * memory. The call to the routine returns when the operation is completed and + * the @a result value is updated. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_swap64(ucp_ep_h ep, uint64_t swap, uint64_t remote_addr, ucp_rkey_h rkey, + uint64_t* result); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic conditional swap (cswap) operation for 32 bit values. + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_CSWAP. + * @see ucp_put. + * + * This routine conditionally swaps a 32 bit value between local and remote + * memory. The swap occurs only if the condition value (@a continue) is equal + * to the remote value, otherwise the remote memory is not modified. The + * remote value is described by the combination of the remote memory address @p + * remote_addr and the @ref ucp_rkey_h "remote memory handle" @a rkey. The @p + * swap value is the value that is used to update the remote memory if the + * condition is true. The call to the routine returns when the operation is + * completed and the @a result value is updated. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] compare Value to compare to. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_cswap32(ucp_ep_h ep, uint32_t compare, uint32_t swap, uint64_t remote_addr, + ucp_rkey_h rkey, uint32_t* result); + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic conditional swap (cswap) operation for 64 bit values. + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_CSWAP. + * @see ucp_put. + * + * This routine conditionally swaps a 64 bit value between local and remote + * memory. The swap occurs only if the condition value (@a continue) is equal + * to the remote value, otherwise the remote memory is not modified. The + * remote value is described by the combination of the remote memory address @p + * remote_addr and the @ref ucp_rkey_h "remote memory handle" @a rkey. The @p + * swap value is the value that is used to update the remote memory if the + * condition is true. The call to the routine returns when the operation is + * completed and the @a result value is updated. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] compare Value to compare to. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_cswap64(ucp_ep_h ep, uint64_t compare, uint64_t swap, uint64_t remote_addr, + ucp_rkey_h rkey, uint64_t* result); + +/** + * @ingroup UCP_ENDPOINT + * @brief Modify endpoint parameters. + * + * @deprecated Use @ref ucp_listener_conn_handler_t instead of @ref + * ucp_listener_accept_handler_t, if you have other use case please + * submit an issue on https://github.com/openucx/ucx or report to + * ucx-group@elist.ornl.gov + * + * This routine modifies @ref ucp_ep_h "endpoint" created by @ref ucp_ep_create + * or @ref ucp_listener_accept_callback_t. For example, this API can be used + * to setup custom parameters like @ref ucp_ep_params_t::user_data or + * @ref ucp_ep_params_t::err_handler to endpoint created by + * @ref ucp_listener_accept_callback_t. + * + * @param [in] ep A handle to the endpoint. + * @param [in] params User defined @ref ucp_ep_params_t configurations + * for the @ref ucp_ep_h "UCP endpoint". + * + * @return NULL - The endpoint is modified successfully. + * @return UCS_PTR_IS_ERR(_ptr) - The reconfiguration failed and an error code + * indicates the status. However, the @a endpoint + * is not modified and can be used further. + * @return otherwise - The reconfiguration process is started, and can be + * completed at any point in time. A request handle + * is returned to the application in order to track + * progress of the endpoint modification. + * The application is responsible for releasing the + * handle using the @ref ucp_request_free routine. + * + * @note See the documentation of @ref ucp_ep_params_t for details, only some of + * the parameters can be modified. + */ +ucs_status_ptr_t +ucp_ep_modify_nb(ucp_ep_h ep, const ucp_ep_params_t* params); + +/** + * @ingroup UCP_WORKER + * @brief Get the address of the worker object. + * + * @deprecated Use @ref ucp_worker_query with the flag + * @ref UCP_WORKER_ATTR_FIELD_ADDRESS in order to obtain the worker + * address. + * + * This routine returns the address of the worker object. This address can be + * passed to remote instances of the UCP library in order to connect to this + * worker. The memory for the address handle is allocated by this function, and + * must be released by using @ref ucp_worker_release_address + * "ucp_worker_release_address()" routine. + * + * @param [in] worker Worker object whose address to return. + * @param [out] address_p A pointer to the worker address. + * @param [out] address_length_p The size in bytes of the address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_worker_get_address(ucp_worker_h worker, ucp_address_t** address_p, + size_t* address_length_p); + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking @ref ucp_ep_h "endpoint" closure. + * + * @deprecated Use @ref ucp_ep_close_nbx instead. + * + * This routine releases the @ref ucp_ep_h "endpoint". The endpoint closure + * process depends on the selected @a mode. + * + * @param [in] ep Handle to the endpoint to close. + * @param [in] mode One from @ref ucp_ep_close_mode value. + * + * @return UCS_OK - The endpoint is closed successfully. + * @return UCS_PTR_IS_ERR(_ptr) - The closure failed and an error code indicates + * the transport level status. However, resources + * are released and the @a endpoint can no longer + * be used. + * @return otherwise - The closure process is started, and can be + * completed at any point in time. A request handle + * is returned to the application in order to track + * progress of the endpoint closure. The application + * is responsible for releasing the handle using the + * @ref ucp_request_free routine. + * + * @note @ref ucp_ep_close_nb replaces deprecated @ref ucp_disconnect_nb and + * @ref ucp_ep_destroy + */ +ucs_status_ptr_t +ucp_ep_close_nb(ucp_ep_h ep, unsigned mode); + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking flush of outstanding AMO and RMA operations on the + * @ref ucp_ep_h "endpoint". + * + * @deprecated Use @ref ucp_ep_flush_nbx instead. + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_ep_h "endpoint". All the AMO and RMA operations issued on the + * @a ep prior to this call are completed both at the origin and at the target + * @ref ucp_ep_h "endpoint" when this call returns. + * + * @param [in] ep UCP endpoint. + * @param [in] flags Flags for flush operation. Reserved for future use. + * @param [in] cb Callback which will be called when the flush operation + * completes. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be completed + * in any point in time. The request handle is returned + * to the application in order to track progress. The + * application is responsible for releasing the handle + * using @ref ucp_request_free "ucp_request_free()" + * routine. + */ +ucs_status_ptr_t +ucp_ep_flush_nb(ucp_ep_h ep, unsigned flags, ucp_send_callback_t cb); + +/** + * @ingroup UCP_WORKER + * @brief Add user defined callback for Active Message. + * + * @deprecated Use @ref ucp_worker_set_am_recv_handler instead. + * + * This routine installs a user defined callback to handle incoming Active + * Messages with a specific id. This callback is called whenever an Active + * Message that was sent from the remote peer by @ref ucp_am_send_nb is + * received on this worker. + * + * @param [in] worker UCP worker on which to set the Active Message + * handler. + * @param [in] id Active Message id. + * @param [in] cb Active Message callback. NULL to clear. + * @param [in] arg Active Message argument, which will be passed + * in to every invocation of the callback as the + * arg argument. + * @param [in] flags Dictates how an Active Message is handled on the + * remote endpoint. Currently only + * UCP_AM_FLAG_WHOLE_MSG is supported, which + * indicates the callback will not be invoked + * until all data has arrived. + * + * @return error code if the worker does not support Active Messages or + * requested callback flags. + */ +ucs_status_t +ucp_worker_set_am_handler(ucp_worker_h worker, uint16_t id, ucp_am_callback_t cb, + void* arg, uint32_t flags); + +/** + * @ingroup UCP_COMM + * @brief Send Active Message. + * + * @deprecated Use @ref ucp_am_send_nbx instead. + * + * This routine sends an Active Message to an ep. It does not support + * CUDA memory. + * + * @param [in] ep UCP endpoint where the Active Message will be run. + * @param [in] id Active Message id. Specifies which registered + * callback to run. + * @param [in] buffer Pointer to the data to be sent to the target node + * of the Active Message. + * @param [in] count Number of elements to send. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback that is invoked upon completion of the + * data transfer if it is not completed immediately. + * @param [in] flags Operation flags as defined by @ref ucp_send_am_flags. + * + * @return NULL Active Message was sent immediately. + * @return UCS_PTR_IS_ERR(_ptr) Error sending Active Message. + * @return otherwise Pointer to request, and Active Message is known + * to be completed after cb is run. + */ +ucs_status_ptr_t +ucp_am_send_nb(ucp_ep_h ep, uint16_t id, const void* buffer, size_t count, + ucp_datatype_t datatype, ucp_send_callback_t cb, unsigned flags); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream send operation. + * + * @deprecated Use @ref ucp_stream_send_nbx instead. + * + * This routine sends data that is described by the local address @a buffer, + * size @a count, and @a datatype object to the destination endpoint @a ep. + * The routine is non-blocking and therefore returns immediately, however + * the actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK and the + * callback function @a cb is @b not invoked. If the operation is + * @b not completed immediately and no error reported, then the UCP library will + * schedule invocation of the callback @a cb upon completion of the send + * operation. In other words, the completion of the operation will be signaled + * either by the return code or by the callback. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. It is important to note + * that the callback is only invoked in the event that + * the operation cannot be completed in place. + * @param [in] flags Reserved for future use. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free routine. + */ +ucs_status_ptr_t +ucp_stream_send_nb(ucp_ep_h ep, const void* buffer, size_t count, ucp_datatype_t datatype, + ucp_send_callback_t cb, unsigned flags); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream receive operation of structured data into a + * user-supplied buffer. + * + * @deprecated Use @ref ucp_stream_recv_nbx instead. + * + * This routine receives data that is described by the local address @a buffer, + * size @a count, and @a datatype object on the endpoint @a ep. The routine is + * non-blocking and therefore returns immediately. The receive operation is + * considered complete when the message is delivered to the buffer. If data is + * not immediately available, the operation will be scheduled for receive and + * a request handle will be returned. In order to notify the application about + * completion of a scheduled receive operation, the UCP library will invoke + * the call-back @a cb when data is in the receive buffer and ready for + * application access. If the receive operation cannot be started, the routine + * returns an error. + * + * @param [in] ep UCP endpoint that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive into @a buffer. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed immediately. + * @param [out] length Size of the received data in bytes. The value is + * valid only if return code is UCS_OK. + * @note The amount of data received, in bytes, is always an + * integral multiple of the @a datatype size. + * @param [in] flags Flags defined in @ref ucp_stream_recv_flags_t. + * + * @return NULL - The receive operation was completed + * immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. A request + * handle is returned to the application in order + * to track progress of the operation. + * The application is responsible for releasing + * the handle by calling the + * @ref ucp_request_free routine. + */ +ucs_status_ptr_t +ucp_stream_recv_nb(ucp_ep_h ep, void* buffer, size_t count, ucp_datatype_t datatype, + ucp_stream_recv_callback_t cb, size_t* length, unsigned flags); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operations + * + * @deprecated Use @ref ucp_tag_send_nbx instead. + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object to the destination endpoint + * @a ep. Each message is associated with a @a tag value that is used for + * message matching on the @ref ucp_tag_recv_nb "receiver". The routine is + * non-blocking and therefore returns immediately, however the actual send + * operation may be delayed. The send operation is considered completed when + * it is safe to reuse the source @e buffer. If the send operation is + * completed immediately the routine return UCS_OK and the call-back function + * @a cb is @b not invoked. If the operation is @b not completed immediately + * and no error reported then the UCP library will schedule to invoke the + * call-back @a cb whenever the send operation will be completed. In other + * words, the completion of a message can be signaled by the return code or + * the call-back. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed in place. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_tag_send_nb(ucp_ep_h ep, const void* buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_send_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operations with user provided request + * + * @deprecated Use @ref ucp_tag_send_nbx with the flag + * @ref UCP_OP_ATTR_FIELD_REQUEST instead. + * + * This routine provides a convenient and efficient way to implement a + * blocking send pattern. It also completes requests faster than + * @ref ucp_tag_send_nb() because: + * @li it always uses eager protocol to send data up to the + * rendezvous threshold. + * @li its rendezvous threshold is higher than the one used by + * the @ref ucp_tag_send_nb(). The threshold is controlled by + * the @b UCX_SEND_NBR_RNDV_THRESH environment variable. + * @li its request handling is simpler. There is no callback and no need + * to allocate and free requests. In fact request can be allocated by + * caller on the stack. + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object to the destination endpoint + * @a ep. Each message is associated with a @a tag value that is used for + * message matching on the @ref ucp_tag_recv_nbr "receiver". + * + * The routine is non-blocking and therefore returns immediately, however + * the actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK. + * + * If the operation is @b not completed immediately and no error reported + * then the UCP library will fill a user provided @a req and + * return UCS_INPROGRESS status. In order to monitor completion of the + * operation @ref ucp_request_check_status() should be used. + * + * Following pseudo code implements a blocking send function: + * @code + * MPI_send(...) + * { + * char *request; + * ucs_status_t status; + * + * // allocate request on the stack + * // ucp_context_query() was used to get ucp_request_size + * request = alloca(ucp_request_size); + * + * // note: make sure that there is enough memory before the + * // request handle + * status = ucp_tag_send_nbr(ep, ..., request + ucp_request_size); + * if (status != UCS_INPROGRESS) { + * return status; + * } + * + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request + ucp_request_size); + * } while (status == UCS_INPROGRESS); + * + * return status; + * } + * @endcode + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] req Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a req. The size of UCP request + * can be obtained by @ref ucp_context_query function. + * + * @return UCS_OK - The send operation was completed immediately. + * @return UCS_INPROGRESS - The send was not completed and is in progress. + * @ref ucp_request_check_status() should be used to + * monitor @a req status. + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_tag_send_nbr(ucp_ep_h ep, const void* buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, void* req); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking synchronous tagged-send operation. + * + * @deprecated Use @ref ucp_tag_send_sync_nbx instead. + * + * Same as @ref ucp_tag_send_nb, except the request completes only after there + * is a remote tag match on the message (which does not always mean the remote + * receive has been completed). This function never completes "in-place", and + * always returns a request handle. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * @note Returns @ref UCS_ERR_UNSUPPORTED if @ref UCP_ERR_HANDLING_MODE_PEER is + * enabled. This is a temporary implementation-related constraint that + * will be addressed in future releases. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. + * + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_tag_send_sync_nb(ucp_ep_h ep, const void* buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, ucp_send_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * @deprecated Use @ref ucp_tag_recv_nbx instead. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a datatype object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates which bits of the tag have to be matched. The + * routine is non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to notify the application about completion of the receive + * operation the UCP library will invoke the call-back @a cb when the received + * message is in the receive buffer and ready for application access. If the + * receive operation cannot be stated the routine returns an error. + * + * @note This routine cannot return UCS_OK. It always returns a request + * handle or an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_tag_recv_nb(ucp_worker_h worker, void* buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, ucp_tag_recv_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * @deprecated Use @ref ucp_tag_recv_nbx with the flag + * @ref UCP_OP_ATTR_FIELD_REQUEST instead. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a datatype object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates which bits of the tag have to be matched. The + * routine is non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to monitor completion of the operation + * @ref ucp_request_check_status or @ref ucp_tag_recv_request_test should be + * used. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] req Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a req. The size of UCP request + * can be obtained by @ref ucp_context_query function. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_tag_recv_nbr(ucp_worker_h worker, void* buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, void* req); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking receive operation for a probed message. + * + * @deprecated Use @ref ucp_tag_recv_nbx instead. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, @a message handle, and @a datatype object on the @a + * worker. The @a message handle can be obtained by calling the @ref + * ucp_tag_probe_nb "ucp_tag_probe_nb()" routine. The @ref ucp_tag_msg_recv_nb + * "ucp_tag_msg_recv_nb()" routine is non-blocking and therefore returns + * immediately. The receive operation is considered completed when the message + * is delivered to the @a buffer. In order to notify the application about + * completion of the receive operation the UCP library will invoke the + * call-back @a cb when the received message is in the receive buffer and ready + * for application access. If the receive operation cannot be started the + * routine returns an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer that will receive the data. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] message Message handle. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_tag_msg_recv_nb(ucp_worker_h worker, void* buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_message_h message, + ucp_tag_recv_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking implicit remote memory put operation. + * + * @deprecated Use @ref ucp_put_nbx without passing the flag + * @ref UCP_OP_ATTR_FIELD_CALLBACK instead. If a request pointer + * is returned, release it immediately by @ref ucp_request_free. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the @ref ucp_rkey_h "memory + * handle" @a rkey. The routine returns immediately and @b does @b not + * guarantee re-usability of the source address @e buffer. If the operation is + * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS + * or an error is returned to user. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_put_nbi(ucp_ep_h ep, const void* buffer, size_t length, uint64_t remote_addr, + ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory put operation. + * + * @deprecated Use @ref ucp_put_nbx instead. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the @ref ucp_rkey_h "memory + * handle" @a rkey. The routine returns immediately and @b does @b not + * guarantee re-usability of the source address @e buffer. If the operation is + * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS + * or an error is returned to user. If the put operation completes immediately, + * the routine returns UCS_OK and the call-back routine @a cb is @b not + * invoked. If the operation is @b not completed immediately and no error is + * reported, then the UCP library will schedule invocation of the call-back + * routine @a cb upon completion of the put operation. In other words, the + * completion of a put operation can be signaled by the return code or + * execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * put operation is completed and the local buffer + * can be modified. Does not guarantee remote + * completion. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_put_nb(ucp_ep_h ep, const void* buffer, size_t length, uint64_t remote_addr, + ucp_rkey_h rkey, ucp_send_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking implicit remote memory get operation. + * + * @deprecated Use @ref ucp_get_nbx without passing the flag + * @ref UCP_OP_ATTR_FIELD_CALLBACK instead. If a request pointer + * is returned, release it immediately by @ref ucp_request_free. + * + * This routine initiate a load of contiguous block of data that is described + * by the remote memory address @a remote_addr and the @ref ucp_rkey_h "memory handle" + * @a rkey in the local contiguous memory region described by @a buffer + * address. The routine returns immediately and @b does @b not guarantee that + * remote data is loaded and stored under the local address @e buffer. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" in order + * guarantee that remote data is loaded and stored under the local address + * @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] length Length of the data (in bytes) stored under the + * destination address. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_get_nbi(ucp_ep_h ep, void* buffer, size_t length, uint64_t remote_addr, + ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory get operation. + * + * @deprecated Use @ref ucp_get_nbx instead. + * + * This routine initiates a load of a contiguous block of data that is + * described by the remote memory address @a remote_addr and the @ref ucp_rkey_h + * "memory handle" @a rkey in the local contiguous memory region described + * by @a buffer address. The routine returns immediately and @b does @b not + * guarantee that remote data is loaded and stored under the local address @e + * buffer. If the operation is completed immediately the routine return UCS_OK, + * otherwise UCS_INPROGRESS or an error is returned to user. If the get + * operation completes immediately, the routine returns UCS_OK and the + * call-back routine @a cb is @b not invoked. If the operation is @b not + * completed immediately and no error is reported, then the UCP library will + * schedule invocation of the call-back routine @a cb upon completion of the + * get operation. In other words, the completion of a get operation can be + * signaled by the return code or execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] length Length of the data (in bytes) stored under the + * destination address. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * get operation is completed and the data is + * visible to the local process. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_get_nb(ucp_ep_h ep, void* buffer, size_t length, uint64_t remote_addr, + ucp_rkey_h rkey, ucp_send_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_post + * + * @deprecated Use @ref ucp_atomic_op_nbx and @ref ucp_atomic_op_t instead. + * + * This enumeration defines which atomic memory operation should be + * performed by the ucp_atomic_post family of functions. All of these are + * non-fetching atomics and will not result in a request handle. + */ +typedef enum +{ + UCP_ATOMIC_POST_OP_ADD, /**< Atomic add */ + UCP_ATOMIC_POST_OP_AND, /**< Atomic and */ + UCP_ATOMIC_POST_OP_OR, /**< Atomic or */ + UCP_ATOMIC_POST_OP_XOR, /**< Atomic xor */ + UCP_ATOMIC_POST_OP_LAST +} ucp_atomic_post_op_t; + +/** + * @ingroup UCP_COMM + * @brief Post an atomic memory operation. + * + * @deprecated Use @ref ucp_atomic_op_nbx without the flag + * @ref UCP_OP_ATTR_FIELD_REPLY_BUFFER instead. + * + * This routine posts an atomic memory operation to a remote value. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. + * Return from the function does not guarantee completion. A user must + * call @ref ucp_ep_flush_nb or @ref ucp_worker_flush_nb to guarantee that the + * remote value has been updated. + * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_post_op_t. + * @param [in] value Source operand for the atomic operation. + * @param [in] op_size Size of value in bytes + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +ucp_atomic_post(ucp_ep_h ep, ucp_atomic_post_op_t opcode, uint64_t value, size_t op_size, + uint64_t remote_addr, ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_fetch + * + * @deprecated Use @ref ucp_atomic_op_nbx and @ref ucp_atomic_op_t instead. + * + * This enumeration defines which atomic memory operation should be performed + * by the ucp_atomic_fetch family of functions. All of these functions + * will fetch data from the remote node. + */ +typedef enum +{ + UCP_ATOMIC_FETCH_OP_FADD, /**< Atomic Fetch and add */ + UCP_ATOMIC_FETCH_OP_SWAP, /**< Atomic swap */ + UCP_ATOMIC_FETCH_OP_CSWAP, /**< Atomic conditional swap */ + UCP_ATOMIC_FETCH_OP_FAND, /**< Atomic Fetch and and */ + UCP_ATOMIC_FETCH_OP_FOR, /**< Atomic Fetch and or */ + UCP_ATOMIC_FETCH_OP_FXOR, /**< Atomic Fetch and xor */ + UCP_ATOMIC_FETCH_OP_LAST +} ucp_atomic_fetch_op_t; + +/** + * @ingroup UCP_COMM + * @brief Post an atomic fetch operation. + * + * @deprecated Use @ref ucp_atomic_op_nbx with the flag + * @ref UCP_OP_ATTR_FIELD_REPLY_BUFFER instead. + * + * This routine will post an atomic fetch operation to remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. + * The routine is non-blocking and therefore returns immediately. However the + * actual atomic operation may be delayed. The atomic operation is not considered complete + * until the values in remote and local memory are completed. If the atomic operation + * completes immediately, the routine returns UCS_OK and the call-back routine + * @a cb is @b not invoked. If the operation is @b not completed immediately and no + * error is reported, then the UCP library will schedule invocation of the call-back + * routine @a cb upon completion of the atomic operation. In other words, the completion + * of an atomic operation can be signaled by the return code or execution of the + * call-back. + * + * @note The user should not modify any part of the @a result after this + * operation is called, until the operation completes. + * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_fetch_op_t. + * @param [in] value Source operand for atomic operation. In the case of CSWAP + * this is the conditional for the swap. For SWAP this is + * the value to be placed in remote memory. + * @param [inout] result Local memory address to store resulting fetch to. + * In the case of CSWAP the value in result will be + * swapped into the @a remote_addr if the condition + * is true. + * @param [in] op_size Size of value in bytes and pointer type for result + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back function is only invoked in a case when + * the operation cannot be completed in place. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_atomic_fetch_nb(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, uint64_t value, + void* result, size_t op_size, uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + +/** + * @ingroup UCP_WORKER + * + * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h + * "worker" + * + * @deprecated Use @ref ucp_worker_flush_nbx instead. + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the + * @a worker prior to this call are completed both at the origin and at the + * target when this call returns. + * + * @note For description of the differences between @ref ucp_worker_flush_nb + * "flush" and @ref ucp_worker_fence "fence" operations please see + * @ref ucp_worker_fence "ucp_worker_fence()" + * + * @param [in] worker UCP worker. + * @param [in] flags Flags for flush operation. Reserved for future use. + * @param [in] cb Callback which will be called when the flush operation + * completes. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be completed + * in any point in time. The request handle is returned + * to the application in order to track progress. The + * application is responsible for releasing the handle + * using @ref ucp_request_free "ucp_request_free()" + * routine. + */ +ucs_status_ptr_t +ucp_worker_flush_nb(ucp_worker_h worker, unsigned flags, ucp_send_callback_t cb); + +/** + * @ingroup UCP_ENDPOINT + * @brief Close UCP endpoint modes. + * + * @deprecated Use @ref ucp_ep_close_nbx and @ref ucp_ep_close_flags_t instead. + * + * The enumeration is used to specify the behavior of @ref ucp_ep_close_nb. + */ +enum ucp_ep_close_mode +{ + UCP_EP_CLOSE_MODE_FORCE = 0, /**< @ref ucp_ep_close_nb releases + the endpoint without any + confirmation from the peer. All + outstanding requests will be + completed with + @ref UCS_ERR_CANCELED error. + @note This mode may cause + transport level errors on remote + side, so it requires set + @ref UCP_ERR_HANDLING_MODE_PEER + for all endpoints created on + both (local and remote) sides to + avoid undefined behavior. */ + UCP_EP_CLOSE_MODE_FLUSH = 1 /**< @ref ucp_ep_close_nb schedules + flushes on all outstanding + operations. */ +}; + +END_C_DECLS + +#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/uct/api/uct.h b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/uct/api/uct.h new file mode 100644 index 0000000000..3c1b853120 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/tpls/ucx/uct/api/uct.h @@ -0,0 +1,3804 @@ +/** + * @file uct.h + * @date 2014-2020 + * @copyright NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * @copyright Oak Ridge National Laboratory. All rights received. + * @copyright Advanced Micro Devices, Inc. All rights received. + * @brief Unified Communication Transport + */ + +#ifndef UCT_H_ +#define UCT_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +BEGIN_C_DECLS + +/** @file uct.h */ + +/** + * @defgroup UCT_API Unified Communication Transport (UCT) API + * @{ + * This section describes UCT API. + * @} + */ + +/** + * @defgroup UCT_RESOURCE UCT Communication Resource + * @ingroup UCT_API + * @{ + * This section describes a concept of the Communication Resource and routines + * associated with the concept. + * @} + */ + +/** + * @defgroup UCT_CONTEXT UCT Communication Context + * @ingroup UCT_API + * @{ + * + * UCT context abstracts all the resources required for network communication. + * It is designed to enable either share or isolate resources for multiple + * programming models used by an application. + * + * This section provides a detailed description of this concept and + * routines associated with it. + * + * @} + */ + +/** + * @defgroup UCT_MD UCT Memory Domain + * @ingroup UCT_API + * @{ + * The Memory Domain abstracts resources required for network communication, + * which typically includes memory, transport mechanisms, compute and + * network resources. It is an isolation mechanism that can be employed + * by the applications for isolating resources between multiple programming models. + * The attributes of the Memory Domain are defined by the structure @ref uct_md_attr(). + * The communication and memory operations are defined in the context of Memory Domain. + * + * @} + */ + +/** + * @defgroup UCT_AM UCT Active messages + * @ingroup UCT_API + * @{ + * Defines active message functions. + * @} + */ + +/** + * @defgroup UCT_RMA UCT Remote memory access operations + * @ingroup UCT_API + * @{ + * Defines remote memory access operations. + * @} + */ + +/** + * @defgroup UCT_AMO UCT Atomic operations + * @ingroup UCT_API + * @{ + * Defines atomic operations. + * @} + */ + +/** + * @defgroup UCT_TAG UCT Tag matching operations + * @ingroup UCT_API + * @{ + * Defines tag matching operations. + * @} + */ + +/** + * @defgroup UCT_CLIENT_SERVER UCT client-server operations + * @ingroup UCT_API + * @{ + * Defines client-server operations. + * The client-server API allows the connection establishment between an active + * side - a client, and its peer - the passive side - a server. + * The connection can be established through a UCT transport that supports + * listening and connecting via IP address and port (listening can also be on INADDR_ANY). + * + * The following is a general overview of the operations on the server side: + * + * Connecting: + * @ref uct_cm_open + * Open a connection manager. + * @ref uct_listener_create + * Create a listener on the CM and start listening on a given IP,port / INADDR_ANY. + * @ref uct_cm_listener_conn_request_callback_t + * This callback is invoked by the UCT transport to handle an incoming connection + * request from a client. + * Accept or reject the client's connection request. + * @ref uct_ep_create + * Connect to the client by creating an endpoint if the request is accepted. + * The server creates a new endpoint for every connection request that it accepts. + * @ref uct_cm_ep_server_conn_notify_callback_t + * This callback is invoked by the UCT transport to handle the connection + * notification from the client. + * @note The private data which the server should send to the client can be + * either provided directly to @ref uct_ep_create, or filled by + * @ref uct_cm_ep_priv_data_pack_callback_t provided to + * @ref uct_ep_create. + * @note In order to reject a connection request, can either call + * @ref uct_listener_reject or return failure status as defined by + * @ref ucs_status_t from @ref uct_cm_ep_priv_data_pack_callback_t. + * + * Disconnecting: + * @ref uct_ep_disconnect + * Disconnect the server's endpoint from the client. + * Can be called when initiating a disconnect or when receiving a disconnect + * notification from the remote side. + * @ref uct_ep_disconnect_cb_t + * This callback is invoked by the UCT transport when the client side calls + * uct_ep_disconnect as well. + * @ref uct_ep_destroy + * Destroy the endpoint connected to the remote peer. + * If this function is called before the endpoint was disconnected, the + * @ref uct_ep_disconnect_cb_t will not be invoked. + * + * Destroying the server's resources: + * @ref uct_listener_destroy + * Destroy the listener object. + * @ref uct_cm_close + * Close the connection manager. + * + * The following is a general overview of the operations on the client side: + * + * Connecting: + * @ref uct_cm_open + * Open a connection manager. + * @ref uct_ep_create + * Create an endpoint for establishing a connection to the server. + * @ref uct_cm_ep_resolve_callback_t + * This callback is invoked on the client side of the connection manager, + * after the remote server address was resolved to the local device to be + * used for connection establishment. + * @ref uct_ep_connect + * This function should be called on the client side, in order to send + * private data and resume connection establishment, following an + * address-resolved notification via @ref uct_cm_ep_resolve_callback_t. + * @ref uct_cm_ep_client_connect_callback_t + * This callback is invoked by the UCT transport to handle a connection response + * from the server. + * After invoking this callback, the UCT transport will finalize the client's + * connection to the server. + * @ref uct_cm_client_ep_conn_notify + * After the client's connection establishment is completed, the client + * should call this function in which it sends a notification message to + * the server stating that it (the client) is connected. + * The notification message that is sent depends on the transport's + * implementation. + * + * Disconnecting: + * @ref uct_ep_disconnect + * Disconnect the client's endpoint from the server. + * Can be called when initiating a disconnect or when receiving a disconnect + * notification from the remote side. + * @ref uct_ep_disconnect_cb_t + * This callback is invoked by the UCT transport when the server side calls + * uct_ep_disconnect as well. + * @ref uct_ep_destroy + * Destroy the endpoint connected to the remote peer. + * + * Destroying the client's resources: + * @ref uct_cm_close + * Close the connection manager. + * + * @} + */ + +/** + * @ingroup UCT_RESOURCE + * @brief Memory domain resource descriptor. + * + * This structure describes a memory domain resource. + */ +typedef struct uct_md_resource_desc +{ + char md_name[UCT_MD_NAME_MAX]; /**< Memory domain name */ +} uct_md_resource_desc_t; + +/** + * @ingroup UCT_RESOURCE + * @brief UCT component attributes field mask + * + * The enumeration allows specifying which fields in @ref uct_component_attr_t + * are present. It is used for backward compatibility support. + */ +enum uct_component_attr_field +{ + UCT_COMPONENT_ATTR_FIELD_NAME = UCS_BIT(0), /**< Component name */ + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT = UCS_BIT(1), /**< MD resource count */ + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES = UCS_BIT(2), /**< MD resources array */ + UCT_COMPONENT_ATTR_FIELD_FLAGS = UCS_BIT(3) /**< Capability flags */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief UCT component attributes + * + * This structure defines the attributes for UCT component. It is used for + * @ref uct_component_query + */ +typedef struct uct_component_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_component_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** Component name */ + char name[UCT_COMPONENT_NAME_MAX]; + + /** Number of memory-domain resources */ + unsigned md_resource_count; + + /** + * Array of memory domain resources. When used, it should be initialized + * prior to calling @ref uct_component_query with a pointer to an array, + * which is large enough to hold all memory domain resource entries. After + * the call, this array will be filled with information about existing + * memory domain resources. + * In order to allocate this array, you can call @ref uct_component_query + * twice: The first time would only obtain the amount of entries required, + * by specifying @ref UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT in + * field_mask. Then the array could be allocated with the returned number of + * entries, and passed to a second call to @ref uct_component_query, this + * time setting field_mask to @ref UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES. + */ + uct_md_resource_desc_t* md_resources; + + /** + * Flags as defined by UCT_COMPONENT_FLAG_xx. + */ + uint64_t flags; +} uct_component_attr_t; + +/** + * @ingroup UCT_RESOURCE + * @brief Capability flags of @ref uct_component_h. + * + * The enumeration defines bit mask of @ref uct_component_h capabilities in + * @ref uct_component_attr_t::flags which is set by @ref uct_component_query. + */ +enum +{ + /** + * If set, the component supports @ref uct_cm_h functionality. + * See @ref uct_cm_open for details. + */ + UCT_COMPONENT_FLAG_CM = UCS_BIT(0), + + /** + * If set, the component supports direct access to remote memory using a + * local pointer returned from @ref uct_rkey_ptr function. + */ + UCT_COMPONENT_FLAG_RKEY_PTR = UCS_BIT(1) +}; + +/** + * @ingroup UCT_RESOURCE + * @brief List of UCX device types. + */ +typedef enum +{ + UCT_DEVICE_TYPE_NET, /**< Network devices */ + UCT_DEVICE_TYPE_SHM, /**< Shared memory devices */ + UCT_DEVICE_TYPE_ACC, /**< Acceleration devices */ + UCT_DEVICE_TYPE_SELF, /**< Loop-back device */ + UCT_DEVICE_TYPE_LAST +} uct_device_type_t; + +/** + * @ingroup UCT_RESOURCE + * @brief Communication resource descriptor. + * + * Resource descriptor is an object representing the network resource. + * Resource descriptor could represent a stand-alone communication resource + * such as an HCA port, network interface, or multiple resources such as + * multiple network interfaces or communication ports. It could also represent + * virtual communication resources that are defined over a single physical + * network interface. + */ +typedef struct uct_tl_resource_desc +{ + char tl_name[UCT_TL_NAME_MAX]; /**< Transport name */ + char dev_name[UCT_DEVICE_NAME_MAX]; /**< Hardware device name */ + uct_device_type_t dev_type; /**< The device represented by this resource + (e.g. UCT_DEVICE_TYPE_NET for a network interface) */ + ucs_sys_device_t sys_device; /**< The identifier associated with the device + bus_id as captured in ucs_sys_bus_id_t struct */ +} uct_tl_resource_desc_t; + +#define UCT_TL_RESOURCE_DESC_FMT "%s/%s" +#define UCT_TL_RESOURCE_DESC_ARG(_resource) (_resource)->tl_name, (_resource)->dev_name + +/** + * @brief Atomic operation requested for uct_ep_atomic32_post, uct_ep_atomic64_post, + * uct_ep_atomic32_fetch and uct_ep_atomic64_fetch. + * + * This enumeration defines which atomic memory operation should be + * performed by the uct_ep_atomic family of functions. + */ +typedef enum uct_atomic_op +{ + UCT_ATOMIC_OP_ADD, /**< Atomic add */ + UCT_ATOMIC_OP_AND, /**< Atomic and */ + UCT_ATOMIC_OP_OR, /**< Atomic or */ + UCT_ATOMIC_OP_XOR, /**< Atomic xor */ + UCT_ATOMIC_OP_SWAP, /**< Atomic swap */ + UCT_ATOMIC_OP_CSWAP, /**< Atomic compare-and-swap */ + UCT_ATOMIC_OP_LAST +} uct_atomic_op_t; + +/** + * @defgroup UCT_RESOURCE_IFACE_CAP UCT interface operations and capabilities + * @ingroup UCT_RESOURCE + * + * @brief List of capabilities supported by UCX API + * + * The definition list presents a full list of operations and capabilities + * exposed by UCX API. + * @{ + */ +/* Active message capabilities */ +#define UCT_IFACE_FLAG_AM_SHORT UCS_BIT(0) /**< Short active message */ +#define UCT_IFACE_FLAG_AM_BCOPY UCS_BIT(1) /**< Buffered active message */ +#define UCT_IFACE_FLAG_AM_ZCOPY UCS_BIT(2) /**< Zero-copy active message */ + +#define UCT_IFACE_FLAG_PENDING UCS_BIT(3) /**< Pending operations */ + +/* PUT capabilities */ +#define UCT_IFACE_FLAG_PUT_SHORT UCS_BIT(4) /**< Short put */ +#define UCT_IFACE_FLAG_PUT_BCOPY UCS_BIT(5) /**< Buffered put */ +#define UCT_IFACE_FLAG_PUT_ZCOPY UCS_BIT(6) /**< Zero-copy put */ + +/* GET capabilities */ +#define UCT_IFACE_FLAG_GET_SHORT UCS_BIT(8) /**< Short get */ +#define UCT_IFACE_FLAG_GET_BCOPY UCS_BIT(9) /**< Buffered get */ +#define UCT_IFACE_FLAG_GET_ZCOPY UCS_BIT(10) /**< Zero-copy get */ + +/* Atomic operations domain */ +#define UCT_IFACE_FLAG_ATOMIC_CPU \ + UCS_BIT(30) /**< Atomic communications are consistent \ + with respect to CPU operations. */ +#define UCT_IFACE_FLAG_ATOMIC_DEVICE \ + UCS_BIT(31) /**< Atomic communications are consistent \ + only with respect to other atomics \ + on the same device. */ + +/* Error handling capabilities */ +#define UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF \ + UCS_BIT(32) /**< Invalid buffer for short operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF \ + UCS_BIT(33) /**< Invalid buffer for buffered operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF \ + UCS_BIT(34) /**< Invalid buffer for zero copy operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_AM_ID UCS_BIT(35) /**< Invalid AM id on remote */ +#define UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM UCS_BIT(36) /**< Remote memory access */ +#define UCT_IFACE_FLAG_ERRHANDLE_BCOPY_LEN \ + UCS_BIT(37) /**< Invalid length for buffered operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE \ + UCS_BIT(38) /**< Remote peer failures/outage */ + +#define UCT_IFACE_FLAG_EP_CHECK UCS_BIT(39) /**< Endpoint check */ + +/* Connection establishment */ +#define UCT_IFACE_FLAG_CONNECT_TO_IFACE \ + UCS_BIT(40) /**< Supports connecting to interface */ +#define UCT_IFACE_FLAG_CONNECT_TO_EP \ + UCS_BIT(41) /**< Supports connecting to specific endpoint */ +#define UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR \ + UCS_BIT(42) /**< Supports connecting to sockaddr */ + +/* Special transport flags */ +#define UCT_IFACE_FLAG_AM_DUP \ + UCS_BIT(43) /**< Active messages may be received with duplicates \ + This happens if the transport does not keep enough \ + information to detect retransmissions */ + +/* Callback invocation */ +#define UCT_IFACE_FLAG_CB_SYNC \ + UCS_BIT(44) /**< Interface supports setting a callback \ + which is invoked only from the calling context of \ + uct_worker_progress() */ +#define UCT_IFACE_FLAG_CB_ASYNC \ + UCS_BIT(45) /**< Interface supports setting a callback \ + which will be invoked within a reasonable amount of \ + time if uct_worker_progress() is not being called. \ + The callback can be invoked from any progress context \ + and it may also be invoked when uct_worker_progress() \ + is called. */ + +/* Keepalive */ +#define UCT_IFACE_FLAG_EP_KEEPALIVE \ + UCS_BIT(46) /**< Transport endpoint has built-in keepalive feature, \ + which guarantees the error callback on the transport \ + interface will be called if the communication \ + channel with remote peer is broken, even if there \ + are no outstanding send operations */ + +/* Tag matching operations */ +#define UCT_IFACE_FLAG_TAG_EAGER_SHORT \ + UCS_BIT(50) /**< Hardware tag matching short eager support */ +#define UCT_IFACE_FLAG_TAG_EAGER_BCOPY \ + UCS_BIT(51) /**< Hardware tag matching bcopy eager support */ +#define UCT_IFACE_FLAG_TAG_EAGER_ZCOPY \ + UCS_BIT(52) /**< Hardware tag matching zcopy eager support */ +#define UCT_IFACE_FLAG_TAG_RNDV_ZCOPY \ + UCS_BIT(53) /**< Hardware tag matching rendezvous zcopy support */ + +/* Interface capability */ +#define UCT_IFACE_FLAG_INTER_NODE UCS_BIT(54) /**< Interface is inter-node capable */ +#define UCT_IFACE_FLAG_DEVICE_EP UCS_BIT(55) /**< Interface supports device endpoint */ +/** + * @} + */ + +/** + * @defgroup UCT_RESOURCE_IFACE_EVENT_CAP UCT interface for asynchronous event + * capabilities + * @ingroup UCT_RESOURCE + * + * @brief List of capabilities supported by UCT iface event API + * + * The definition list presents a full list of operations and capabilities + * supported by UCT iface event. + * @{ + */ +/* Event types */ +#define UCT_IFACE_FLAG_EVENT_SEND_COMP \ + UCS_BIT(0) /**< Event notification of send completion is \ + supported */ +#define UCT_IFACE_FLAG_EVENT_RECV \ + UCS_BIT(1) /**< Event notification of tag and active message \ + receive is supported */ +#define UCT_IFACE_FLAG_EVENT_RECV_SIG \ + UCS_BIT(2) /**< Event notification of signaled tag and active \ + message is supported */ + /* Event notification mechanisms */ +#define UCT_IFACE_FLAG_EVENT_FD \ + UCS_BIT(3) /**< Event notification through File Descriptor \ + is supported */ +#define UCT_IFACE_FLAG_EVENT_ASYNC_CB \ + UCS_BIT(4) /**< Event notification through asynchronous \ + callback invocation is supported */ +/** + * @} + */ + +/** + * @ingroup UCT_CONTEXT + * @brief Memory allocation methods. + */ +typedef enum +{ + UCT_ALLOC_METHOD_THP, /**< Allocate from OS using libc allocator with + Transparent Huge Pages enabled*/ + UCT_ALLOC_METHOD_MD, /**< Allocate using memory domain */ + UCT_ALLOC_METHOD_HEAP, /**< Allocate from heap using libc allocator */ + UCT_ALLOC_METHOD_MMAP, /**< Allocate from OS using mmap() syscall */ + UCT_ALLOC_METHOD_HUGE, /**< Allocate huge pages */ + UCT_ALLOC_METHOD_LAST, + UCT_ALLOC_METHOD_DEFAULT = UCT_ALLOC_METHOD_LAST /**< Use default method */ +} uct_alloc_method_t; + +/** + * @ingroup UCT_RESOURCE + * @brief Asynchronous event types. + * + * @note The UCT_EVENT_RECV and UCT_EVENT_RECV_SIG event types are used to + * indicate receive-side completions for both tag matching and active + * messages. If the interface supports signaled receives + * (@ref UCT_IFACE_FLAG_EVENT_RECV_SIG), then for the messages sent with + * UCT_SEND_FLAG_SIGNALED flag, UCT_EVENT_RECV_SIG should be triggered + * on the receiver. Otherwise, UCT_EVENT_RECV should be triggered. + */ +enum uct_iface_event_types +{ + UCT_EVENT_SEND_COMP = UCS_BIT(0), /**< Send completion event */ + UCT_EVENT_RECV = UCS_BIT(1), /**< Tag or active message received */ + UCT_EVENT_RECV_SIG = UCS_BIT(2) /**< Signaled tag or active message + received */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief Flush modifiers. + */ +enum uct_flush_flags +{ + UCT_FLUSH_FLAG_LOCAL = 0, /**< Guarantees that the data + transfer is completed but the + target buffer may not be + updated yet. */ + UCT_FLUSH_FLAG_CANCEL = UCS_BIT(0), /**< The library will make a best + effort attempt to cancel all + uncompleted operations. + However, there is a chance that + some operations will not be + canceled in which case the user + will need to handle their + completions through + the relevant callbacks. + After @ref uct_ep_flush + with this flag is completed, + the endpoint will be set to + error state, and it becomes + unusable for send operations + and should be destroyed. */ + UCT_FLUSH_FLAG_REMOTE = UCS_BIT(1) /**< Guarantees that all previous + UCP memory update operations + (put, atomics, etc.) are + completed, the target memory + of these operation was updated, + and the updated memory is + globally visible for all + processing elements in the + system. */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief UCT progress types + */ +enum uct_progress_types +{ + UCT_PROGRESS_SEND = UCS_BIT(0), /**< Progress send operations */ + UCT_PROGRESS_RECV = UCS_BIT(1), /**< Progress receive operations */ + UCT_PROGRESS_THREAD_SAFE = UCS_BIT(7) /**< Enable/disable progress while + another thread may be calling + @ref ucp_worker_progress(). */ +}; + +/** + * @ingroup UCT_AM + * @brief Flags for active message send operation. + */ +enum uct_msg_flags +{ + UCT_SEND_FLAG_SIGNALED = UCS_BIT(0), /**< Trigger @ref UCT_EVENT_RECV_SIG + event on remote side. Make best + effort attempt to avoid + triggering @ref UCT_EVENT_RECV + event. Ignored if not supported + by interface. */ + UCT_SEND_FLAG_PEER_CHECK = UCS_BIT(1) /**< Forces checking connectivity to + a peer. If the connection is + not alive, an error callback + will be invoked. If the flag is + not set, there is no guarantee + that a connectivity error could + be detected. */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief Callback flags. + * + * List of flags for a callback. + */ +enum uct_cb_flags +{ + UCT_CB_FLAG_RESERVED = UCS_BIT(1), /**< Reserved for future use. */ + UCT_CB_FLAG_ASYNC = UCS_BIT(2) /**< Callback is allowed to be called + from any thread in the process, and + therefore should be thread-safe. For + example, it may be called from a + transport async progress thread. To + guarantee async invocation, the + interface must have the @ref + UCT_IFACE_FLAG_CB_ASYNC flag set. If + async callback is requested on an + interface which only supports sync + callback (i.e., only the @ref + UCT_IFACE_FLAG_CB_SYNC flag is set), + the callback will be invoked only + from the context that called @ref + uct_iface_progress). */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief Mode in which to open the interface. + */ +enum uct_iface_open_mode +{ + /** Interface is opened on a specific device */ + UCT_IFACE_OPEN_MODE_DEVICE = UCS_BIT(0), + + /** Interface is opened on a specific address on the server side. This mode + will be deprecated in the near future for a better API. */ + UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER = UCS_BIT(1), + + /** Interface is opened on a specific address on the client side This mode + will be deprecated in the near future for a better API. */ + UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT = UCS_BIT(2) +}; + +/** + * @ingroup UCT_RESOURCE + * @brief UCT interface created by @ref uct_iface_open parameters field mask. + * + * The enumeration allows specifying which fields in @ref uct_iface_params_t are + * present, for backward compatibility support. + */ +enum uct_iface_params_field +{ + /** Enables @ref uct_iface_params_t::cpu_mask */ + UCT_IFACE_PARAM_FIELD_CPU_MASK = UCS_BIT(0), + + /** Enables @ref uct_iface_params_t::open_mode */ + UCT_IFACE_PARAM_FIELD_OPEN_MODE = UCS_BIT(1), + + /** Enables @ref uct_iface_params_t_mode_device + * "uct_iface_params_t::mode::device" */ + UCT_IFACE_PARAM_FIELD_DEVICE = UCS_BIT(2), + + /** Enables @ref uct_iface_params_t_mode_sockaddr + * "uct_iface_params_t::mode::sockaddr" */ + UCT_IFACE_PARAM_FIELD_SOCKADDR = UCS_BIT(3), + + /** Enables @ref uct_iface_params_t::stats_root */ + UCT_IFACE_PARAM_FIELD_STATS_ROOT = UCS_BIT(4), + + /** Enables @ref uct_iface_params_t::rx_headroom */ + UCT_IFACE_PARAM_FIELD_RX_HEADROOM = UCS_BIT(5), + + /** Enables @ref uct_iface_params_t::err_handler_arg */ + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG = UCS_BIT(6), + + /** Enables @ref uct_iface_params_t::err_handler */ + UCT_IFACE_PARAM_FIELD_ERR_HANDLER = UCS_BIT(7), + + /** Enables @ref uct_iface_params_t::err_handler_flags */ + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS = UCS_BIT(8), + + /** Enables @ref uct_iface_params_t::eager_arg */ + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG = UCS_BIT(9), + + /** Enables @ref uct_iface_params_t::eager_cb */ + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB = UCS_BIT(10), + + /** Enables @ref uct_iface_params_t::rndv_arg */ + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG = UCS_BIT(11), + + /** Enables @ref uct_iface_params_t::rndv_cb */ + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB = UCS_BIT(12), + + /** Enables @ref uct_iface_params_t::async_event_arg */ + UCT_IFACE_PARAM_FIELD_ASYNC_EVENT_ARG = UCS_BIT(13), + + /** Enables @ref uct_iface_params_t::async_event_cb */ + UCT_IFACE_PARAM_FIELD_ASYNC_EVENT_CB = UCS_BIT(14), + + /** Enables @ref uct_iface_params_t::keepalive_interval */ + UCT_IFACE_PARAM_FIELD_KEEPALIVE_INTERVAL = UCS_BIT(15), + + /** Enables @ref uct_iface_params_t::am_alignment */ + UCT_IFACE_PARAM_FIELD_AM_ALIGNMENT = UCS_BIT(16), + + /** Enables @ref uct_iface_params_t::am_align_offset */ + UCT_IFACE_PARAM_FIELD_AM_ALIGN_OFFSET = UCS_BIT(17), + + /** Enables @ref uct_iface_params_t::features */ + UCT_IFACE_PARAM_FIELD_FEATURES = UCS_BIT(18) +}; + +/** + * @ingroup UCT_MD + * @brief Socket address accessibility type. + */ +typedef enum +{ + UCT_SOCKADDR_ACC_LOCAL, /**< Check if local address exists. + Address should belong to a local + network interface */ + UCT_SOCKADDR_ACC_REMOTE /**< Check if remote address can be reached. + Address is routable from one of the + local network interfaces */ +} uct_sockaddr_accessibility_t; + +/** + * @ingroup UCT_MD + * @brief Memory domain capability flags. + */ +enum +{ + /** + * MD supports memory allocation + */ + UCT_MD_FLAG_ALLOC = UCS_BIT(0), + + /** + * MD supports memory registration + */ + UCT_MD_FLAG_REG = UCS_BIT(1), + + /** + * The transport needs a valid local memory handle for zero-copy operations + */ + UCT_MD_FLAG_NEED_MEMH = UCS_BIT(2), + + /** + * The transport needs a valid remote memory key for remote memory + * operations + */ + UCT_MD_FLAG_NEED_RKEY = UCS_BIT(3), + + /** + * MD supports memory advice + */ + UCT_MD_FLAG_ADVISE = UCS_BIT(4), + + /** + * MD supports memory allocation with fixed address + */ + UCT_MD_FLAG_FIXED = UCS_BIT(5), + + /** + * MD supports direct access to remote memory via a pointer that is + * returned by @ref uct_rkey_ptr. + * @note This flag is deprecated and replaced by + * @a UCT_COMPONENT_FLAG_RKEY_PTR. + */ + UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), + + /** + * MD support for client-server connection establishment via sockaddr + */ + UCT_MD_FLAG_SOCKADDR = UCS_BIT(7), + + /** + * MD supports memory invalidation. + * @note This flag is equivalent to the combination of + * UCT_MD_FLAG_INVALIDATE_RMA and UCT_MD_FLAG_INVALIDATE_AMO for + * uct_md_attr_v2_t.flags + */ + UCT_MD_FLAG_INVALIDATE = UCS_BIT(8), + + /** + * MD supports exporting memory keys with another process using the same + * device or attaching to an exported memory key. + */ + UCT_MD_FLAG_EXPORTED_MKEY = UCS_BIT(9), + + /** + * MD supports registering a dmabuf file descriptor. + */ + UCT_MD_FLAG_REG_DMABUF = UCS_BIT(10), + + /** + * The enum must not be extended. Any additional flags must be defined in + * API v2 uct_md_flags_v2_t. + */ + UCT_MD_FLAG_LAST = UCS_BIT(11) +}; + +/** + * @ingroup UCT_MD + * @brief Memory allocation/registration flags. + */ +enum uct_md_mem_flags +{ + /** + * Hint to perform non-blocking allocation/registration: page mapping may + * be deferred until it is accessed by the CPU or a transport. + */ + UCT_MD_MEM_FLAG_NONBLOCK = UCS_BIT(0), + + /** + * Place the mapping at exactly defined address. + */ + UCT_MD_MEM_FLAG_FIXED = UCS_BIT(1), + + /** + * Registered memory should be locked. May incur extra cost for + * registration, but memory access is usually faster. + */ + UCT_MD_MEM_FLAG_LOCK = UCS_BIT(2), + + /** + * Hide errors on memory registration and allocation. If this flag is set, + * no error messages will be printed. + */ + UCT_MD_MEM_FLAG_HIDE_ERRORS = UCS_BIT(3), + + /* Memory access flags */ + /** + * Enable remote put access. + */ + UCT_MD_MEM_ACCESS_REMOTE_PUT = UCS_BIT(5), + + /** + * Enable remote get access. + */ + UCT_MD_MEM_ACCESS_REMOTE_GET = UCS_BIT(6), + + /** + * Enable remote atomic access. + */ + UCT_MD_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(7), + + /** + * Enable local read access. + */ + UCT_MD_MEM_ACCESS_LOCAL_READ = UCS_BIT(8), + + /** + * Enable local write access. + */ + UCT_MD_MEM_ACCESS_LOCAL_WRITE = UCS_BIT(9), + + /** + * Register the memory region so its remote access key would likely be + * equal to remote access keys received from other peers, when compared + * with @a uct_rkey_compare. This flag is a hint. When remote access keys + * received from different peers are compared equal, they can be used + * interchangeably, avoiding the need to keep all of them in memory. + */ + UCT_MD_MEM_SYMMETRIC_RKEY = UCS_BIT(10), + + /** + * Register global VA to access all process virtual address space. + */ + UCT_MD_MEM_GVA = UCS_BIT(11), + + /** + * Enable local and remote access for all operations. + */ + UCT_MD_MEM_ACCESS_ALL = + (UCT_MD_MEM_ACCESS_REMOTE_PUT | UCT_MD_MEM_ACCESS_REMOTE_GET | + UCT_MD_MEM_ACCESS_REMOTE_ATOMIC | UCT_MD_MEM_ACCESS_LOCAL_READ | + UCT_MD_MEM_ACCESS_LOCAL_WRITE), + + /** + * Enable local and remote access for put and get operations. + */ + UCT_MD_MEM_ACCESS_RMA = (UCT_MD_MEM_ACCESS_REMOTE_PUT | UCT_MD_MEM_ACCESS_REMOTE_GET | + UCT_MD_MEM_ACCESS_LOCAL_READ | UCT_MD_MEM_ACCESS_LOCAL_WRITE) +}; + +/** + * @ingroup UCT_MD + * @brief list of UCT memory use advice + */ +typedef enum +{ + UCT_MADV_NORMAL = 0, /**< No special treatment */ + UCT_MADV_WILLNEED /**< can be used on the memory mapped with + @ref UCT_MD_MEM_FLAG_NONBLOCK to speed up + memory mapping and to avoid page faults when + the memory is accessed for the first time. */ +} uct_mem_advice_t; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT connection manager attributes field mask. + * + * The enumeration allows specifying which fields in @ref uct_cm_attr_t are + * present, for backward compatibility support. + */ +enum uct_cm_attr_field +{ + /** Enables @ref uct_cm_attr::max_conn_priv */ + UCT_CM_ATTR_FIELD_MAX_CONN_PRIV = UCS_BIT(0) +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT listener attributes field mask. + * + * The enumeration allows specifying which fields in @ref uct_listener_attr_t are + * present, for backward compatibility support. + */ +enum uct_listener_attr_field +{ + /** Enables @ref uct_listener_attr::sockaddr */ + UCT_LISTENER_ATTR_FIELD_SOCKADDR = UCS_BIT(0) +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT listener created by @ref uct_listener_create parameters field mask. + * + * The enumeration allows specifying which fields in @ref uct_listener_params_t + * are present, for backward compatibility support. + */ +enum uct_listener_params_field +{ + /** Enables @ref uct_listener_params::backlog */ + UCT_LISTENER_PARAM_FIELD_BACKLOG = UCS_BIT(0), + + /** Enables @ref uct_listener_params::conn_request_cb */ + UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB = UCS_BIT(1), + + /** Enables @ref uct_listener_params::user_data */ + UCT_LISTENER_PARAM_FIELD_USER_DATA = UCS_BIT(2) +}; + +/** + * @ingroup UCT_RESOURCE + * @brief UCT endpoint created by @ref uct_ep_create parameters field mask. + * + * The enumeration allows specifying which fields in @ref uct_ep_params_t are + * present, for backward compatibility support. + */ +enum uct_ep_params_field +{ + /** Enables @ref uct_ep_params::iface */ + UCT_EP_PARAM_FIELD_IFACE = UCS_BIT(0), + + /** Enables @ref uct_ep_params::user_data */ + UCT_EP_PARAM_FIELD_USER_DATA = UCS_BIT(1), + + /** Enables @ref uct_ep_params::dev_addr */ + UCT_EP_PARAM_FIELD_DEV_ADDR = UCS_BIT(2), + + /** Enables @ref uct_ep_params::iface_addr */ + UCT_EP_PARAM_FIELD_IFACE_ADDR = UCS_BIT(3), + + /** Enables @ref uct_ep_params::sockaddr */ + UCT_EP_PARAM_FIELD_SOCKADDR = UCS_BIT(4), + + /** Enables @ref uct_ep_params::sockaddr_cb_flags */ + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS = UCS_BIT(5), + + /** Enables @ref uct_ep_params::sockaddr_pack_cb */ + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB = UCS_BIT(6), + + /** Enables @ref uct_ep_params::cm */ + UCT_EP_PARAM_FIELD_CM = UCS_BIT(7), + + /** Enables @ref uct_ep_params::conn_request */ + UCT_EP_PARAM_FIELD_CONN_REQUEST = UCS_BIT(8), + + /** Enables @ref uct_ep_params::sockaddr_cb_client */ + UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB_CLIENT = UCS_BIT(9), + + /** Enables @ref uct_ep_params::sockaddr_cb_server */ + UCT_EP_PARAM_FIELD_SOCKADDR_NOTIFY_CB_SERVER = UCS_BIT(10), + + /** Enables @ref uct_ep_params::disconnect_cb */ + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB = UCS_BIT(11), + + /** Enables @ref uct_ep_params::path_index */ + UCT_EP_PARAM_FIELD_PATH_INDEX = UCS_BIT(12), + + /** Enables @ref uct_ep_params::cm_resolve_cb */ + UCT_EP_PARAM_FIELD_CM_RESOLVE_CB = UCS_BIT(13), + + /** Enables @ref uct_ep_params::private_data */ + UCT_EP_PARAM_FIELD_PRIV_DATA = UCS_BIT(14), + + /** Enables @ref uct_ep_params::private_data_length */ + UCT_EP_PARAM_FIELD_PRIV_DATA_LENGTH = UCS_BIT(15), + + /** Enables @ref uct_ep_params::local_sockaddr */ + UCT_EP_PARAM_FIELD_LOCAL_SOCKADDR = UCS_BIT(16), + + /** Enables @ref uct_ep_params::dev_addr_length */ + UCT_EP_PARAM_FIELD_DEV_ADDR_LENGTH = UCS_BIT(17), + + /** Enables @ref uct_ep_params::iface_addr_length */ + UCT_EP_PARAM_FIELD_IFACE_ADDR_LENGTH = UCS_BIT(18) +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT endpoint connected by @ref uct_ep_connect parameters field mask. + * + * The enumeration allows specifying which fields in + * @ref uct_ep_connect_params_t are present, for backward compatibility support. + */ +enum uct_ep_connect_params_field +{ + /** Enables @ref uct_ep_connect_params::private_data */ + UCT_EP_CONNECT_PARAM_FIELD_PRIVATE_DATA = UCS_BIT(0), + + /** Enables @ref uct_ep_connect_params::private_data_length */ + UCT_EP_CONNECT_PARAM_FIELD_PRIVATE_DATA_LENGTH = UCS_BIT(1) +}; + +/** + * @ingroup UCT_RESOURCE + * @brief UCT interface configuration features + * + * The enumeration list describes the features supported by UCT. An + * application can request the features using @ref uct_iface_params "UCT parameters" + * during @ref uct_iface_open "UCT iface initialization" process. + */ +enum uct_iface_feature +{ + /** Request Active Message support */ + UCT_IFACE_FEATURE_AM = UCS_BIT(0), + + /** Request PUT support */ + UCT_IFACE_FEATURE_PUT = UCS_BIT(1), + + /** Request GET support */ + UCT_IFACE_FEATURE_GET = UCS_BIT(2), + + /** Request 32-bit atomic operations support */ + UCT_IFACE_FEATURE_AMO32 = UCS_BIT(3), + + /** Request 64-bit atomic operations support */ + UCT_IFACE_FEATURE_AMO64 = UCS_BIT(4), + + /** Request tag matching offload support */ + UCT_IFACE_FEATURE_TAG = UCS_BIT(5), + + /** Request remote flush support */ + UCT_IFACE_FEATURE_FLUSH_REMOTE = UCS_BIT(6), + + /** Used to determine the number of features */ + UCT_IFACE_FEATURE_LAST = UCS_BIT(7) +}; + +/* + * @ingroup UCT_RESOURCE + * @brief Process Per Node (PPN) bandwidth specification: f(ppn) = dedicated + shared / + * ppn + * + * This structure specifies a function which is used as basis for bandwidth + * estimation of various UCT operations. This information can be used to select + * the best performing combination of UCT operations. + */ +typedef struct uct_ppn_bandwidth +{ + double dedicated; /**< Dedicated bandwidth, bytes/second */ + double shared; /**< Shared bandwidth, bytes/second */ +} uct_ppn_bandwidth_t; + +/** + * @ingroup UCT_RESOURCE + * @brief Interface attributes: capabilities and limitations. + */ +struct uct_iface_attr +{ + struct + { + struct + { + size_t max_short; /**< Maximal size for put_short */ + size_t max_bcopy; /**< Maximal size for put_bcopy */ + size_t min_zcopy; /**< Minimal size for put_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t max_zcopy; /**< Maximal size for put_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t opt_zcopy_align; /**< Optimal alignment for zero-copy + buffer address */ + size_t align_mtu; /**< MTU used for alignment */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref ::uct_ep_put_zcopy + @anchor uct_iface_attr_cap_put_max_iov */ + } put; /**< Attributes for PUT operations */ + + struct + { + size_t max_short; /**< Maximal size for get_short */ + size_t max_bcopy; /**< Maximal size for get_bcopy */ + size_t min_zcopy; /**< Minimal size for get_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t max_zcopy; /**< Maximal size for get_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t opt_zcopy_align; /**< Optimal alignment for zero-copy + buffer address */ + size_t align_mtu; /**< MTU used for alignment */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_ep_get_zcopy + @anchor uct_iface_attr_cap_get_max_iov */ + } get; /**< Attributes for GET operations */ + + struct + { + size_t max_short; /**< Total maximum size (incl. the header) + @anchor uct_iface_attr_cap_am_max_short */ + size_t max_bcopy; /**< Total maximum size (incl. the header) */ + size_t min_zcopy; /**< Minimal size for am_zcopy (incl. the + header and total of @ref uct_iov_t::length + of the @a iov parameter) */ + size_t max_zcopy; /**< Total max. size (incl. the header + and total of @ref uct_iov_t::length + of the @a iov parameter) */ + size_t opt_zcopy_align; /**< Optimal alignment for zero-copy + buffer address */ + size_t align_mtu; /**< MTU used for alignment */ + size_t max_hdr; /**< Max. header size for zcopy */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref ::uct_ep_am_zcopy + @anchor uct_iface_attr_cap_am_max_iov */ + } am; /**< Attributes for AM operations */ + + struct + { + struct + { + size_t min_recv; /**< Minimal allowed length of posted receive buffer */ + size_t max_zcopy; /**< Maximal allowed data length in + @ref uct_iface_tag_recv_zcopy */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_iface_tag_recv_zcopy + @anchor uct_iface_attr_cap_tag_recv_iov */ + size_t max_outstanding; /**< Maximal number of simultaneous + receive operations */ + } recv; + + struct + { + size_t max_short; /**< Maximal allowed data length in + @ref uct_ep_tag_eager_short */ + size_t max_bcopy; /**< Maximal allowed data length in + @ref uct_ep_tag_eager_bcopy */ + size_t max_zcopy; /**< Maximal allowed data length in + @ref uct_ep_tag_eager_zcopy */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_ep_tag_eager_zcopy */ + } eager; /**< Attributes related to eager protocol */ + + struct + { + size_t max_zcopy; /**< Maximal allowed data length in + @ref uct_ep_tag_rndv_zcopy */ + size_t max_hdr; /**< Maximal allowed header length in + @ref uct_ep_tag_rndv_zcopy and + @ref uct_ep_tag_rndv_request */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_ep_tag_rndv_zcopy */ + } rndv; /**< Attributes related to rendezvous protocol */ + } tag; /**< Attributes for TAG operations */ + + struct + { + uint64_t op_flags; /**< Attributes for atomic-post operations */ + uint64_t fop_flags; /**< Attributes for atomic-fetch operations */ + } atomic32, atomic64; /**< Attributes for atomic operations */ + + uint64_t flags; /**< Flags from @ref UCT_RESOURCE_IFACE_CAP */ + uint64_t event_flags; /**< Flags from @ref UCT_RESOURCE_IFACE_EVENT_CAP */ + } cap; /**< Interface capabilities */ + + size_t device_addr_len; /**< Size of device address */ + size_t iface_addr_len; /**< Size of interface address */ + size_t ep_addr_len; /**< Size of endpoint address */ + size_t max_conn_priv; /**< Max size of the iface's private data. + used for connection + establishment with sockaddr */ + struct sockaddr_storage listen_sockaddr; /**< Sockaddr on which this iface + is listening. */ + /* + * The following fields define expected performance of the communication + * interface, this would usually be a combination of device and system + * characteristics and determined at run time. + */ + double overhead; /**< Message overhead, seconds */ + uct_ppn_bandwidth_t bandwidth; /**< Bandwidth model */ + ucs_linear_func_t latency; /**< Latency as function of number of + active endpoints */ + uint8_t priority; /**< Priority of device */ + size_t max_num_eps; /**< Maximum number of endpoints */ + unsigned dev_num_paths; /**< How many network paths can be + utilized on the device used by + this interface for optimal + performance. Endpoints that connect + to the same remote address but use + different paths can potentially + achieve higher total bandwidth + compared to using only a single + endpoint. */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief Parameters used for interface creation. + * + * This structure should be allocated by the user and should be passed to + * @ref uct_iface_open. User has to initialize all fields of this structure. + */ +struct uct_iface_params +{ + /** Mask of valid fields in this structure, using bits from + * @ref uct_iface_params_field. Fields not specified in this mask will be + * ignored. */ + uint64_t field_mask; + /** Mask of CPUs to use for resources */ + ucs_cpu_set_t cpu_mask; + /** Interface open mode bitmap. @ref uct_iface_open_mode */ + uint64_t open_mode; + /** Mode-specific parameters */ + union + { + /** @anchor uct_iface_params_t_mode_device + * The fields in this structure (tl_name and dev_name) need to be set only when + * the @ref UCT_IFACE_OPEN_MODE_DEVICE bit is set in @ref + * uct_iface_params_t.open_mode This will make @ref uct_iface_open + * open the interface on the specified device. + */ + struct + { + const char* tl_name; /**< Transport name */ + const char* dev_name; /**< Device Name */ + } device; + /** @anchor uct_iface_params_t_mode_sockaddr + * These callbacks and address are only relevant for client-server + * connection establishment with sockaddr and are needed on the server side. + * The callbacks and address need to be set when the @ref + * UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER bit is set in @ref + * uct_iface_params_t.open_mode. This will make @ref uct_iface_open + * open the interface on the specified address as a server. */ + struct + { + ucs_sock_addr_t listen_sockaddr; + /** Argument for connection request callback */ + void* conn_request_arg; + /** Callback for an incoming connection request on the server */ + uct_sockaddr_conn_request_callback_t conn_request_cb; + /** Callback flags to indicate where the callback can be invoked from. + * @ref uct_cb_flags */ + uint32_t cb_flags; + } sockaddr; + } mode; + + /** Root in the statistics tree. Can be NULL. If non NULL, it will be + a root of @a uct_iface object in the statistics tree. */ + ucs_stats_node_t* stats_root; + /** How much bytes to reserve before the receive segment.*/ + size_t rx_headroom; + + /** Custom argument of @a err_handler. */ + void* err_handler_arg; + /** The callback to handle transport level error.*/ + uct_error_handler_t err_handler; + /** Callback flags to indicate where the @a err_handler callback can be + * invoked from. @ref uct_cb_flags */ + uint32_t err_handler_flags; + + /** These callbacks are only relevant for HW Tag Matching */ + void* eager_arg; + /** Callback for tag matching unexpected eager messages */ + uct_tag_unexp_eager_cb_t eager_cb; + void* rndv_arg; + /** Callback for tag matching unexpected rndv messages */ + uct_tag_unexp_rndv_cb_t rndv_cb; + + void* async_event_arg; + /** Callback for asynchronous event handling. The callback will be + * invoked from UCT transport when there are new events to be + * read by user if the iface has @ref UCT_IFACE_FLAG_EVENT_ASYNC_CB + * capability */ + uct_async_event_cb_t async_event_cb; + + /* Time period between keepalive rounds */ + ucs_time_t keepalive_interval; + + /** + * Desired alignment for Active Messages on the receiver. Note that only + * data received in the UCT descriptor can be aligned (i.e. + * @a UCT_CB_PARAM_FLAG_DESC flag is provided in the Active Message + * handler callback). The provided value must be power of 2. The default + * value is 1. + */ + size_t am_alignment; + + /** + * Offset in the Active Message receive buffer, which should be aligned to + * the @a am_alignment boundary. Note this parameter has no effect without + * setting @a am_alignment parameter. The provided value must be less than + * the given @a am_alignment value. The default value is 0. + * + * +-+ pointer to @a data in @ref uct_am_callback_t + * | + * | + alignment boundary + * | | + * v v + * +-------------------+ + * | align | | + * | offset | | + * +-------------------+ + */ + size_t am_align_offset; + + /** + * UCT @ref uct_iface_feature "features" that are used for interface + * initialization. + */ + uint64_t features; +}; + +/** + * @ingroup UCT_RESOURCE + * @brief Parameters for creating a UCT endpoint by @ref uct_ep_create + */ +struct uct_ep_params +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_ep_params_field. Fields not specified by this mask will be + * ignored. + */ + uint64_t field_mask; + + /** + * Interface to create the endpoint on. + * Either @a iface or @a cm field must be initialized but not both. + */ + uct_iface_h iface; + + /** + * User data associated with the endpoint. + */ + void* user_data; + + /** + * The device address to connect to on the remote peer. This must be defined + * together with @ref uct_ep_params_t::iface_addr to create an endpoint + * connected to a remote interface. + */ + const uct_device_addr_t* dev_addr; + + /** + * This specifies the remote address to use when creating an endpoint that + * is connected to a remote interface. + * @note This requires @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE capability. + */ + const uct_iface_addr_t* iface_addr; + + /** + * The sockaddr to connect to on the remote peer. If set, @ref uct_ep_create + * will create an endpoint for a connection to the remote peer, specified by + * its socket address. + * @note The interface in this routine requires the + * @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR capability. + */ + const ucs_sock_addr_t* sockaddr; + + /** + * @ref uct_cb_flags to indicate @ref uct_ep_params_t::sockaddr_pack_cb, + * @ref uct_ep_params_t::sockaddr_cb_client, + * @ref uct_ep_params_t::sockaddr_cb_server, + * @ref uct_ep_params_t::disconnect_cb and + * @ref uct_ep_params_t::cm_resolve_cb behavior. + * If none from these are not set, this field will be ignored. + */ + uint32_t sockaddr_cb_flags; + + /** + * Callback that will be used for filling the user's private data to be + * delivered to the remote peer by the callback on the server or client side. + * This field is only valid if @ref uct_ep_params_t::sockaddr is set. + * @note It is never guaranteed that the callback will be called. If, for + * example, the endpoint goes into error state before issuing the connection + * request, the callback will not be invoked. + * @note Can not be set together with @ref uct_ep_params_t::private_data or + * @ref uct_ep_params_t::cm_resolve_cb. + */ + uct_cm_ep_priv_data_pack_callback_t sockaddr_pack_cb; + + /** + * The connection manager object as created by @ref uct_cm_open. + * Either @a cm or @a iface field must be initialized but not both. + */ + uct_cm_h cm; + + /** + * Connection request that was passed to + * @ref uct_cm_listener_conn_request_args_t::conn_request. + * @note After a call to @ref uct_ep_create, @a params.conn_request is + * consumed and should not be used anymore, even if the call returns + * with an error. + */ + uct_conn_request_h conn_request; + + /** + * Callback that will be invoked when the endpoint on the client side + * is being connected to the server by a connection manager @ref uct_cm_h . + */ + uct_cm_ep_client_connect_callback_t sockaddr_cb_client; + + /** + * Callback that will be invoked when the endpoint on the server side + * is being connected to a client by a connection manager @ref uct_cm_h . + */ + uct_cm_ep_server_conn_notify_callback_t sockaddr_cb_server; + + /** + * Callback that will be invoked when the endpoint is disconnected. + */ + uct_ep_disconnect_cb_t disconnect_cb; + + /** + * Index of the path which the endpoint should use, must be in the range + * 0..(@ref uct_iface_attr_t.dev_num_paths - 1). + */ + unsigned path_index; + + /** + * This callback is invoked when the remote server address provided in field + * @ref uct_ep_params_t::sockaddr is resolved to the local device to be used + * for connection establishment. + * @note This field is mutually exclusive with + * @ref uct_ep_params::sockaddr_pack_cb. + */ + uct_cm_ep_resolve_callback_t cm_resolve_cb; + + /** + * Private data to be passed from server to client. Can be used only along + * with @ref uct_ep_params::conn_request. + * @note This field is mutually exclusive with + * @ref uct_ep_params::sockaddr_pack_cb. + */ + const void* private_data; + + /** + * Length of @ref uct_ep_params::private_data, the maximal allowed value is + * indicated by the @ref uct_cm_attr::max_conn_priv. + */ + size_t private_data_length; + + /** + * The sockaddr to bind locally. If set, @ref uct_ep_create + * will create an endpoint binding to this local sockaddr. + * @note The interface in this routine requires the + * @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR capability. + */ + const ucs_sock_addr_t* local_sockaddr; + + /** + * Device address length. If not provided, the transport will assume a + * default minimum length according to the address buffer contents. + */ + size_t dev_addr_length; + + /** + * Iface address length. If not provided, the transport will assume a + * default minimum length according to the address buffer contents. + */ + size_t iface_addr_length; +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Parameters for connecting a UCT endpoint by @ref uct_ep_connect. + */ +struct uct_ep_connect_params +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_ep_connect_params_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * User's private data to be passed from client to server. + */ + const void* private_data; + + /** + * Length of @ref uct_ep_connect_params::private_data, the maximal allowed + * value is indicated by the @ref uct_cm_attr::max_conn_priv. + */ + size_t private_data_length; +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Connection manager attributes, capabilities and limitations. + */ +struct uct_cm_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_cm_attr_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Max size of the connection manager's private data used for connection + * establishment with sockaddr. + */ + size_t max_conn_priv; +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT listener attributes, capabilities and limitations. + */ +struct uct_listener_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_listener_attr_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Sockaddr on which this listener is listening. + */ + struct sockaddr_storage sockaddr; +}; + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Parameters for creating a listener object @ref uct_listener_h by + * @ref uct_listener_create + */ +struct uct_listener_params +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_listener_params_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Backlog of incoming connection requests. If specified, must be a positive value. + * If not specified, each CM component will use its maximal allowed value, + * based on the system's setting. + */ + int backlog; + + /** + * Callback function for handling incoming connection requests. + */ + uct_cm_listener_conn_request_callback_t conn_request_cb; + + /** + * User data associated with the listener. + */ + void* user_data; +}; + +/** + * @ingroup UCT_MD + * @brief Memory domain attributes. + * + * This structure defines the attributes of a Memory Domain which includes + * maximum memory that can be allocated, credentials required for accessing the memory, + * CPU mask indicating the proximity of CPUs, and bitmaps indicating the types + * of memory (CPU/CUDA/ROCM) that can be detected, allocated and accessed. + */ +struct uct_md_attr +{ + struct + { + uint64_t max_alloc; /**< Maximal allocation size */ + size_t max_reg; /**< Maximal registration size */ + uint64_t flags; /**< UCT_MD_FLAG_xx */ + uint64_t reg_mem_types; /**< Bitmap of memory types that Memory Domain can be + registered with */ + uint64_t detect_mem_types; /**< Bitmap of memory types that Memory Domain can + detect if address belongs to it */ + uint64_t alloc_mem_types; /**< Bitmap of memory types that Memory Domain can + allocate memory on */ + uint64_t access_mem_types; /**< Memory types that Memory Domain can access */ + } cap; + + ucs_linear_func_t reg_cost; /**< Memory registration cost estimation + (time,seconds) as a linear function + of the buffer size. */ + + char component_name[UCT_COMPONENT_NAME_MAX]; /**< Component name */ + size_t rkey_packed_size; /**< Size of buffer needed for packed rkey */ + ucs_cpu_set_t local_cpus; /**< Mask of CPUs near the resource */ +}; + +/** + * @ingroup UCT_MD + * @brief UCT MD memory attributes field mask + * + * The enumeration allows specifying which fields in @ref uct_md_mem_attr_t + * are present. + */ +typedef enum uct_md_mem_attr_field +{ + /** Indicate if memory type is populated. E.g. CPU/GPU */ + UCT_MD_MEM_ATTR_FIELD_MEM_TYPE = UCS_BIT(0), + + /** + * Indicate if details of system device backing the pointer are populated. + * For example: GPU device, NUMA domain, etc. + */ + UCT_MD_MEM_ATTR_FIELD_SYS_DEV = UCS_BIT(1), + + /** Request base address of the allocation to which the buffer belongs. */ + UCT_MD_MEM_ATTR_FIELD_BASE_ADDRESS = UCS_BIT(2), + + /** Request the whole length of the allocation to which the buffer belongs. */ + UCT_MD_MEM_ATTR_FIELD_ALLOC_LENGTH = UCS_BIT(3), + + /** + * Request a cross-device dmabuf file descriptor that represents a memory + * region, and can be used to register the region with another memory + * domain. + */ + UCT_MD_MEM_ATTR_FIELD_DMABUF_FD = UCS_BIT(4), + + /** + * Request the offset of the provided virtual address relative to the + * beginning of its backing dmabuf region. + */ + UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET = UCS_BIT(5) +} uct_md_mem_attr_field_t; + +/** + * @ingroup UCT_MD + * @brief Memory domain attributes. + * + * This structure defines the attributes of a memory pointer which may + * include the memory type of the pointer, and the system device that backs + * the pointer depending on the bit fields populated in field_mask. + */ +typedef struct uct_md_mem_attr +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_md_mem_attr_field_t. + */ + uint64_t field_mask; + + /** + * The type of memory. E.g. CPU/GPU memory or some other valid type. + * If the md does not support sys_dev query, then UCS_MEMORY_TYPE_UNKNOWN + * is returned. + */ + ucs_memory_type_t mem_type; + + /** + * Index of the system device on which the buffer resides. eg: NUMA/GPU + * If the md does not support sys_dev query, then UCS_SYS_DEVICE_ID_UNKNOWN + * is returned. + */ + ucs_sys_device_t sys_dev; + + /** + * Base address of the allocation to which the provided buffer belongs to. + * If the md not support base address query, then the pointer passed to + * uct_md_mem_query is returned as is. + */ + void* base_address; + + /** + * Length of the whole allocation to which the provided buffer belongs to. + * If the md not support querying allocation length, then the length passed + * to uct_md_mem_query is returned as is. + */ + size_t alloc_length; + + /** + * Dmabuf file descriptor to expose memory regions across devices. Refer + * (https://01.org/linuxgraphics/gfx-docs/drm/driver-api/dma-buf.html). + * If the md does not support querying the fd object associated with the + * region, then dmabuf_fd is set to UCT_DMABUF_FD_INVALID by + * uct_md_mem_query(). It is the responsibility of the user to close the + * returned fd using close (2) when it's no longer needed. + */ + int dmabuf_fd; + + /** + * Offset of the given address from the start of the memory region + * (identified by dmabuf_fd) backing the memory region being queried. + */ + size_t dmabuf_offset; +} uct_md_mem_attr_t; + +/** + * @ingroup UCT_MD + * @brief Query attributes of a given pointer + * + * Return attributes such as memory type, base address, allocation length, + * and system device for the given pointer of specific length. + * + * @param [in] md Memory domain to run the query on. This function + * returns an error if the md does not recognize the + * pointer. + * @param [in] address The address of the pointer. Must be non-NULL + * else UCS_ERR_INVALID_PARAM error is returned. + * @param [in] length Length of the memory region to examine. + * Must be nonzero else UCS_ERR_INVALID_PARAM error + * is returned. + * @param [inout] mem_attr If successful, filled with ptr attributes. + * + * @return UCS_OK if at least one attribute is successfully queried otherwise + * an error code as defined by @ref ucs_status_t is returned. + */ +ucs_status_t +uct_md_mem_query(uct_md_h md, const void* address, size_t length, + uct_md_mem_attr_t* mem_attr); + +/** + * @ingroup UCT_MD + * @brief Describes a memory allocated by UCT. + * + * This structure describes the memory block which includes the address, size, and + * Memory Domain used for allocation. This structure is passed to interface + * and the memory is allocated by memory allocation functions @ref uct_mem_alloc. + */ +typedef struct uct_allocated_memory +{ + void* address; /**< Address of allocated memory */ + size_t length; /**< Real size of allocated memory */ + uct_alloc_method_t method; /**< Method used to allocate the memory */ + ucs_memory_type_t mem_type; /**< type of allocated memory */ + uct_md_h md; /**< if method==MD: MD used to allocate the memory */ + uct_mem_h memh; /**< if method==MD: MD memory handle */ + ucs_sys_device_t sys_device; /**< System device for allocated memory */ +} uct_allocated_memory_t; + +/** + * @ingroup UCT_MD + * @brief Remote key with its type + * + * This structure describes the credentials (typically key) and information + * required to access the remote memory by the communication interfaces. + */ +typedef struct uct_rkey_bundle +{ + uct_rkey_t rkey; /**< Remote key descriptor, passed to RMA functions */ + void* handle; /**< Handle, used internally for releasing the key */ + void* type; /**< Remote key type */ +} uct_rkey_bundle_t; + +/** + * @ingroup UCT_RESOURCE + * @brief Completion handle. + * + * This structure should be allocated by the user and can be passed to communication + * primitives. The user must initialize all fields of the structure. + * If the operation returns UCS_INPROGRESS, this structure will be in use by the + * transport until the operation completes. When the operation completes, "count" + * field is decremented by 1, and whenever it reaches 0 - the callback is called. + * + * Notes: + * - The same structure can be passed multiple times to communication functions + * without the need to wait for completion. + * - If the number of operations is smaller than the initial value of the counter, + * the callback will not be called at all, so it may be left undefined. + * - status field is required to track the first time the error occurred, and + * report it via a callback when count reaches 0. + */ +struct uct_completion +{ + uct_completion_callback_t func; /**< User callback function */ + int count; /**< Completion counter */ + ucs_status_t status; /**< Completion status, this field must + be initialized with UCS_OK before + first operation is started. */ +}; + +/** + * @ingroup UCT_RESOURCE + * @brief Pending request. + * + * This structure should be passed to @ref uct_ep_pending_add() and is used to signal + * new available resources back to user. + */ +struct uct_pending_req +{ + uct_pending_callback_t func; /**< User callback function */ + char priv[UCT_PENDING_REQ_PRIV_LEN]; /**< Used internally by UCT */ +}; + +/** + * @ingroup UCT_TAG + * @brief Posted tag context. + * + * Tag context is an object which tracks a tag posted to the transport. It + * contains callbacks for matching events on this tag. + */ +struct uct_tag_context +{ + /** + * Tag is consumed by the transport and should not be matched in software. + * + * @param [in] self Pointer to relevant context structure, which was + * initially passed to @ref uct_iface_tag_recv_zcopy. + */ + void (*tag_consumed_cb)(uct_tag_context_t* self); + + /** + * Tag processing is completed by the transport. + * + * @param [in] self Pointer to relevant context structure, which was + * initially passed to @ref uct_iface_tag_recv_zcopy. + * @param [in] stag Tag from sender. + * @param [in] imm Immediate data from sender. For rendezvous, it's always 0. + * @param [in] length Completed length. + * @param [in] inline_data If non-null, points to a temporary buffer which contains + the received data. In this case the received data was not + placed directly in the receive buffer. This callback + routine is responsible for copy-out the inline data, otherwise it is released. + * @param [in] status Completion status: + * (a) UCS_OK - Success, data placed in provided buffer. + * (b) UCS_ERR_TRUNCATED - Sender's length exceed posted + buffer, no data is copied. + * (c) UCS_ERR_CANCELED - Canceled by user. + */ + void (*completed_cb)(uct_tag_context_t* self, uct_tag_t stag, uint64_t imm, + size_t length, void* inline_data, ucs_status_t status); + + /** + * Tag was matched by a rendezvous request, which should be completed by + * the protocol layer. + * + * @param [in] self Pointer to relevant context structure, which was + * initially passed to @ref uct_iface_tag_recv_zcopy. + * @param [in] stag Tag from sender. + * @param [in] header User defined header. + * @param [in] header_length User defined header length in bytes. + * @param [in] status Completion status. + * @param [in] flags Flags defined by UCT_TAG_RECV_CB_xx. + */ + void (*rndv_cb)(uct_tag_context_t* self, uct_tag_t stag, const void* header, + unsigned header_length, ucs_status_t status, unsigned flags); + + /** A placeholder for the private data used by the transport */ + char priv[UCT_TAG_PRIV_LEN]; +}; + +/** + * @ingroup UCT_RESOURCE + * @brief flags of @ref uct_tag_context. + */ +enum +{ + /* If set, header points to inline data, otherwise it is user buffer. */ + UCT_TAG_RECV_CB_INLINE_DATA = UCS_BIT(0) +}; + +extern const char* uct_alloc_method_names[]; +extern const char* uct_device_type_names[]; + +/** + * @ingroup UCT_RESOURCE + * @brief Query for list of components. + * + * Obtain the list of transport components available on the current system. + * + * @param [out] components_p Filled with a pointer to an array of component + * handles. + * @param [out] num_components_p Filled with the number of elements in the array. + * + * @return UCS_OK if successful, or UCS_ERR_NO_MEMORY if failed to allocate the + * array of component handles. + */ +ucs_status_t +uct_query_components(uct_component_h** components_p, unsigned* num_components_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Release the list of components returned from @ref uct_query_components. + * + * This routine releases the memory associated with the list of components + * allocated by @ref uct_query_components. + * + * @param [in] components Array of component handles to release. + */ +void +uct_release_component_list(uct_component_h* components); + +/** + * @ingroup UCT_RESOURCE + * @brief Get component attributes + * + * Query various attributes of a component. + * + * @param [in] component Component handle to query attributes for. The + * handle can be obtained from @ref uct_query_components. + * @param [inout] component_attr Filled with component attributes. + * + * @return UCS_OK if successful, or nonzero error code in case of failure. + */ +ucs_status_t +uct_component_query(uct_component_h component, uct_component_attr_t* component_attr); + +/** + * @ingroup UCT_RESOURCE + * @brief Open a memory domain. + * + * Open a specific memory domain. All communications and memory operations + * are performed in the context of a specific memory domain. Therefore it + * must be created before communication resources. + * + * @param [in] component Component on which to open the memory domain, + * as returned from @ref uct_query_components. + * @param [in] md_name Memory domain name, as returned from @ref + * uct_component_query. + * @param [in] config MD configuration options. Should be obtained + * from uct_md_config_read() function, or point to + * MD-specific structure which extends uct_md_config_t. + * @param [out] md_p Filled with a handle to the memory domain. + * + * @return Error code. + */ +ucs_status_t +uct_md_open(uct_component_h component, const char* md_name, const uct_md_config_t* config, + uct_md_h* md_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Close a memory domain. + * + * @param [in] md Memory domain to close. + */ +void +uct_md_close(uct_md_h md); + +/** + * @ingroup UCT_RESOURCE + * @brief Query for transport resources. + * + * This routine queries the @ref uct_md_h "memory domain" for communication + * resources that are available for it. + * + * @param [in] md Handle to memory domain. + * @param [out] resources_p Filled with a pointer to an array of resource + * descriptors. + * @param [out] num_resources_p Filled with the number of resources in the array. + * + * @return Error code. + */ +ucs_status_t +uct_md_query_tl_resources(uct_md_h md, uct_tl_resource_desc_t** resources_p, + unsigned* num_resources_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Release the list of resources returned from @ref uct_md_query_tl_resources. + * + * This routine releases the memory associated with the list of resources + * allocated by @ref uct_md_query_tl_resources. + * + * @param [in] resources Array of resource descriptors to release. + */ +void +uct_release_tl_resource_list(uct_tl_resource_desc_t* resources); + +/** + * @ingroup UCT_CONTEXT + * @brief Create a worker object. + * + * The worker represents a progress engine. Multiple progress engines can be + * created in an application, for example to be used by multiple threads. + * Transports can allocate separate communication resources for every worker, + * so that every worker can be progressed independently of others. + * + * @param [in] async Context for async event handlers. Must not be NULL. + * @param [in] thread_mode Thread access mode to the worker and all interfaces + * and endpoints associated with it. + * @param [out] worker_p Filled with a pointer to the worker object. + */ +ucs_status_t +uct_worker_create(ucs_async_context_t* async, ucs_thread_mode_t thread_mode, + uct_worker_h* worker_p); + +/** + * @ingroup UCT_CONTEXT + * @brief Destroy a worker object. + * + * @param [in] worker Worker object to destroy. + */ +void +uct_worker_destroy(uct_worker_h worker); + +/** + * @ingroup UCT_CONTEXT + * @brief Add a slow path callback function to a worker progress. + * + * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, this function will add a callback + * which will be invoked every time progress is made on the worker. *id_p will + * be updated with an id which refers to this callback and can be used in + * @ref uct_worker_progress_unregister_safe to remove it from the progress path. + * + * @param [in] worker Handle to the worker whose progress should invoke + * the callback. + * @param [in] func Pointer to the callback function. + * @param [in] arg Argument for the callback function. + * @param [in] flags Callback flags, see @ref ucs_callbackq_flags. + * @param [inout] id_p Points to a location to store a callback identifier. + * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, a + * callback will be added and *id_p will be replaced + * with a callback identifier which can be subsequently + * used to remove the callback. Otherwise, no callback + * will be added and *id_p will be left unchanged. + * + * @note This function is thread safe. + */ +void +uct_worker_progress_register_safe(uct_worker_h worker, ucs_callback_t func, void* arg, + unsigned flags, uct_worker_cb_id_t* id_p); + +/** + * @ingroup UCT_CONTEXT + * @brief Remove a slow path callback function from worker's progress. + * + * If *id_p is not equal to UCS_CALLBACKQ_ID_NULL, remove a callback which was + * previously added by @ref uct_worker_progress_register_safe. *id_p will be reset + * to UCS_CALLBACKQ_ID_NULL. + * + * @param [in] worker Handle to the worker whose progress should invoke + * the callback. + * @param [inout] id_p Points to a callback identifier which indicates + * the callback to remove. If *id_p is not equal to + * UCS_CALLBACKQ_ID_NULL, the callback will be removed + * and *id_p will be reset to UCS_CALLBACKQ_ID_NULL. + * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, no + * operation will be performed and *id_p will be + * left unchanged. + * + * @note This function is thread safe. + */ +void +uct_worker_progress_unregister_safe(uct_worker_h worker, uct_worker_cb_id_t* id_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Read transport-specific interface configuration. + * + * @param [in] md Memory domain on which the transport's interface + * was registered. + * @param [in] tl_name Transport name. If @e md supports + * @ref UCT_MD_FLAG_SOCKADDR, the transport name + * is allowed to be NULL. In this case, the configuration + * returned from this routine should be passed to + * @ref uct_iface_open with + * @ref UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER or + * @ref UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT set in + * @ref uct_iface_params_t.open_mode. + * In addition, if tl_name is not NULL, the configuration + * returned from this routine should be passed to + * @ref uct_iface_open with @ref UCT_IFACE_OPEN_MODE_DEVICE + * set in @ref uct_iface_params_t.open_mode. + * @param [in] env_prefix If non-NULL, search for environment variables + * starting with this UCT__. Otherwise, search + * for environment variables starting with just UCT_. + * @param [in] filename If non-NULL, read configuration from this file. If + * the file does not exist, it will be ignored. + * @param [out] config_p Filled with a pointer to configuration. + * + * @return Error code. + */ +ucs_status_t +uct_md_iface_config_read(uct_md_h md, const char* tl_name, const char* env_prefix, + const char* filename, uct_iface_config_t** config_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Release configuration memory returned from uct_md_iface_config_read(), + * uct_md_config_read(), or from uct_cm_config_read(). + * + * @param [in] config Configuration to release. + */ +void +uct_config_release(void* config); + +/** + * @ingroup UCT_CONTEXT + * @brief Get value by name from interface configuration (@ref uct_iface_config_t), + * memory domain configuration (@ref uct_md_config_t) + * or connection manager configuration (@ref uct_cm_config_t). + * + * @param [in] config Configuration to get from. + * @param [in] name Configuration variable name. + * @param [out] value Pointer to get value. Should be allocated/freed by + * caller. + * @param [in] max Available memory space at @a value pointer. + * + * @return UCS_OK if found, otherwise UCS_ERR_INVALID_PARAM or UCS_ERR_NO_ELEM + * if error. + */ +ucs_status_t +uct_config_get(void* config, const char* name, char* value, size_t max); + +/** + * @ingroup UCT_CONTEXT + * @brief Modify interface configuration (@ref uct_iface_config_t), + * memory domain configuration (@ref uct_md_config_t) + * or connection manager configuration (@ref uct_cm_config_t). + * + * @param [in] config Configuration to modify. + * @param [in] name Configuration variable name. + * @param [in] value Value to set. + * + * @return Error code. + */ +ucs_status_t +uct_config_modify(void* config, const char* name, const char* value); + +/** + * @ingroup UCT_RESOURCE + * @brief Open a communication interface. + * + * @param [in] md Memory domain to create the interface on. + * @param [in] worker Handle to worker which will be used to progress + * communications on this interface. + * @param [in] params User defined @ref uct_iface_params_t parameters. + * @param [in] config Interface configuration options. Should be obtained + * from uct_md_iface_config_read() function, or point to + * transport-specific structure which extends + * uct_iface_config_t. + * @param [out] iface_p Filled with a handle to opened communication interface. + * + * @return Error code. + */ +ucs_status_t +uct_iface_open(uct_md_h md, uct_worker_h worker, const uct_iface_params_t* params, + const uct_iface_config_t* config, uct_iface_h* iface_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Close and destroy an interface. + * + * @param [in] iface Interface to close. + */ +void +uct_iface_close(uct_iface_h iface); + +/** + * @ingroup UCT_RESOURCE + * @brief Get interface attributes. + * + * @param [in] iface Interface to query. + * @param [out] iface_attr Filled with interface attributes. + */ +ucs_status_t +uct_iface_query(uct_iface_h iface, uct_iface_attr_t* iface_attr); + +/** + * @ingroup UCT_RESOURCE + * @brief Get address of the device the interface is using. + * + * Get underlying device address of the interface. All interfaces using the same + * device would return the same address. + * + * @param [in] iface Interface to query. + * @param [out] addr Filled with device address. The size of the buffer + * provided must be at least @ref + * uct_iface_attr_t::device_addr_len. + */ +ucs_status_t +uct_iface_get_device_address(uct_iface_h iface, uct_device_addr_t* addr); + +/** + * @ingroup UCT_RESOURCE + * @brief Get interface address. + * + * requires @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE. + * + * @param [in] iface Interface to query. + * @param [out] addr Filled with interface address. The size of the buffer + * provided must be at least @ref + * uct_iface_attr_t::iface_addr_len. + */ +ucs_status_t +uct_iface_get_address(uct_iface_h iface, uct_iface_addr_t* addr); + +/** + * @ingroup UCT_RESOURCE + * @brief Check if remote iface address is reachable. + * + * This function checks if a remote address can be reached from a local interface. + * If the function returns true, it does not necessarily mean a connection and/or + * data transfer would succeed, since the reachability check is a local operation + * it does not detect issues such as network mis-configuration or lack of connectivity. + * + * @param [in] iface Interface to check reachability from. + * @param [in] dev_addr Device address to check reachability to. It is NULL + * if iface_attr.dev_addr_len == 0, and must be non-NULL + * otherwise. + * @param [in] iface_addr Interface address to check reachability to. It is + * NULL if iface_attr.iface_addr_len == 0, and must + * be non-NULL otherwise. + * + * @return Nonzero if reachable, 0 if not. + */ +int +uct_iface_is_reachable(const uct_iface_h iface, const uct_device_addr_t* dev_addr, + const uct_iface_addr_t* iface_addr); + +/** + * @ingroup UCT_RESOURCE + * @brief check if the destination endpoint is alive in respect to UCT library + * + * This function checks if the destination endpoint is alive with respect to the + * UCT library. If the status of @a ep is known, either @ref UCS_OK or an error + * is returned immediately. Otherwise, @ref UCS_INPROGRESS is returned, + * indicating that synchronization on the status is needed. In this case, the + * status will be be propagated by @a comp callback. + * + * @param [in] ep Endpoint to check + * @param [in] flags Flags that define level of check + * (currently unsupported - set to 0). + * @param [in] comp Handler to process status of @a ep + * + * @return Error code. + */ +ucs_status_t +uct_ep_check(const uct_ep_h ep, unsigned flags, uct_completion_t* comp); + +/** + * @ingroup UCT_RESOURCE + * @brief Obtain a notification file descriptor for polling. + * + * Only interfaces that support at least one of the UCT_IFACE_FLAG_EVENT* flags + * will implement this function. + * + * @param [in] iface Interface to get the notification descriptor. + * @param [out] fd_p Location to write the notification file descriptor. + * + * @return Error code. + */ +ucs_status_t +uct_iface_event_fd_get(uct_iface_h iface, int* fd_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Turn on event notification for the next event. + * + * This routine needs to be called before waiting on each notification on this + * interface, so will typically be called once the processing of the previous + * event is over. + * + * @param [in] iface Interface to arm. + * @param [in] events Events to wakeup on. See @ref uct_iface_event_types + * + * @return ::UCS_OK The operation completed successfully. File descriptor + * will be signaled by new events. + * @return ::UCS_ERR_BUSY There are unprocessed events which prevent the + * file descriptor from being armed. + * The operation is not completed. File descriptor + * will not be signaled by new events. + * @return @ref ucs_status_t "Other" different error codes in case of issues. + */ +ucs_status_t +uct_iface_event_arm(uct_iface_h iface, unsigned events); + +/** + * @ingroup UCT_RESOURCE + * @brief Allocate memory which can be used for zero-copy communications. + * + * Allocate a region of memory which can be used for zero-copy data transfer or + * remote access on a particular transport interface. + * + * @param [in] iface Interface to allocate memory on. + * @param [in] length Size of memory region to allocate. + * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags. + * @param [in] name Allocation name, for debug purposes. + * @param [out] mem Descriptor of allocated memory. + * + * @return UCS_OK if allocation was successful, error code otherwise. + */ +ucs_status_t +uct_iface_mem_alloc(uct_iface_h iface, size_t length, unsigned flags, const char* name, + uct_allocated_memory_t* mem); + +/** + * @ingroup UCT_RESOURCE + * @brief Release memory allocated with @ref uct_iface_mem_alloc(). + * + * @param [in] mem Descriptor of memory to release. + */ +void +uct_iface_mem_free(const uct_allocated_memory_t* mem); + +/** + * @ingroup UCT_AM + * @brief Set active message handler for the interface. + * + * Only one handler can be set of each active message ID, and setting a handler + * replaces the previous value. If cb == NULL, the current handler is removed. + * + * + * @param [in] iface Interface to set the active message handler for. + * @param [in] id Active message id. Must be 0..UCT_AM_ID_MAX-1. + * @param [in] cb Active message callback. NULL to clear. + * @param [in] arg Active message argument. + * @param [in] flags Required @ref uct_cb_flags "callback flags" + * + * @return error code if the interface does not support active messages or + * requested callback flags + */ +ucs_status_t +uct_iface_set_am_handler(uct_iface_h iface, uint8_t id, uct_am_callback_t cb, void* arg, + uint32_t flags); + +/** + * @ingroup UCT_AM + * @brief Set active message tracer for the interface. + * + * Sets a function which dumps active message debug information to a buffer, + * which is printed every time an active message is sent or received, when + * data tracing is on. Without the tracer, only transport-level information is + * printed. + * + * @param [in] iface Interface to set the active message tracer for. + * @param [in] tracer Active message tracer. NULL to clear. + * @param [in] arg Tracer custom argument. + */ +ucs_status_t +uct_iface_set_am_tracer(uct_iface_h iface, uct_am_tracer_t tracer, void* arg); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Accept connection request. + * + * @param [in] iface Transport interface which generated connection + * request @a conn_request. + * @param [in] conn_request Connection establishment request passed as parameter + * of @ref uct_sockaddr_conn_request_callback_t. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +uct_iface_accept(uct_iface_h iface, uct_conn_request_h conn_request); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Reject connection request. Will invoke an error handler @ref + * uct_error_handler_t on the remote transport interface, if set. + * + * @param [in] iface Interface which generated connection establishment + * request @a conn_request. + * @param [in] conn_request Connection establishment request passed as parameter + * of @ref uct_sockaddr_conn_request_callback_t. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +uct_iface_reject(uct_iface_h iface, uct_conn_request_h conn_request); + +/** + * @ingroup UCT_RESOURCE + * @brief Create new endpoint. + * + * Create a UCT endpoint in one of the available modes: + * -# Unconnected endpoint: If no any address is present in @ref uct_ep_params, + * this creates an unconnected endpoint. To establish a connection to a + * remote endpoint, @ref uct_ep_connect_to_ep will need to be called. Use of + * this mode requires @ref uct_ep_params_t::iface has the + * @ref UCT_IFACE_FLAG_CONNECT_TO_EP capability flag. It may be obtained by + * @ref uct_iface_query. + * -# Connect to a remote interface: If @ref uct_ep_params_t::dev_addr and + * @ref uct_ep_params_t::iface_addr are set, this will establish an endpoint + * that is connected to a remote interface. This requires that + * @ref uct_ep_params_t::iface has the @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE + * capability flag. It may be obtained by @ref uct_iface_query. + * -# Connect to a remote socket address: If @ref uct_ep_params_t::sockaddr is + * set, this will create an endpoint that is connected to a remote socket. + * This requires that either @ref uct_ep_params::cm, or + * @ref uct_ep_params::iface will be set. In the latter case, the interface + * has to support @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR flag, which can be + * checked by calling @ref uct_iface_query. + * @param [in] params User defined @ref uct_ep_params_t configuration for the + * @a ep_p. + * @param [out] ep_p Filled with handle to the new endpoint. + * + * @return UCS_OK The endpoint is created successfully. This does not + * guarantee that the endpoint has been connected to + * the destination defined in @a params; in case of failure, + * the error will be reported to the interface error + * handler callback provided to @ref uct_iface_open + * via @ref uct_iface_params_t.err_handler. + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +uct_ep_create(const uct_ep_params_t* params, uct_ep_h* ep_p); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Connect a client side endpoint after it is bound to a local network + * device, i.e. @ref uct_ep_params_t::cm_resolve_cb was invoked. + * + * This non-blocking routine establishes connection of the client side endpoint + * and sends private data to the peer. + * + * @param [in] ep Endpoint to connect. + * @param [in] params Parameters as defined in @ref uct_ep_connect_params_t. + * + * @return UCS_OK Operation has been initiated successfully. + * Other error codes as defined by @ref ucs_status_t. + */ +ucs_status_t +uct_ep_connect(uct_ep_h ep, const uct_ep_connect_params_t* params); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Initiate a disconnection of an endpoint connected to a + * sockaddr by a connection manager @ref uct_cm_h. + * + * This non-blocking routine will send a disconnect notification on the endpoint, + * so that @ref uct_ep_disconnect_cb_t will be called on the remote peer. + * The remote side should also call this routine when handling the initiator's + * disconnect. + * After a call to this function, the given endpoint may not be used for + * communications anymore. + * The @ref uct_ep_flush / @ref uct_iface_flush routines will guarantee that the + * disconnect notification is delivered to the remote peer. + * @ref uct_ep_destroy should be called on this endpoint after invoking this + * routine and @ref uct_ep_params::disconnect_cb was called. + * + * @param [in] ep Endpoint to disconnect. + * @param [in] flags Reserved for future use. + * + * @return UCS_OK Operation has completed successfully. + * UCS_ERR_BUSY The @a ep is not connected yet (either + * @ref uct_cm_ep_client_connect_callback_t or + * @ref uct_cm_ep_server_conn_notify_callback_t + * was not invoked). + * UCS_INPROGRESS The disconnect request has been initiated, but + * the remote peer has not yet responded to this + * request, and consequently the registered + * callback @ref uct_ep_disconnect_cb_t has not + * been invoked to handle the request. + * UCS_ERR_NOT_CONNECTED The @a ep is disconnected locally and remotely. + * Other error codes as defined by @ref ucs_status_t . + */ +ucs_status_t +uct_ep_disconnect(uct_ep_h ep, unsigned flags); + +/** + * @ingroup UCT_RESOURCE + * @brief Destroy an endpoint. + * + * @param [in] ep Endpoint to destroy. + */ +void +uct_ep_destroy(uct_ep_h ep); + +/** + * @ingroup UCT_RESOURCE + * @brief Get endpoint address. + * + * @param [in] ep Endpoint to query. + * @param [out] addr Filled with endpoint address. The size of the buffer + * provided must be at least @ref uct_iface_attr_t::ep_addr_len. + */ +ucs_status_t +uct_ep_get_address(uct_ep_h ep, uct_ep_addr_t* addr); + +/** + * @ingroup UCT_RESOURCE + * @brief Connect endpoint to a remote endpoint. + * + * requires @ref UCT_IFACE_FLAG_CONNECT_TO_EP capability. + * + * @param [in] ep Endpoint to connect. + * @param [in] dev_addr Remote device address. + * @param [in] ep_addr Remote endpoint address. + */ +ucs_status_t +uct_ep_connect_to_ep(uct_ep_h ep, const uct_device_addr_t* dev_addr, + const uct_ep_addr_t* ep_addr); + +/** + * @ingroup UCT_MD + * @brief Query for memory domain attributes. + * + * @param [in] md Memory domain to query. + * @param [out] md_attr Filled with memory domain attributes. + */ +ucs_status_t +uct_md_query(uct_md_h md, uct_md_attr_t* md_attr); + +/** + * @ingroup UCT_MD + * @brief UCT allocation parameters specification field mask + * + * The enumeration allows specifying which fields in @ref uct_mem_alloc_params_t + * are present. + */ +typedef enum +{ + /** Enables @ref uct_mem_alloc_params_t::flags */ + UCT_MEM_ALLOC_PARAM_FIELD_FLAGS = UCS_BIT(0), + + /** Enables @ref uct_mem_alloc_params_t::address */ + UCT_MEM_ALLOC_PARAM_FIELD_ADDRESS = UCS_BIT(1), + + /** Enables @ref uct_mem_alloc_params_t::mem_type */ + UCT_MEM_ALLOC_PARAM_FIELD_MEM_TYPE = UCS_BIT(2), + + /** Enables @ref uct_mem_alloc_params_t::mds */ + UCT_MEM_ALLOC_PARAM_FIELD_MDS = UCS_BIT(3), + + /** Enables @ref uct_mem_alloc_params_t::name */ + UCT_MEM_ALLOC_PARAM_FIELD_NAME = UCS_BIT(4), + + /** Enables @ref uct_mem_alloc_params_t::sys_device */ + UCT_MEM_ALLOC_PARAM_FIELD_SYS_DEVICE = UCS_BIT(5) +} uct_mem_alloc_params_field_t; + +/** + * @ingroup UCT_MD + * @brief Parameters for allocating memory using @ref uct_mem_alloc + */ +typedef struct +{ + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_mem_alloc_params_field_t. Fields not specified in this mask will + * be ignored. + */ + uint64_t field_mask; + + /** + * Memory allocation flags, see @ref uct_md_mem_flags + * If UCT_MEM_ALLOC_PARAM_FIELD_FLAGS is not specified in field_mask, then + * (UCT_MD_MEM_ACCESS_LOCAL_READ | UCT_MD_MEM_ACCESS_LOCAL_WRITE) is used by + * default. + */ + unsigned flags; + + /** + * If @a address is NULL, the underlying allocation routine will + * choose the address at which to create the mapping. If @a address + * is non-NULL and UCT_MD_MEM_FLAG_FIXED is not set, the address + * will be interpreted as a hint as to where to establish the mapping. If + * @a address is non-NULL and UCT_MD_MEM_FLAG_FIXED is set, then the + * specified address is interpreted as a requirement. In this case, if the + * mapping to the exact address cannot be made, the allocation request + * fails. + */ + void* address; + + /** + * Type of memory to be allocated. + */ + ucs_memory_type_t mem_type; + + struct + { + /** + * Array of memory domains to attempt to allocate + * the memory with, for MD allocation method. + */ + const uct_md_h* mds; + + /** + * Length of 'mds' array. May be empty, in such case + * 'mds' may be NULL, and MD allocation method will + * be skipped. + */ + unsigned count; + } mds; + + /** + * Name of the allocated region, used to track memory + * usage for debugging and profiling. + * If UCT_MEM_ALLOC_PARAM_FIELD_NAME is not specified in field_mask, then + * "anonymous-uct_mem_alloc" is used by default. + */ + const char* name; + + /** + * System device on which memory is to be allocated, or + * UCS_SYS_DEVICE_ID_UNKNOWN to allow allocating on any device. + */ + ucs_sys_device_t sys_device; +} uct_mem_alloc_params_t; + +/** + * @ingroup UCT_MD + * @brief Give advice about the use of memory + * + * This routine advises the UCT about how to handle memory range beginning at + * address and size of length bytes. This call does not influence the semantics + * of the application, but may influence its performance. The advice may be + * ignored. + * + * @param [in] md Memory domain memory was allocated or registered on. + * @param [in] memh Memory handle, as returned from @ref uct_mem_alloc + * @param [in] addr Memory base address. Memory range must belong to the + * @a memh + * @param [in] length Length of memory to advise. Must be >0. + * @param [in] advice Memory use advice as defined in the + * @ref uct_mem_advice_t list + */ +ucs_status_t +uct_md_mem_advise(uct_md_h md, uct_mem_h memh, void* addr, size_t length, + uct_mem_advice_t advice); + +/** + * @ingroup UCT_MD + * @brief Register memory for zero-copy sends and remote access. + * + * Register memory on the memory domain. In order to use this function, MD + * must support @ref UCT_MD_FLAG_REG flag. + * + * @param [in] md Memory domain to register memory on. + * @param [in] address Memory to register. + * @param [in] length Size of memory to register. Must be >0. + * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags. + * @param [out] memh_p Filled with handle for allocated region. + */ +ucs_status_t +uct_md_mem_reg(uct_md_h md, void* address, size_t length, unsigned flags, + uct_mem_h* memh_p); + +/** + * @ingroup UCT_MD + * @brief Undo the operation of @ref uct_md_mem_reg(). + * + * @param [in] md Memory domain which was used to register the memory. + * @param [in] memh Local access key to memory region. + */ +ucs_status_t +uct_md_mem_dereg(uct_md_h md, uct_mem_h memh); + +/** + * @ingroup UCT_MD + * @brief Detect memory type + * + * @param [in] md Memory domain to detect memory type + * @param [in] addr Memory address to detect. + * @param [in] length Size of memory + * @param [out] mem_type_p Filled with memory type of the address range if + function succeeds + * @return UCS_OK If memory type is successfully detected + * UCS_ERR_INVALID_ADDR If failed to detect memory type + */ +ucs_status_t +uct_md_detect_memory_type(uct_md_h md, const void* addr, size_t length, + ucs_memory_type_t* mem_type_p); + +/** + * @ingroup UCT_MD + * @brief Allocate memory for zero-copy communications and remote access. + * + * Allocate potentially registered memory. + * + * @param [in] length The minimal size to allocate. The actual size may + * be larger, for example because of alignment + * restrictions. Must be >0. + * @param [in] methods Array of memory allocation methods to attempt. + * Each of the provided allocation methods will be + * tried in array order, to perform the allocation, + * until one succeeds. Whenever the MD method is + * encountered, each of the provided MDs will be + * tried in array order, to allocate the memory, + * until one succeeds, or they are exhausted. In + * this case the next allocation method from the + * initial list will be attempted. + * @param [in] num_methods Length of 'methods' array. + * @param [in] params Memory allocation characteristics, see + * @ref uct_mem_alloc_params_t. + * @param [out] mem In case of success, filled with information about + * the allocated memory. @ref uct_allocated_memory_t + */ +ucs_status_t +uct_mem_alloc(size_t length, const uct_alloc_method_t* methods, unsigned num_methods, + const uct_mem_alloc_params_t* params, uct_allocated_memory_t* mem); + +/** + * @ingroup UCT_MD + * @brief Release allocated memory. + * + * Release the memory allocated by @ref uct_mem_alloc. + * + * @param [in] mem Description of allocated memory, as returned from + * @ref uct_mem_alloc. + */ +ucs_status_t +uct_mem_free(const uct_allocated_memory_t* mem); + +/** + * @ingroup UCT_MD + * @brief Read the configuration for a memory domain. + * + * @param [in] component Read the configuration of this component. + * @param [in] env_prefix If non-NULL, search for environment variables + * starting with this UCT__. Otherwise, search + * for environment variables starting with just UCT_. + * @param [in] filename If non-NULL, read configuration from this file. If + * the file does not exist, it will be ignored. + * @param [out] config_p Filled with a pointer to the configuration. + * + * @return Error code. + */ +ucs_status_t +uct_md_config_read(uct_component_h component, const char* env_prefix, + const char* filename, uct_md_config_t** config_p); + +/** + * @ingroup UCT_MD + * @brief Check if remote sock address is accessible from the memory domain. + * + * This function checks if a remote sock address can be accessed from a local + * memory domain. Accessibility can be checked in local or remote mode. + * + * @param [in] md Memory domain to check accessibility from. + * This memory domain must support the @ref + * UCT_MD_FLAG_SOCKADDR flag. + * @param [in] sockaddr Socket address to check accessibility to. + * @param [in] mode Mode for checking accessibility, as defined in @ref + * uct_sockaddr_accessibility_t. + * Indicates if accessibility is tested on the server side - + * for binding to the given sockaddr, or on the + * client side - for connecting to the given remote + * peer's sockaddr. + * + * @return Nonzero if accessible, 0 if inaccessible. + */ +int +uct_md_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t* sockaddr, + uct_sockaddr_accessibility_t mode); + +/** + * @ingroup UCT_MD + * + * @brief Pack a remote key. + * + * @param [in] md Handle to memory domain. + * @param [in] memh Local key, whose remote key should be packed. + * @param [out] rkey_buffer Filled with packed remote key. + * + * @return Error code. + */ +ucs_status_t +uct_md_mkey_pack(uct_md_h md, uct_mem_h memh, void* rkey_buffer); + +/** + * @ingroup UCT_MD + * + * @brief Unpack a remote key. + * + * @param [in] component Component on which to unpack the remote key. + * @param [in] rkey_buffer Packed remote key buffer. + * @param [out] rkey_ob Filled with the unpacked remote key and its type. + * + * @note The remote key must be unpacked with the same component that was used + * to pack it. For example, if a remote device address on the remote + * memory domain which was used to pack the key is reachable by a + * transport on a local component, then that component is eligible to + * unpack the key. + * If the remote key buffer cannot be unpacked with the given component, + * UCS_ERR_INVALID_PARAM will be returned. + * + * @return Error code. + */ +ucs_status_t +uct_rkey_unpack(uct_component_h component, const void* rkey_buffer, + uct_rkey_bundle_t* rkey_ob); + +/** + * @ingroup UCT_MD + * + * @brief Get a local pointer to remote memory. + * + * This routine returns a local pointer to the remote memory + * described by the rkey bundle. The @a component must support + * @ref UCT_COMPONENT_FLAG_RKEY_PTR flag. + * + * @param [in] component Component on which to obtain the pointer to the + * remote key. + * @param [in] rkey_ob A remote key bundle as returned by + * the @ref uct_rkey_unpack function. + * @param [in] remote_addr A remote address within the memory area described + * by the rkey_ob. + * @param [out] addr_p A pointer that can be used for direct access to + * the remote memory. + * + * @note The component used to obtain a local pointer to the remote memory must + * be the same component that was used to pack the remote key. See notes + * section for @ref uct_rkey_unpack. + * + * @return Error code if the remote memory cannot be accessed directly or + * the remote address is not valid. + */ +ucs_status_t +uct_rkey_ptr(uct_component_h component, uct_rkey_bundle_t* rkey_ob, uint64_t remote_addr, + void** addr_p); + +/** + * @ingroup UCT_MD + * + * @brief Release a remote key. + * + * @param [in] component Component which was used to unpack the remote key. + * @param [in] rkey_ob Remote key to release. + */ +ucs_status_t +uct_rkey_release(uct_component_h component, const uct_rkey_bundle_t* rkey_ob); + +/** + * @ingroup UCT_CONTEXT + * @brief Explicit progress for UCT worker. + * + * This routine explicitly progresses any outstanding communication operations + * and active message requests. + * + * @note @li In the current implementation, users @b MUST call this routine + * to receive the active message requests. + * + * @param [in] worker Handle to worker. + * + * @return Nonzero if any communication was progressed, zero otherwise. + */ +UCT_INLINE_API unsigned +uct_worker_progress(uct_worker_h worker) +{ + return ucs_callbackq_dispatch(&worker->progress_q); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Flush outstanding communication operations on an interface. + * + * Flushes all outstanding communications issued on the interface prior to + * this call. The operations are completed at the origin or at the target + * as well. The exact completion semantic depends on @a flags parameter. + * + * @note Currently only one completion type is supported. It guarantees that + * the data transfer is completed but the target buffer may not be updated yet. + * + * @param [in] iface Interface to flush communications from. + * @param [in] flags Flags that control completion semantic (currently only + * @ref UCT_FLUSH_FLAG_LOCAL is supported). + * @param [inout] comp Completion handle as defined by @ref uct_completion_t. + * Can be NULL, which means that the call will return the + * current state of the interface and no completion will + * be generated in case of outstanding communications. + * If it is not NULL completion counter is decremented + * by 1 when the call completes. Completion callback is + * called when the counter reaches 0. + * + * + * @return UCS_OK - No outstanding communications left. + * UCS_INPROGRESS - Some communication operations are still in progress. + * If non-NULL 'comp' is provided, it will be updated + * upon completion of these operations. + */ +UCT_INLINE_API ucs_status_t +uct_iface_flush(uct_iface_h iface, unsigned flags, uct_completion_t* comp) +{ + return iface->ops.iface_flush(iface, flags, comp); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Ensures ordering of outstanding communications on the interface. + * Operations issued on the interface prior to this call are guaranteed to + * be completed before any subsequent communication operations to the same + * interface which follow the call to fence. + * + * @param [in] iface Interface to issue communications from. + * @param [in] flags Flags that control ordering semantic (currently + * unsupported - set to 0). + * @return UCS_OK - Ordering is inserted. + */ +UCT_INLINE_API ucs_status_t +uct_iface_fence(uct_iface_h iface, unsigned flags) +{ + return iface->ops.iface_fence(iface, flags); +} + +/** + * @ingroup UCT_AM + * @brief Release AM descriptor + * + * Release active message descriptor @a desc, which was passed to + * @ref uct_am_callback_t "the active message callback", and owned by the callee. + * + * @param [in] desc Descriptor to release. + */ +UCT_INLINE_API void +uct_iface_release_desc(void* desc) +{ + uct_recv_desc_t* release_desc = uct_recv_desc(desc); + release_desc->cb(release_desc, desc); +} + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_put_short(uct_ep_h ep, const void* buffer, unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey); +} + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ssize_t +uct_ep_put_bcopy(uct_ep_h ep, uct_pack_callback_t pack_cb, void* arg, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return ep->iface->ops.ep_put_bcopy(ep, pack_cb, arg, remote_addr, rkey); +} + +/** + * @ingroup UCT_RMA + * @brief Write data to remote memory while avoiding local memory copy + * + * The input data in @a iov array of @ref ::uct_iov_t structures sent to remote + * address ("gather output"). Buffers in @a iov are processed in array order. + * This means that the function complete iov[0] before proceeding to + * iov[1], and so on. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_put_max_iov + * "uct_iface_attr::cap::put::max_iov". + * @param [in] remote_addr Remote address to place the @a iov data. + * @param [in] rkey Remote key descriptor provided by @ref ::uct_rkey_unpack + * @param [in] comp Completion handle as defined by @ref ::uct_completion_t. + * + * @return UCS_INPROGRESS Some communication operations are still in progress. + * If non-NULL @a comp is provided, it will be updated + * upon completion of these operations. + * + */ +UCT_INLINE_API ucs_status_t +uct_ep_put_zcopy(uct_ep_h ep, const uct_iov_t* iov, size_t iovcnt, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t* comp) +{ + return ep->iface->ops.ep_put_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp); +} + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_get_short(uct_ep_h ep, void* buffer, unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + return ep->iface->ops.ep_get_short(ep, buffer, length, remote_addr, rkey); +} + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_get_bcopy(uct_ep_h ep, uct_unpack_callback_t unpack_cb, void* arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t* comp) +{ + return ep->iface->ops.ep_get_bcopy(ep, unpack_cb, arg, length, remote_addr, rkey, + comp); +} + +/** + * @ingroup UCT_RMA + * @brief Read data from remote memory while avoiding local memory copy + * + * The output data in @a iov array of @ref ::uct_iov_t structures received from + * remote address ("scatter input"). Buffers in @a iov are processed in array order. + * This means that the function complete iov[0] before proceeding to + * iov[1], and so on. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_get_max_iov + * "uct_iface_attr::cap::get::max_iov". + * @param [in] remote_addr Remote address of the data placed to the @a iov. + * @param [in] rkey Remote key descriptor provided by @ref ::uct_rkey_unpack + * @param [in] comp Completion handle as defined by @ref ::uct_completion_t. + * + * @return UCS_INPROGRESS Some communication operations are still in progress. + * If non-NULL @a comp is provided, it will be updated + * upon completion of these operations. + * + */ +UCT_INLINE_API ucs_status_t +uct_ep_get_zcopy(uct_ep_h ep, const uct_iov_t* iov, size_t iovcnt, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t* comp) +{ + return ep->iface->ops.ep_get_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp); +} + +/** + * @ingroup UCT_AM + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header, const void* payload, + unsigned length) +{ + return ep->iface->ops.ep_am_short(ep, id, header, payload, length); +} + +/** + * @ingroup UCT_AM + * @brief Short io-vector send operation. + * + * This routine sends a message using @ref uct_short_protocol_desc "short" protocol. + * The input data in @a iov array of @ref ::uct_iov_t structures is sent to remote + * side to contiguous buffer keeping the order of the data in the array. + * + * @param [in] ep Destination endpoint handle. + * @param [in] id Active message id. Must be in range 0..UCT_AM_ID_MAX-1. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. @a stride and @a count fields in + * @ref ::uct_iov_t structure are ignored in current + * implementation. The total size of the data buffers in + * the array is limited by + * @ref uct_iface_attr_cap_am_max_short + * "uct_iface_attr::cap::am::max_short". + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_am_max_iov + * "uct_iface_attr::cap::am::max_iov". + * + * @return UCS_OK Operation completed successfully. + * @return UCS_ERR_NO_RESOURCE Could not start the operation due to lack of + * send resources. + * @return otherwise Error code. + */ +UCT_INLINE_API ucs_status_t +uct_ep_am_short_iov(uct_ep_h ep, uint8_t id, const uct_iov_t* iov, size_t iovcnt) +{ + return ep->iface->ops.ep_am_short_iov(ep, id, iov, iovcnt); +} + +/** + * @ingroup UCT_AM + * @brief + */ +UCT_INLINE_API ssize_t +uct_ep_am_bcopy(uct_ep_h ep, uint8_t id, uct_pack_callback_t pack_cb, void* arg, + unsigned flags) +{ + return ep->iface->ops.ep_am_bcopy(ep, id, pack_cb, arg, flags); +} + +/** + * @ingroup UCT_AM + * @brief Send active message while avoiding local memory copy + * + * The input data in @a iov array of @ref ::uct_iov_t structures sent to remote + * side ("gather output"). Buffers in @a iov are processed in array order. + * This means that the function complete iov[0] before proceeding to + * iov[1], and so on. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] id Active message id. Must be in range 0..UCT_AM_ID_MAX-1. + * @param [in] header Active message header. + * @param [in] header_length Active message header length in bytes. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_am_max_iov + * "uct_iface_attr::cap::am::max_iov". + * @param [in] flags Active message flags, see @ref uct_msg_flags. + * @param [in] comp Completion handle as defined by @ref ::uct_completion_t. + * + * @return UCS_OK Operation completed successfully. + * @return UCS_INPROGRESS Some communication operations are still in progress. + * If non-NULL @a comp is provided, it will be updated + * upon completion of these operations. + * @return UCS_ERR_NO_RESOURCE Could not start the operation due to lack of send + * resources. + * + * @note If the operation returns @a UCS_INPROGRESS, the memory buffers + * pointed to by @a iov array must not be modified until the operation + * is completed by @a comp. @a header can be released or changed. + */ +UCT_INLINE_API ucs_status_t +uct_ep_am_zcopy(uct_ep_h ep, uint8_t id, const void* header, unsigned header_length, + const uct_iov_t* iov, size_t iovcnt, unsigned flags, + uct_completion_t* comp) +{ + return ep->iface->ops.ep_am_zcopy(ep, id, header, header_length, iov, iovcnt, flags, + comp); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_atomic_cswap64(uct_ep_h ep, uint64_t compare, uint64_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uint64_t* result, uct_completion_t* comp) +{ + return ep->iface->ops.ep_atomic_cswap64(ep, compare, swap, remote_addr, rkey, result, + comp); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_atomic_cswap32(uct_ep_h ep, uint32_t compare, uint32_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uint32_t* result, uct_completion_t* comp) +{ + return ep->iface->ops.ep_atomic_cswap32(ep, compare, swap, remote_addr, rkey, result, + comp); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_atomic32_post(uct_ep_h ep, uct_atomic_op_t opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return ep->iface->ops.ep_atomic32_post(ep, opcode, value, remote_addr, rkey); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_atomic64_post(uct_ep_h ep, uct_atomic_op_t opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return ep->iface->ops.ep_atomic64_post(ep, opcode, value, remote_addr, rkey); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, uint32_t value, + uint32_t* result, uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t* comp) +{ + return ep->iface->ops.ep_atomic32_fetch(ep, opcode, value, result, remote_addr, rkey, + comp); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t +uct_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, uint64_t value, + uint64_t* result, uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t* comp) +{ + return ep->iface->ops.ep_atomic64_fetch(ep, opcode, value, result, remote_addr, rkey, + comp); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Add a pending request to an endpoint. + * + * Add a pending request to the endpoint pending queue. The request will be + * dispatched when the endpoint could potentially have additional send resources. + * + * @param [in] ep Endpoint to add the pending request to. + * @param [in] req Pending request, which would be dispatched when more + * resources become available. The user is expected to initialize + * the "func" field. + * After being passed to the function, the request is owned by UCT, + * until the callback is called and returns UCS_OK. + * @param [in] flags Flags that control pending request processing (see @ref + * uct_cb_flags) + * + * @return UCS_OK - request added to pending queue + * UCS_ERR_BUSY - request was not added to pending queue, because send + * resources are available now. The user is advised to + * retry. + */ +UCT_INLINE_API ucs_status_t +uct_ep_pending_add(uct_ep_h ep, uct_pending_req_t* req, unsigned flags) +{ + return ep->iface->ops.ep_pending_add(ep, req, flags); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Remove all pending requests from an endpoint. + * + * Remove pending requests from the given endpoint and pass them to the provided + * callback function. The callback return value is ignored. + * + * @param [in] ep Endpoint to remove pending requests from. + * @param [in] cb Callback to pass the removed requests to. + * @param [in] arg Argument to pass to the @a cb callback. + */ +UCT_INLINE_API void +uct_ep_pending_purge(uct_ep_h ep, uct_pending_purge_callback_t cb, void* arg) +{ + ep->iface->ops.ep_pending_purge(ep, cb, arg); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Flush outstanding communication operations on an endpoint. + * + * Flushes all outstanding communications issued on the endpoint prior to + * this call. The operations are completed at the origin or at the target + * as well. The exact completion semantic depends on @a flags parameter. + * + * @param [in] ep Endpoint to flush communications from. + * @param [in] flags Flags @ref uct_flush_flags that control completion + * semantic. + * @param [inout] comp Completion handle as defined by @ref uct_completion_t. + * Can be NULL, which means that the call will return the + * current state of the endpoint and no completion will + * be generated in case of outstanding communications. + * If it is not NULL completion counter is decremented + * by 1 when the call completes. Completion callback is + * called when the counter reaches 0. + * + * @return UCS_OK - No outstanding communications left. + * UCS_ERR_NO_RESOURCE - Flush operation could not be initiated. A subsequent + * call to @ref uct_ep_pending_add would add a pending + * operation, which provides an opportunity to retry + * the flush. + * UCS_INPROGRESS - Some communication operations are still in progress. + * If non-NULL 'comp' is provided, it will be updated + * upon completion of these operations. + */ +UCT_INLINE_API ucs_status_t +uct_ep_flush(uct_ep_h ep, unsigned flags, uct_completion_t* comp) +{ + return ep->iface->ops.ep_flush(ep, flags, comp); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Ensures ordering of outstanding communications on the endpoint. + * Operations issued on the endpoint prior to this call are guaranteed to + * be completed before any subsequent communication operations to the same + * endpoint which follow the call to fence. + * + * @param [in] ep Endpoint to issue communications from. + * @param [in] flags Flags that control ordering semantic (currently + * unsupported - set to 0). + * @return UCS_OK - Ordering is inserted. + */ +UCT_INLINE_API ucs_status_t +uct_ep_fence(uct_ep_h ep, unsigned flags) +{ + return ep->iface->ops.ep_fence(ep, flags); +} + +/** + * @ingroup UCT_TAG + * @brief Short eager tagged-send operation. + * + * This routine sends a message using @ref uct_short_protocol_desc "short" + * eager protocol. Eager protocol means that the whole data is sent to the peer + * immediately without any preceding notification. + * The data is provided as buffer and its length,and must not be larger than the + * corresponding @a max_short value in @ref uct_iface_attr. + * The immediate value delivered to the receiver is implicitly equal to 0. + * If it's required to pass nonzero imm value, @ref uct_ep_tag_eager_bcopy + * should be used. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] data Data to send. + * @param [in] length Data length. + * + * @return UCS_OK - operation completed successfully. + * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of + * send resources. + */ +UCT_INLINE_API ucs_status_t +uct_ep_tag_eager_short(uct_ep_h ep, uct_tag_t tag, const void* data, size_t length) +{ + return ep->iface->ops.ep_tag_eager_short(ep, tag, data, length); +} + +/** + * @ingroup UCT_TAG + * @brief Bcopy eager tagged-send operation. + * + * This routine sends a message using @ref uct_bcopy_protocol_desc "bcopy" + * eager protocol. Eager protocol means that the whole data is sent to the peer + * immediately without any preceding notification. + * Custom data callback is used to copy the data to the network buffers. + * + * @note The resulted data length must not be larger than the corresponding + * @a max_bcopy value in @ref uct_iface_attr. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] imm Immediate value which will be available to the + * receiver. + * @param [in] pack_cb User callback to pack the data. + * @param [in] arg Custom argument to @a pack_cb. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * + * @return >=0 - The size of the data packed by @a pack_cb. + * @return otherwise - Error code. + */ +UCT_INLINE_API ssize_t +uct_ep_tag_eager_bcopy(uct_ep_h ep, uct_tag_t tag, uint64_t imm, + uct_pack_callback_t pack_cb, void* arg, unsigned flags) +{ + return ep->iface->ops.ep_tag_eager_bcopy(ep, tag, imm, pack_cb, arg, flags); +} + +/** + * @ingroup UCT_TAG + * @brief Zcopy eager tagged-send operation. + * + * This routine sends a message using @ref uct_zcopy_protocol_desc "zcopy" + * eager protocol. Eager protocol means that the whole data is sent to the peer + * immediately without any preceding notification. + * The input data (which has to be previously registered) in @a iov array of + * @ref uct_iov_t structures sent to remote side ("gather output"). Buffers in + * @a iov are processed in array order, so the function complete @a iov[0] + * before proceeding to @a iov[1], and so on. + * + * @note The resulted data length must not be larger than the corresponding + * @a max_zcopy value in @ref uct_iface_attr. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] imm Immediate value which will be available to the + * receiver. + * @param [in] iov Points to an array of @ref uct_iov_t structures. + * A particular structure pointer must be a valid address. + * A NULL terminated array is not required. + * @param [in] iovcnt Size of the @a iov array. If @a iovcnt is zero, the + * data is considered empty. Note that @a iovcnt is + * limited by the corresponding @a max_iov value in + * @ref uct_iface_attr. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * @param [in] comp Completion callback which will be called when the data + * is reliably received by the peer, and the buffer + * can be reused or invalidated. + * + * @return UCS_OK - operation completed successfully. + * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of + * send resources. + * @return UCS_INPROGRESS - operation started, and @a comp will be used to + * notify when it's completed. + */ +UCT_INLINE_API ucs_status_t +uct_ep_tag_eager_zcopy(uct_ep_h ep, uct_tag_t tag, uint64_t imm, const uct_iov_t* iov, + size_t iovcnt, unsigned flags, uct_completion_t* comp) +{ + return ep->iface->ops.ep_tag_eager_zcopy(ep, tag, imm, iov, iovcnt, flags, comp); +} + +/** + * @ingroup UCT_TAG + * @brief Rendezvous tagged-send operation. + * + * This routine sends a message using rendezvous protocol. Rendezvous protocol + * means that only a small notification is sent at first, and the data itself + * is transferred later (when there is a match) to avoid extra memory copy. + * + * @note The header will be available to the receiver in case of unexpected + * rendezvous operation only, i.e. the peer has not posted tag for this + * message yet (by means of @ref uct_iface_tag_recv_zcopy), when it is + * arrived. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] header User defined header. + * @param [in] header_length User defined header length in bytes. Note that + * it is limited by the corresponding @a max_hdr + * value in @ref uct_iface_attr. + * @param [in] iov Points to an array of @ref uct_iov_t structures. + * A particular structure pointer must be valid + * address. A NULL terminated array is not required. + * @param [in] iovcnt Size of the @a iov array. If @a iovcnt is zero, + * the data is considered empty. Note that @a iovcnt + * is limited by the corresponding @a max_iov value + * in @ref uct_iface_attr. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * @param [in] comp Completion callback which will be called when the + * data is reliably received by the peer, and the + * buffer can be reused or invalidated. + * + * @return >=0 - The operation is in progress and the return value is a + * handle which can be used to cancel the outstanding + * rendezvous operation. + * @return otherwise - Error code. + */ +UCT_INLINE_API ucs_status_ptr_t +uct_ep_tag_rndv_zcopy(uct_ep_h ep, uct_tag_t tag, const void* header, + unsigned header_length, const uct_iov_t* iov, size_t iovcnt, + unsigned flags, uct_completion_t* comp) +{ + return ep->iface->ops.ep_tag_rndv_zcopy(ep, tag, header, header_length, iov, iovcnt, + flags, comp); +} + +/** + * @ingroup UCT_TAG + * @brief Cancel outstanding rendezvous operation. + * + * This routine signals the underlying transport disregard the outstanding + * operation without calling completion callback provided in + * @ref uct_ep_tag_rndv_zcopy. + * + * @note The operation handle should be valid at the time the routine is + * invoked. I.e. it should be a handle of the real operation which is not + * completed yet. + * + * @param [in] ep Destination endpoint handle. + * @param [in] op Rendezvous operation handle, as returned from + * @ref uct_ep_tag_rndv_zcopy. + * + * @return UCS_OK - The operation has been canceled. + */ +UCT_INLINE_API ucs_status_t +uct_ep_tag_rndv_cancel(uct_ep_h ep, void* op) +{ + return ep->iface->ops.ep_tag_rndv_cancel(ep, op); +} + +/** + * @ingroup UCT_TAG + * @brief Send software rendezvous request. + * + * This routine sends a rendezvous request only, which indicates that the data + * transfer should be completed in software. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for matching. + * @param [in] header User defined header + * @param [in] header_length User defined header length in bytes. Note that it + * is limited by the corresponding @a max_hdr value + * in @ref uct_iface_attr. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * + * @return UCS_OK - operation completed successfully. + * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of + * send resources. + */ +UCT_INLINE_API ucs_status_t +uct_ep_tag_rndv_request(uct_ep_h ep, uct_tag_t tag, const void* header, + unsigned header_length, unsigned flags) +{ + return ep->iface->ops.ep_tag_rndv_request(ep, tag, header, header_length, flags); +} + +/** + * @ingroup UCT_TAG + * @brief Post a tag to a transport interface. + * + * This routine posts a tag to be matched on a transport interface. When a + * message with the corresponding tag arrives it is stored in the user buffer + * (described by @a iov and @a iovcnt) directly. The operation completion is + * reported using callbacks on the @a ctx structure. + * + * @param [in] iface Interface to post the tag on. + * @param [in] tag Tag to expect. + * @param [in] tag_mask Mask which specifies what bits of the tag to + * compare. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_tag_recv_iov + * "uct_iface_attr::cap::tag::max_iov". + * @param [inout] ctx Context associated with this particular tag, "priv" field + * in this structure is used to track the state internally. + * + * @return UCS_OK - The tag is posted to the transport. + * @return UCS_ERR_NO_RESOURCE - Could not start the operation due to lack of + * resources. + * @return UCS_ERR_EXCEEDS_LIMIT - No more room for tags in the transport. + */ +UCT_INLINE_API ucs_status_t +uct_iface_tag_recv_zcopy(uct_iface_h iface, uct_tag_t tag, uct_tag_t tag_mask, + const uct_iov_t* iov, size_t iovcnt, uct_tag_context_t* ctx) +{ + return iface->ops.iface_tag_recv_zcopy(iface, tag, tag_mask, iov, iovcnt, ctx); +} + +/** + * @ingroup UCT_TAG + * @brief Cancel a posted tag. + * + * This routine cancels a tag, which was previously posted by + * @ref uct_iface_tag_recv_zcopy. The tag would be either matched or canceled, + * in a bounded time, regardless of the peer actions. The original completion + * callback of the tag would be called with the status if @a force is not set. + * + * @param [in] iface Interface to cancel the tag on. + * @param [in] ctx Tag context which was used for posting the tag. If + * force is 0, @a ctx->completed_cb will be called with + * either UCS_OK which means the tag was matched and data + * received despite the cancel request, or + * UCS_ERR_CANCELED which means the tag was successfully + * canceled before it was matched. + * @param [in] force Whether to report completions to @a ctx->completed_cb. + * If nonzero, the cancel is assumed to be successful, + * and the callback is not called. + * + * @return UCS_OK - The tag is canceled in the transport. + */ +UCT_INLINE_API ucs_status_t +uct_iface_tag_recv_cancel(uct_iface_h iface, uct_tag_context_t* ctx, int force) +{ + return iface->ops.iface_tag_recv_cancel(iface, ctx, force); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Enable synchronous progress for the interface + * + * Notify the transport that it should actively progress communications during + * @ref uct_worker_progress(). + * + * When the interface is created, its progress is initially disabled. + * + * @param [in] iface The interface to enable progress. + * @param [in] flags The type of progress to enable as defined by + * @ref uct_progress_types + * + * @note This function is not thread safe with respect to + * @ref ucp_worker_progress(), unless the flag + * @ref UCT_PROGRESS_THREAD_SAFE is specified. + * + */ +UCT_INLINE_API void +uct_iface_progress_enable(uct_iface_h iface, unsigned flags) +{ + iface->ops.iface_progress_enable(iface, flags); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Disable synchronous progress for the interface + * + * Notify the transport that it should not progress its communications during + * @ref uct_worker_progress(). Thus the latency of other transports may be + * improved. + * + * By default, progress is disabled when the interface is created. + * + * @param [in] iface The interface to disable progress. + * @param [in] flags The type of progress to disable as defined by + * @ref uct_progress_types. + * + * @note This function is not thread safe with respect to + * @ref ucp_worker_progress(), unless the flag + * @ref UCT_PROGRESS_THREAD_SAFE is specified. + * + */ +UCT_INLINE_API void +uct_iface_progress_disable(uct_iface_h iface, unsigned flags) +{ + iface->ops.iface_progress_disable(iface, flags); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Perform a progress on an interface. + */ +UCT_INLINE_API unsigned +uct_iface_progress(uct_iface_h iface) +{ + return iface->ops.iface_progress(iface); +} + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Open a connection manager. + * + * Open a connection manager. All client server connection + * establishment operations are performed in the context of a specific + * connection manager. + * @note This is an alternative API for + * @ref uct_iface_open_mode::UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER and + * @ref uct_iface_open_mode::UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT . + * + * @param [in] component Component on which to open the connection manager, + * as returned from @ref uct_query_components. + * @param [in] worker Worker on which to open the connection manager. + * @param [in] config CM configuration options. Either obtained + * from @ref uct_cm_config_read() function, or pointer + * to CM-specific structure that extends + * @ref uct_cm_config_t. + * @param [out] cm_p Filled with a handle to the connection manager. + * + * @return Error code. + */ +ucs_status_t +uct_cm_open(uct_component_h component, uct_worker_h worker, const uct_cm_config_t* config, + uct_cm_h* cm_p); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Close a connection manager. + * + * @param [in] cm Connection manager to close. + */ +void +uct_cm_close(uct_cm_h cm); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Get connection manager attributes. + * + * This routine queries the @ref uct_cm_h "cm" for its attributes + * @ref uct_cm_attr_t. + * + * @param [in] cm Connection manager to query. + * @param [out] cm_attr Filled with connection manager attributes. + */ +ucs_status_t +uct_cm_query(uct_cm_h cm, uct_cm_attr_t* cm_attr); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Read the configuration for a connection manager. + * + * @param [in] component Read the configuration of the connection manager + * on this component. + * @param [in] env_prefix If non-NULL, search for environment variables + * starting with this UCT__. Otherwise, search + * for environment variables starting with just UCT_. + * @param [in] filename If non-NULL, read configuration from this file. If + * the file does not exist, or exists but cannot be + * opened or read, it will be ignored. + * @param [out] config_p Filled with a pointer to the configuration. + * + * @return Error code. + */ +ucs_status_t +uct_cm_config_read(uct_component_h component, const char* env_prefix, + const char* filename, uct_cm_config_t** config_p); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Notify the server about client-side connection establishment. + * + * This routine should be called on the client side after the client completed + * establishing its connection to the server. The routine will send a + * notification message to the server indicating that the client is connected. + * + * @param [in] ep The connected endpoint on the client side. + * + * @return Error code. + */ +ucs_status_t +uct_cm_client_ep_conn_notify(uct_ep_h ep); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Create a new transport listener object. + * + * This routine creates a new listener on the given CM which will start + * listening on a given sockaddr. + * + * @param [in] cm Connection manager on which to open the listener. + * This cm should not be closed as long as there are + * open listeners on it. + * @param [in] saddr The socket address to listen on. + * @param [in] socklen The saddr length. + * @param [in] params User defined @ref uct_listener_params_t + * configurations for the @a listener_p. + * @param [out] listener_p Filled with handle to the new listener. + * + * @return Error code. + */ +ucs_status_t +uct_listener_create(uct_cm_h cm, const struct sockaddr* saddr, socklen_t socklen, + const uct_listener_params_t* params, uct_listener_h* listener_p); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Destroy a transport listener. + * + * @param [in] listener Listener to destroy. + */ +void +uct_listener_destroy(uct_listener_h listener); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Reject a connection request. + * + * This routine can be invoked on the server side. It rejects a connection request + * from the client. + * + * @param [in] listener Listener which will reject the connection request. + * @param [in] conn_request Connection establishment request passed as parameter + * of @ref uct_cm_listener_conn_request_callback_t in + * @ref uct_cm_listener_conn_request_args_t::conn_request. + * + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +uct_listener_reject(uct_listener_h listener, uct_conn_request_h conn_request); + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Get attributes specific to a particular listener. + * + * This routine queries the @ref uct_listener_h "listener" for its attributes + * @ref uct_listener_attr_t. + * + * @param [in] listener Listener object to query. + * @param [out] listener_attr Filled with attributes of the listener. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t +uct_listener_query(uct_listener_h listener, uct_listener_attr_t* listener_attr); + +/** + * @ingroup UCT_RESOURCE + * @brief Update status of UCT completion handle. + * + * @param comp [in] Completion handle to update. + * @param status [in] Status to update @a comp handle. + */ +static UCS_F_ALWAYS_INLINE void +uct_completion_update_status(uct_completion_t* comp, ucs_status_t status) +{ + if(ucs_unlikely(status != UCS_OK) && (comp->status == UCS_OK)) + { + /* store first failure status */ + comp->status = status; + } +} + +/** + * @example uct_hello_world.c + * UCT hello world client / server example utility. + */ + +END_C_DECLS + +#endif diff --git a/projects/rocprofiler-systems/tests/CMakeLists.txt b/projects/rocprofiler-systems/tests/CMakeLists.txt index f6a18e580a..642f9d3179 100644 --- a/projects/rocprofiler-systems/tests/CMakeLists.txt +++ b/projects/rocprofiler-systems/tests/CMakeLists.txt @@ -35,6 +35,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-pthread-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rocm-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-user-api-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-mpi-tests.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-ucx-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-kokkos-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-openmp-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-code-coverage-tests.cmake) @@ -62,9 +63,12 @@ include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-thread-limit-tests.cmake) # # -------------------------------------------------------------------------------------- # +#delete temp files created by rocprofiler-sys tests in /tmp owned by the current user. Always return success. add_test( NAME rocprofsys-cleanup-tmp-files - COMMAND sh -c "rm -f /tmp/buffered_storage*.bin /tmp/metadata*.json" + COMMAND + sh -c + "find /tmp -maxdepth 1 -user $(whoami) \\( -name 'buffered_storage*.bin' -o -name 'metadata*.json' \\) -delete 2>/dev/null || true" WORKING_DIRECTORY ${PROJECT_BINARY_DIR} ) diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-ucx-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-ucx-tests.cmake new file mode 100644 index 0000000000..1bad820ee3 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocprof-sys-ucx-tests.cmake @@ -0,0 +1,264 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# -------------------------------------------------------------------------------------- # +# +# UCX tests - MPI examples with UCX transport +# +# -------------------------------------------------------------------------------------- # + +# UCX tests require MPI examples since UCX is MPI's transport layer +if(NOT ROCPROFSYS_USE_MPI AND NOT ROCPROFSYS_USE_MPI_HEADERS) + return() +endif() + +# Detect MPI implementation by checking include paths +set(_DETECTED_MPI_IMPL "unknown") +if("${MPI_C_COMPILER_INCLUDE_DIRS};${MPI_C_HEADER_DIR}" MATCHES "openmpi") + set(_DETECTED_MPI_IMPL "openmpi") +elseif("${MPI_C_COMPILER_INCLUDE_DIRS};${MPI_C_HEADER_DIR}" MATCHES "mpich") + set(_DETECTED_MPI_IMPL "mpich") +endif() + +# Only proceed if OpenMPI is detected +if(NOT "${_DETECTED_MPI_IMPL}" STREQUAL "openmpi") + message( + STATUS + "Skipping UCX tests - requires OpenMPI (detected: ${_DETECTED_MPI_IMPL}). UCX tests use OpenMPI-specific environment variables (OMPI_MCA_*)." + ) + return() +endif() + +# Force OpenMPI to use UCX transport via environment variables +set(_ucxp_mpi_environment + "OMPI_MCA_pml=ucx" # Use UCX point-to-point messaging layer + "OMPI_MCA_osc=ucx" # Use UCX one-sided communications + "OMPI_MCA_pml_ucx_tls=tcp,self" # Force TCP and self (not sysv/posix/cma which bypass UCX functions) + "OMPI_MCA_pml_ucx_devices=any" # Accept any device (not just InfiniBand/Mellanox) + "OMPI_MCA_btl=^vader,sm" # Disable shared memory BTLs to force communication through UCX + "UCX_TLS=tcp,self" # Tell UCX to use TCP for inter-process, self for intra-process + "OMPI_MCA_pml_base_verbose=100" # Show which PML is selected + "UCX_LOG_LEVEL=info" # Enable UCX logging to show transport usage +) + +# Base environment for UCX tests +set(_ucx_base_environment + "${_base_environment}" + "ROCPROFSYS_USE_UCX=ON" + "ROCPROFSYS_DEBUG=OFF" + "ROCPROFSYS_VERBOSE=2" + "ROCPROFSYS_DL_VERBOSE=2" + "${_ucxp_mpi_environment}" +) + +# First test: UCX availability check using mpi-example (basic test) +# This test checks if UCX is available. If not, subsequent UCX tests will be marked as skipped. +rocprofiler_systems_add_test( + SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE SKIP_SYS_RUN + NAME "ucx-availability-check" + TARGET mpi-example + MPI ON + NUM_PROCS 2 + LABELS "ucx;availability" + REWRITE_ARGS + -e + -v + 2 + --label + file + line + return + args + --min-instructions + 0 + ENVIRONMENT "${_ucx_base_environment};ROCPROFSYS_VERBOSE=1" + REWRITE_RUN_PASS_REGEX + "UCX.*configured|ucp_|uct_|UCX transport|pml.*ucx" + REWRITE_RUN_FAIL_REGEX + "PML ucx cannot be selected|UCX is not available|No UCX support found|Failed to select|ROCPROFSYS_ABORT_FAIL_REGEX" + REWRITE_RUN_SKIP_REGEX + "PML ucx cannot be selected|UCX is not available|No UCX support found|Failed to select" +) + +# Enhanced UCX environment with more detailed logging +set(_ucx_environment + "${_base_environment}" + "ROCPROFSYS_USE_UCX=ON" + "ROCPROFSYS_DEBUG=ON" + "ROCPROFSYS_VERBOSE=3" + "ROCPROFSYS_DL_VERBOSE=3" + "ROCPROFSYS_PERFETTO_BACKEND=inprocess" + "ROCPROFSYS_PERFETTO_FILL_POLICY=ring_buffer" + "ROCPROFSYS_USE_PID=OFF" + "ROCPROFSYS_MPI_INIT=OFF" + "${_ucxp_mpi_environment}" +) + +# Debug environment - extra verbose for troubleshooting CI issues +set(_ucx_debug_environment + "${_ucx_environment}" + "UCX_LOG_LEVEL=debug" # Maximum UCX logging + "OMPI_MCA_mpi_show_mca_params=all" # Show all MCA parameters +) + +# UCX perfetto trace test +rocprofiler_systems_add_test( + SKIP_RUNTIME + NAME "ucx-perfetto" + TARGET mpi-example + MPI ON + NUM_PROCS 2 + LABELS "ucx;perfetto" + REWRITE_ARGS + -e + -v + 2 + --label + file + line + --min-instructions + 0 + ENVIRONMENT "${_ucx_environment};ROCPROFSYS_VERBOSE=1;ROCPROFSYS_TRACE_LEGACY=ON;ROCPROFSYS_PERFETTO_COMBINE_TRACES=ON" + REWRITE_RUN_PASS_REGEX + "Successfully executed: .+rocprof-sys-merge-output.sh.*" + REWRITE_RUN_FAIL_REGEX + "Script not found|Failed to execute|ROCPROFSYS_ABORT_FAIL_REGEX" + SYS_RUN_PASS_REGEX + "ucp_tag_send|ucp_tag_recv|UCX.*configured|Using UCX|pml.*ucx" +) + +# Validation test for UCX perfetto trace to ensure communication tracks are present +rocprofiler_systems_add_validation_test( + NAME ucx-perfetto-sys-run + PERFETTO_METRIC "ucx" + PERFETTO_FILE "merged.proto" + LABELS "ucx;perfetto" + ARGS --counter-names "UCX Comm Recv" "UCX Comm Send" -p +) + +# Test all MPI example binaries with UCX transport +foreach( + _UCX_EXAMPLE + all2all + allgather + allreduce + scatter-gather + send-recv +) + rocprofiler_systems_add_test( + SKIP_BASELINE SKIP_RUNTIME SKIP_SAMPLING + NAME "ucx-${_UCX_EXAMPLE}" + TARGET mpi-${_UCX_EXAMPLE} + MPI ON + NUM_PROCS 2 + LABELS "ucx" + REWRITE_ARGS -e -v 2 --label file line --min-instructions 0 + RUN_ARGS 30 + ENVIRONMENT "${_ucx_environment};ROCPROFSYS_VERBOSE=1;ROCPROFSYS_TRACE_LEGACY=ON;ROCPROFSYS_PERFETTO_COMBINE_TRACES=ON" + REWRITE_RUN_PASS_REGEX + "UCX.*trace|ucp_.*trace|Category.*ucx|UCX function.*called" + SYS_RUN_PASS_REGEX + "ucp_tag_send|ucp_tag_recv|write_perfetto_counter_track.*ucx" + ) + + # Add validation test to check for UCX communication tracks and bytes + rocprofiler_systems_add_validation_test( + NAME ucx-${_UCX_EXAMPLE}-sys-run + PERFETTO_METRIC "ucx" + PERFETTO_FILE "merged.proto" + LABELS "ucx" + ARGS --counter-names "UCX Comm Recv" "UCX Comm Send" -p + ) +endforeach() + +# UCX with MPIP integration test +rocprofiler_systems_add_test( + SKIP_RUNTIME + NAME "ucx-mpip-integration" + TARGET mpi-all2all + MPI ON + NUM_PROCS 2 + LABELS "ucx;mpip" + REWRITE_ARGS + -e + -v + 2 + --label + file + line + args + --min-instructions + 0 + ENVIRONMENT + "${_ucx_environment};ROCPROFSYS_USE_MPIP=ON" + RUN_ARGS 30 + REWRITE_RUN_PASS_REGEX + "UCX.*trace.*MPI.*trace|ucp_.*MPI_|Category.*ucx.*Category.*mpi" +) + +# UCX with different message sizes +foreach(_MSG_SIZE 1024 4096 16384) + rocprofiler_systems_add_test( + SKIP_BASELINE SKIP_RUNTIME + NAME "ucx-bcast-${_MSG_SIZE}" + TARGET mpi-bcast + MPI ON + NUM_PROCS 2 + LABELS "ucx;bcast" + REWRITE_ARGS + -e + -v + 2 + --label + file + line + --min-instructions + 0 + ENVIRONMENT "${_ucx_environment}" + RUN_ARGS ${_MSG_SIZE} + REWRITE_RUN_PASS_REGEX + "UCX.*trace|ucp_.*send|ucp_.*recv|Category.*ucx" + ) +endforeach() + +# Test UCX active message functionality +rocprofiler_systems_add_test( + SKIP_BASELINE SKIP_RUNTIME + NAME "ucx-active-messages" + TARGET mpi-allreduce + MPI ON + NUM_PROCS 2 + LABELS "ucx;am" + REWRITE_ARGS + -e + -v + 2 + --label + file + line + --min-instructions + 0 + ENVIRONMENT "${_ucx_environment};OMPI_MCA_btl=^vader,tcp,openib,uct" + RUN_ARGS 64 + REWRITE_RUN_PASS_REGEX + "ucp_am_send|ucp_am_recv|uct_ep_am|Active.*Message" +)