From bf19d70a2901eec5f54da33bbf57634b1ec0fe81 Mon Sep 17 00:00:00 2001 From: Yiltan Date: Wed, 5 Nov 2025 16:12:44 -0500 Subject: [PATCH] Added ibv_wrapper which opens library using dlopen (#309) [ROCm/rocshmem commit: 110f9c8793c78ef3792310c514272870144f1fdf] --- projects/rocshmem/cmake/FindIBVerbs.cmake | 57 ----- projects/rocshmem/src/gda/CMakeLists.txt | 4 +- projects/rocshmem/src/gda/backend_gda.cpp | 58 ++--- projects/rocshmem/src/gda/backend_gda.hpp | 1 + projects/rocshmem/src/gda/ibv_wrapper.cpp | 207 ++++++++++++++++++ projects/rocshmem/src/gda/ibv_wrapper.hpp | 128 +++++++++++ .../src/gda/ionic/backend_gda_ionic.cpp | 2 +- projects/rocshmem/src/gda/queue_pair.cpp | 8 +- projects/rocshmem/src/gda/queue_pair.hpp | 2 + projects/rocshmem/src/gda/topology.cpp | 17 +- 10 files changed, 382 insertions(+), 102 deletions(-) delete mode 100644 projects/rocshmem/cmake/FindIBVerbs.cmake create mode 100644 projects/rocshmem/src/gda/ibv_wrapper.cpp create mode 100644 projects/rocshmem/src/gda/ibv_wrapper.hpp diff --git a/projects/rocshmem/cmake/FindIBVerbs.cmake b/projects/rocshmem/cmake/FindIBVerbs.cmake deleted file mode 100644 index a9b7a37911..0000000000 --- a/projects/rocshmem/cmake/FindIBVerbs.cmake +++ /dev/null @@ -1,57 +0,0 @@ -############################################################################### -# Copyright (c) Advanced Micro Devices, Inc. All rights reserved. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -############################################################################### - -find_package(PkgConfig QUIET) -if (PkgConfig_FOUND) -if (IBVerbs_ROOT ) - set(ENV{PKG_CONFIG_PATH} "${IBVerbs_ROOT}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") -endif() -pkg_check_modules(PC_IBVerbs QUIET libibverbs) -endif() - -find_path(IBVerbs_INCLUDE_DIR infiniband/verbs.h - HINTS ${PC_IBVerbs_INCLUDEDIR} ${PC_IBVerbs_INCLUDE_DIRS} - PATH_SUFFIXES include -) - -find_library(IBVerbs_LIBRARY - NAMES ibverbs libibverbs - HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS} - PATH_SUFFIXES lib lib64 -) - -find_package_handle_standard_args(IBVerbs DEFAULT_MSG - IBVerbs_LIBRARY - IBVerbs_INCLUDE_DIR -) -mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR) - -if (IBVerbs_FOUND) -add_library(IBVerbs::verbs UNKNOWN IMPORTED) -set_target_properties(IBVerbs::verbs PROPERTIES - IMPORTED_LOCATION "${IBVerbs_LIBRARY}" - INTERFACE_COMPILE_OPTIONS "${PC_IBVerbs_CFLAGS_OTHER}" - INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_INCLUDE_DIR}" -) -endif() diff --git a/projects/rocshmem/src/gda/CMakeLists.txt b/projects/rocshmem/src/gda/CMakeLists.txt index 983a90cb0b..59e1978128 100644 --- a/projects/rocshmem/src/gda/CMakeLists.txt +++ b/projects/rocshmem/src/gda/CMakeLists.txt @@ -32,18 +32,16 @@ target_sources( context_gda_device_coll.cpp context_gda_host.cpp backend_gda.cpp + ibv_wrapper.cpp gda_team.cpp queue_pair.cpp endian.cpp topology.cpp ) -find_package(IBVerbs REQUIRED) - target_link_libraries( ${PROJECT_NAME} PUBLIC - IBVerbs::verbs numa ) diff --git a/projects/rocshmem/src/gda/backend_gda.cpp b/projects/rocshmem/src/gda/backend_gda.cpp index 7d9bfd657e..a456c24f3a 100644 --- a/projects/rocshmem/src/gda/backend_gda.cpp +++ b/projects/rocshmem/src/gda/backend_gda.cpp @@ -643,29 +643,29 @@ void GDABackend::cleanup_ibv() { } } else { for (int i = 0; i < qps.size(); i++) { - err = ibv_destroy_qp(qps[i]); + err = ibv.destroy_qp(qps[i]); CHECK_ZERO(err, "ibv_destroy_qp"); - err = ibv_destroy_cq(cqs[i]); + err = ibv.destroy_cq(cqs[i]); CHECK_ZERO(err, "ibv_destroy_cqs"); } if (gda_provider == GDAProvider::IONIC) { - err = ibv_dealloc_pd(pd_uxdma[0]); + err = ibv.dealloc_pd(pd_uxdma[0]); CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[0])"); - err = ibv_dealloc_pd(pd_uxdma[1]); + err = ibv.dealloc_pd(pd_uxdma[1]); CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[1])"); } - err = ibv_dealloc_pd(pd_parent); + err = ibv.dealloc_pd(pd_parent); CHECK_ZERO(err, "ibv_dealloc_pd (pd_parent)"); } - err = ibv_dealloc_pd(pd_orig); + err = ibv.dealloc_pd(pd_orig); CHECK_ZERO(err, "ibv_dealloc_pd (pd_orig)"); - err = ibv_close_device(context); + err = ibv.close_device(context); CHECK_ZERO(err, "ibv_close_device"); } @@ -723,12 +723,12 @@ void GDABackend::open_dv_libs() { } void GDABackend::close_dv_libs() { - if (ionicdv_handle_ != nullptr) - dlclose(ionicdv_handle_); - if (bnxtdv_handle_ != nullptr) dlclose(bnxtdv_handle_); + if (ionicdv_handle_ != nullptr) + dlclose(ionicdv_handle_); + if (mlx5dv_handle_ != nullptr) dlclose(mlx5dv_handle_); @@ -756,7 +756,7 @@ void GDABackend::setup_heap_memory_rkey() { auto *base_heap = heap.get_local_heap_base(); int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_ATOMIC; - heap_mr = ibv_reg_mr(pd_orig, base_heap, heap.get_size(), access); + heap_mr = ibv.reg_mr(pd_orig, base_heap, heap.get_size(), access); CHECK_NNULL(heap_mr, "ibv_reg_mr"); const size_t rkeys_size = sizeof(uint32_t) * num_pes; @@ -784,7 +784,7 @@ void GDABackend::setup_heap_memory_rkey() { } void GDABackend::cleanup_heap_memory_rkey() { - int ret = ibv_dereg_mr(heap_mr); + int ret = ibv.dereg_mr(heap_mr); CHECK_ZERO(ret, "ibv_dereg_mr"); CHECK_HIP(hipHostFree(heap_rkey)); @@ -831,14 +831,14 @@ void GDABackend::open_ib_device() { int num_devices = 0; int err; - device_list = ibv_get_device_list(&num_devices); + device_list = ibv.get_device_list(&num_devices); CHECK_NNULL(device_list, "ibv_get_device_list"); device = device_list[0]; //TODO default to HIP selected device? if (requested_dev) { for (int i = 0; i < num_devices; i++) { - const char *select_device = ibv_get_device_name(device_list[i]); + const char *select_device = ibv.get_device_name(device_list[i]); CHECK_NNULL(select_device, "ibv_get_device_name"); if (strstr(select_device, requested_dev)) { @@ -848,14 +848,14 @@ void GDABackend::open_ib_device() { } } - context = ibv_open_device(device); + context = ibv.open_device(device); CHECK_NNULL(context, "ib open device"); dump_ibv_context(context); dump_ibv_device(context->device); validate_ib_device(); - pd_orig = ibv_alloc_pd(context); + pd_orig = ibv.alloc_pd(context); CHECK_NNULL(pd_orig, "ib allocate pd"); dump_ibv_pd(pd_orig); @@ -863,14 +863,14 @@ void GDABackend::open_ib_device() { create_parent_domain(); } - err = ibv_query_port(context, port, &portinfo); + err = ibv.query_port(context, port, &portinfo); CHECK_ZERO(err, "ibv_query_port"); dump_ibv_port_attr(&portinfo); /* Must init after querying port */ select_gid_index(); - ibv_free_device_list(device_list); + ibv.free_device_list(device_list); } void GDABackend::validate_ib_device() { @@ -881,12 +881,12 @@ void GDABackend::validate_ib_device() { err = gethostname(hostname, sizeof(hostname)); CHECK_ZERO(err, "gethostname"); - nicname = ibv_get_device_name(device); + nicname = ibv.get_device_name(device); CHECK_NNULL(nicname, "ibv_get_device_name"); debug_str = "[" + std::string(hostname) + ", " + std::string(nicname) + "]"; - err = ibv_query_device(context, &device_attr); + err = ibv.query_device(context, &device_attr); CHECK_ZERO(err, "ibv_query_device"); if (gda_provider == GDAProvider::BNXT) { @@ -935,7 +935,7 @@ void GDABackend::modify_qps_reset_to_init() { if (gda_provider == GDAProvider::BNXT) { err = bnxt_re_dv.modify_qp(qps[i], &attr, attr_mask, 0, 0); } else { - err = ibv_modify_qp(qps[i], &attr, attr_mask); + err = ibv.modify_qp(qps[i], &attr, attr_mask); } CHECK_ZERO(err, "modify_qp (INIT)"); } @@ -987,7 +987,7 @@ void GDABackend::modify_qps_init_to_rtr() { if (gda_provider == GDAProvider::BNXT) { err = bnxt_re_dv.modify_qp(qps[i], &attr, attr_mask, 0, 0); } else { - err = ibv_modify_qp(qps[i], &attr, attr_mask); + err = ibv.modify_qp(qps[i], &attr, attr_mask); } CHECK_ZERO(err, "modify_qp (RTR)"); } @@ -1023,7 +1023,7 @@ void GDABackend::modify_qps_rtr_to_rts() { if (gda_provider == GDAProvider::BNXT) { err = bnxt_re_dv.modify_qp(qps[i], &attr, attr_mask, 0, 0); } else { - err = ibv_modify_qp(qps[i], &attr, attr_mask); + err = ibv.modify_qp(qps[i], &attr, attr_mask); } CHECK_ZERO(err, "modify_qp (RTS)"); } @@ -1144,7 +1144,7 @@ void GDABackend::create_parent_domain() { pattr.alloc = GDABackend::pd_alloc_host; } - pd_parent = ibv_alloc_parent_domain(context, &pattr); + pd_parent = ibv.alloc_parent_domain(context, &pattr); CHECK_NNULL(pd_parent, "ibv_alloc_parent_domain"); dump_ibv_pd(pd_parent); @@ -1171,10 +1171,10 @@ void GDABackend::create_cqs(int cqe) { cq_attr.parent_domain = pd_uxdma[i & 1]; } - cq_ex = ibv_create_cq_ex(context, &cq_attr); + cq_ex = ibv.create_cq_ex(context, &cq_attr); CHECK_NNULL(cq_ex, "ibv_create_cq_ex"); - cqs[i] = ibv_cq_ex_to_cq(cq_ex); + cqs[i] = ibv.cq_ex_to_cq(cq_ex); CHECK_NNULL(cqs[i], "ibv_cq_ex_to_cq"); } } @@ -1218,7 +1218,7 @@ void GDABackend::create_qps(int sq_length) { attr.send_cq = cqs[i]; attr.recv_cq = cqs[i]; - qps[i] = ibv_create_qp_ex(context, &attr); + qps[i] = ibv.create_qp_ex(context, &attr); CHECK_NNULL(qps[i], "ibv_create_qp_ex"); } } @@ -1240,7 +1240,7 @@ void GDABackend::select_gid_index() { gid_entries = (struct ibv_gid_entry*) calloc(gid_tbl_len, sizeof(struct ibv_gid_entry)); - gid_tbl_entries = ibv_query_gid_table(context, gid_entries, gid_tbl_len, 0); + gid_tbl_entries = ibv.query_gid_table(context, gid_entries, gid_tbl_len, 0); if (gid_tbl_entries < 0) { fprintf(stderr, "[Warning] ibv_query_gid_table failed. No available GIDs\n"); free(gid_entries); @@ -1257,7 +1257,7 @@ void GDABackend::select_gid_index() { current_gid = gid_entries[i].gid; - err = ibv_query_gid(context, port, i, ¤t_gid); + err = ibv.query_gid(context, port, i, ¤t_gid); CHECK_ZERO(err, "ibv_query_gid"); /* We don't want local GIDs */ diff --git a/projects/rocshmem/src/gda/backend_gda.hpp b/projects/rocshmem/src/gda/backend_gda.hpp index f408457121..f2bc96a74e 100644 --- a/projects/rocshmem/src/gda/backend_gda.hpp +++ b/projects/rocshmem/src/gda/backend_gda.hpp @@ -36,6 +36,7 @@ #include "queue_pair.hpp" #include "bootstrap/bootstrap.hpp" #include "debug_gda.hpp" +#include "ibv_wrapper.hpp" #include "gda/ionic/provider_gda_ionic.hpp" #include "gda/bnxt/provider_gda_bnxt.hpp" #include "gda/mlx5/provider_gda_mlx5.hpp" diff --git a/projects/rocshmem/src/gda/ibv_wrapper.cpp b/projects/rocshmem/src/gda/ibv_wrapper.cpp new file mode 100644 index 0000000000..85d12fc030 --- /dev/null +++ b/projects/rocshmem/src/gda/ibv_wrapper.cpp @@ -0,0 +1,207 @@ +/****************************************************************************** + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#include "ibv_wrapper.hpp" +#include "util.hpp" + +#include "rocshmem/rocshmem.hpp" +#include + +namespace rocshmem { + +IBVWrapper ibv; + +IBVWrapper::IBVWrapper() { + int err; + + ibv_handle = dlopen("libibverbs.so", RTLD_NOW); + + if (!ibv_handle) { + // Try hard-coded PATH + ibv_handle = dlopen("/usr/lib/x86_64-linux-gnu/libibverbs.so", RTLD_NOW); + + if (!ibv_handle) { + DPRINTF("Could not open libibverbs. Returning\n"); + exit(1); + } + } + + err = init_function_table(); + if (err != ROCSHMEM_SUCCESS) { + DPRINTF("Could not construct InfiniBand Verbs function table \n"); + exit(1); + } +} + +IBVWrapper::~IBVWrapper() { + if (ibv_handle != nullptr) { + dlclose(ibv_handle); + } +} + +int IBVWrapper::init_function_table() { + DLSYM_HELPER(ibv, ibv_, ibv_handle, get_device_list); + DLSYM_HELPER(ibv, ibv_, ibv_handle, free_device_list); + DLSYM_HELPER(ibv, ibv_, ibv_handle, open_device); + DLSYM_HELPER(ibv, ibv_, ibv_handle, close_device); + DLSYM_HELPER(ibv, ibv_, ibv_handle, get_device_name); + DLSYM_HELPER(ibv, ibv_, ibv_handle, query_device); + DLSYM_HELPER(ibv, ibv_, ibv_handle, query_port); + DLSYM_HELPER(ibv, ibv_, ibv_handle, query_gid); + DLSYM_HELPER(ibv, _ibv_, ibv_handle, query_gid_table); // This is not a typo + DLSYM_HELPER(ibv, ibv_, ibv_handle, alloc_pd); + DLSYM_HELPER(ibv, ibv_, ibv_handle, dealloc_pd); + DLSYM_HELPER(ibv, ibv_, ibv_handle, reg_mr); + DLSYM_HELPER(ibv, ibv_, ibv_handle, reg_mr_iova2); + DLSYM_HELPER(ibv, ibv_, ibv_handle, dereg_mr); + DLSYM_HELPER(ibv, ibv_, ibv_handle, destroy_cq); + DLSYM_HELPER(ibv, ibv_, ibv_handle, create_qp); + DLSYM_HELPER(ibv, ibv_, ibv_handle, modify_qp); + DLSYM_HELPER(ibv, ibv_, ibv_handle, destroy_qp); + return ROCSHMEM_SUCCESS; +} + +struct ibv_device** IBVWrapper::get_device_list(int *num_devices) { + return ibv.get_device_list(num_devices); +} + +void IBVWrapper::free_device_list(struct ibv_device **list) { + ibv.free_device_list(list); +} + +struct ibv_context* IBVWrapper::open_device(struct ibv_device *device) { + return ibv.open_device(device); + +} + +int IBVWrapper::close_device(struct ibv_context *context) { + return ibv.close_device(context); +} + +const char* IBVWrapper::get_device_name(struct ibv_device *device) { + return ibv.get_device_name(device); +} + +int IBVWrapper::query_device(struct ibv_context *context, struct ibv_device_attr *device_attr) { + return ibv.query_device(context, device_attr); +} + +int IBVWrapper::query_port(struct ibv_context* context, uint8_t port_num, + struct ibv_port_attr* port_attr) { + // Passthrough function for ibv_query_port macro in verbs.h + struct verbs_context *vctx = verbs_get_ctx_op(context, query_port); + + if (!vctx) { + int rc; + + memset(port_attr, 0, sizeof(*port_attr)); + + rc = ibv.query_port(context, port_num, port_attr); + return rc; + } + + return vctx->query_port(context, port_num, port_attr, sizeof(*port_attr)); +} + +ssize_t IBVWrapper::query_gid_table(struct ibv_context *context, + struct ibv_gid_entry *entries, + size_t max_entries, uint32_t flags) { + // Passthrough function for ibv_query_gid_table macro in verbs.h + return ibv.query_gid_table(context, entries, max_entries, flags, sizeof(*entries)); +} + +int IBVWrapper::query_gid(struct ibv_context *context, uint8_t port_num, int index, + union ibv_gid *gid) { + return ibv.query_gid(context, port_num, index, gid); +} + +struct ibv_pd* IBVWrapper::alloc_pd(struct ibv_context *context) { + return ibv.alloc_pd(context); +} + +struct ibv_pd * IBVWrapper::alloc_parent_domain(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr) { + // Passthrough function for ibv_alloc_parent_domain macro in verbs.h + return ibv_alloc_parent_domain(context, attr); +} + +int IBVWrapper::dealloc_pd(struct ibv_pd *pd) { + return ibv.dealloc_pd(pd); +} + +struct ibv_mr* IBVWrapper::reg_mr(struct ibv_pd* pd, void* addr, size_t length, int access) { + // Passthrough function for ibv_reg_mr macro in verbs.h + int is_access_const = __builtin_constant_p(((int)(access) & IBV_ACCESS_OPTIONAL_RANGE) == 0); + + if (is_access_const && (access & IBV_ACCESS_OPTIONAL_RANGE) == 0) + return ibv.reg_mr(pd, addr, length, (int)access); + else + return ibv.reg_mr_iova2(pd, addr, length, (uintptr_t)addr, access); +} + +int IBVWrapper::dereg_mr(struct ibv_mr *mr) { + return ibv.dereg_mr(mr); +} + +struct ibv_cq_ex *IBVWrapper::create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr) { + // Passthrough function for ibv_create_cq_ex macro in verbs.h + return ibv_create_cq_ex(context, cq_attr); +} + +struct ibv_cq* IBVWrapper::cq_ex_to_cq(struct ibv_cq_ex *cq) { + // Passthrough function for ibv_create_cq_ex macro in verbs.h + return ibv_cq_ex_to_cq(cq); +} + +int IBVWrapper::destroy_cq(struct ibv_cq *cq) { + return ibv.destroy_cq(cq); +} + +struct ibv_qp * IBVWrapper::create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_init_attr_ex) { + // Passthrough function for ibv_create_qp_ex macro in verbs.h + struct verbs_context *vctx; + uint32_t mask = qp_init_attr_ex->comp_mask; + + if (mask == IBV_QP_INIT_ATTR_PD) + return ibv.create_qp(qp_init_attr_ex->pd, (struct ibv_qp_init_attr *)qp_init_attr_ex); + + vctx = verbs_get_ctx_op(context, create_qp_ex); + if (!vctx) { + errno = EOPNOTSUPP; + return NULL; + } + return vctx->create_qp_ex(context, qp_init_attr_ex); +} + +int IBVWrapper::modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { + return ibv.modify_qp(qp, attr, attr_mask); +} + +int IBVWrapper::destroy_qp(struct ibv_qp *qp) { + return ibv.destroy_qp(qp); +} + +} // namespace rocshmem diff --git a/projects/rocshmem/src/gda/ibv_wrapper.hpp b/projects/rocshmem/src/gda/ibv_wrapper.hpp new file mode 100644 index 0000000000..bea680fed0 --- /dev/null +++ b/projects/rocshmem/src/gda/ibv_wrapper.hpp @@ -0,0 +1,128 @@ +/****************************************************************************** + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef LIBRARY_SRC_GDA_IBV_WRAPPER_HPP_ +#define LIBRARY_SRC_GDA_IBV_WRAPPER_HPP_ + +#include + +namespace rocshmem { + +class IBVWrapper; + +extern IBVWrapper ibv; + +class IBVWrapper { + public: + explicit IBVWrapper(); + virtual ~IBVWrapper(); + + struct ibv_device** get_device_list(int *num_devices); + void free_device_list(struct ibv_device **list); + + struct ibv_context* open_device(struct ibv_device *device); + int close_device(struct ibv_context *context); + + const char* get_device_name(struct ibv_device *device); + int query_device(struct ibv_context *context, struct ibv_device_attr *device_attr); + int query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + ssize_t query_gid_table(struct ibv_context *context, struct ibv_gid_entry *entries, + size_t max_entries, uint32_t flags); + int query_gid(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid); + + struct ibv_pd* alloc_pd(struct ibv_context *context); + struct ibv_pd* alloc_parent_domain(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr); + int dealloc_pd(struct ibv_pd *pd); + + struct ibv_mr* reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access); + int dereg_mr(struct ibv_mr *mr); + + struct ibv_cq_ex* create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr); + struct ibv_cq* cq_ex_to_cq(struct ibv_cq_ex *cq); + int destroy_cq(struct ibv_cq *cq); + + struct ibv_qp* create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_init_attr); + int modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); + int destroy_qp(struct ibv_qp *qp); + + private: + struct ibv_funcs_t { + struct ibv_device** (*get_device_list)(int *num_devices); + void (*free_device_list)(struct ibv_device **list); + + struct ibv_context* (*open_device)(struct ibv_device *device); + int (*close_device)(struct ibv_context *context); + + const char* (*get_device_name)(struct ibv_device *device); + int (*query_device)(struct ibv_context *context, struct ibv_device_attr *device_attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + ssize_t (*query_gid_table)(struct ibv_context *context, + struct ibv_gid_entry *entries, size_t max_entries, + uint32_t flags, size_t entry_size); + + int (*query_gid)(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid); + + struct ibv_pd* (*alloc_pd)(struct ibv_context *context); + struct ibv_pd* (*alloc_parent_domain)(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr); + int (*dealloc_pd)(struct ibv_pd *pd); + + struct ibv_mr* (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, int access); + struct ibv_mr* (*reg_mr_iova2)(struct ibv_pd *pd, void *addr, size_t length, + uint64_t iova, unsigned int access); + int (*dereg_mr)(struct ibv_mr *mr); + + struct ibv_cq_ex* (*create_cq_ex)(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr); + int (*destroy_cq)(struct ibv_cq *cq); + + struct ibv_qp* (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); + int (*destroy_qp)(struct ibv_qp *qp); + }; + + /** + * @brief External handle to the ibv function table + */ + struct ibv_funcs_t ibv; + + /** + * @brief handle used for the dlopen of the InfiniBand Verbs library + */ + void *ibv_handle = nullptr; + + /** + * @brief initialize function table + */ + int init_function_table(); +}; + +} // namespace rocshmem + +#endif /* LIBRARY_SRC_GDA_IBV_WRAPPER_HPP_ */ diff --git a/projects/rocshmem/src/gda/ionic/backend_gda_ionic.cpp b/projects/rocshmem/src/gda/ionic/backend_gda_ionic.cpp index ef62590f45..2547449b94 100644 --- a/projects/rocshmem/src/gda/ionic/backend_gda_ionic.cpp +++ b/projects/rocshmem/src/gda/ionic/backend_gda_ionic.cpp @@ -81,7 +81,7 @@ void GDABackend::ionic_setup_parent_domain(struct ibv_parent_domain_init_attr* p ionic_dv.pd_set_rqcmb(pd_parent, false, false, false); for (int uxdma_i = 0; uxdma_i < 2; ++uxdma_i) { - pd_uxdma[uxdma_i] = ibv_alloc_parent_domain(context, pattr); + pd_uxdma[uxdma_i] = ibv.alloc_parent_domain(context, pattr); CHECK_NNULL(pd_uxdma[uxdma_i], "ibv_alloc_parent_domain (uxdma)"); ionic_dv.pd_set_sqcmb(pd_uxdma[uxdma_i], false, false, false); diff --git a/projects/rocshmem/src/gda/queue_pair.cpp b/projects/rocshmem/src/gda/queue_pair.cpp index 729db6490b..3ac890f81a 100644 --- a/projects/rocshmem/src/gda/queue_pair.cpp +++ b/projects/rocshmem/src/gda/queue_pair.cpp @@ -46,10 +46,10 @@ QueuePair::QueuePair(struct ibv_pd* pd, int gda_provider) { CHECK_HIP(hipMemset(nonfetching_atomic, 0, 8)); CHECK_HIP(hipMemset(fetching_atomic, 0, 8 * FETCHING_ATOMIC_CNT)); - mr_nonfetching_atomic = ibv_reg_mr(pd, nonfetching_atomic, 8, access); + mr_nonfetching_atomic = ibv.reg_mr(pd, nonfetching_atomic, 8, access); CHECK_NNULL(mr_nonfetching_atomic, "ibv_reg_mr"); - mr_fetching_atomic = ibv_reg_mr(pd, fetching_atomic, 8 * FETCHING_ATOMIC_CNT, access); + mr_fetching_atomic = ibv.reg_mr(pd, fetching_atomic, 8 * FETCHING_ATOMIC_CNT, access); CHECK_NNULL(mr_fetching_atomic, "ibv_reg_mr"); if (gda_provider == GDAProvider::MLX5) { @@ -102,10 +102,10 @@ QueuePair::QueuePair(struct ibv_pd* pd, int gda_provider) { QueuePair::~QueuePair() { int err; - err = ibv_dereg_mr(mr_nonfetching_atomic); + err = ibv.dereg_mr(mr_nonfetching_atomic); CHECK_ZERO(err, "ibv_dereg_mr (nonfetching_atomic)"); - err = ibv_dereg_mr(mr_fetching_atomic); + err = ibv.dereg_mr(mr_fetching_atomic); CHECK_ZERO(err, "ibv_dereg_mr (fetching_atomic)"); allocator.deallocate((void*)nonfetching_atomic); diff --git a/projects/rocshmem/src/gda/queue_pair.hpp b/projects/rocshmem/src/gda/queue_pair.hpp index cfb7cfc6ba..e415666389 100644 --- a/projects/rocshmem/src/gda/queue_pair.hpp +++ b/projects/rocshmem/src/gda/queue_pair.hpp @@ -39,6 +39,8 @@ #include "constants.hpp" #include "util.hpp" +#include "ibv_wrapper.hpp" + #include "gda/ionic/provider_gda_ionic.hpp" #include "gda/mlx5/provider_gda_mlx5.hpp" #include "gda/bnxt/provider_gda_bnxt.hpp" diff --git a/projects/rocshmem/src/gda/topology.cpp b/projects/rocshmem/src/gda/topology.cpp index dc0ce7aa8f..f0b6069c0e 100644 --- a/projects/rocshmem/src/gda/topology.cpp +++ b/projects/rocshmem/src/gda/topology.cpp @@ -23,6 +23,7 @@ *****************************************************************************/ #include "topology.hpp" +#include "ibv_wrapper.hpp" using namespace rocshmem; @@ -288,7 +289,7 @@ namespace rocshmem int const& gidIndex, int& version) { - char const* deviceName = ibv_get_device_name(context->device); + char const* deviceName = ibv.get_device_name(context->device); char gidRoceVerStr[16] = {}; char roceTypePath[PATH_MAX] = {}; sprintf(roceTypePath, "/sys/class/infiniband/%s/ports/%d/gid_attrs/types/%d", @@ -347,7 +348,7 @@ namespace rocshmem int gidIndex = -1; for (int i = 0; i < gidTblLen; ++i) { - IBV_CALL(ibv_query_gid, context, portNum, i, &gid); + IBV_CALL(ibv.query_gid, context, portNum, i, &gid); if (!IsConfiguredGid(gid)) continue; int gidCurrRoceVersion; if(GetRoceVersionNumber(context, portNum, i, gidCurrRoceVersion) != ROCSHMEM_SUCCESS) continue; @@ -385,7 +386,7 @@ namespace rocshmem // Query the number of IBV devices int numIbvDevices = 0; - ibv_device** deviceList = ibv_get_device_list(&numIbvDevices); + ibv_device** deviceList = ibv.get_device_list(&numIbvDevices); CHECK_NNULL(deviceList, "ibv_get_device_list"); if (numIbvDevices > 0) { @@ -396,15 +397,15 @@ namespace rocshmem ibvDevice.name = deviceList[i]->name; ibvDevice.hasActivePort = false; { - struct ibv_context *context = ibv_open_device(ibvDevice.devicePtr); + struct ibv_context *context = ibv.open_device(ibvDevice.devicePtr); if (context) { struct ibv_device_attr deviceAttr; - if (!ibv_query_device(context, &deviceAttr)) { + if (!ibv.query_device(context, &deviceAttr)) { int activePort; ibvDevice.gidIndex = -1; for (int port = 1; port <= deviceAttr.phys_port_cnt; ++port) { struct ibv_port_attr portAttr; - if (ibv_query_port(context, port, &portAttr)) continue; + if (ibv.query_port(context, port, &portAttr)) continue; if (portAttr.state == IBV_PORT_ACTIVE) { activePort = port; ibvDevice.hasActivePort = true; @@ -421,7 +422,7 @@ namespace rocshmem } } } - ibv_close_device(context); + ibv.close_device(context); } } ibvDevice.busId = ""; @@ -458,7 +459,7 @@ namespace rocshmem fprintf(stderr, "[Error] No visible InfiniBand devices found.\n"); exit(1); } - ibv_free_device_list(deviceList); + ibv.free_device_list(deviceList); isInitialized = true; } return ibvDeviceList;