diff --git a/scripts/build_configs/all_backends b/scripts/build_configs/all_backends new file mode 100755 index 0000000000..6c49e98e54 --- /dev/null +++ b/scripts/build_configs/all_backends @@ -0,0 +1,52 @@ +#!/bin/bash +############################################################################### +# Copyright (c) Advanced Micro Devices, Inc. All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +############################################################################### + +set -e + +src_path=$(dirname "$(realpath $0)")/../../ + +cmake \ + -DBUILD_CODE_COVERAGE=${CODE_COV:-OFF} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE:-Release} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-~/rocshmem} \ + -DCMAKE_VERBOSE_MAKEFILE=OFF \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DBUILD_FUNCTIONAL_TESTS=ON \ + -DBUILD_UNIT_TESTS=ON \ + -DDEBUG=OFF \ + -DPROFILE=OFF \ + -DUSE_GDA=ON \ + -DGDA_MLX5=ON \ + -DGDA_BNXT=ON \ + -DGDA_IONIC=OFF \ + -DUSE_RO=ON \ + -DUSE_IPC=ON \ + -DUSE_THREADS=OFF \ + -DUSE_WF_COAL=OFF \ + -DUSE_HDP_FLUSH=OFF \ + -DUSE_HDP_FLUSH_HOST_SIDE=OFF \ + $* $src_path +cmake --build . --parallel 8 +cmake --install . diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e427ec1efe..4b2f237d56 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -66,9 +66,11 @@ target_compile_options(${PROJECT_NAME} PUBLIC ${ROCSHMEM_COMPILE_FLAGS}) ############################################################################### if (USE_GDA) add_subdirectory(gda) -elseif (USE_RO) +endif() +if (USE_RO) add_subdirectory(reverse_offload) -elseif (USE_IPC) +endif() +if (USE_IPC) add_subdirectory(ipc) endif() add_subdirectory(host) diff --git a/src/backend_bc.cpp b/src/backend_bc.cpp index 95ff10d976..9b17ed3dc8 100644 --- a/src/backend_bc.cpp +++ b/src/backend_bc.cpp @@ -29,9 +29,11 @@ #if defined(USE_GDA) #include "gda/backend_gda.hpp" -#elif defined(USE_RO) +#endif +#if defined(USE_RO) #include "reverse_offload/backend_ro.hpp" -#elif defined(USE_IPC) +#endif +#if defined(USE_IPC) #include "ipc/backend_ipc.hpp" #endif @@ -250,7 +252,20 @@ void Backend::reset_stats() { } __device__ bool Backend::create_ctx(int64_t option, rocshmem_ctx_t* ctx) { -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) + switch(this->type) { + case BackendType::GDA_BACKEND: + return static_cast(this)->create_ctx(option, ctx); + break; + case BackendType::RO_BACKEND: + return static_cast(this)->create_ctx(option, ctx); + break; + case BackendType::IPC_BACKEND: + default: + return static_cast(this)->create_ctx(option, ctx); + break; + } +#elif defined(USE_GDA) return static_cast(this)->create_ctx(option, ctx); #elif defined(USE_RO) return static_cast(this)->create_ctx(option, ctx); @@ -260,7 +275,20 @@ __device__ bool Backend::create_ctx(int64_t option, rocshmem_ctx_t* ctx) { } __device__ void Backend::destroy_ctx(rocshmem_ctx_t* ctx) { -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) + switch(this->type) { + case BackendType::GDA_BACKEND: + static_cast(this)->destroy_ctx(ctx); + break; + case BackendType::RO_BACKEND: + static_cast(this)->destroy_ctx(ctx); + break; + case BackendType::IPC_BACKEND: + default: + static_cast(this)->destroy_ctx(ctx); + break; + } +#elif defined(USE_GDA) static_cast(this)->destroy_ctx(ctx); #elif defined(USE_RO) static_cast(this)->destroy_ctx(ctx); diff --git a/src/backend_bc.hpp b/src/backend_bc.hpp index 240277caaa..bfa04c4861 100644 --- a/src/backend_bc.hpp +++ b/src/backend_bc.hpp @@ -63,6 +63,8 @@ class TeamInfo; */ class Backend { public: + friend Context; + /** * @brief Constructor. * @@ -286,7 +288,7 @@ class Backend { * rely on the normal inheritance mechanism to tailor behavior for * derived backend types. */ - BackendType type{BackendType::RO_BACKEND}; + BackendType type; /** * @brief Dumps derived class statistics. diff --git a/src/backend_type.hpp b/src/backend_type.hpp index 203aaeecc3..fc6b7d111c 100644 --- a/src/backend_type.hpp +++ b/src/backend_type.hpp @@ -56,7 +56,21 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND }; /** * @brief Device static dispatch method call. */ -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +#define DISPATCH(Func) \ + switch(this->btype) { \ + case BackendType::GDA_BACKEND: \ + static_cast(this)->Func; \ + break; \ + case BackendType::RO_BACKEND: \ + static_cast(this)->Func; \ + break; \ + case BackendType::IPC_BACKEND: \ + default: \ + static_cast(this)->Func; \ + break; \ + } +#elif defined(USE_GDA) #define DISPATCH(Func) \ static_cast(this)->Func; #elif defined(USE_RO) @@ -70,7 +84,19 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND }; /** * @brief Device static dispatch method call with a return value. */ -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +#define DISPATCH_RET(Func) \ + if (this->btype == BackendType::GDA_BACKEND) { \ + auto ret1 = static_cast(this)->Func; \ + return ret1; \ + } else if(this->btype == BackendType::RO_BACKEND) { \ + auto ret2 = static_cast(this)->Func; \ + return ret2; \ + } else { \ + auto ret3 = static_cast(this)->Func; \ + return ret3; \ + } +#elif defined(USE_GDA) #define DISPATCH_RET(Func) \ auto ret_val = static_cast(this)->Func; \ return ret_val; @@ -87,7 +113,23 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND }; /** * @brief Device static dispatch method call with a return type of pointer. */ -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +#define DISPATCH_RET_PTR(Func) \ + void *ret_val{nullptr}; \ + switch(this->btype) { \ + case BackendType::GDA_BACKEND: \ + ret_val = static_cast(this)->Func; \ + break; \ + case BackendType::RO_BACKEND: \ + ret_val = static_cast(this)->Func; \ + break; \ + case BackendType::IPC_BACKEND: \ + default: \ + ret_val = static_cast(this)->Func; \ + break; \ + } \ + return ret_val; +#elif defined(USE_GDA) #define DISPATCH_RET_PTR(Func) \ void *ret_val{nullptr}; \ ret_val = static_cast(this)->Func; \ @@ -111,7 +153,21 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND }; * MPI_THREAD_MULTIPLE (for RMA and AMO operations) and the ordering and * threading semantics of collectives in OpenSHMEM match those of MPI. */ -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +#define HOST_DISPATCH(Func) \ + switch(this->btype) { \ + case BackendType::GDA_BACKEND: \ + static_cast(this)->Func; \ + break; \ + case BackendType::RO_BACKEND: \ + static_cast(this)->Func; \ + break; \ + case BackendType::IPC_BACKEND: \ + default: \ + static_cast(this)->Func; \ + break; \ + } +#elif defined(USE_GDA) #define HOST_DISPATCH(Func) static_cast(this)->Func; #elif defined(USE_RO) #define HOST_DISPATCH(Func) static_cast(this)->Func; @@ -126,7 +182,19 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND }; * MPI_THREAD_MULTIPLE (for RMA and AMO operations) and the ordering and * threading semantics of collectives in OpenSHMEM match those of MPI. */ -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +#define HOST_DISPATCH_RET(Func) \ + if (this->btype == BackendType::GDA_BACKEND) { \ + auto ret1 = static_cast(this)->Func; \ + return ret1; \ + } else if (this->btype == BackendType::RO_BACKEND) { \ + auto ret2 = static_cast(this)->Func; \ + return ret2; \ + } else { \ + auto ret3 = static_cast(this)->Func; \ + return ret3; \ + } +#elif defined(USE_GDA) #define HOST_DISPATCH_RET(Func) \ auto ret_val = static_cast(this)->Func; \ return ret_val; @@ -143,7 +211,23 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND }; /** * @brief Host static dispatch method call with a return type of pointer. */ -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +#define HOST_DISPATCH_RET_PTR(Func) \ + void *ret_val{nullptr}; \ + switch(this->btype) { \ + case BackendType::GDA_BACKEND: \ + ret_val = static_cast(this)->Func; \ + break; \ + case BackendType::RO_BACKEND: \ + ret_val = static_cast(this)->Func; \ + break; \ + case BackendType::IPC_BACKEND: \ + default: \ + ret_val = static_cast(this)->Func; \ + break; \ + } \ + return ret_val; +#elif defined(USE_GDA) #define HOST_DISPATCH_RET_PTR(Func) \ void *ret_val{nullptr}; \ ret_val = static_cast(this)->Func; \ diff --git a/src/context.hpp b/src/context.hpp index 143893b430..72163bdbdd 100644 --- a/src/context.hpp +++ b/src/context.hpp @@ -471,6 +471,11 @@ class Context { */ int my_pe{-1}; + /** + * @brief Duplicated local copy of backend's type + */ + BackendType btype; + /** * @brief Stats common to all types of device contexts. */ diff --git a/src/context_device.cpp b/src/context_device.cpp index 459d33b3b9..14382b2f7b 100644 --- a/src/context_device.cpp +++ b/src/context_device.cpp @@ -32,7 +32,8 @@ namespace rocshmem { __device__ Context::Context(Backend* handle, bool shareable) : num_pes(handle->getNumPEs()), my_pe(handle->getMyPE()), - fence_(shareable) { + fence_(shareable), + btype(handle->type) { /* * Device-side context constructor is a work-group collective, so make * sure all the members have their default values before returning. diff --git a/src/context_host.cpp b/src/context_host.cpp index e6b3304ebd..2f4f49455d 100644 --- a/src/context_host.cpp +++ b/src/context_host.cpp @@ -31,7 +31,8 @@ namespace rocshmem { __host__ Context::Context(Backend* handle, bool shareable) : num_pes(handle->getNumPEs()), my_pe(handle->getMyPE()), - fence_(shareable) { + fence_(shareable), + btype(handle->type) { } __host__ Context::~Context() { diff --git a/src/context_incl.hpp b/src/context_incl.hpp index efc0fa65f2..cbe4d3ab0e 100644 --- a/src/context_incl.hpp +++ b/src/context_incl.hpp @@ -31,14 +31,14 @@ #if defined(USE_GDA) #include "gda/context_gda_device.hpp" #include "gda/context_gda_host.hpp" -#elif defined(USE_RO) +#endif +#if defined(USE_RO) #include "reverse_offload/context_ro_device.hpp" #include "reverse_offload/context_ro_host.hpp" -#elif defined(USE_IPC) +#endif +#if defined(USE_IPC) #include "ipc/context_ipc_device.hpp" #include "ipc/context_ipc_host.hpp" -#else -#error "Select one backend among USE_RO, USE_IPC, USE_GDA" #endif #endif // LIBRARY_SRC_CONTEXT_INCL_HPP_ diff --git a/src/context_tmpl_device.hpp b/src/context_tmpl_device.hpp index ecaaf6c909..404c2533fb 100644 --- a/src/context_tmpl_device.hpp +++ b/src/context_tmpl_device.hpp @@ -29,9 +29,11 @@ #include "backend_type.hpp" #if defined(USE_GDA) #include "gda/context_gda_device.hpp" -#elif defined(USE_RO) +#endif +#if defined(USE_RO) #include "reverse_offload/context_ro_device.hpp" -#elif defined(USE_IPC) +#endif +#if defined(USE_IPC) #include "ipc/context_ipc_device.hpp" #endif diff --git a/src/context_tmpl_host.hpp b/src/context_tmpl_host.hpp index 36ba8a552b..bb4a8ed38f 100644 --- a/src/context_tmpl_host.hpp +++ b/src/context_tmpl_host.hpp @@ -29,9 +29,11 @@ #include "backend_type.hpp" #if defined(USE_GDA) #include "gda/context_gda_host.hpp" -#elif defined(USE_RO) +#endif +#if defined(USE_RO) #include "reverse_offload/context_ro_host.hpp" -#elif defined(USE_IPC) +#endif +#if defined(USE_IPC) #include "ipc/context_ipc_host.hpp" #endif @@ -249,7 +251,7 @@ __host__ size_t Context::wait_until_any(T *ivars, size_t nelems, int cmp, T val) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY); - return HOST_DISPATCH(wait_until_any(ivars, nelems, status, cmp, val)); + HOST_DISPATCH_RET(wait_until_any(ivars, nelems, status, cmp, val)); } template diff --git a/src/envvar.cpp b/src/envvar.cpp index bdaa53fbe9..436ffa2d74 100644 --- a/src/envvar.cpp +++ b/src/envvar.cpp @@ -46,6 +46,7 @@ namespace envvar { const var max_wavefront_buffers("MAX_WF_BUFFERS", "", 1024); const var requested_dev("USE_IB_HCA", ""); const var sq_size("SQ_SIZE", "", 1024); + const var backend("BACKEND", ""); } // inline namespace _base namespace bootstrap { diff --git a/src/envvar.hpp b/src/envvar.hpp index 14b587ace3..b80d84e7a9 100644 --- a/src/envvar.hpp +++ b/src/envvar.hpp @@ -404,6 +404,7 @@ namespace envvar { extern const var debug_level; extern const var heap_size; extern const var max_num_teams; + extern const var backend; /** * @brief Maximum number of contexts for the application diff --git a/src/gda/backend_gda.cpp b/src/gda/backend_gda.cpp index 6565fd7b66..60c96eaf45 100644 --- a/src/gda/backend_gda.cpp +++ b/src/gda/backend_gda.cpp @@ -50,7 +50,7 @@ rocshmem_team_t get_external_team(GDATeam *team) { return reinterpret_cast(team); } -int get_ls_non_zero_bit(char *bitmask, int mask_length) { +static int get_ls_non_zero_bit(char *bitmask, int mask_length) { int position{-1}; for (int bit_i = 0; bit_i < mask_length; bit_i++) { int byte_i = bit_i / CHAR_BIT; @@ -543,6 +543,39 @@ int GDABackend::mlx5_dv_dl_init () { return ROCSHMEM_SUCCESS; } +/* Currently we only check whether we can dlopen a Direct Verbs library. +** We might need to extend this logic to check whether we have interfaces that +** can use those DV libraries +*/ +int GDABackend::backend_can_run() { + void *handle{nullptr}; + + /* Try opening bnxt DV libraries */ + handle = dlopen("libbnxt_re.so", RTLD_NOW); + if (handle) { + dlclose(handle); + return ROCSHMEM_SUCCESS; + } else { + /* Try hard-coded PATH */ + handle = dlopen("/usr/local/lib/libbnxt_re.so", RTLD_NOW); + if (handle) { + dlclose(handle); + return ROCSHMEM_SUCCESS; + } + } + + /* Try opening mlx5 DV libraries */ + handle = dlopen("libmlx5.so", RTLD_NOW); + if (handle) { + dlclose(handle); + return ROCSHMEM_SUCCESS; + } + + /* ToDo: opening ionic DV libraries */ + + return ROCSHMEM_ERROR; +} + void GDABackend::setup_ibv() { autodetect_dv_libs(); diff --git a/src/gda/backend_gda.hpp b/src/gda/backend_gda.hpp index 74cba5974d..65f07edc55 100644 --- a/src/gda/backend_gda.hpp +++ b/src/gda/backend_gda.hpp @@ -146,6 +146,17 @@ class GDABackend : public Backend { */ virtual ~GDABackend(); + /** + * @brief Verify whether GDA Backend could run + * + * @return ROSCHMEM_SUCCESS if GDA Backend can most likely be used + * ROCSHMEM_ERROR otherwise + */ + static int backend_can_run(void); + + /** + * @brief + */ __device__ bool create_ctx(int64_t options, rocshmem_ctx_t *ctx); /** diff --git a/src/gda/context_gda_tmpl_device.hpp b/src/gda/context_gda_tmpl_device.hpp index 9c3790506c..d119dd5f6c 100644 --- a/src/gda/context_gda_tmpl_device.hpp +++ b/src/gda/context_gda_tmpl_device.hpp @@ -286,7 +286,7 @@ __device__ T GDAContext::amo_fetch_cas(void *dst, T value, T cond, int pe) { // Collectives TODO: loosely adapted from IPC, needs review template -__device__ void compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) { +__device__ void gda_compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) { for (int i = wg_id; i < size; i += wg_size) { OpWrap::Calc(src, dst, i); } @@ -339,7 +339,7 @@ __device__ void GDAContext::internal_direct_allreduce( __syncthreads(); T *ptr = &pWrk[i * nelems]; - compute_reduce(ptr, dst, nelems, wg_id, wg_size); + gda_compute_reduce(ptr, dst, nelems, wg_id, wg_size); threadfence_system(); } } @@ -457,8 +457,8 @@ __device__ void GDAContext::internal_ring_allreduce( wait_until(&pSync[iter], ROCSHMEM_CMP_EQ, wait_val); } __syncthreads(); - compute_reduce(&pWrk[off_recv], &dst[off_seg + off_recv], - chunk_size, wg_id, wg_size); + gda_compute_reduce(&pWrk[off_recv], &dst[off_seg + off_recv], + chunk_size, wg_id, wg_size); } // Loop 2 in the example above diff --git a/src/ipc/backend_ipc.cpp b/src/ipc/backend_ipc.cpp index 9314a79ddf..d61fb7e96b 100644 --- a/src/ipc/backend_ipc.cpp +++ b/src/ipc/backend_ipc.cpp @@ -49,7 +49,7 @@ rocshmem_team_t get_external_team(IPCTeam *team) { return reinterpret_cast(team); } -int get_ls_non_zero_bit(char *bitmask, int mask_length) { +static int get_ls_non_zero_bit(char *bitmask, int mask_length) { int position = -1; for (int bit_i = 0; bit_i < mask_length; bit_i++) { diff --git a/src/ipc/context_ipc_tmpl_device.hpp b/src/ipc/context_ipc_tmpl_device.hpp index d3825a1f2f..f4a76a9b7d 100644 --- a/src/ipc/context_ipc_tmpl_device.hpp +++ b/src/ipc/context_ipc_tmpl_device.hpp @@ -160,7 +160,7 @@ __device__ T IPCContext::amo_fetch_cas(void *dest, T value, T cond, int pe) { // Collectives template -__device__ void compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) { +__device__ void ipc_compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) { for (int i = wg_id; i < size; i += wg_size) { OpWrap::Calc(src, dst, i); } @@ -213,7 +213,7 @@ __device__ void IPCContext::internal_direct_allreduce( __syncthreads(); T *ptr = &pWrk[i * nelems]; - compute_reduce(ptr, dst, nelems, wg_id, wg_size); + ipc_compute_reduce(ptr, dst, nelems, wg_id, wg_size); threadfence_system(); } } @@ -331,7 +331,7 @@ __device__ void IPCContext::internal_ring_allreduce( wait_until(&pSync[iter], ROCSHMEM_CMP_EQ, wait_val); } __syncthreads(); - compute_reduce(&pWrk[off_recv], &dst[off_seg + off_recv], + ipc_compute_reduce(&pWrk[off_recv], &dst[off_seg + off_recv], chunk_size, wg_id, wg_size); } diff --git a/src/reverse_offload/context_ro_device.hpp b/src/reverse_offload/context_ro_device.hpp index 3fc7b31ae9..68c295aa72 100644 --- a/src/reverse_offload/context_ro_device.hpp +++ b/src/reverse_offload/context_ro_device.hpp @@ -43,8 +43,6 @@ class ROContext : public Context { public: __host__ ROContext(Backend *b, size_t block_id = 0, bool default_ctx = false); - __device__ void threadfence_system(); - __device__ void ctx_destroy(); __device__ void putmem(void *dest, const void *source, size_t nelems, int pe); diff --git a/src/rocshmem.cpp b/src/rocshmem.cpp index eb3d3cb485..7912dc9758 100644 --- a/src/rocshmem.cpp +++ b/src/rocshmem.cpp @@ -39,14 +39,14 @@ #if defined(USE_GDA) #include "gda/backend_gda.hpp" #include "gda/context_gda_tmpl_host.hpp" -#elif defined(USE_RO) +#endif +#if defined(USE_RO) #include "reverse_offload/backend_ro.hpp" #include "reverse_offload/context_ro_tmpl_host.hpp" -#elif defined(USE_IPC) +#endif +#if defined(USE_IPC) #include "ipc/backend_ipc.hpp" #include "ipc/context_ipc_tmpl_host.hpp" -#else -#error "Select one backend among USE_RO, USE_IPC, USE_GDA" #endif #include "mpi_instance.hpp" #include "team.hpp" @@ -81,6 +81,39 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT; * Begin Host Code **/ +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) +static BackendType select_backend_type() { + BackendType type; + + /* Check whether the user explicitely requests a particular backend type */ + std::string envstr = envvar::backend; + std::transform(envstr.begin(), envstr.end(), envstr.begin(), ::tolower); + if (!envstr.empty()) { + DPRINTF("Found environment variable ROCSHMEM_BACKEND, value is %s\n", envstr.c_str()); + if (envstr.find("gda") != std::string::npos) { + return BackendType::GDA_BACKEND; + } + if (envstr.find("ro") != std::string::npos) { + return BackendType::RO_BACKEND; + } + if (envstr.find("ipc") != std::string::npos) { + return BackendType::IPC_BACKEND; + } + } + + if (GDABackend::backend_can_run() == ROCSHMEM_SUCCESS) { + DPRINTF("GDABackend::backend_can_run returned success\n"); + return BackendType::GDA_BACKEND; + } + if (MPIInstance::mpilib_dl_init() == ROCSHMEM_SUCCESS) { + DPRINTF("MPIInstance could dl_init MPI library\n"); + return BackendType::RO_BACKEND; + } + + return BackendType::IPC_BACKEND; +} +#endif + [[maybe_unused]] __host__ void inline library_init(MPI_Comm comm) { assert(!backend); int count = 0; @@ -97,7 +130,26 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT; ret = MPIInstance::mpilib_dl_init(); mpi_instance = new MPIInstance(comm); -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) + BackendType type = select_backend_type(); + switch (type) { + case BackendType::GDA_BACKEND: + DPRINTF("Initializing GDA backend using MPI\n"); + CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend))); + backend = new (backend) GDABackend(comm); + break; + case BackendType::RO_BACKEND: + DPRINTF("Initializing RO backend using MPI\n"); + CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend))); + backend = new (backend) ROBackend(comm); + break; + case BackendType::IPC_BACKEND: + DPRINTF("Initializing IPC backend using MPI\n"); + CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend))); + backend = new (backend) IPCBackend(comm); + break; + } +#elif defined(USE_GDA) CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend))); backend = new (backend) GDABackend(comm); #elif defined(USE_RO) @@ -189,12 +241,41 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT; rocm_init(); -#if defined(USE_GDA) +#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) + BackendType type = select_backend_type(); + switch (type) { + case BackendType::GDA_BACKEND: + DPRINTF("Initializing GDA backend with TCP bootstrapping\n"); + CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend))); + backend = new (backend) GDABackend(bootstrap); + break; + case BackendType::RO_BACKEND: + /* Not sure whether this is a valid configuration. Will leave it in for now */ + DPRINTF("Initializing RO backend with TCP bootstrapping\n"); + mpi_instance = new MPIInstance(MPI_COMM_WORLD); + CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend))); + backend = new (backend) ROBackend(MPI_COMM_WORLD); + break; + case BackendType::IPC_BACKEND: + DPRINTF("Initializing IPC backend with TCP bootstrapping\n"); + CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend))); + backend = new (backend) IPCBackend(bootstrap); + break; + } +#elif defined(USE_GDA) CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend))); backend = new (backend) GDABackend(bootstrap); #elif defined(USE_RO) - printf("RO Backend requires MPI library to be initialized, even when using uniqueId initializations!\n"); - abort(); + /* Not sure whether this is a valid configuration. Will leave it in for now */ + int ret; + ret = MPIInstance::mpilib_dl_init(); + if (ret != MPI_SUCCESS) { + printf("RO Backend requires MPI library to be initialized, even when using uniqueId initializations!\n"); + abort(); + } + mpi_instance = new MPIInstance(MPI_COMM_WORLD); + CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend))); + backend = new (backend) ROBackend(MPI_COMM_WORLD); #elif defined(USE_IPC) CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend))); backend = new (backend) IPCBackend(bootstrap); diff --git a/src/rocshmem_gpu.cpp b/src/rocshmem_gpu.cpp index 48623ae372..7cc9f2c466 100644 --- a/src/rocshmem_gpu.cpp +++ b/src/rocshmem_gpu.cpp @@ -53,15 +53,15 @@ #if defined(USE_GDA) #include "gda/context_gda_tmpl_device.hpp" -#elif defined(USE_RO) +#endif +#if defined(USE_RO) #include "reverse_offload/context_ro_tmpl_device.hpp" -#elif defined(USE_IPC) +#endif +#if defined(USE_IPC) # if defined(ENABLE_IPC_BITCODE) # include "ipc/backend_ipc.hpp" # endif #include "ipc/context_ipc_tmpl_device.hpp" -#else -#error "Select one backend among USE_RO, USE_IPC, USE_GDA" #endif /****************************************************************************** diff --git a/src/util.cpp b/src/util.cpp index 170fb7f811..840e84d841 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -26,6 +26,8 @@ #include #include +#include +#include #include #include "rocshmem/rocshmem_config.h" // NOLINT(build/include_subdir)