allow all three backends to co-exist in a single build (#270)
* add support for compiling all backends
also include the logic to select backends either based on user requests
or through some heuristics
* checkpoint for compiling all backends
* final checkpoint
all tests seem to pass when compiling all three backends simultaneasly
and forcing to use any of the three Backends.
* update PR to new envvar system
[ROCm/rocshmem commit: a1269e3db5]
이 커밋은 다음에 포함됨:
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
###############################################################################
|
||||
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
set -e
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DBUILD_CODE_COVERAGE=${CODE_COV:-OFF} \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE:-Release} \
|
||||
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-~/rocshmem} \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||
-DBUILD_FUNCTIONAL_TESTS=ON \
|
||||
-DBUILD_UNIT_TESTS=ON \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GDA=ON \
|
||||
-DGDA_MLX5=ON \
|
||||
-DGDA_BNXT=ON \
|
||||
-DGDA_IONIC=OFF \
|
||||
-DUSE_RO=ON \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_HDP_FLUSH=OFF \
|
||||
-DUSE_HDP_FLUSH_HOST_SIDE=OFF \
|
||||
$* $src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -66,9 +66,11 @@ target_compile_options(${PROJECT_NAME} PUBLIC ${ROCSHMEM_COMPILE_FLAGS})
|
||||
###############################################################################
|
||||
if (USE_GDA)
|
||||
add_subdirectory(gda)
|
||||
elseif (USE_RO)
|
||||
endif()
|
||||
if (USE_RO)
|
||||
add_subdirectory(reverse_offload)
|
||||
elseif (USE_IPC)
|
||||
endif()
|
||||
if (USE_IPC)
|
||||
add_subdirectory(ipc)
|
||||
endif()
|
||||
add_subdirectory(host)
|
||||
|
||||
@@ -29,9 +29,11 @@
|
||||
|
||||
#if defined(USE_GDA)
|
||||
#include "gda/backend_gda.hpp"
|
||||
#elif defined(USE_RO)
|
||||
#endif
|
||||
#if defined(USE_RO)
|
||||
#include "reverse_offload/backend_ro.hpp"
|
||||
#elif defined(USE_IPC)
|
||||
#endif
|
||||
#if defined(USE_IPC)
|
||||
#include "ipc/backend_ipc.hpp"
|
||||
#endif
|
||||
|
||||
@@ -250,7 +252,20 @@ void Backend::reset_stats() {
|
||||
}
|
||||
|
||||
__device__ bool Backend::create_ctx(int64_t option, rocshmem_ctx_t* ctx) {
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
switch(this->type) {
|
||||
case BackendType::GDA_BACKEND:
|
||||
return static_cast<GDABackend*>(this)->create_ctx(option, ctx);
|
||||
break;
|
||||
case BackendType::RO_BACKEND:
|
||||
return static_cast<ROBackend*>(this)->create_ctx(option, ctx);
|
||||
break;
|
||||
case BackendType::IPC_BACKEND:
|
||||
default:
|
||||
return static_cast<IPCBackend*>(this)->create_ctx(option, ctx);
|
||||
break;
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
return static_cast<GDABackend*>(this)->create_ctx(option, ctx);
|
||||
#elif defined(USE_RO)
|
||||
return static_cast<ROBackend*>(this)->create_ctx(option, ctx);
|
||||
@@ -260,7 +275,20 @@ __device__ bool Backend::create_ctx(int64_t option, rocshmem_ctx_t* ctx) {
|
||||
}
|
||||
|
||||
__device__ void Backend::destroy_ctx(rocshmem_ctx_t* ctx) {
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
switch(this->type) {
|
||||
case BackendType::GDA_BACKEND:
|
||||
static_cast<GDABackend*>(this)->destroy_ctx(ctx);
|
||||
break;
|
||||
case BackendType::RO_BACKEND:
|
||||
static_cast<ROBackend*>(this)->destroy_ctx(ctx);
|
||||
break;
|
||||
case BackendType::IPC_BACKEND:
|
||||
default:
|
||||
static_cast<IPCBackend*>(this)->destroy_ctx(ctx);
|
||||
break;
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
static_cast<GDABackend*>(this)->destroy_ctx(ctx);
|
||||
#elif defined(USE_RO)
|
||||
static_cast<ROBackend*>(this)->destroy_ctx(ctx);
|
||||
|
||||
@@ -63,6 +63,8 @@ class TeamInfo;
|
||||
*/
|
||||
class Backend {
|
||||
public:
|
||||
friend Context;
|
||||
|
||||
/**
|
||||
* @brief Constructor.
|
||||
*
|
||||
@@ -286,7 +288,7 @@ class Backend {
|
||||
* rely on the normal inheritance mechanism to tailor behavior for
|
||||
* derived backend types.
|
||||
*/
|
||||
BackendType type{BackendType::RO_BACKEND};
|
||||
BackendType type;
|
||||
|
||||
/**
|
||||
* @brief Dumps derived class statistics.
|
||||
|
||||
@@ -56,7 +56,21 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND };
|
||||
/**
|
||||
* @brief Device static dispatch method call.
|
||||
*/
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
#define DISPATCH(Func) \
|
||||
switch(this->btype) { \
|
||||
case BackendType::GDA_BACKEND: \
|
||||
static_cast<GDAContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::RO_BACKEND: \
|
||||
static_cast<ROContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::IPC_BACKEND: \
|
||||
default: \
|
||||
static_cast<IPCContext *>(this)->Func; \
|
||||
break; \
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
#define DISPATCH(Func) \
|
||||
static_cast<GDAContext *>(this)->Func;
|
||||
#elif defined(USE_RO)
|
||||
@@ -70,7 +84,19 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND };
|
||||
/**
|
||||
* @brief Device static dispatch method call with a return value.
|
||||
*/
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
#define DISPATCH_RET(Func) \
|
||||
if (this->btype == BackendType::GDA_BACKEND) { \
|
||||
auto ret1 = static_cast<GDAContext *>(this)->Func; \
|
||||
return ret1; \
|
||||
} else if(this->btype == BackendType::RO_BACKEND) { \
|
||||
auto ret2 = static_cast<ROContext *>(this)->Func; \
|
||||
return ret2; \
|
||||
} else { \
|
||||
auto ret3 = static_cast<IPCContext *>(this)->Func; \
|
||||
return ret3; \
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
#define DISPATCH_RET(Func) \
|
||||
auto ret_val = static_cast<GDAContext *>(this)->Func; \
|
||||
return ret_val;
|
||||
@@ -87,7 +113,23 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND };
|
||||
/**
|
||||
* @brief Device static dispatch method call with a return type of pointer.
|
||||
*/
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
#define DISPATCH_RET_PTR(Func) \
|
||||
void *ret_val{nullptr}; \
|
||||
switch(this->btype) { \
|
||||
case BackendType::GDA_BACKEND: \
|
||||
ret_val = static_cast<GDAContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::RO_BACKEND: \
|
||||
ret_val = static_cast<ROContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::IPC_BACKEND: \
|
||||
default: \
|
||||
ret_val = static_cast<IPCContext *>(this)->Func; \
|
||||
break; \
|
||||
} \
|
||||
return ret_val;
|
||||
#elif defined(USE_GDA)
|
||||
#define DISPATCH_RET_PTR(Func) \
|
||||
void *ret_val{nullptr}; \
|
||||
ret_val = static_cast<GDAContext *>(this)->Func; \
|
||||
@@ -111,7 +153,21 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND };
|
||||
* MPI_THREAD_MULTIPLE (for RMA and AMO operations) and the ordering and
|
||||
* threading semantics of collectives in OpenSHMEM match those of MPI.
|
||||
*/
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
#define HOST_DISPATCH(Func) \
|
||||
switch(this->btype) { \
|
||||
case BackendType::GDA_BACKEND: \
|
||||
static_cast<GDAHostContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::RO_BACKEND: \
|
||||
static_cast<ROHostContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::IPC_BACKEND: \
|
||||
default: \
|
||||
static_cast<IPCHostContext *>(this)->Func; \
|
||||
break; \
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
#define HOST_DISPATCH(Func) static_cast<GDAHostContext *>(this)->Func;
|
||||
#elif defined(USE_RO)
|
||||
#define HOST_DISPATCH(Func) static_cast<ROHostContext *>(this)->Func;
|
||||
@@ -126,7 +182,19 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND };
|
||||
* MPI_THREAD_MULTIPLE (for RMA and AMO operations) and the ordering and
|
||||
* threading semantics of collectives in OpenSHMEM match those of MPI.
|
||||
*/
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
#define HOST_DISPATCH_RET(Func) \
|
||||
if (this->btype == BackendType::GDA_BACKEND) { \
|
||||
auto ret1 = static_cast<GDAHostContext *>(this)->Func; \
|
||||
return ret1; \
|
||||
} else if (this->btype == BackendType::RO_BACKEND) { \
|
||||
auto ret2 = static_cast<ROHostContext *>(this)->Func; \
|
||||
return ret2; \
|
||||
} else { \
|
||||
auto ret3 = static_cast<IPCHostContext *>(this)->Func; \
|
||||
return ret3; \
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
#define HOST_DISPATCH_RET(Func) \
|
||||
auto ret_val = static_cast<GDAHostContext *>(this)->Func; \
|
||||
return ret_val;
|
||||
@@ -143,7 +211,23 @@ enum class BackendType { GDA_BACKEND, RO_BACKEND, IPC_BACKEND };
|
||||
/**
|
||||
* @brief Host static dispatch method call with a return type of pointer.
|
||||
*/
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
#define HOST_DISPATCH_RET_PTR(Func) \
|
||||
void *ret_val{nullptr}; \
|
||||
switch(this->btype) { \
|
||||
case BackendType::GDA_BACKEND: \
|
||||
ret_val = static_cast<GDAHostContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::RO_BACKEND: \
|
||||
ret_val = static_cast<ROHostContext *>(this)->Func; \
|
||||
break; \
|
||||
case BackendType::IPC_BACKEND: \
|
||||
default: \
|
||||
ret_val = static_cast<IPCHostContext *>(this)->Func; \
|
||||
break; \
|
||||
} \
|
||||
return ret_val;
|
||||
#elif defined(USE_GDA)
|
||||
#define HOST_DISPATCH_RET_PTR(Func) \
|
||||
void *ret_val{nullptr}; \
|
||||
ret_val = static_cast<GDAHostContext *>(this)->Func; \
|
||||
|
||||
@@ -471,6 +471,11 @@ class Context {
|
||||
*/
|
||||
int my_pe{-1};
|
||||
|
||||
/**
|
||||
* @brief Duplicated local copy of backend's type
|
||||
*/
|
||||
BackendType btype;
|
||||
|
||||
/**
|
||||
* @brief Stats common to all types of device contexts.
|
||||
*/
|
||||
|
||||
@@ -32,7 +32,8 @@ namespace rocshmem {
|
||||
__device__ Context::Context(Backend* handle, bool shareable)
|
||||
: num_pes(handle->getNumPEs()),
|
||||
my_pe(handle->getMyPE()),
|
||||
fence_(shareable) {
|
||||
fence_(shareable),
|
||||
btype(handle->type) {
|
||||
/*
|
||||
* Device-side context constructor is a work-group collective, so make
|
||||
* sure all the members have their default values before returning.
|
||||
|
||||
@@ -31,7 +31,8 @@ namespace rocshmem {
|
||||
__host__ Context::Context(Backend* handle, bool shareable)
|
||||
: num_pes(handle->getNumPEs()),
|
||||
my_pe(handle->getMyPE()),
|
||||
fence_(shareable) {
|
||||
fence_(shareable),
|
||||
btype(handle->type) {
|
||||
}
|
||||
|
||||
__host__ Context::~Context() {
|
||||
|
||||
@@ -31,14 +31,14 @@
|
||||
#if defined(USE_GDA)
|
||||
#include "gda/context_gda_device.hpp"
|
||||
#include "gda/context_gda_host.hpp"
|
||||
#elif defined(USE_RO)
|
||||
#endif
|
||||
#if defined(USE_RO)
|
||||
#include "reverse_offload/context_ro_device.hpp"
|
||||
#include "reverse_offload/context_ro_host.hpp"
|
||||
#elif defined(USE_IPC)
|
||||
#endif
|
||||
#if defined(USE_IPC)
|
||||
#include "ipc/context_ipc_device.hpp"
|
||||
#include "ipc/context_ipc_host.hpp"
|
||||
#else
|
||||
#error "Select one backend among USE_RO, USE_IPC, USE_GDA"
|
||||
#endif
|
||||
|
||||
#endif // LIBRARY_SRC_CONTEXT_INCL_HPP_
|
||||
|
||||
@@ -29,9 +29,11 @@
|
||||
#include "backend_type.hpp"
|
||||
#if defined(USE_GDA)
|
||||
#include "gda/context_gda_device.hpp"
|
||||
#elif defined(USE_RO)
|
||||
#endif
|
||||
#if defined(USE_RO)
|
||||
#include "reverse_offload/context_ro_device.hpp"
|
||||
#elif defined(USE_IPC)
|
||||
#endif
|
||||
#if defined(USE_IPC)
|
||||
#include "ipc/context_ipc_device.hpp"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -29,9 +29,11 @@
|
||||
#include "backend_type.hpp"
|
||||
#if defined(USE_GDA)
|
||||
#include "gda/context_gda_host.hpp"
|
||||
#elif defined(USE_RO)
|
||||
#endif
|
||||
#if defined(USE_RO)
|
||||
#include "reverse_offload/context_ro_host.hpp"
|
||||
#elif defined(USE_IPC)
|
||||
#endif
|
||||
#if defined(USE_IPC)
|
||||
#include "ipc/context_ipc_host.hpp"
|
||||
#endif
|
||||
|
||||
@@ -249,7 +251,7 @@ __host__ size_t Context::wait_until_any(T *ivars, size_t nelems,
|
||||
int cmp, T val) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY);
|
||||
|
||||
return HOST_DISPATCH(wait_until_any<T>(ivars, nelems, status, cmp, val));
|
||||
HOST_DISPATCH_RET(wait_until_any<T>(ivars, nelems, status, cmp, val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
@@ -46,6 +46,7 @@ namespace envvar {
|
||||
const var<size_t> max_wavefront_buffers("MAX_WF_BUFFERS", "", 1024);
|
||||
const var<std::string> requested_dev("USE_IB_HCA", "");
|
||||
const var<uint32_t> sq_size("SQ_SIZE", "", 1024);
|
||||
const var<std::string> backend("BACKEND", "");
|
||||
} // inline namespace _base
|
||||
|
||||
namespace bootstrap {
|
||||
|
||||
@@ -404,6 +404,7 @@ namespace envvar {
|
||||
extern const var<types::debug_level> debug_level;
|
||||
extern const var<size_t> heap_size;
|
||||
extern const var<size_t> max_num_teams;
|
||||
extern const var<std::string> backend;
|
||||
|
||||
/**
|
||||
* @brief Maximum number of contexts for the application
|
||||
|
||||
@@ -50,7 +50,7 @@ rocshmem_team_t get_external_team(GDATeam *team) {
|
||||
return reinterpret_cast<rocshmem_team_t>(team);
|
||||
}
|
||||
|
||||
int get_ls_non_zero_bit(char *bitmask, int mask_length) {
|
||||
static int get_ls_non_zero_bit(char *bitmask, int mask_length) {
|
||||
int position{-1};
|
||||
for (int bit_i = 0; bit_i < mask_length; bit_i++) {
|
||||
int byte_i = bit_i / CHAR_BIT;
|
||||
@@ -543,6 +543,39 @@ int GDABackend::mlx5_dv_dl_init () {
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/* Currently we only check whether we can dlopen a Direct Verbs library.
|
||||
** We might need to extend this logic to check whether we have interfaces that
|
||||
** can use those DV libraries
|
||||
*/
|
||||
int GDABackend::backend_can_run() {
|
||||
void *handle{nullptr};
|
||||
|
||||
/* Try opening bnxt DV libraries */
|
||||
handle = dlopen("libbnxt_re.so", RTLD_NOW);
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
} else {
|
||||
/* Try hard-coded PATH */
|
||||
handle = dlopen("/usr/local/lib/libbnxt_re.so", RTLD_NOW);
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try opening mlx5 DV libraries */
|
||||
handle = dlopen("libmlx5.so", RTLD_NOW);
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/* ToDo: opening ionic DV libraries */
|
||||
|
||||
return ROCSHMEM_ERROR;
|
||||
}
|
||||
|
||||
void GDABackend::setup_ibv() {
|
||||
autodetect_dv_libs();
|
||||
|
||||
|
||||
@@ -146,6 +146,17 @@ class GDABackend : public Backend {
|
||||
*/
|
||||
virtual ~GDABackend();
|
||||
|
||||
/**
|
||||
* @brief Verify whether GDA Backend could run
|
||||
*
|
||||
* @return ROSCHMEM_SUCCESS if GDA Backend can most likely be used
|
||||
* ROCSHMEM_ERROR otherwise
|
||||
*/
|
||||
static int backend_can_run(void);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
__device__ bool create_ctx(int64_t options, rocshmem_ctx_t *ctx);
|
||||
|
||||
/**
|
||||
|
||||
@@ -286,7 +286,7 @@ __device__ T GDAContext::amo_fetch_cas(void *dst, T value, T cond, int pe) {
|
||||
|
||||
// Collectives TODO: loosely adapted from IPC, needs review
|
||||
template <typename T, ROCSHMEM_OP Op>
|
||||
__device__ void compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) {
|
||||
__device__ void gda_compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) {
|
||||
for (int i = wg_id; i < size; i += wg_size) {
|
||||
OpWrap<Op>::Calc(src, dst, i);
|
||||
}
|
||||
@@ -339,7 +339,7 @@ __device__ void GDAContext::internal_direct_allreduce(
|
||||
__syncthreads();
|
||||
|
||||
T *ptr = &pWrk[i * nelems];
|
||||
compute_reduce<T, Op>(ptr, dst, nelems, wg_id, wg_size);
|
||||
gda_compute_reduce<T, Op>(ptr, dst, nelems, wg_id, wg_size);
|
||||
threadfence_system();
|
||||
}
|
||||
}
|
||||
@@ -457,8 +457,8 @@ __device__ void GDAContext::internal_ring_allreduce(
|
||||
wait_until(&pSync[iter], ROCSHMEM_CMP_EQ, wait_val);
|
||||
}
|
||||
__syncthreads();
|
||||
compute_reduce<T, Op>(&pWrk[off_recv], &dst[off_seg + off_recv],
|
||||
chunk_size, wg_id, wg_size);
|
||||
gda_compute_reduce<T, Op>(&pWrk[off_recv], &dst[off_seg + off_recv],
|
||||
chunk_size, wg_id, wg_size);
|
||||
}
|
||||
|
||||
// Loop 2 in the example above
|
||||
|
||||
@@ -49,7 +49,7 @@ rocshmem_team_t get_external_team(IPCTeam *team) {
|
||||
return reinterpret_cast<rocshmem_team_t>(team);
|
||||
}
|
||||
|
||||
int get_ls_non_zero_bit(char *bitmask, int mask_length) {
|
||||
static int get_ls_non_zero_bit(char *bitmask, int mask_length) {
|
||||
int position = -1;
|
||||
|
||||
for (int bit_i = 0; bit_i < mask_length; bit_i++) {
|
||||
|
||||
@@ -160,7 +160,7 @@ __device__ T IPCContext::amo_fetch_cas(void *dest, T value, T cond, int pe) {
|
||||
|
||||
// Collectives
|
||||
template <typename T, ROCSHMEM_OP Op>
|
||||
__device__ void compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) {
|
||||
__device__ void ipc_compute_reduce(T *src, T *dst, int size, int wg_id, int wg_size) {
|
||||
for (int i = wg_id; i < size; i += wg_size) {
|
||||
OpWrap<Op>::Calc(src, dst, i);
|
||||
}
|
||||
@@ -213,7 +213,7 @@ __device__ void IPCContext::internal_direct_allreduce(
|
||||
__syncthreads();
|
||||
|
||||
T *ptr = &pWrk[i * nelems];
|
||||
compute_reduce<T, Op>(ptr, dst, nelems, wg_id, wg_size);
|
||||
ipc_compute_reduce<T, Op>(ptr, dst, nelems, wg_id, wg_size);
|
||||
threadfence_system();
|
||||
}
|
||||
}
|
||||
@@ -331,7 +331,7 @@ __device__ void IPCContext::internal_ring_allreduce(
|
||||
wait_until(&pSync[iter], ROCSHMEM_CMP_EQ, wait_val);
|
||||
}
|
||||
__syncthreads();
|
||||
compute_reduce<T, Op>(&pWrk[off_recv], &dst[off_seg + off_recv],
|
||||
ipc_compute_reduce<T, Op>(&pWrk[off_recv], &dst[off_seg + off_recv],
|
||||
chunk_size, wg_id, wg_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -43,8 +43,6 @@ class ROContext : public Context {
|
||||
public:
|
||||
__host__ ROContext(Backend *b, size_t block_id = 0, bool default_ctx = false);
|
||||
|
||||
__device__ void threadfence_system();
|
||||
|
||||
__device__ void ctx_destroy();
|
||||
|
||||
__device__ void putmem(void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
@@ -39,14 +39,14 @@
|
||||
#if defined(USE_GDA)
|
||||
#include "gda/backend_gda.hpp"
|
||||
#include "gda/context_gda_tmpl_host.hpp"
|
||||
#elif defined(USE_RO)
|
||||
#endif
|
||||
#if defined(USE_RO)
|
||||
#include "reverse_offload/backend_ro.hpp"
|
||||
#include "reverse_offload/context_ro_tmpl_host.hpp"
|
||||
#elif defined(USE_IPC)
|
||||
#endif
|
||||
#if defined(USE_IPC)
|
||||
#include "ipc/backend_ipc.hpp"
|
||||
#include "ipc/context_ipc_tmpl_host.hpp"
|
||||
#else
|
||||
#error "Select one backend among USE_RO, USE_IPC, USE_GDA"
|
||||
#endif
|
||||
#include "mpi_instance.hpp"
|
||||
#include "team.hpp"
|
||||
@@ -81,6 +81,39 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT;
|
||||
* Begin Host Code
|
||||
**/
|
||||
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
static BackendType select_backend_type() {
|
||||
BackendType type;
|
||||
|
||||
/* Check whether the user explicitely requests a particular backend type */
|
||||
std::string envstr = envvar::backend;
|
||||
std::transform(envstr.begin(), envstr.end(), envstr.begin(), ::tolower);
|
||||
if (!envstr.empty()) {
|
||||
DPRINTF("Found environment variable ROCSHMEM_BACKEND, value is %s\n", envstr.c_str());
|
||||
if (envstr.find("gda") != std::string::npos) {
|
||||
return BackendType::GDA_BACKEND;
|
||||
}
|
||||
if (envstr.find("ro") != std::string::npos) {
|
||||
return BackendType::RO_BACKEND;
|
||||
}
|
||||
if (envstr.find("ipc") != std::string::npos) {
|
||||
return BackendType::IPC_BACKEND;
|
||||
}
|
||||
}
|
||||
|
||||
if (GDABackend::backend_can_run() == ROCSHMEM_SUCCESS) {
|
||||
DPRINTF("GDABackend::backend_can_run returned success\n");
|
||||
return BackendType::GDA_BACKEND;
|
||||
}
|
||||
if (MPIInstance::mpilib_dl_init() == ROCSHMEM_SUCCESS) {
|
||||
DPRINTF("MPIInstance could dl_init MPI library\n");
|
||||
return BackendType::RO_BACKEND;
|
||||
}
|
||||
|
||||
return BackendType::IPC_BACKEND;
|
||||
}
|
||||
#endif
|
||||
|
||||
[[maybe_unused]] __host__ void inline library_init(MPI_Comm comm) {
|
||||
assert(!backend);
|
||||
int count = 0;
|
||||
@@ -97,7 +130,26 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT;
|
||||
ret = MPIInstance::mpilib_dl_init();
|
||||
mpi_instance = new MPIInstance(comm);
|
||||
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
BackendType type = select_backend_type();
|
||||
switch (type) {
|
||||
case BackendType::GDA_BACKEND:
|
||||
DPRINTF("Initializing GDA backend using MPI\n");
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
|
||||
backend = new (backend) GDABackend(comm);
|
||||
break;
|
||||
case BackendType::RO_BACKEND:
|
||||
DPRINTF("Initializing RO backend using MPI\n");
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
|
||||
backend = new (backend) ROBackend(comm);
|
||||
break;
|
||||
case BackendType::IPC_BACKEND:
|
||||
DPRINTF("Initializing IPC backend using MPI\n");
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend)));
|
||||
backend = new (backend) IPCBackend(comm);
|
||||
break;
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
|
||||
backend = new (backend) GDABackend(comm);
|
||||
#elif defined(USE_RO)
|
||||
@@ -189,12 +241,41 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT;
|
||||
|
||||
rocm_init();
|
||||
|
||||
#if defined(USE_GDA)
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
BackendType type = select_backend_type();
|
||||
switch (type) {
|
||||
case BackendType::GDA_BACKEND:
|
||||
DPRINTF("Initializing GDA backend with TCP bootstrapping\n");
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
|
||||
backend = new (backend) GDABackend(bootstrap);
|
||||
break;
|
||||
case BackendType::RO_BACKEND:
|
||||
/* Not sure whether this is a valid configuration. Will leave it in for now */
|
||||
DPRINTF("Initializing RO backend with TCP bootstrapping\n");
|
||||
mpi_instance = new MPIInstance(MPI_COMM_WORLD);
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
|
||||
backend = new (backend) ROBackend(MPI_COMM_WORLD);
|
||||
break;
|
||||
case BackendType::IPC_BACKEND:
|
||||
DPRINTF("Initializing IPC backend with TCP bootstrapping\n");
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend)));
|
||||
backend = new (backend) IPCBackend(bootstrap);
|
||||
break;
|
||||
}
|
||||
#elif defined(USE_GDA)
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
|
||||
backend = new (backend) GDABackend(bootstrap);
|
||||
#elif defined(USE_RO)
|
||||
printf("RO Backend requires MPI library to be initialized, even when using uniqueId initializations!\n");
|
||||
abort();
|
||||
/* Not sure whether this is a valid configuration. Will leave it in for now */
|
||||
int ret;
|
||||
ret = MPIInstance::mpilib_dl_init();
|
||||
if (ret != MPI_SUCCESS) {
|
||||
printf("RO Backend requires MPI library to be initialized, even when using uniqueId initializations!\n");
|
||||
abort();
|
||||
}
|
||||
mpi_instance = new MPIInstance(MPI_COMM_WORLD);
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
|
||||
backend = new (backend) ROBackend(MPI_COMM_WORLD);
|
||||
#elif defined(USE_IPC)
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend)));
|
||||
backend = new (backend) IPCBackend(bootstrap);
|
||||
|
||||
@@ -53,15 +53,15 @@
|
||||
|
||||
#if defined(USE_GDA)
|
||||
#include "gda/context_gda_tmpl_device.hpp"
|
||||
#elif defined(USE_RO)
|
||||
#endif
|
||||
#if defined(USE_RO)
|
||||
#include "reverse_offload/context_ro_tmpl_device.hpp"
|
||||
#elif defined(USE_IPC)
|
||||
#endif
|
||||
#if defined(USE_IPC)
|
||||
# if defined(ENABLE_IPC_BITCODE)
|
||||
# include "ipc/backend_ipc.hpp"
|
||||
# endif
|
||||
#include "ipc/context_ipc_tmpl_device.hpp"
|
||||
#else
|
||||
#error "Select one backend among USE_RO, USE_IPC, USE_GDA"
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "rocshmem/rocshmem_config.h" // NOLINT(build/include_subdir)
|
||||
|
||||
새 이슈에서 참조
사용자 차단