Merge pull request #20 from avinashkethineedi/ipc_teams
IPC Teams
[ROCm/rocshmem commit: 0c86939a30]
Этот коммит содержится в:
@@ -29,4 +29,5 @@ target_sources(
|
||||
context_ipc_device.cpp
|
||||
context_ipc_host.cpp
|
||||
backend_ipc.cpp
|
||||
ipc_team.cpp
|
||||
)
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
*****************************************************************************/
|
||||
|
||||
#include "backend_ipc.hpp"
|
||||
#include "ipc_team.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
@@ -34,6 +35,24 @@ namespace rocshmem {
|
||||
|
||||
extern roc_shmem_ctx_t ROC_SHMEM_HOST_CTX_DEFAULT;
|
||||
|
||||
roc_shmem_team_t get_external_team(GPUIBTeam *team) {
|
||||
return reinterpret_cast<roc_shmem_team_t>(team);
|
||||
}
|
||||
|
||||
int get_ls_non_zero_bit(char *bitmask, int mask_length) {
|
||||
int position = -1;
|
||||
|
||||
for (int bit_i = 0; bit_i < mask_length; bit_i++) {
|
||||
int byte_i = bit_i / CHAR_BIT;
|
||||
if (bitmask[byte_i] & (1 << (bit_i % CHAR_BIT))) {
|
||||
position = bit_i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return position;
|
||||
}
|
||||
|
||||
IPCBackend::IPCBackend(MPI_Comm comm)
|
||||
: Backend() {
|
||||
type = BackendType::IPC_BACKEND;
|
||||
@@ -54,7 +73,6 @@ IPCBackend::IPCBackend(MPI_Comm comm)
|
||||
/* Initialize the host interface */
|
||||
host_interface =
|
||||
new HostInterface(hdp_proxy_.get(), thread_comm, &heap);
|
||||
//free host interface
|
||||
|
||||
default_host_ctx = std::make_unique<IPCHostContext>(this, 0);
|
||||
|
||||
@@ -66,12 +84,42 @@ IPCBackend::IPCBackend(MPI_Comm comm)
|
||||
|
||||
default_context_proxy_ = IPCDefaultContextProxyT(this);
|
||||
|
||||
setup_team_world();
|
||||
|
||||
roc_shmem_collective_init();
|
||||
|
||||
teams_init();
|
||||
|
||||
setup_ctxs();
|
||||
|
||||
}
|
||||
|
||||
IPCBackend::~IPCBackend() {
|
||||
ipc_net_free_runtime();
|
||||
/*
|
||||
* Validate that a handle was passed that is not a nullptr.
|
||||
*/
|
||||
auto *bp{ipc_backend_proxy.get()};
|
||||
assert(bp);
|
||||
|
||||
/*
|
||||
* Free the atomic_ret array.
|
||||
*/
|
||||
CHECK_HIP(hipFree(bp->atomic_ret->atomic_base_ptr));
|
||||
|
||||
// TODO(Avinash) Free g_ret
|
||||
|
||||
// delete host_interface;
|
||||
// host_interface = nullptr;
|
||||
|
||||
/**
|
||||
* Destroy teams infrastructure
|
||||
* and team world
|
||||
*/
|
||||
teams_destroy();
|
||||
auto *team_world{team_tracker.get_team_world()};
|
||||
team_world->~Team();
|
||||
CHECK_HIP(hipFree(team_world));
|
||||
|
||||
CHECK_HIP(hipFree(ctx_array));
|
||||
}
|
||||
|
||||
@@ -100,6 +148,31 @@ __device__ void IPCBackend::destroy_ctx(roc_shmem_ctx_t *ctx) {
|
||||
ctx_free_list.get()->push_back(static_cast<IPCContext *>(ctx->ctx_opaque));
|
||||
}
|
||||
|
||||
void IPCBackend::setup_team_world() {
|
||||
TeamInfo *team_info_wrt_parent, *team_info_wrt_world;
|
||||
|
||||
/**
|
||||
* Allocate device-side memory for team_world and construct a
|
||||
* IPC team in it.
|
||||
*/
|
||||
CHECK_HIP(hipMalloc(&team_info_wrt_parent, sizeof(TeamInfo)));
|
||||
CHECK_HIP(hipMalloc(&team_info_wrt_world, sizeof(TeamInfo)));
|
||||
|
||||
new (team_info_wrt_parent) TeamInfo(nullptr, 0, 1, num_pes);
|
||||
new (team_info_wrt_world) TeamInfo(nullptr, 0, 1, num_pes);
|
||||
|
||||
IPCTeam *team_world{nullptr};
|
||||
CHECK_HIP(hipMalloc(&team_world, sizeof(IPCTeam)));
|
||||
new (team_world) IPCTeam(this, team_info_wrt_parent, team_info_wrt_world,
|
||||
num_pes, my_pe, thread_comm, 0);
|
||||
team_tracker.set_team_world(team_world);
|
||||
|
||||
/**
|
||||
* Copy the address to ROC_SHMEM_TEAM_WORLD.
|
||||
*/
|
||||
ROC_SHMEM_TEAM_WORLD = reinterpret_cast<roc_shmem_team_t>(team_world);
|
||||
}
|
||||
|
||||
void IPCBackend::init_mpi_once(MPI_Comm comm) {
|
||||
int init_done{};
|
||||
NET_CHECK(MPI_Initialized(&init_done));
|
||||
@@ -119,15 +192,53 @@ void IPCBackend::init_mpi_once(MPI_Comm comm) {
|
||||
}
|
||||
|
||||
void IPCBackend::team_destroy(roc_shmem_team_t team) {
|
||||
assert(false);
|
||||
IPCTeam *team_obj = get_internal_ipc_team(team);
|
||||
|
||||
/* Mark the pool as available */
|
||||
int bit = team_obj->pool_index_;
|
||||
int byte_i = bit / CHAR_BIT;
|
||||
pool_bitmask_[byte_i] |= 1 << (bit % CHAR_BIT);
|
||||
|
||||
team_obj->~IPCTeam();
|
||||
CHECK_HIP(hipFree(team_obj));
|
||||
}
|
||||
|
||||
void IPCBackend::create_new_team(Team *parent_team,
|
||||
void IPCBackend::create_new_team([[maybe_unused]] Team *parent_team,
|
||||
TeamInfo *team_info_wrt_parent,
|
||||
TeamInfo *team_info_wrt_world, int num_pes,
|
||||
int my_pe_in_new_team, MPI_Comm team_comm,
|
||||
roc_shmem_team_t *new_team) {
|
||||
assert(false);
|
||||
/**
|
||||
* Read the bit mask and find out a common index into
|
||||
* the pool of available work arrays.
|
||||
*/
|
||||
NET_CHECK(MPI_Allreduce(pool_bitmask_, reduced_bitmask_, bitmask_size_,
|
||||
MPI_CHAR, MPI_BAND, team_comm));
|
||||
|
||||
/* Pick the least significant non-zero bit (logical layout) in the reduced
|
||||
* bitmask */
|
||||
auto max_num_teams{team_tracker.get_max_num_teams()};
|
||||
int common_index = get_ls_non_zero_bit(reduced_bitmask_, max_num_teams);
|
||||
if (common_index < 0) {
|
||||
/* No team available */
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Mark the team as taken (by unsetting the bit in the pool bitmask) */
|
||||
int byte = common_index / CHAR_BIT;
|
||||
pool_bitmask_[byte] &= ~(1 << (common_index % CHAR_BIT));
|
||||
|
||||
/**
|
||||
* Allocate device-side memory for team_world and
|
||||
* construct a GPU_IB team in it
|
||||
*/
|
||||
GPUIBTeam *new_team_obj;
|
||||
CHECK_HIP(hipMalloc(&new_team_obj, sizeof(IPCTeam)));
|
||||
new (new_team_obj)
|
||||
IPCTeam(this, team_info_wrt_parent, team_info_wrt_world, num_pes,
|
||||
my_pe_in_new_team, team_comm, common_index);
|
||||
|
||||
*new_team = get_external_team(new_team_obj);
|
||||
}
|
||||
|
||||
void IPCBackend::ctx_create(int64_t options, void **ctx) {
|
||||
@@ -160,24 +271,122 @@ void IPCBackend::initIPC() {
|
||||
thread_comm);
|
||||
}
|
||||
|
||||
void IPCBackend::ipc_net_free_runtime() {
|
||||
/*
|
||||
* Validate that a handle was passed that is not a nullptr.
|
||||
*/
|
||||
auto *bp{ipc_backend_proxy.get()};
|
||||
assert(bp);
|
||||
|
||||
/*
|
||||
* Free the atomic_ret array.
|
||||
*/
|
||||
CHECK_HIP(hipFree(bp->atomic_ret->atomic_base_ptr));
|
||||
|
||||
// TODO(Avinash) Free g_ret
|
||||
}
|
||||
|
||||
void IPCBackend::global_exit(int status) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void IPCBackend::teams_destroy() {
|
||||
roc_shmem_free(barrier_pSync_pool);
|
||||
roc_shmem_free(reduce_pSync_pool);
|
||||
roc_shmem_free(bcast_pSync_pool);
|
||||
roc_shmem_free(alltoall_pSync_pool);
|
||||
roc_shmem_free(pWrk_pool);
|
||||
roc_shmem_free(pAta_pool);
|
||||
|
||||
free(pool_bitmask_);
|
||||
free(reduced_bitmask_);
|
||||
}
|
||||
|
||||
void IPCBackend::roc_shmem_collective_init() {
|
||||
/*
|
||||
* Allocate heap space for barrier_sync
|
||||
*/
|
||||
size_t one_sync_size_bytes{sizeof(*barrier_sync)};
|
||||
size_t sync_size_bytes{one_sync_size_bytes * ROC_SHMEM_BARRIER_SYNC_SIZE};
|
||||
heap.malloc(reinterpret_cast<void **>(&barrier_sync), sync_size_bytes);
|
||||
|
||||
/*
|
||||
* Initialize the barrier synchronization array with default values.
|
||||
*/
|
||||
for (int i = 0; i < num_pes; i++) {
|
||||
barrier_sync[i] = ROC_SHMEM_SYNC_VALUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that all processing elements have done this before
|
||||
* continuing.
|
||||
*/
|
||||
NET_CHECK(MPI_Barrier(thread_comm));
|
||||
}
|
||||
|
||||
void IPCBackend::teams_init() {
|
||||
/**
|
||||
* Allocate pools for the teams sync and work arrary from the SHEAP.
|
||||
*/
|
||||
auto max_num_teams{team_tracker.get_max_num_teams()};
|
||||
barrier_pSync_pool = reinterpret_cast<long *>(roc_shmem_malloc(
|
||||
sizeof(long) * ROC_SHMEM_BARRIER_SYNC_SIZE * max_num_teams));
|
||||
reduce_pSync_pool = reinterpret_cast<long *>(roc_shmem_malloc(
|
||||
sizeof(long) * ROC_SHMEM_REDUCE_SYNC_SIZE * max_num_teams));
|
||||
bcast_pSync_pool = reinterpret_cast<long *>(roc_shmem_malloc(
|
||||
sizeof(long) * ROC_SHMEM_BCAST_SYNC_SIZE * max_num_teams));
|
||||
alltoall_pSync_pool = reinterpret_cast<long *>(roc_shmem_malloc(
|
||||
sizeof(long) * ROC_SHMEM_ALLTOALL_SYNC_SIZE * max_num_teams));
|
||||
|
||||
/* Accommodating for largest possible data type for pWrk */
|
||||
pWrk_pool = roc_shmem_malloc(
|
||||
sizeof(double) * ROC_SHMEM_REDUCE_MIN_WRKDATA_SIZE * max_num_teams);
|
||||
pAta_pool = roc_shmem_malloc(sizeof(double) * ROC_SHMEM_ATA_MAX_WRKDATA_SIZE *
|
||||
max_num_teams);
|
||||
|
||||
/**
|
||||
* Initialize the sync arrays in the pool with default values.
|
||||
*/
|
||||
long *barrier_pSync, *reduce_pSync, *bcast_pSync, *alltoall_pSync;
|
||||
for (int team_i = 0; team_i < max_num_teams; team_i++) {
|
||||
barrier_pSync = reinterpret_cast<long *>(
|
||||
&barrier_pSync_pool[team_i * ROC_SHMEM_BARRIER_SYNC_SIZE]);
|
||||
reduce_pSync = reinterpret_cast<long *>(
|
||||
&reduce_pSync_pool[team_i * ROC_SHMEM_REDUCE_SYNC_SIZE]);
|
||||
bcast_pSync = reinterpret_cast<long *>(
|
||||
&bcast_pSync_pool[team_i * ROC_SHMEM_BCAST_SYNC_SIZE]);
|
||||
alltoall_pSync = reinterpret_cast<long *>(
|
||||
&alltoall_pSync_pool[team_i * ROC_SHMEM_ALLTOALL_SYNC_SIZE]);
|
||||
|
||||
for (int i = 0; i < ROC_SHMEM_BARRIER_SYNC_SIZE; i++) {
|
||||
barrier_pSync[i] = ROC_SHMEM_SYNC_VALUE;
|
||||
}
|
||||
for (int i = 0; i < ROC_SHMEM_REDUCE_SYNC_SIZE; i++) {
|
||||
reduce_pSync[i] = ROC_SHMEM_SYNC_VALUE;
|
||||
}
|
||||
for (int i = 0; i < ROC_SHMEM_BCAST_SYNC_SIZE; i++) {
|
||||
bcast_pSync[i] = ROC_SHMEM_SYNC_VALUE;
|
||||
}
|
||||
for (int i = 0; i < ROC_SHMEM_ALLTOALL_SYNC_SIZE; i++) {
|
||||
alltoall_pSync[i] = ROC_SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize bit mask
|
||||
*
|
||||
* Logical:
|
||||
* MSB..........................................................................LSB
|
||||
* Physical: MSB...1st least significant 8 bits...LSB MSB...2nd least
|
||||
* signifant 8 bits...LSB
|
||||
*
|
||||
* Description shows only a 2-byte long mask but idea extends to any
|
||||
* arbitrary size.
|
||||
*/
|
||||
bitmask_size_ = (max_num_teams % CHAR_BIT) ? (max_num_teams / CHAR_BIT + 1)
|
||||
: (max_num_teams / CHAR_BIT);
|
||||
pool_bitmask_ = reinterpret_cast<char *>(malloc(bitmask_size_));
|
||||
reduced_bitmask_ = reinterpret_cast<char *>(malloc(bitmask_size_));
|
||||
|
||||
memset(pool_bitmask_, 0, bitmask_size_);
|
||||
memset(reduced_bitmask_, 0, bitmask_size_);
|
||||
/* Set all to available except the 0th one (reserved for TEAM_WORLD) */
|
||||
for (int bit_i = 1; bit_i < max_num_teams; bit_i++) {
|
||||
int byte_i = bit_i / CHAR_BIT;
|
||||
|
||||
pool_bitmask_[byte_i] |= 1 << (bit_i % CHAR_BIT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure that all processing elements have done this before
|
||||
* continuing.
|
||||
*/
|
||||
NET_CHECK(MPI_Barrier(thread_comm));
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
@@ -86,21 +86,6 @@ class IPCBackend : public Backend {
|
||||
*/
|
||||
void setup_ctxs();
|
||||
|
||||
/**
|
||||
* @brief Free all resources associated with the backend.
|
||||
*
|
||||
* The memory allocated to the handle param is deallocated during this
|
||||
* method. The handle should be treated as a nullptr after the call.
|
||||
*
|
||||
* The destructor treats this method as a helper function to destroy
|
||||
* this object.
|
||||
*
|
||||
* @todo The method needs to be broken into smaller pieces and most
|
||||
* of these internal resources need to be moved into subclasses using
|
||||
* RAII.
|
||||
*/
|
||||
void ipc_net_free_runtime();
|
||||
|
||||
/**
|
||||
* @brief Abort the application.
|
||||
*
|
||||
@@ -136,6 +121,41 @@ class IPCBackend : public Backend {
|
||||
*/
|
||||
HostInterface *host_interface{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Scratchpad for the internal barrier algorithms.
|
||||
*/
|
||||
int64_t *barrier_sync{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Handle for raw memory for barrier sync
|
||||
*/
|
||||
long *barrier_pSync_pool{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Handle for raw memory for reduce sync
|
||||
*/
|
||||
long *reduce_pSync_pool{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Handle for raw memory for broadcast sync
|
||||
*/
|
||||
long *bcast_pSync_pool{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Handle for raw memory for alltoall sync
|
||||
*/
|
||||
long *alltoall_pSync_pool{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Handle for raw memory for work
|
||||
*/
|
||||
void *pWrk_pool{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Handle for raw memory for alltoall
|
||||
*/
|
||||
void *pAta_pool{nullptr};
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @copydoc Backend::dump_backend_stats()
|
||||
@@ -159,6 +179,30 @@ class IPCBackend : public Backend {
|
||||
*/
|
||||
std::unique_ptr<IPCHostContext> default_host_ctx{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Allocate and initialize team world.
|
||||
*/
|
||||
void setup_team_world();
|
||||
|
||||
/**
|
||||
* @brief Initialize the resources required to support teams
|
||||
*/
|
||||
void teams_init();
|
||||
|
||||
/**
|
||||
* @brief Destruct the resources required to support teams
|
||||
*/
|
||||
void teams_destroy();
|
||||
|
||||
/**
|
||||
* @brief Allocate and initialize barrier operation addresses on
|
||||
* symmetric heap.
|
||||
*
|
||||
* When this method completes, the barrier_sync member will be available
|
||||
* for use.
|
||||
*/
|
||||
void roc_shmem_collective_init();
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Proxy for the default context
|
||||
@@ -182,6 +226,24 @@ class IPCBackend : public Backend {
|
||||
*/
|
||||
size_t maximum_num_contexts_{1024};
|
||||
|
||||
/**
|
||||
* @brief The bitmask representing the availability of teams in the pool
|
||||
*/
|
||||
char *pool_bitmask_{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Bitmask to store the reduced result of bitmasks on pariticipating
|
||||
* PEs
|
||||
*
|
||||
* With no thread-safety for this bitmask, multithreaded creation of teams is
|
||||
* not supported.
|
||||
*/
|
||||
char *reduced_bitmask_{nullptr};
|
||||
|
||||
/**
|
||||
* @brief Size of the bitmask
|
||||
*/
|
||||
int bitmask_size_{-1};
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "ipc_team.hpp"
|
||||
|
||||
#include "../backend_type.hpp"
|
||||
#include "backend_ipc.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
IPCTeam::IPCTeam(Backend *backend, TeamInfo *team_info_parent,
|
||||
TeamInfo *team_info_world, int num_pes, int my_pe,
|
||||
MPI_Comm mpi_comm, int pool_index)
|
||||
: Team(backend, team_info_parent, team_info_world, num_pes, my_pe,
|
||||
mpi_comm) {
|
||||
type = BackendType::IPC_BACKEND;
|
||||
const IPCBackend *b = static_cast<const IPCBackend *>(backend);
|
||||
|
||||
pool_index_ = pool_index;
|
||||
|
||||
barrier_pSync =
|
||||
&(b->barrier_pSync_pool[pool_index * ROC_SHMEM_BARRIER_SYNC_SIZE]);
|
||||
reduce_pSync =
|
||||
&(b->reduce_pSync_pool[pool_index * ROC_SHMEM_REDUCE_SYNC_SIZE]);
|
||||
bcast_pSync = &(b->bcast_pSync_pool[pool_index * ROC_SHMEM_BCAST_SYNC_SIZE]);
|
||||
alltoall_pSync =
|
||||
&(b->alltoall_pSync_pool[pool_index * ROC_SHMEM_ALLTOALL_SYNC_SIZE]);
|
||||
|
||||
pWrk = reinterpret_cast<char *>(b->pWrk_pool) +
|
||||
ROC_SHMEM_REDUCE_MIN_WRKDATA_SIZE * sizeof(double) * pool_index;
|
||||
pAta = reinterpret_cast<char *>(b->pAta_pool) +
|
||||
ROC_SHMEM_ATA_MAX_WRKDATA_SIZE * sizeof(double) * pool_index;
|
||||
}
|
||||
|
||||
IPCTeam::~IPCTeam() {}
|
||||
|
||||
} // namespace rocshmem
|
||||
@@ -0,0 +1,50 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef LIBRARY_SRC_IPC_TEAM_HPP_
|
||||
#define LIBRARY_SRC_IPC_TEAM_HPP_
|
||||
|
||||
#include "../team.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
class IPCTeam : public Team {
|
||||
public:
|
||||
IPCTeam(Backend* handle, TeamInfo* team_info_wrt_parent,
|
||||
TeamInfo* team_info_wrt_world, int num_pes, int my_pe,
|
||||
MPI_Comm team_comm, int pool_index);
|
||||
|
||||
virtual ~IPCTeam();
|
||||
|
||||
long* barrier_pSync{nullptr};
|
||||
long* reduce_pSync{nullptr};
|
||||
long* bcast_pSync{nullptr};
|
||||
long* alltoall_pSync{nullptr};
|
||||
void* pWrk{nullptr};
|
||||
void* pAta{nullptr};
|
||||
|
||||
int pool_index_{-1};
|
||||
};
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_SRC_IPC_TEAM_HPP_
|
||||
@@ -44,6 +44,10 @@ ROTeam* get_internal_ro_team(roc_shmem_team_t team) {
|
||||
return reinterpret_cast<ROTeam*>(team);
|
||||
}
|
||||
|
||||
IPCTeam* get_internal_ipc_team(roc_shmem_team_t team) {
|
||||
return reinterpret_cast<IPCTeam*>(team);
|
||||
}
|
||||
|
||||
__host__ __device__ int team_translate_pe(roc_shmem_team_t src_team, int src_pe,
|
||||
roc_shmem_team_t dst_team) {
|
||||
if (src_team == ROC_SHMEM_TEAM_INVALID ||
|
||||
|
||||
@@ -34,6 +34,7 @@ class Backend;
|
||||
class Team;
|
||||
class ROTeam;
|
||||
class GPUIBTeam;
|
||||
class IPCTeam;
|
||||
|
||||
class TeamInfo {
|
||||
public:
|
||||
@@ -162,6 +163,8 @@ GPUIBTeam* get_internal_gpu_ib_team(roc_shmem_team_t team);
|
||||
|
||||
ROTeam* get_internal_ro_team(roc_shmem_team_t team);
|
||||
|
||||
IPCTeam* get_internal_ipc_team(roc_shmem_team_t team);
|
||||
|
||||
__host__ __device__ int team_translate_pe(roc_shmem_team_t src_team, int src_pe,
|
||||
roc_shmem_team_t dst_team);
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user