Added Signalling Operations
Tento commit je obsažen v:
@@ -66,6 +66,11 @@ enum ROC_SHMEM_OP {
|
||||
ROC_SHMEM_REPLACE
|
||||
};
|
||||
|
||||
enum ROC_SHMEM_SIGNAL_OPS {
|
||||
ROC_SHMEM_SIGNAL_SET,
|
||||
ROC_SHMEM_SIGNAL_ADD,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Types defined for roc_shmem_wait() operations.
|
||||
*/
|
||||
@@ -2627,6 +2632,70 @@ GET_NBI_API_EXT_GEN(wg, unsigned long, ulong) // NOLINT(runtime/int)
|
||||
GET_NBI_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
///@}
|
||||
|
||||
|
||||
/*
|
||||
* ROC_SHMEM Signalling Operations
|
||||
*/
|
||||
#define PUTMEM_SIGNAL_DEC(SUFFIX) \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_putmem_signal##SUFFIX(void *dest, \
|
||||
const void *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, \
|
||||
uint64_t signal, \
|
||||
int sig_op, int pe); \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_signal##SUFFIX(roc_shmem_ctx_t ctx, \
|
||||
void *dest, \
|
||||
const void *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, \
|
||||
uint64_t signal, \
|
||||
int sig_op, int pe);
|
||||
|
||||
#define PUT_SIGNAL_TYPED_DEC(T, TNAME, SUFFIX) \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \
|
||||
T *dest, \
|
||||
const T *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, \
|
||||
uint64_t signal, \
|
||||
int sig_op, int pe); \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_put_signal##SUFFIX(T *dest, \
|
||||
const T *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, \
|
||||
uint64_t signal, \
|
||||
int sig_op, int pe);
|
||||
|
||||
#define PUT_SIGNAL_DEC(SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(float, float, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(double, double, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(char, char, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(signed char, schar, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(short, short, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(int, int, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(long, long, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(long long, longlong, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(unsigned char, uchar, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(unsigned short, ushort, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(unsigned int, uint, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(unsigned long, ulong, SUFFIX) \
|
||||
PUT_SIGNAL_TYPED_DEC(unsigned long long, ulonglong, SUFFIX)
|
||||
|
||||
#define SIGNALING_API_DEC(SUFFIX) \
|
||||
PUTMEM_SIGNAL_DEC(SUFFIX) \
|
||||
PUT_SIGNAL_DEC(SUFFIX)
|
||||
|
||||
SIGNALING_API_DEC()
|
||||
SIGNALING_API_DEC(_wg)
|
||||
SIGNALING_API_DEC(_wave)
|
||||
SIGNALING_API_DEC(_nbi)
|
||||
SIGNALING_API_DEC(_nbi_wg)
|
||||
SIGNALING_API_DEC(_nbi_wave)
|
||||
|
||||
__device__ ATTR_NO_INLINE uint64_t roc_shmem_signal_fetch(const uint64_t *sig_addr);
|
||||
__device__ ATTR_NO_INLINE uint64_t roc_shmem_signal_fetch_wg(const uint64_t *sig_addr);
|
||||
__device__ ATTR_NO_INLINE uint64_t roc_shmem_signal_fetch_wave(const uint64_t *sig_addr);
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_INCLUDE_ROC_SHMEM_HPP
|
||||
|
||||
@@ -271,6 +271,33 @@ class Context {
|
||||
template <typename T>
|
||||
__device__ void get_nbi_wave(T* dest, const T* source, size_t nelems, int pe);
|
||||
|
||||
#define CONTEXT_PUTMEM_SIGNAL_DEC(SUFFIX) \
|
||||
__device__ void putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
CONTEXT_PUTMEM_SIGNAL_DEC()
|
||||
CONTEXT_PUTMEM_SIGNAL_DEC(_wg)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEC(_wave)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEC(_nbi)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEC(_nbi_wg)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEC(_nbi_wave)
|
||||
|
||||
#define CONTEXT_PUT_SIGNAL_DEC(SUFFIX) \
|
||||
template <typename T> \
|
||||
__device__ void put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
CONTEXT_PUT_SIGNAL_DEC()
|
||||
CONTEXT_PUT_SIGNAL_DEC(_wg)
|
||||
CONTEXT_PUT_SIGNAL_DEC(_wave)
|
||||
CONTEXT_PUT_SIGNAL_DEC(_nbi)
|
||||
CONTEXT_PUT_SIGNAL_DEC(_nbi_wg)
|
||||
CONTEXT_PUT_SIGNAL_DEC(_nbi_wave)
|
||||
|
||||
__device__ uint64_t signal_fetch(const uint64_t *sig_addr);
|
||||
__device__ uint64_t signal_fetch_wg(const uint64_t *sig_addr);
|
||||
__device__ uint64_t signal_fetch_wave(const uint64_t *sig_addr);
|
||||
|
||||
/**************************************************************************
|
||||
****************************** HOST METHODS ******************************
|
||||
*************************************************************************/
|
||||
|
||||
@@ -248,4 +248,34 @@ __device__ void Context::getmem_nbi_wave(void* dest, const void* source,
|
||||
DISPATCH(getmem_nbi_wave(dest, source, size, pe));
|
||||
}
|
||||
|
||||
#define CONTEXT_PUTMEM_SIGNAL_DEF(SUFFIX, STATS_SUFFIX) \
|
||||
__device__ void Context::putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, \
|
||||
int pe) { \
|
||||
if (nelems == 0) { \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
ctxStats.incStat(NUM_PUT_SIGNAL##STATS_SUFFIX); \
|
||||
\
|
||||
DISPATCH(putmem_signal##SUFFIX(dest, source, nelems, sig_addr, signal, sig_op, pe)); \
|
||||
}
|
||||
|
||||
CONTEXT_PUTMEM_SIGNAL_DEF(,)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEF(_wg, _WG)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEF(_wave, _WAVE)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEF(_nbi, _NBI)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEF(_nbi_wg, _NBI_WG)
|
||||
CONTEXT_PUTMEM_SIGNAL_DEF(_nbi_wave, _NBI_WAVE)
|
||||
|
||||
#define CONTEXT_SIGNAL_FETCH_DEF(SUFFIX) \
|
||||
__device__ uint64_t Context::signal_fetch##SUFFIX(const uint64_t *sig_addr) \
|
||||
{ \
|
||||
DISPATCH_RET(signal_fetch##SUFFIX(sig_addr)); \
|
||||
}
|
||||
|
||||
CONTEXT_SIGNAL_FETCH_DEF()
|
||||
CONTEXT_SIGNAL_FETCH_DEF(_wg)
|
||||
CONTEXT_SIGNAL_FETCH_DEF(_wave)
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -556,6 +556,27 @@ __device__ void Context::amo_cas(void *dst, T value, T cond, int pe) {
|
||||
DISPATCH(amo_cas(dst, value, cond, pe));
|
||||
}
|
||||
|
||||
#define CONTEXT_PUT_SIGNAL_DEF(SUFFIX, STATS_SUFFIX) \
|
||||
template <typename T> \
|
||||
__device__ void Context::put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, \
|
||||
int pe) { \
|
||||
if (nelems == 0) { \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
ctxStats.incStat(NUM_PUT_SIGNAL##STATS_SUFFIX); \
|
||||
\
|
||||
DISPATCH(put_signal##SUFFIX(dest, source, nelems, sig_addr, signal, sig_op, pe)); \
|
||||
}
|
||||
|
||||
CONTEXT_PUT_SIGNAL_DEF(,)
|
||||
CONTEXT_PUT_SIGNAL_DEF(_wg, _WG)
|
||||
CONTEXT_PUT_SIGNAL_DEF(_wave, _WAVE)
|
||||
CONTEXT_PUT_SIGNAL_DEF(_nbi, _NBI)
|
||||
CONTEXT_PUT_SIGNAL_DEF(_nbi_wg, _NBI_WG)
|
||||
CONTEXT_PUT_SIGNAL_DEF(_nbi_wave, _NBI_WAVE)
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_SRC_CONTEXT_TMPL_DEVICE_HPP_
|
||||
|
||||
@@ -222,4 +222,109 @@ __device__ void IPCContext::internal_getmem_wave(void *dest,
|
||||
ipcImpl_.ipcFence();
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_signal(void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op,
|
||||
int pe) {
|
||||
putmem(dest, source, nelems, pe);
|
||||
fence();
|
||||
|
||||
switch (sig_op) {
|
||||
case ROC_SHMEM_SIGNAL_SET:
|
||||
amo_set<uint64_t>(static_cast<void*>(sig_addr), signal, pe);
|
||||
break;
|
||||
case ROC_SHMEM_SIGNAL_ADD:
|
||||
amo_add<uint64_t>(static_cast<void*>(dest), signal, pe);
|
||||
break;
|
||||
default:
|
||||
DPRINTF("[%s] Invalid sig_op value (%d)\n", __func__, sig_op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_signal_wg(void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op,
|
||||
int pe) {
|
||||
putmem_wg(dest, source, nelems, pe);
|
||||
fence();
|
||||
|
||||
if (is_thread_zero_in_block()) {
|
||||
switch (sig_op) {
|
||||
case ROC_SHMEM_SIGNAL_SET:
|
||||
amo_set<uint64_t>(static_cast<void*>(sig_addr), signal, pe);
|
||||
break;
|
||||
case ROC_SHMEM_SIGNAL_ADD:
|
||||
amo_add<uint64_t>(static_cast<void*>(dest), signal, pe);
|
||||
break;
|
||||
default:
|
||||
DPRINTF("[%s] Invalid sig_op value (%d)\n", __func__, sig_op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_signal_wave(void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op,
|
||||
int pe) {
|
||||
putmem_wave(dest, source, nelems, pe);
|
||||
fence();
|
||||
|
||||
if (is_thread_zero_in_wave()) {
|
||||
switch (sig_op) {
|
||||
case ROC_SHMEM_SIGNAL_SET:
|
||||
amo_set<uint64_t>(static_cast<void*>(sig_addr), signal, pe);
|
||||
break;
|
||||
case ROC_SHMEM_SIGNAL_ADD:
|
||||
amo_add<uint64_t>(static_cast<void*>(dest), signal, pe);
|
||||
break;
|
||||
default:
|
||||
DPRINTF("[%s] Invalid sig_op value (%d)\n", __func__, sig_op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_signal_nbi(void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op,
|
||||
int pe) {
|
||||
putmem_signal(dest, source, nelems, sig_addr, signal, sig_op, pe);
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_signal_nbi_wg(void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op,
|
||||
int pe) {
|
||||
putmem_signal_wg(dest, source, nelems, sig_addr, signal, sig_op, pe);
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_signal_nbi_wave(void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op,
|
||||
int pe) {
|
||||
putmem_signal_wave(dest, source, nelems, sig_addr, signal, sig_op, pe);
|
||||
}
|
||||
|
||||
__device__ uint64_t IPCContext::signal_fetch(const uint64_t *sig_addr) {
|
||||
uint64_t *dst = const_cast<uint64_t*>(sig_addr);
|
||||
return amo_fetch_add<uint64_t>(static_cast<void*>(dst), 0, my_pe);
|
||||
}
|
||||
|
||||
__device__ uint64_t IPCContext::signal_fetch_wg(const uint64_t *sig_addr) {
|
||||
__shared__ uint64_t value;
|
||||
if (is_thread_zero_in_block()) {
|
||||
uint64_t *dst = const_cast<uint64_t*>(sig_addr);
|
||||
value = amo_fetch_add<uint64_t>(static_cast<void*>(dst), 0, my_pe);
|
||||
}
|
||||
__threadfence_block();
|
||||
return value;
|
||||
}
|
||||
|
||||
__device__ uint64_t IPCContext::signal_fetch_wave(const uint64_t *sig_addr) {
|
||||
uint64_t value;
|
||||
if (is_thread_zero_in_wave()) {
|
||||
uint64_t *dst = const_cast<uint64_t*>(sig_addr);
|
||||
value = amo_fetch_add<uint64_t>(static_cast<void*>(dst), 0, my_pe);
|
||||
}
|
||||
__threadfence_block();
|
||||
value = __shfl(value, 0);
|
||||
return value;
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -186,6 +186,27 @@ class IPCContext : public Context {
|
||||
template <typename T>
|
||||
__device__ void get_nbi_wave(T *dest, const T *source, size_t nelems, int pe);
|
||||
|
||||
#define IPC_CONTEXT_PUT_SIGNAL_DEC(SUFFIX) \
|
||||
template <typename T> \
|
||||
__device__ void put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, \
|
||||
int pe); \
|
||||
\
|
||||
__device__ void putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, \
|
||||
int pe);
|
||||
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEC()
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEC(_wg)
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEC(_wave)
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEC(_nbi)
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEC(_nbi_wg)
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEC(_nbi_wave)
|
||||
|
||||
__device__ uint64_t signal_fetch(const uint64_t *sig_addr);
|
||||
__device__ uint64_t signal_fetch_wg(const uint64_t *sig_addr);
|
||||
__device__ uint64_t signal_fetch_wave(const uint64_t *sig_addr);
|
||||
|
||||
private:
|
||||
|
||||
//context class has IpcImpl object (ipcImpl_)
|
||||
|
||||
@@ -578,6 +578,25 @@ __device__ void IPCContext::get_nbi_wave(T *dest, const T *source,
|
||||
getmem_nbi_wave(dest, source, nelems * sizeof(T), pe);
|
||||
}
|
||||
|
||||
#define IPC_CONTEXT_PUT_SIGNAL_DEF(SUFFIX) \
|
||||
template <typename T> \
|
||||
__device__ void IPCContext::put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, \
|
||||
int pe) { \
|
||||
putmem_signal##SUFFIX(dest, source, nelems * sizeof(T), sig_addr, signal, sig_op, pe); \
|
||||
} \
|
||||
\
|
||||
template <typename T> \
|
||||
__device__ void IPCContext::put_signal_nbi##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, \
|
||||
int pe) { \
|
||||
putmem_signal##SUFFIX(dest, source, nelems * sizeof(T), sig_addr, signal, sig_op, pe); \
|
||||
}
|
||||
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEF()
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEF(_wg)
|
||||
IPC_CONTEXT_PUT_SIGNAL_DEF(_wave)
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_SRC_IPC_CONTEXT_TMPL_DEVICE_HPP_
|
||||
|
||||
@@ -235,6 +235,31 @@ __device__ void roc_shmem_atomic_xor(T *dest, T value, int pe) {
|
||||
roc_shmem_atomic_xor(ROC_SHMEM_CTX_DEFAULT, dest, value, pe);
|
||||
}
|
||||
|
||||
#define ROC_SHMEM_PUTMEM_SIGNAL_DEF(SUFFIX) \
|
||||
__device__ void roc_shmem_putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe) { \
|
||||
roc_shmem_ctx_putmem_signal##SUFFIX(ROC_SHMEM_CTX_DEFAULT, \
|
||||
dest, source, nelems, \
|
||||
sig_addr, signal, sig_op, pe); \
|
||||
} \
|
||||
\
|
||||
template <typename T> \
|
||||
__device__ void roc_shmem_put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe) { \
|
||||
roc_shmem_ctx_put_signal##SUFFIX(ROC_SHMEM_CTX_DEFAULT, \
|
||||
dest, source, nelems, \
|
||||
sig_addr, signal, sig_op, pe); \
|
||||
}
|
||||
|
||||
ROC_SHMEM_PUTMEM_SIGNAL_DEF()
|
||||
ROC_SHMEM_PUTMEM_SIGNAL_DEF(_wg)
|
||||
ROC_SHMEM_PUTMEM_SIGNAL_DEF(_wave)
|
||||
ROC_SHMEM_PUTMEM_SIGNAL_DEF(_nbi)
|
||||
ROC_SHMEM_PUTMEM_SIGNAL_DEF(_nbi_wg)
|
||||
ROC_SHMEM_PUTMEM_SIGNAL_DEF(_nbi_wave)
|
||||
|
||||
/******************************************************************************
|
||||
************************* Private Context Interfaces *************************
|
||||
*****************************************************************************/
|
||||
@@ -846,6 +871,47 @@ __device__ void roc_shmem_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
get_internal_ctx(ctx)->get_nbi_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
#define ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(SUFFIX) \
|
||||
__device__ void roc_shmem_ctx_putmem_signal##SUFFIX(roc_shmem_ctx_t ctx, \
|
||||
void *dest, const void *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, \
|
||||
int pe) { \
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_signal##SUFFIX\n"); \
|
||||
\
|
||||
get_internal_ctx(ctx)->putmem_signal##SUFFIX(dest, source, nelems, \
|
||||
sig_addr, signal, sig_op, pe); \
|
||||
} \
|
||||
\
|
||||
template <typename T> \
|
||||
__device__ void roc_shmem_ctx_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \
|
||||
T *dest, const T *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe) { \
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_put_signal##SUFFIX\n"); \
|
||||
\
|
||||
get_internal_ctx(ctx)->put_signal##SUFFIX(dest, source, nelems, \
|
||||
sig_addr, signal, sig_op, pe); \
|
||||
}
|
||||
|
||||
ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF()
|
||||
ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_wg)
|
||||
ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_wave)
|
||||
ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_nbi)
|
||||
ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_nbi_wg)
|
||||
ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_nbi_wave)
|
||||
|
||||
#define ROC_SHMEM_SIGNAL_FETCH_DEF(SUFFIX) \
|
||||
__device__ uint64_t roc_shmem_signal_fetch##SUFFIX(const uint64_t *sig_addr) { \
|
||||
return get_internal_ctx(ROC_SHMEM_CTX_DEFAULT)->signal_fetch##SUFFIX(sig_addr); \
|
||||
}
|
||||
|
||||
ROC_SHMEM_SIGNAL_FETCH_DEF()
|
||||
ROC_SHMEM_SIGNAL_FETCH_DEF(_wg)
|
||||
ROC_SHMEM_SIGNAL_FETCH_DEF(_wave)
|
||||
|
||||
/******************************************************************************
|
||||
****************************** Teams Interface *******************************
|
||||
*****************************************************************************/
|
||||
@@ -1384,6 +1450,67 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
|
||||
return roc_shmem_test<T>(ivars, cmp, val); \
|
||||
}
|
||||
|
||||
#define RMA_SIGNAL_SUFFIX_DEC(SUFFIX) \
|
||||
template <typename T> \
|
||||
__device__ void roc_shmem__ctx_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \
|
||||
T *dest, const T *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe); \
|
||||
\
|
||||
template <typename T> \
|
||||
__device__ void roc_shmem__put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe); \
|
||||
|
||||
#define RMA_SIGNAL_SUFFIX_DEF(T, TNAME, SUFFIX) \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \
|
||||
T *dest, const T *source, \
|
||||
size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe) { \
|
||||
roc_shmem_ctx_put_signal##SUFFIX<T>(ctx, dest, source, nelems, sig_addr, signal, sig_op, pe); \
|
||||
} \
|
||||
\
|
||||
__device__ void roc_shmem_##TNAME##_put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \
|
||||
uint64_t *sig_addr, uint64_t signal, \
|
||||
int sig_op, int pe) { \
|
||||
roc_shmem_put_signal##SUFFIX(dest, source, nelems, sig_addr, signal, sig_op, pe); \
|
||||
}
|
||||
|
||||
#define RMA_SIGNAL_GEN(SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEC(SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(float, float, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(double, double, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(char, char, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(signed char, schar, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(short, short, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(int, int, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(long, long, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(long long, longlong, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(unsigned char, uchar, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(unsigned short, ushort, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(unsigned int, uint, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(unsigned long, ulong, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(unsigned long long, ulonglong, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(int8_t, int8, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(int16_t, int16, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(int32_t, int32, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(int64_t, int64, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(uint8_t, uint8, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(uint16_t, uint16, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(uint32_t, uint32, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(uint64_t, uint64, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(size_t, size, SUFFIX) \
|
||||
RMA_SIGNAL_SUFFIX_DEF(ptrdiff_t, ptrdiff, SUFFIX)
|
||||
|
||||
RMA_SIGNAL_GEN(_wg)
|
||||
RMA_SIGNAL_GEN()
|
||||
RMA_SIGNAL_GEN(_wave)
|
||||
RMA_SIGNAL_GEN(_nbi)
|
||||
RMA_SIGNAL_GEN(_nbi_wg)
|
||||
RMA_SIGNAL_GEN(_nbi_wave)
|
||||
|
||||
/******************************************************************************
|
||||
************************* Macro Invocation Per Type **************************
|
||||
*****************************************************************************/
|
||||
|
||||
@@ -82,6 +82,12 @@ enum roc_shmem_stats {
|
||||
NUM_CREATE,
|
||||
NUM_ALLTOALL,
|
||||
NUM_FCOLLECT,
|
||||
NUM_PUT_SIGNAL,
|
||||
NUM_PUT_SIGNAL_WG,
|
||||
NUM_PUT_SIGNAL_WAVE,
|
||||
NUM_PUT_SIGNAL_NBI,
|
||||
NUM_PUT_SIGNAL_NBI_WG,
|
||||
NUM_PUT_SIGNAL_NBI_WAVE,
|
||||
NUM_STATS
|
||||
};
|
||||
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele