From d8f44e44362a35d161931102124fc950cd3e5d8f Mon Sep 17 00:00:00 2001 From: Yiltan Temucin Date: Wed, 20 Nov 2024 15:42:53 -0600 Subject: [PATCH] Added Signalling Operations --- include/roc_shmem/roc_shmem.hpp | 69 +++++++++++++++ src/context.hpp | 27 ++++++ src/context_device.cpp | 30 +++++++ src/context_tmpl_device.hpp | 21 +++++ src/ipc/context_ipc_device.cpp | 105 +++++++++++++++++++++++ src/ipc/context_ipc_device.hpp | 21 +++++ src/ipc/context_ipc_tmpl_device.hpp | 19 +++++ src/roc_shmem_gpu.cpp | 127 ++++++++++++++++++++++++++++ src/stats.hpp | 6 ++ 9 files changed, 425 insertions(+) diff --git a/include/roc_shmem/roc_shmem.hpp b/include/roc_shmem/roc_shmem.hpp index d5e90ab0db..1052d9497d 100644 --- a/include/roc_shmem/roc_shmem.hpp +++ b/include/roc_shmem/roc_shmem.hpp @@ -66,6 +66,11 @@ enum ROC_SHMEM_OP { ROC_SHMEM_REPLACE }; +enum ROC_SHMEM_SIGNAL_OPS { + ROC_SHMEM_SIGNAL_SET, + ROC_SHMEM_SIGNAL_ADD, +}; + /** * @brief Types defined for roc_shmem_wait() operations. */ @@ -2627,6 +2632,70 @@ GET_NBI_API_EXT_GEN(wg, unsigned long, ulong) // NOLINT(runtime/int) GET_NBI_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int) ///@} + +/* + * ROC_SHMEM Signalling Operations + */ +#define PUTMEM_SIGNAL_DEC(SUFFIX) \ + __device__ ATTR_NO_INLINE void roc_shmem_putmem_signal##SUFFIX(void *dest, \ + const void *source, \ + size_t nelems, \ + uint64_t *sig_addr, \ + uint64_t signal, \ + int sig_op, int pe); \ + __device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_signal##SUFFIX(roc_shmem_ctx_t ctx, \ + void *dest, \ + const void *source, \ + size_t nelems, \ + uint64_t *sig_addr, \ + uint64_t signal, \ + int sig_op, int pe); + +#define PUT_SIGNAL_TYPED_DEC(T, TNAME, SUFFIX) \ + __device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \ + T *dest, \ + const T *source, \ + size_t nelems, \ + uint64_t *sig_addr, \ + uint64_t signal, \ + int sig_op, int pe); \ + __device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_put_signal##SUFFIX(T *dest, \ + const T *source, \ + size_t nelems, \ + uint64_t *sig_addr, \ + uint64_t signal, \ + int sig_op, int pe); + +#define PUT_SIGNAL_DEC(SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(float, float, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(double, double, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(char, char, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(signed char, schar, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(short, short, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(int, int, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(long, long, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(long long, longlong, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(unsigned char, uchar, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(unsigned short, ushort, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(unsigned int, uint, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(unsigned long, ulong, SUFFIX) \ + PUT_SIGNAL_TYPED_DEC(unsigned long long, ulonglong, SUFFIX) + +#define SIGNALING_API_DEC(SUFFIX) \ + PUTMEM_SIGNAL_DEC(SUFFIX) \ + PUT_SIGNAL_DEC(SUFFIX) + +SIGNALING_API_DEC() +SIGNALING_API_DEC(_wg) +SIGNALING_API_DEC(_wave) +SIGNALING_API_DEC(_nbi) +SIGNALING_API_DEC(_nbi_wg) +SIGNALING_API_DEC(_nbi_wave) + +__device__ ATTR_NO_INLINE uint64_t roc_shmem_signal_fetch(const uint64_t *sig_addr); +__device__ ATTR_NO_INLINE uint64_t roc_shmem_signal_fetch_wg(const uint64_t *sig_addr); +__device__ ATTR_NO_INLINE uint64_t roc_shmem_signal_fetch_wave(const uint64_t *sig_addr); + } // namespace rocshmem #endif // LIBRARY_INCLUDE_ROC_SHMEM_HPP diff --git a/src/context.hpp b/src/context.hpp index 4516a5d0a1..50f9713c4b 100644 --- a/src/context.hpp +++ b/src/context.hpp @@ -271,6 +271,33 @@ class Context { template __device__ void get_nbi_wave(T* dest, const T* source, size_t nelems, int pe); +#define CONTEXT_PUTMEM_SIGNAL_DEC(SUFFIX) \ + __device__ void putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + + CONTEXT_PUTMEM_SIGNAL_DEC() + CONTEXT_PUTMEM_SIGNAL_DEC(_wg) + CONTEXT_PUTMEM_SIGNAL_DEC(_wave) + CONTEXT_PUTMEM_SIGNAL_DEC(_nbi) + CONTEXT_PUTMEM_SIGNAL_DEC(_nbi_wg) + CONTEXT_PUTMEM_SIGNAL_DEC(_nbi_wave) + +#define CONTEXT_PUT_SIGNAL_DEC(SUFFIX) \ + template \ + __device__ void put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); + + CONTEXT_PUT_SIGNAL_DEC() + CONTEXT_PUT_SIGNAL_DEC(_wg) + CONTEXT_PUT_SIGNAL_DEC(_wave) + CONTEXT_PUT_SIGNAL_DEC(_nbi) + CONTEXT_PUT_SIGNAL_DEC(_nbi_wg) + CONTEXT_PUT_SIGNAL_DEC(_nbi_wave) + + __device__ uint64_t signal_fetch(const uint64_t *sig_addr); + __device__ uint64_t signal_fetch_wg(const uint64_t *sig_addr); + __device__ uint64_t signal_fetch_wave(const uint64_t *sig_addr); + /************************************************************************** ****************************** HOST METHODS ****************************** *************************************************************************/ diff --git a/src/context_device.cpp b/src/context_device.cpp index 33f468d72d..518ad06a5e 100644 --- a/src/context_device.cpp +++ b/src/context_device.cpp @@ -248,4 +248,34 @@ __device__ void Context::getmem_nbi_wave(void* dest, const void* source, DISPATCH(getmem_nbi_wave(dest, source, size, pe)); } +#define CONTEXT_PUTMEM_SIGNAL_DEF(SUFFIX, STATS_SUFFIX) \ + __device__ void Context::putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, \ + int pe) { \ + if (nelems == 0) { \ + return; \ + } \ + \ + ctxStats.incStat(NUM_PUT_SIGNAL##STATS_SUFFIX); \ + \ + DISPATCH(putmem_signal##SUFFIX(dest, source, nelems, sig_addr, signal, sig_op, pe)); \ + } + +CONTEXT_PUTMEM_SIGNAL_DEF(,) +CONTEXT_PUTMEM_SIGNAL_DEF(_wg, _WG) +CONTEXT_PUTMEM_SIGNAL_DEF(_wave, _WAVE) +CONTEXT_PUTMEM_SIGNAL_DEF(_nbi, _NBI) +CONTEXT_PUTMEM_SIGNAL_DEF(_nbi_wg, _NBI_WG) +CONTEXT_PUTMEM_SIGNAL_DEF(_nbi_wave, _NBI_WAVE) + +#define CONTEXT_SIGNAL_FETCH_DEF(SUFFIX) \ +__device__ uint64_t Context::signal_fetch##SUFFIX(const uint64_t *sig_addr) \ +{ \ + DISPATCH_RET(signal_fetch##SUFFIX(sig_addr)); \ +} + +CONTEXT_SIGNAL_FETCH_DEF() +CONTEXT_SIGNAL_FETCH_DEF(_wg) +CONTEXT_SIGNAL_FETCH_DEF(_wave) + } // namespace rocshmem diff --git a/src/context_tmpl_device.hpp b/src/context_tmpl_device.hpp index b504fce229..532c64813a 100644 --- a/src/context_tmpl_device.hpp +++ b/src/context_tmpl_device.hpp @@ -556,6 +556,27 @@ __device__ void Context::amo_cas(void *dst, T value, T cond, int pe) { DISPATCH(amo_cas(dst, value, cond, pe)); } +#define CONTEXT_PUT_SIGNAL_DEF(SUFFIX, STATS_SUFFIX) \ + template \ + __device__ void Context::put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, \ + int pe) { \ + if (nelems == 0) { \ + return; \ + } \ + \ + ctxStats.incStat(NUM_PUT_SIGNAL##STATS_SUFFIX); \ + \ + DISPATCH(put_signal##SUFFIX(dest, source, nelems, sig_addr, signal, sig_op, pe)); \ + } + +CONTEXT_PUT_SIGNAL_DEF(,) +CONTEXT_PUT_SIGNAL_DEF(_wg, _WG) +CONTEXT_PUT_SIGNAL_DEF(_wave, _WAVE) +CONTEXT_PUT_SIGNAL_DEF(_nbi, _NBI) +CONTEXT_PUT_SIGNAL_DEF(_nbi_wg, _NBI_WG) +CONTEXT_PUT_SIGNAL_DEF(_nbi_wave, _NBI_WAVE) + } // namespace rocshmem #endif // LIBRARY_SRC_CONTEXT_TMPL_DEVICE_HPP_ diff --git a/src/ipc/context_ipc_device.cpp b/src/ipc/context_ipc_device.cpp index 47d45565ef..81d1475b9e 100644 --- a/src/ipc/context_ipc_device.cpp +++ b/src/ipc/context_ipc_device.cpp @@ -222,4 +222,109 @@ __device__ void IPCContext::internal_getmem_wave(void *dest, ipcImpl_.ipcFence(); } +__device__ void IPCContext::putmem_signal(void *dest, const void *source, size_t nelems, + uint64_t *sig_addr, uint64_t signal, int sig_op, + int pe) { + putmem(dest, source, nelems, pe); + fence(); + + switch (sig_op) { + case ROC_SHMEM_SIGNAL_SET: + amo_set(static_cast(sig_addr), signal, pe); + break; + case ROC_SHMEM_SIGNAL_ADD: + amo_add(static_cast(dest), signal, pe); + break; + default: + DPRINTF("[%s] Invalid sig_op value (%d)\n", __func__, sig_op); + break; + } +} + +__device__ void IPCContext::putmem_signal_wg(void *dest, const void *source, size_t nelems, + uint64_t *sig_addr, uint64_t signal, int sig_op, + int pe) { + putmem_wg(dest, source, nelems, pe); + fence(); + + if (is_thread_zero_in_block()) { + switch (sig_op) { + case ROC_SHMEM_SIGNAL_SET: + amo_set(static_cast(sig_addr), signal, pe); + break; + case ROC_SHMEM_SIGNAL_ADD: + amo_add(static_cast(dest), signal, pe); + break; + default: + DPRINTF("[%s] Invalid sig_op value (%d)\n", __func__, sig_op); + break; + } + } +} + +__device__ void IPCContext::putmem_signal_wave(void *dest, const void *source, size_t nelems, + uint64_t *sig_addr, uint64_t signal, int sig_op, + int pe) { + putmem_wave(dest, source, nelems, pe); + fence(); + + if (is_thread_zero_in_wave()) { + switch (sig_op) { + case ROC_SHMEM_SIGNAL_SET: + amo_set(static_cast(sig_addr), signal, pe); + break; + case ROC_SHMEM_SIGNAL_ADD: + amo_add(static_cast(dest), signal, pe); + break; + default: + DPRINTF("[%s] Invalid sig_op value (%d)\n", __func__, sig_op); + break; + } + } +} + +__device__ void IPCContext::putmem_signal_nbi(void *dest, const void *source, size_t nelems, + uint64_t *sig_addr, uint64_t signal, int sig_op, + int pe) { + putmem_signal(dest, source, nelems, sig_addr, signal, sig_op, pe); +} + +__device__ void IPCContext::putmem_signal_nbi_wg(void *dest, const void *source, size_t nelems, + uint64_t *sig_addr, uint64_t signal, int sig_op, + int pe) { + putmem_signal_wg(dest, source, nelems, sig_addr, signal, sig_op, pe); +} + +__device__ void IPCContext::putmem_signal_nbi_wave(void *dest, const void *source, size_t nelems, + uint64_t *sig_addr, uint64_t signal, int sig_op, + int pe) { + putmem_signal_wave(dest, source, nelems, sig_addr, signal, sig_op, pe); +} + +__device__ uint64_t IPCContext::signal_fetch(const uint64_t *sig_addr) { + uint64_t *dst = const_cast(sig_addr); + return amo_fetch_add(static_cast(dst), 0, my_pe); +} + +__device__ uint64_t IPCContext::signal_fetch_wg(const uint64_t *sig_addr) { + __shared__ uint64_t value; + if (is_thread_zero_in_block()) { + uint64_t *dst = const_cast(sig_addr); + value = amo_fetch_add(static_cast(dst), 0, my_pe); + } + __threadfence_block(); + return value; +} + +__device__ uint64_t IPCContext::signal_fetch_wave(const uint64_t *sig_addr) { + uint64_t value; + if (is_thread_zero_in_wave()) { + uint64_t *dst = const_cast(sig_addr); + value = amo_fetch_add(static_cast(dst), 0, my_pe); + } + __threadfence_block(); + value = __shfl(value, 0); + return value; +} + } // namespace rocshmem diff --git a/src/ipc/context_ipc_device.hpp b/src/ipc/context_ipc_device.hpp index 03b72efe89..2f3e899546 100644 --- a/src/ipc/context_ipc_device.hpp +++ b/src/ipc/context_ipc_device.hpp @@ -186,6 +186,27 @@ class IPCContext : public Context { template __device__ void get_nbi_wave(T *dest, const T *source, size_t nelems, int pe); +#define IPC_CONTEXT_PUT_SIGNAL_DEC(SUFFIX) \ + template \ + __device__ void put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, \ + int pe); \ + \ + __device__ void putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, \ + int pe); + + IPC_CONTEXT_PUT_SIGNAL_DEC() + IPC_CONTEXT_PUT_SIGNAL_DEC(_wg) + IPC_CONTEXT_PUT_SIGNAL_DEC(_wave) + IPC_CONTEXT_PUT_SIGNAL_DEC(_nbi) + IPC_CONTEXT_PUT_SIGNAL_DEC(_nbi_wg) + IPC_CONTEXT_PUT_SIGNAL_DEC(_nbi_wave) + + __device__ uint64_t signal_fetch(const uint64_t *sig_addr); + __device__ uint64_t signal_fetch_wg(const uint64_t *sig_addr); + __device__ uint64_t signal_fetch_wave(const uint64_t *sig_addr); + private: //context class has IpcImpl object (ipcImpl_) diff --git a/src/ipc/context_ipc_tmpl_device.hpp b/src/ipc/context_ipc_tmpl_device.hpp index 4da49b6f7b..f239672243 100644 --- a/src/ipc/context_ipc_tmpl_device.hpp +++ b/src/ipc/context_ipc_tmpl_device.hpp @@ -578,6 +578,25 @@ __device__ void IPCContext::get_nbi_wave(T *dest, const T *source, getmem_nbi_wave(dest, source, nelems * sizeof(T), pe); } +#define IPC_CONTEXT_PUT_SIGNAL_DEF(SUFFIX) \ + template \ + __device__ void IPCContext::put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, \ + int pe) { \ + putmem_signal##SUFFIX(dest, source, nelems * sizeof(T), sig_addr, signal, sig_op, pe); \ + } \ + \ + template \ + __device__ void IPCContext::put_signal_nbi##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, int sig_op, \ + int pe) { \ + putmem_signal##SUFFIX(dest, source, nelems * sizeof(T), sig_addr, signal, sig_op, pe); \ + } + +IPC_CONTEXT_PUT_SIGNAL_DEF() +IPC_CONTEXT_PUT_SIGNAL_DEF(_wg) +IPC_CONTEXT_PUT_SIGNAL_DEF(_wave) + } // namespace rocshmem #endif // LIBRARY_SRC_IPC_CONTEXT_TMPL_DEVICE_HPP_ diff --git a/src/roc_shmem_gpu.cpp b/src/roc_shmem_gpu.cpp index 2deff5d0b7..949f6619b1 100644 --- a/src/roc_shmem_gpu.cpp +++ b/src/roc_shmem_gpu.cpp @@ -235,6 +235,31 @@ __device__ void roc_shmem_atomic_xor(T *dest, T value, int pe) { roc_shmem_atomic_xor(ROC_SHMEM_CTX_DEFAULT, dest, value, pe); } +#define ROC_SHMEM_PUTMEM_SIGNAL_DEF(SUFFIX) \ + __device__ void roc_shmem_putmem_signal##SUFFIX(void *dest, const void *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe) { \ + roc_shmem_ctx_putmem_signal##SUFFIX(ROC_SHMEM_CTX_DEFAULT, \ + dest, source, nelems, \ + sig_addr, signal, sig_op, pe); \ + } \ + \ + template \ + __device__ void roc_shmem_put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe) { \ + roc_shmem_ctx_put_signal##SUFFIX(ROC_SHMEM_CTX_DEFAULT, \ + dest, source, nelems, \ + sig_addr, signal, sig_op, pe); \ + } + +ROC_SHMEM_PUTMEM_SIGNAL_DEF() +ROC_SHMEM_PUTMEM_SIGNAL_DEF(_wg) +ROC_SHMEM_PUTMEM_SIGNAL_DEF(_wave) +ROC_SHMEM_PUTMEM_SIGNAL_DEF(_nbi) +ROC_SHMEM_PUTMEM_SIGNAL_DEF(_nbi_wg) +ROC_SHMEM_PUTMEM_SIGNAL_DEF(_nbi_wave) + /****************************************************************************** ************************* Private Context Interfaces ************************* *****************************************************************************/ @@ -846,6 +871,47 @@ __device__ void roc_shmem_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest, get_internal_ctx(ctx)->get_nbi_wave(dest, source, nelems, pe); } +#define ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(SUFFIX) \ + __device__ void roc_shmem_ctx_putmem_signal##SUFFIX(roc_shmem_ctx_t ctx, \ + void *dest, const void *source, \ + size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, \ + int pe) { \ + GPU_DPRINTF("Function: roc_shmem_ctx_putmem_signal##SUFFIX\n"); \ + \ + get_internal_ctx(ctx)->putmem_signal##SUFFIX(dest, source, nelems, \ + sig_addr, signal, sig_op, pe); \ + } \ + \ + template \ + __device__ void roc_shmem_ctx_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \ + T *dest, const T *source, \ + size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe) { \ + GPU_DPRINTF("Function: roc_shmem_ctx_put_signal##SUFFIX\n"); \ + \ + get_internal_ctx(ctx)->put_signal##SUFFIX(dest, source, nelems, \ + sig_addr, signal, sig_op, pe); \ + } + +ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF() +ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_wg) +ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_wave) +ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_nbi) +ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_nbi_wg) +ROC_SHMEM_CTX_PUTMEM_SIGNAL_DEF(_nbi_wave) + +#define ROC_SHMEM_SIGNAL_FETCH_DEF(SUFFIX) \ + __device__ uint64_t roc_shmem_signal_fetch##SUFFIX(const uint64_t *sig_addr) { \ + return get_internal_ctx(ROC_SHMEM_CTX_DEFAULT)->signal_fetch##SUFFIX(sig_addr); \ + } + +ROC_SHMEM_SIGNAL_FETCH_DEF() +ROC_SHMEM_SIGNAL_FETCH_DEF(_wg) +ROC_SHMEM_SIGNAL_FETCH_DEF(_wave) + /****************************************************************************** ****************************** Teams Interface ******************************* *****************************************************************************/ @@ -1384,6 +1450,67 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team, return roc_shmem_test(ivars, cmp, val); \ } +#define RMA_SIGNAL_SUFFIX_DEC(SUFFIX) \ + template \ + __device__ void roc_shmem__ctx_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \ + T *dest, const T *source, \ + size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe); \ + \ + template \ + __device__ void roc_shmem__put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe); \ + +#define RMA_SIGNAL_SUFFIX_DEF(T, TNAME, SUFFIX) \ + __device__ void roc_shmem_ctx_##TNAME##_put_signal##SUFFIX(roc_shmem_ctx_t ctx, \ + T *dest, const T *source, \ + size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe) { \ + roc_shmem_ctx_put_signal##SUFFIX(ctx, dest, source, nelems, sig_addr, signal, sig_op, pe); \ + } \ + \ + __device__ void roc_shmem_##TNAME##_put_signal##SUFFIX(T *dest, const T *source, size_t nelems, \ + uint64_t *sig_addr, uint64_t signal, \ + int sig_op, int pe) { \ + roc_shmem_put_signal##SUFFIX(dest, source, nelems, sig_addr, signal, sig_op, pe); \ + } + +#define RMA_SIGNAL_GEN(SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEC(SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(float, float, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(double, double, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(char, char, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(signed char, schar, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(short, short, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(int, int, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(long, long, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(long long, longlong, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(unsigned char, uchar, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(unsigned short, ushort, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(unsigned int, uint, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(unsigned long, ulong, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(unsigned long long, ulonglong, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(int8_t, int8, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(int16_t, int16, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(int32_t, int32, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(int64_t, int64, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(uint8_t, uint8, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(uint16_t, uint16, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(uint32_t, uint32, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(uint64_t, uint64, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(size_t, size, SUFFIX) \ + RMA_SIGNAL_SUFFIX_DEF(ptrdiff_t, ptrdiff, SUFFIX) + +RMA_SIGNAL_GEN(_wg) +RMA_SIGNAL_GEN() +RMA_SIGNAL_GEN(_wave) +RMA_SIGNAL_GEN(_nbi) +RMA_SIGNAL_GEN(_nbi_wg) +RMA_SIGNAL_GEN(_nbi_wave) + /****************************************************************************** ************************* Macro Invocation Per Type ************************** *****************************************************************************/ diff --git a/src/stats.hpp b/src/stats.hpp index 704313f102..15cb16779c 100644 --- a/src/stats.hpp +++ b/src/stats.hpp @@ -82,6 +82,12 @@ enum roc_shmem_stats { NUM_CREATE, NUM_ALLTOALL, NUM_FCOLLECT, + NUM_PUT_SIGNAL, + NUM_PUT_SIGNAL_WG, + NUM_PUT_SIGNAL_WAVE, + NUM_PUT_SIGNAL_NBI, + NUM_PUT_SIGNAL_NBI_WG, + NUM_PUT_SIGNAL_NBI_WAVE, NUM_STATS };