converted roc_shmemx to roc_shmem
Este cometimento está contido em:
@@ -2086,39 +2086,39 @@ TEST_API_GEN(unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
* MACRO DECLARE SHMEMX_PUT APIs
|
||||
* MACRO DECLARE SHMEM_PUT APIs
|
||||
*/
|
||||
#define PUT_API_EXT_GEN(GRAN, T, TNAME) \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_put_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_put_##GRAN( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_put_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_put_##GRAN( \
|
||||
T *dest, const T *source, size_t nelems, int pe);
|
||||
|
||||
/*
|
||||
* MACRO DECLARE SHMEMX_GET APIs
|
||||
* MACRO DECLARE SHMEM_GET APIs
|
||||
*/
|
||||
#define GET_API_EXT_GEN(GRAN, T, TNAME) \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_get_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_get_##GRAN( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_get_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_get_##GRAN( \
|
||||
T *dest, const T *source, size_t nelems, int pe);
|
||||
|
||||
/*
|
||||
* MACRO DECLARE SHMEMX_PUT_NBI APIs
|
||||
* MACRO DECLARE SHMEM_PUT_NBI APIs
|
||||
*/
|
||||
#define PUT_NBI_API_EXT_GEN(GRAN, T, TNAME) \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_put_nbi_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_put_nbi_##GRAN( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_put_nbi_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_put_nbi_##GRAN( \
|
||||
T *dest, const T *source, size_t nelems, int pe);
|
||||
|
||||
/*
|
||||
* MACRO DECLARE SHMEMX_GET_NBI APIs
|
||||
* MACRO DECLARE SHMEM_GET_NBI APIs
|
||||
*/
|
||||
#define GET_NBI_API_EXT_GEN(GRAN, T, TNAME) \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_get_nbi_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_get_nbi_##GRAN( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_get_nbi_##GRAN( \
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_get_nbi_##GRAN( \
|
||||
T *dest, const T *source, size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
@@ -2140,12 +2140,12 @@ TEST_API_GEN(unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_wave(
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_wave(
|
||||
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_putmem_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* @brief Writes contiguous data of \p nelems bytes from \p source on the
|
||||
@@ -2166,14 +2166,14 @@ __device__ ATTR_NO_INLINE void roc_shmemx_putmem_wave(void *dest,
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_wg(roc_shmem_ctx_t ctx,
|
||||
void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_wg(roc_shmem_ctx_t ctx,
|
||||
void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_putmem_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* @brief Writes contiguous data of \p nelems elements from \p source on the
|
||||
@@ -2265,12 +2265,12 @@ PUT_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_wave(
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_wave(
|
||||
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_getmem_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* @brief Reads contiguous data of \p nelems bytes from \p source on \p pe
|
||||
@@ -2290,14 +2290,14 @@ __device__ ATTR_NO_INLINE void roc_shmemx_getmem_wave(void *dest,
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_wg(roc_shmem_ctx_t ctx,
|
||||
void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_wg(roc_shmem_ctx_t ctx,
|
||||
void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_getmem_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* @brief Reads contiguous data of \p nelems elements from \p source on \p pe
|
||||
@@ -2389,13 +2389,13 @@ GET_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_nbi_wave(
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_nbi_wave(
|
||||
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_nbi_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems,
|
||||
int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_putmem_nbi_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
* @brief Writes contiguous data of \p nelems elements from \p source on the
|
||||
@@ -2454,12 +2454,12 @@ PUT_NBI_API_EXT_GEN(wave, unsigned long long, ulonglong) // NOLINT
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_nbi_wg(
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_nbi_wg(
|
||||
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_nbi_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_putmem_nbi_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* @brief Writes contiguous data of \p nelems elements from \p source on the
|
||||
@@ -2518,13 +2518,13 @@ PUT_NBI_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_nbi_wave(
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_nbi_wave(
|
||||
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_nbi_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems,
|
||||
int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_getmem_nbi_wave(void *dest,
|
||||
const void *source,
|
||||
size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
* @brief Reads contiguous data of \p nelems elements from \p source on \p pe
|
||||
@@ -2583,12 +2583,12 @@ GET_NBI_API_EXT_GEN(wave, unsigned long long, ulonglong) // NOLINT
|
||||
*
|
||||
* @return void.
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_nbi_wg(
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_nbi_wg(
|
||||
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_nbi_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
__device__ ATTR_NO_INLINE void roc_shmem_getmem_nbi_wg(void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* @brief Reads contiguous data of \p nelems elements from \p source on \p pe
|
||||
|
||||
@@ -47,7 +47,7 @@ PrimitiveTest(int loop,
|
||||
|
||||
int block_id = hipBlockIdx_x;
|
||||
for(int i =0; i< loop; i++){
|
||||
roc_shmemx_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
|
||||
roc_shmem_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
|
||||
if(hipThreadIdx_x==0){
|
||||
//roc_shmem_ctx_quiet(ctx);
|
||||
//roc_shmem_ctx_threadfence_system(ctx);
|
||||
|
||||
+122
-122
@@ -716,132 +716,132 @@ __device__ void roc_shmem_atomic_xor(roc_shmem_ctx_t ctx, T *dest, T val,
|
||||
/**
|
||||
* SHMEM X RMA API for WG and Wave level
|
||||
*/
|
||||
__device__ void roc_shmemx_ctx_putmem_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_wave\n");
|
||||
__device__ void roc_shmem_ctx_putmem_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->putmem_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_putmem_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_wg\n");
|
||||
__device__ void roc_shmem_ctx_putmem_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->putmem_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_putmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_nbi_wave\n");
|
||||
__device__ void roc_shmem_ctx_putmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_nbi_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->putmem_nbi_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_putmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_nbi_wg\n");
|
||||
__device__ void roc_shmem_ctx_putmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_nbi_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->putmem_nbi_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_put_wave\n");
|
||||
__device__ void roc_shmem_put_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_put_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->put_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_put_wg\n");
|
||||
__device__ void roc_shmem_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_put_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->put_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_put_nbi_wave\n");
|
||||
__device__ void roc_shmem_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_put_nbi_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->put_nbi_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_put_nbi_wg\n");
|
||||
__device__ void roc_shmem_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_put_nbi_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->put_nbi_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_getmem_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_wg\n");
|
||||
__device__ void roc_shmem_ctx_getmem_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->getmem_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_getmem_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_wave\n");
|
||||
__device__ void roc_shmem_ctx_getmem_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->getmem_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_get_wg\n");
|
||||
__device__ void roc_shmem_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_get_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->get_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_get_wave\n");
|
||||
__device__ void roc_shmem_get_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_get_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->get_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_getmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_nbi_wg\n");
|
||||
__device__ void roc_shmem_ctx_getmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_nbi_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->getmem_nbi_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_get_nbi_wg\n");
|
||||
__device__ void roc_shmem_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_get_nbi_wg\n");
|
||||
|
||||
get_internal_ctx(ctx)->get_nbi_wg(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_ctx_getmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_nbi_wave\n");
|
||||
__device__ void roc_shmem_ctx_getmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
|
||||
const void *source,
|
||||
size_t nelems, int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_nbi_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->getmem_nbi_wave(dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmemx_get_nbi_wave\n");
|
||||
__device__ void roc_shmem_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems,
|
||||
int pe) {
|
||||
GPU_DPRINTF("Function: roc_shmem_get_nbi_wave\n");
|
||||
|
||||
get_internal_ctx(ctx)->get_nbi_wave(dest, source, nelems, pe);
|
||||
}
|
||||
@@ -903,38 +903,38 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
|
||||
template __device__ void roc_shmem_wg_fcollect<T>( \
|
||||
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T * dest, const T *source, \
|
||||
int nelem); \
|
||||
template __device__ void roc_shmemx_put_wave<T>( \
|
||||
template __device__ void roc_shmem_put_wave<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_wg<T>( \
|
||||
template __device__ void roc_shmem_put_wg<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_wave<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_nbi_wave<T>( \
|
||||
template __device__ void roc_shmem_put_wave<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmem_put_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmem_put_nbi_wave<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_nbi_wg<T>( \
|
||||
template __device__ void roc_shmem_put_nbi_wg<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_nbi_wave<T>( \
|
||||
template __device__ void roc_shmem_put_nbi_wave<T>( \
|
||||
T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_put_nbi_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_wave<T>( \
|
||||
template __device__ void roc_shmem_put_nbi_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmem_get_wave<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_wg<T>( \
|
||||
template __device__ void roc_shmem_get_wg<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_wave<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_nbi_wave<T>( \
|
||||
template __device__ void roc_shmem_get_wave<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmem_get_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe); \
|
||||
template __device__ void roc_shmem_get_nbi_wave<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_nbi_wg<T>( \
|
||||
template __device__ void roc_shmem_get_nbi_wg<T>( \
|
||||
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_nbi_wave<T>( \
|
||||
template __device__ void roc_shmem_get_nbi_wave<T>( \
|
||||
T * dest, const T *source, size_t nelems, int pe); \
|
||||
template __device__ void roc_shmemx_get_nbi_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe);
|
||||
template __device__ void roc_shmem_get_nbi_wg<T>(T * dest, const T *source, \
|
||||
size_t nelems, int pe);
|
||||
|
||||
/**
|
||||
* Declare templates for the standard amo types
|
||||
@@ -1142,69 +1142,69 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
|
||||
__device__ T roc_shmem_##TNAME##_g(const T *source, int pe) { \
|
||||
return roc_shmem_g<T>(source, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_put_wave( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_put_wave( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_put_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_put_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_put_wg( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_put_wg( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_put_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_put_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_put_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_put_wave<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_put_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_put_wave<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_put_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_put_wg<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_put_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_put_wg<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_put_nbi_wave( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_put_nbi_wave( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_put_nbi_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_put_nbi_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_put_nbi_wg( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_put_nbi_wg( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_put_nbi_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_put_nbi_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_put_nbi_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_put_nbi_wave<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_put_nbi_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_put_nbi_wave<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_put_nbi_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_put_nbi_wg<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_put_nbi_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_put_nbi_wg<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_get_wave( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_get_wave( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_get_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_get_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_get_wg( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_get_wg( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_get_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_get_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_get_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_get_wave<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_get_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_get_wave<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_get_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_get_wg<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_get_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_get_wg<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_get_nbi_wave( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_get_nbi_wave( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_get_nbi_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_get_nbi_wave<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_ctx_##TNAME##_get_nbi_wg( \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_get_nbi_wg( \
|
||||
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
|
||||
roc_shmemx_get_nbi_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
roc_shmem_get_nbi_wg<T>(ctx, dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_get_nbi_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_get_nbi_wave<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_get_nbi_wave(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_get_nbi_wave<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmemx_##TNAME##_get_nbi_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmemx_get_nbi_wg<T>(dest, source, nelems, pe); \
|
||||
__device__ void roc_shmem_##TNAME##_get_nbi_wg(T *dest, const T *source, \
|
||||
size_t nelems, int pe) { \
|
||||
roc_shmem_get_nbi_wg<T>(dest, source, nelems, pe); \
|
||||
} \
|
||||
__device__ void roc_shmem_ctx_##TNAME##_wg_broadcast( \
|
||||
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \
|
||||
@@ -1380,8 +1380,8 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
|
||||
return roc_shmem_wait_until_some_vector<T>(ivars, nelems, indices, \
|
||||
status, cmp, vals); \
|
||||
} \
|
||||
__device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \
|
||||
return roc_shmem_test<T>(ivars, cmp, val); \
|
||||
__device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \
|
||||
return roc_shmem_test<T>(ivars, cmp, val); \
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
|
||||
+48
-48
@@ -489,11 +489,11 @@ __device__ void roc_shmem_wg_to_all(roc_shmem_ctx_t ctx, T *dest,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
__device__ void roc_shmem_put_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_put_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -517,11 +517,11 @@ __device__ void roc_shmemx_put_wave(T *dest, const T *source, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
__device__ void roc_shmem_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_put_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -544,11 +544,11 @@ __device__ void roc_shmemx_put_wg(T *dest, const T *source, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
__device__ void roc_shmem_get_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -571,11 +571,11 @@ __device__ void roc_shmemx_get_wave(T *dest, const T *source, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
__device__ void roc_shmem_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -599,11 +599,11 @@ __device__ void roc_shmemx_get_wg(T *dest, const T *source, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
__device__ void roc_shmem_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *src, size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wave(T *dest, const T *src, size_t nelems,
|
||||
__device__ void roc_shmem_put_nbi_wave(T *dest, const T *src, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -627,11 +627,11 @@ __device__ void roc_shmemx_put_nbi_wave(T *dest, const T *src, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
__device__ void roc_shmem_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *src, size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wg(T *dest, const T *src, size_t nelems,
|
||||
__device__ void roc_shmem_put_nbi_wg(T *dest, const T *src, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -655,11 +655,11 @@ __device__ void roc_shmemx_put_nbi_wg(T *dest, const T *src, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
__device__ void roc_shmem_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe);
|
||||
|
||||
/**
|
||||
@@ -683,101 +683,101 @@ __device__ void roc_shmemx_get_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
__device__ void roc_shmem_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
|
||||
const T *source, size_t nelems, int pe);
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_nbi_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe);
|
||||
|
||||
__device__ void roc_shmemx_putmem_wave(void *dest, const void *source,
|
||||
__device__ void roc_shmem_putmem_wave(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_putmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_ctx_putmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_putmem_wg(void *dest, const void *source,
|
||||
__device__ void roc_shmem_putmem_wg(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_putmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_ctx_putmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_put_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_put_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_put_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_put_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_put_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_put_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_getmem_wg(void *dest, const void *source,
|
||||
__device__ void roc_shmem_getmem_wg(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_getmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_ctx_getmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_get_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_get_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_getmem_wave(void *dest, const void *source,
|
||||
__device__ void roc_shmem_getmem_wave(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_getmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_ctx_getmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_get_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_get_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_putmem_nbi_wg(void *dest, const void *source,
|
||||
__device__ void roc_shmem_putmem_nbi_wg(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_putmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_ctx_putmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_put_nbi_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_put_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_put_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_putmem_nbi_wave(void *dest, const void *source,
|
||||
__device__ void roc_shmem_putmem_nbi_wave(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_putmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
|
||||
roc_shmem_ctx_putmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
|
||||
pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_put_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_put_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_put_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_put_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_getmem_nbi_wg(void *dest, const void *source,
|
||||
__device__ void roc_shmem_getmem_nbi_wg(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_getmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_ctx_getmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wg(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_nbi_wg(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_get_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_get_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
__device__ void roc_shmemx_getmem_nbi_wave(void *dest, const void *source,
|
||||
__device__ void roc_shmem_getmem_nbi_wave(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
roc_shmemx_ctx_getmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
|
||||
roc_shmem_ctx_getmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
|
||||
pe);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmemx_get_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
__device__ void roc_shmem_get_nbi_wave(T *dest, const T *source, size_t nelems,
|
||||
int pe) {
|
||||
roc_shmemx_get_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
roc_shmem_get_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -54,16 +54,16 @@ __global__ void ExtendedPrimitiveTest(int loop, int skip, uint64_t *timer,
|
||||
|
||||
switch (type) {
|
||||
case WGGetTestType:
|
||||
roc_shmemx_ctx_getmem_wg(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_getmem_wg(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
case WGGetNBITestType:
|
||||
roc_shmemx_ctx_getmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_getmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
case WGPutTestType:
|
||||
roc_shmemx_ctx_putmem_wg(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_putmem_wg(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
case WGPutNBITestType:
|
||||
roc_shmemx_ctx_putmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_putmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
||||
@@ -56,16 +56,16 @@ __global__ void WaveLevelPrimitiveTest(int loop, int skip, uint64_t *timer,
|
||||
|
||||
switch (type) {
|
||||
case WAVEGetTestType:
|
||||
roc_shmemx_ctx_getmem_wave(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_getmem_wave(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
case WAVEGetNBITestType:
|
||||
roc_shmemx_ctx_getmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_getmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
case WAVEPutTestType:
|
||||
roc_shmemx_ctx_putmem_wave(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_putmem_wave(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
case WAVEPutNBITestType:
|
||||
roc_shmemx_ctx_putmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
|
||||
roc_shmem_ctx_putmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
||||
@@ -45,7 +45,7 @@ using namespace rocshmem;
|
||||
static int atoi_scaled(char *s);
|
||||
static void usage(char *pgm);
|
||||
|
||||
static double shmemx_wtime(void) {
|
||||
static double shmem_wtime(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
@@ -154,14 +154,14 @@ int main(int argc, char **argv) {
|
||||
roc_shmem_barrier_all();
|
||||
|
||||
for (time_taken = 0.0, ps = i = 0; i < loops; i++) {
|
||||
start_time = shmemx_wtime();
|
||||
start_time = shmem_wtime();
|
||||
|
||||
roc_shmem_ctx_int_broadcast(ROC_SHMEM_CTX_DEFAULT, target, source, elements,
|
||||
0, 0, 0, npes, &pSync[ps]);
|
||||
|
||||
if (Serialize) roc_shmem_barrier_all();
|
||||
|
||||
time_taken += (shmemx_wtime() - start_time);
|
||||
time_taken += (shmem_wtime() - start_time);
|
||||
|
||||
if (ps_cnt > 1) {
|
||||
ps += ROC_SHMEM_BCAST_SYNC_SIZE;
|
||||
|
||||
@@ -83,7 +83,7 @@ static void usage(char *pgm) {
|
||||
pgm, NUM_ELEMENTS, DFLT_LOOPS);
|
||||
}
|
||||
|
||||
static inline double shmemx_wtime(void) {
|
||||
static inline double shmem_wtime(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, 0);
|
||||
return (double)((tv.tv_usec / 1000000.0) + tv.tv_sec);
|
||||
@@ -186,11 +186,11 @@ int main(int argc, char **argv) {
|
||||
roc_shmem_barrier_all();
|
||||
|
||||
for (i = 0; i < loops; i++) {
|
||||
start_time = shmemx_wtime();
|
||||
start_time = shmem_wtime();
|
||||
|
||||
roc_shmem_int_get(Target, Source, elements, target_pe);
|
||||
|
||||
time_taken += shmemx_wtime() - start_time;
|
||||
time_taken += shmem_wtime() - start_time;
|
||||
|
||||
if (me == 0) {
|
||||
if (Track && i > 0 && ((i % 200) == 0)) fprintf(stderr, ".%d", i);
|
||||
|
||||
@@ -83,7 +83,7 @@ static void usage(char *pgm) {
|
||||
pgm, NUM_ELEMENTS, DFLT_LOOPS);
|
||||
}
|
||||
|
||||
static inline double shmemx_wtime(void) {
|
||||
static inline double shmem_wtime(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, 0);
|
||||
return (double)((tv.tv_usec / 1000000.0) + tv.tv_sec);
|
||||
@@ -214,11 +214,11 @@ int main(int argc, char **argv) {
|
||||
roc_shmem_barrier_all();
|
||||
|
||||
for (i = 0; i < loops; i++) {
|
||||
start_time = shmemx_wtime();
|
||||
start_time = shmem_wtime();
|
||||
|
||||
roc_shmem_int_put(Target, Source, elements, target_PE);
|
||||
|
||||
time_taken += (shmemx_wtime() - start_time);
|
||||
time_taken += (shmem_wtime() - start_time);
|
||||
|
||||
if (me == 0) {
|
||||
if (Track && i > 0 && ((i % 200) == 0)) fprintf(stderr, ".%d", i);
|
||||
|
||||
@@ -44,7 +44,7 @@ using namespace rocshmem;
|
||||
|
||||
#define LOOPS 25000
|
||||
|
||||
static double shmemx_wtime(void) {
|
||||
static double shmem_wtime(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
@@ -86,11 +86,11 @@ int main(int argc, char *argv[]) {
|
||||
roc_shmem_barrier_all();
|
||||
|
||||
neighbor = (my_pe + 1) % npes;
|
||||
start_time = shmemx_wtime();
|
||||
start_time = shmem_wtime();
|
||||
for (j = 0, elapsed = 0.0; j < loops; j++) {
|
||||
start_time = shmemx_wtime();
|
||||
start_time = shmem_wtime();
|
||||
lval = roc_shmem_int64_atomic_fetch_inc((int64_t *)&data[1], neighbor);
|
||||
elapsed += shmemx_wtime() - start_time;
|
||||
elapsed += shmem_wtime() - start_time;
|
||||
if (lval != (long)j) {
|
||||
fprintf(stderr, "[%d] Test: FAIL previous val %ld != %d Exit.\n", my_pe,
|
||||
lval, j);
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador