2
0

converted roc_shmemx to roc_shmem

Este cometimento está contido em:
Yiltan Temucin
2024-11-12 08:37:56 -06:00
ascendente 958575d8a4
cometimento c2b736ef3d
10 ficheiros modificados com 244 adições e 244 eliminações
+52 -52
Ver ficheiro
@@ -2086,39 +2086,39 @@ TEST_API_GEN(unsigned long long, ulonglong) // NOLINT(runtime/int)
*****************************************************************************/
/*
* MACRO DECLARE SHMEMX_PUT APIs
* MACRO DECLARE SHMEM_PUT APIs
*/
#define PUT_API_EXT_GEN(GRAN, T, TNAME) \
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_put_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_put_##GRAN( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_put_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_put_##GRAN( \
T *dest, const T *source, size_t nelems, int pe);
/*
* MACRO DECLARE SHMEMX_GET APIs
* MACRO DECLARE SHMEM_GET APIs
*/
#define GET_API_EXT_GEN(GRAN, T, TNAME) \
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_get_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_get_##GRAN( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_get_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_get_##GRAN( \
T *dest, const T *source, size_t nelems, int pe);
/*
* MACRO DECLARE SHMEMX_PUT_NBI APIs
* MACRO DECLARE SHMEM_PUT_NBI APIs
*/
#define PUT_NBI_API_EXT_GEN(GRAN, T, TNAME) \
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_put_nbi_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_put_nbi_##GRAN( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_put_nbi_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_put_nbi_##GRAN( \
T *dest, const T *source, size_t nelems, int pe);
/*
* MACRO DECLARE SHMEMX_GET_NBI APIs
* MACRO DECLARE SHMEM_GET_NBI APIs
*/
#define GET_NBI_API_EXT_GEN(GRAN, T, TNAME) \
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_##TNAME##_get_nbi_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_get_nbi_##GRAN( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe); \
__device__ ATTR_NO_INLINE void roc_shmemx_##TNAME##_get_nbi_##GRAN( \
__device__ ATTR_NO_INLINE void roc_shmem_##TNAME##_get_nbi_##GRAN( \
T *dest, const T *source, size_t nelems, int pe);
/**
@@ -2140,12 +2140,12 @@ TEST_API_GEN(unsigned long long, ulonglong) // NOLINT(runtime/int)
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_wave(
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_wave(
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_wave(void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_putmem_wave(void *dest,
const void *source,
size_t nelems, int pe);
/**
* @brief Writes contiguous data of \p nelems bytes from \p source on the
@@ -2166,14 +2166,14 @@ __device__ ATTR_NO_INLINE void roc_shmemx_putmem_wave(void *dest,
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_wg(roc_shmem_ctx_t ctx,
void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_wg(roc_shmem_ctx_t ctx,
void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_wg(void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_putmem_wg(void *dest,
const void *source,
size_t nelems, int pe);
/**
* @brief Writes contiguous data of \p nelems elements from \p source on the
@@ -2265,12 +2265,12 @@ PUT_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_wave(
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_wave(
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_wave(void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_getmem_wave(void *dest,
const void *source,
size_t nelems, int pe);
/**
* @brief Reads contiguous data of \p nelems bytes from \p source on \p pe
@@ -2290,14 +2290,14 @@ __device__ ATTR_NO_INLINE void roc_shmemx_getmem_wave(void *dest,
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_wg(roc_shmem_ctx_t ctx,
void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_wg(roc_shmem_ctx_t ctx,
void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_wg(void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_getmem_wg(void *dest,
const void *source,
size_t nelems, int pe);
/**
* @brief Reads contiguous data of \p nelems elements from \p source on \p pe
@@ -2389,13 +2389,13 @@ GET_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_nbi_wave(
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_nbi_wave(
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_nbi_wave(void *dest,
const void *source,
size_t nelems,
int pe);
__device__ ATTR_NO_INLINE void roc_shmem_putmem_nbi_wave(void *dest,
const void *source,
size_t nelems,
int pe);
/**
* @brief Writes contiguous data of \p nelems elements from \p source on the
@@ -2454,12 +2454,12 @@ PUT_NBI_API_EXT_GEN(wave, unsigned long long, ulonglong) // NOLINT
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_putmem_nbi_wg(
__device__ ATTR_NO_INLINE void roc_shmem_ctx_putmem_nbi_wg(
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_putmem_nbi_wg(void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_putmem_nbi_wg(void *dest,
const void *source,
size_t nelems, int pe);
/**
* @brief Writes contiguous data of \p nelems elements from \p source on the
@@ -2518,13 +2518,13 @@ PUT_NBI_API_EXT_GEN(wg, unsigned long long, ulonglong) // NOLINT(runtime/int)
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_nbi_wave(
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_nbi_wave(
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_nbi_wave(void *dest,
const void *source,
size_t nelems,
int pe);
__device__ ATTR_NO_INLINE void roc_shmem_getmem_nbi_wave(void *dest,
const void *source,
size_t nelems,
int pe);
/**
* @brief Reads contiguous data of \p nelems elements from \p source on \p pe
@@ -2583,12 +2583,12 @@ GET_NBI_API_EXT_GEN(wave, unsigned long long, ulonglong) // NOLINT
*
* @return void.
*/
__device__ ATTR_NO_INLINE void roc_shmemx_ctx_getmem_nbi_wg(
__device__ ATTR_NO_INLINE void roc_shmem_ctx_getmem_nbi_wg(
roc_shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmemx_getmem_nbi_wg(void *dest,
const void *source,
size_t nelems, int pe);
__device__ ATTR_NO_INLINE void roc_shmem_getmem_nbi_wg(void *dest,
const void *source,
size_t nelems, int pe);
/**
* @brief Reads contiguous data of \p nelems elements from \p source on \p pe
+1 -1
Ver ficheiro
@@ -47,7 +47,7 @@ PrimitiveTest(int loop,
int block_id = hipBlockIdx_x;
for(int i =0; i< loop; i++){
roc_shmemx_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
roc_shmem_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
if(hipThreadIdx_x==0){
//roc_shmem_ctx_quiet(ctx);
//roc_shmem_ctx_threadfence_system(ctx);
+122 -122
Ver ficheiro
@@ -716,132 +716,132 @@ __device__ void roc_shmem_atomic_xor(roc_shmem_ctx_t ctx, T *dest, T val,
/**
* SHMEM X RMA API for WG and Wave level
*/
__device__ void roc_shmemx_ctx_putmem_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_wave\n");
__device__ void roc_shmem_ctx_putmem_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_wave\n");
get_internal_ctx(ctx)->putmem_wave(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_putmem_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_wg\n");
__device__ void roc_shmem_ctx_putmem_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_wg\n");
get_internal_ctx(ctx)->putmem_wg(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_putmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_nbi_wave\n");
__device__ void roc_shmem_ctx_putmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_nbi_wave\n");
get_internal_ctx(ctx)->putmem_nbi_wave(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_putmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_putmem_nbi_wg\n");
__device__ void roc_shmem_ctx_putmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_putmem_nbi_wg\n");
get_internal_ctx(ctx)->putmem_nbi_wg(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_put_wave\n");
__device__ void roc_shmem_put_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_put_wave\n");
get_internal_ctx(ctx)->put_wave(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_put_wg\n");
__device__ void roc_shmem_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_put_wg\n");
get_internal_ctx(ctx)->put_wg(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_put_nbi_wave\n");
__device__ void roc_shmem_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_put_nbi_wave\n");
get_internal_ctx(ctx)->put_nbi_wave(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_put_nbi_wg\n");
__device__ void roc_shmem_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_put_nbi_wg\n");
get_internal_ctx(ctx)->put_nbi_wg(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_getmem_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_wg\n");
__device__ void roc_shmem_ctx_getmem_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_wg\n");
get_internal_ctx(ctx)->getmem_wg(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_getmem_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_wave\n");
__device__ void roc_shmem_ctx_getmem_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_wave\n");
get_internal_ctx(ctx)->getmem_wave(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_get_wg\n");
__device__ void roc_shmem_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_get_wg\n");
get_internal_ctx(ctx)->get_wg(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_get_wave\n");
__device__ void roc_shmem_get_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_get_wave\n");
get_internal_ctx(ctx)->get_wave(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_getmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_nbi_wg\n");
__device__ void roc_shmem_ctx_getmem_nbi_wg(roc_shmem_ctx_t ctx, void *dest,
const void *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_nbi_wg\n");
get_internal_ctx(ctx)->getmem_nbi_wg(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_get_nbi_wg\n");
__device__ void roc_shmem_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_get_nbi_wg\n");
get_internal_ctx(ctx)->get_nbi_wg(dest, source, nelems, pe);
}
__device__ void roc_shmemx_ctx_getmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmemx_ctx_getmem_nbi_wave\n");
__device__ void roc_shmem_ctx_getmem_nbi_wave(roc_shmem_ctx_t ctx, void *dest,
const void *source,
size_t nelems, int pe) {
GPU_DPRINTF("Function: roc_shmem_ctx_getmem_nbi_wave\n");
get_internal_ctx(ctx)->getmem_nbi_wave(dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmemx_get_nbi_wave\n");
__device__ void roc_shmem_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems,
int pe) {
GPU_DPRINTF("Function: roc_shmem_get_nbi_wave\n");
get_internal_ctx(ctx)->get_nbi_wave(dest, source, nelems, pe);
}
@@ -903,38 +903,38 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
template __device__ void roc_shmem_wg_fcollect<T>( \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T * dest, const T *source, \
int nelem); \
template __device__ void roc_shmemx_put_wave<T>( \
template __device__ void roc_shmem_put_wave<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_put_wg<T>( \
template __device__ void roc_shmem_put_wg<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_put_wave<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmemx_put_wg<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmemx_put_nbi_wave<T>( \
template __device__ void roc_shmem_put_wave<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmem_put_wg<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmem_put_nbi_wave<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_put_nbi_wg<T>( \
template __device__ void roc_shmem_put_nbi_wg<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_put_nbi_wave<T>( \
template __device__ void roc_shmem_put_nbi_wave<T>( \
T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_put_nbi_wg<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmemx_get_wave<T>( \
template __device__ void roc_shmem_put_nbi_wg<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmem_get_wave<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_get_wg<T>( \
template __device__ void roc_shmem_get_wg<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_get_wave<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmemx_get_wg<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmemx_get_nbi_wave<T>( \
template __device__ void roc_shmem_get_wave<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmem_get_wg<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ void roc_shmem_get_nbi_wave<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_get_nbi_wg<T>( \
template __device__ void roc_shmem_get_nbi_wg<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_get_nbi_wave<T>( \
template __device__ void roc_shmem_get_nbi_wave<T>( \
T * dest, const T *source, size_t nelems, int pe); \
template __device__ void roc_shmemx_get_nbi_wg<T>(T * dest, const T *source, \
size_t nelems, int pe);
template __device__ void roc_shmem_get_nbi_wg<T>(T * dest, const T *source, \
size_t nelems, int pe);
/**
* Declare templates for the standard amo types
@@ -1142,69 +1142,69 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
__device__ T roc_shmem_##TNAME##_g(const T *source, int pe) { \
return roc_shmem_g<T>(source, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_put_wave( \
__device__ void roc_shmem_ctx_##TNAME##_put_wave( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_put_wave<T>(ctx, dest, source, nelems, pe); \
roc_shmem_put_wave<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_put_wg( \
__device__ void roc_shmem_ctx_##TNAME##_put_wg( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_put_wg<T>(ctx, dest, source, nelems, pe); \
roc_shmem_put_wg<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_put_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_put_wave<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_put_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_put_wave<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_put_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_put_wg<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_put_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_put_wg<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_put_nbi_wave( \
__device__ void roc_shmem_ctx_##TNAME##_put_nbi_wave( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_put_nbi_wave<T>(ctx, dest, source, nelems, pe); \
roc_shmem_put_nbi_wave<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_put_nbi_wg( \
__device__ void roc_shmem_ctx_##TNAME##_put_nbi_wg( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_put_nbi_wg<T>(ctx, dest, source, nelems, pe); \
roc_shmem_put_nbi_wg<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_put_nbi_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_put_nbi_wave<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_put_nbi_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_put_nbi_wave<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_put_nbi_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_put_nbi_wg<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_put_nbi_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_put_nbi_wg<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_get_wave( \
__device__ void roc_shmem_ctx_##TNAME##_get_wave( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_get_wave<T>(ctx, dest, source, nelems, pe); \
roc_shmem_get_wave<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_get_wg( \
__device__ void roc_shmem_ctx_##TNAME##_get_wg( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_get_wg<T>(ctx, dest, source, nelems, pe); \
roc_shmem_get_wg<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_get_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_get_wave<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_get_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_get_wave<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_get_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_get_wg<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_get_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_get_wg<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_get_nbi_wave( \
__device__ void roc_shmem_ctx_##TNAME##_get_nbi_wave( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_get_nbi_wave<T>(ctx, dest, source, nelems, pe); \
roc_shmem_get_nbi_wave<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_ctx_##TNAME##_get_nbi_wg( \
__device__ void roc_shmem_ctx_##TNAME##_get_nbi_wg( \
roc_shmem_ctx_t ctx, T *dest, const T *source, size_t nelems, int pe) { \
roc_shmemx_get_nbi_wg<T>(ctx, dest, source, nelems, pe); \
roc_shmem_get_nbi_wg<T>(ctx, dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_get_nbi_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_get_nbi_wave<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_get_nbi_wave(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_get_nbi_wave<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmemx_##TNAME##_get_nbi_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmemx_get_nbi_wg<T>(dest, source, nelems, pe); \
__device__ void roc_shmem_##TNAME##_get_nbi_wg(T *dest, const T *source, \
size_t nelems, int pe) { \
roc_shmem_get_nbi_wg<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmem_ctx_##TNAME##_wg_broadcast( \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \
@@ -1380,8 +1380,8 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
return roc_shmem_wait_until_some_vector<T>(ivars, nelems, indices, \
status, cmp, vals); \
} \
__device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \
return roc_shmem_test<T>(ivars, cmp, val); \
__device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \
return roc_shmem_test<T>(ivars, cmp, val); \
}
/******************************************************************************
+48 -48
Ver ficheiro
@@ -489,11 +489,11 @@ __device__ void roc_shmem_wg_to_all(roc_shmem_ctx_t ctx, T *dest,
*
*/
template <typename T>
__device__ void roc_shmemx_put_wave(roc_shmem_ctx_t ctx, T *dest,
__device__ void roc_shmem_put_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_put_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_put_wave(T *dest, const T *source, size_t nelems,
int pe);
/**
@@ -517,11 +517,11 @@ __device__ void roc_shmemx_put_wave(T *dest, const T *source, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
__device__ void roc_shmem_put_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_put_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_put_wg(T *dest, const T *source, size_t nelems,
int pe);
/**
@@ -544,11 +544,11 @@ __device__ void roc_shmemx_put_wg(T *dest, const T *source, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_get_wave(roc_shmem_ctx_t ctx, T *dest,
__device__ void roc_shmem_get_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_get_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_wave(T *dest, const T *source, size_t nelems,
int pe);
/**
@@ -571,11 +571,11 @@ __device__ void roc_shmemx_get_wave(T *dest, const T *source, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
__device__ void roc_shmem_get_wg(roc_shmem_ctx_t ctx, T *dest, const T *source,
size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_get_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_wg(T *dest, const T *source, size_t nelems,
int pe);
/**
@@ -599,11 +599,11 @@ __device__ void roc_shmemx_get_wg(T *dest, const T *source, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
__device__ void roc_shmem_put_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
const T *src, size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_put_nbi_wave(T *dest, const T *src, size_t nelems,
__device__ void roc_shmem_put_nbi_wave(T *dest, const T *src, size_t nelems,
int pe);
/**
@@ -627,11 +627,11 @@ __device__ void roc_shmemx_put_nbi_wave(T *dest, const T *src, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
__device__ void roc_shmem_put_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
const T *src, size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_put_nbi_wg(T *dest, const T *src, size_t nelems,
__device__ void roc_shmem_put_nbi_wg(T *dest, const T *src, size_t nelems,
int pe);
/**
@@ -655,11 +655,11 @@ __device__ void roc_shmemx_put_nbi_wg(T *dest, const T *src, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
__device__ void roc_shmem_get_nbi_wave(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_get_nbi_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_nbi_wave(T *dest, const T *source, size_t nelems,
int pe);
/**
@@ -683,101 +683,101 @@ __device__ void roc_shmemx_get_nbi_wave(T *dest, const T *source, size_t nelems,
*
*/
template <typename T>
__device__ void roc_shmemx_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
__device__ void roc_shmem_get_nbi_wg(roc_shmem_ctx_t ctx, T *dest,
const T *source, size_t nelems, int pe);
template <typename T>
__device__ void roc_shmemx_get_nbi_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_nbi_wg(T *dest, const T *source, size_t nelems,
int pe);
__device__ void roc_shmemx_putmem_wave(void *dest, const void *source,
__device__ void roc_shmem_putmem_wave(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_putmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_ctx_putmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_putmem_wg(void *dest, const void *source,
__device__ void roc_shmem_putmem_wg(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_putmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_ctx_putmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_put_wave(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_put_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_put_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_put_wg(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_put_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_put_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_getmem_wg(void *dest, const void *source,
__device__ void roc_shmem_getmem_wg(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_getmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_ctx_getmem_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_wg(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_get_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_get_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_getmem_wave(void *dest, const void *source,
__device__ void roc_shmem_getmem_wave(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_getmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_ctx_getmem_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_wave(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_get_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_get_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_putmem_nbi_wg(void *dest, const void *source,
__device__ void roc_shmem_putmem_nbi_wg(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_putmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_ctx_putmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_put_nbi_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_put_nbi_wg(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_put_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_put_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_putmem_nbi_wave(void *dest, const void *source,
__device__ void roc_shmem_putmem_nbi_wave(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_putmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
roc_shmem_ctx_putmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
pe);
}
template <typename T>
__device__ void roc_shmemx_put_nbi_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_put_nbi_wave(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_put_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_put_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_getmem_nbi_wg(void *dest, const void *source,
__device__ void roc_shmem_getmem_nbi_wg(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_getmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_ctx_getmem_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
template <typename T>
__device__ void roc_shmemx_get_nbi_wg(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_nbi_wg(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_get_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_get_nbi_wg(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
__device__ void roc_shmemx_getmem_nbi_wave(void *dest, const void *source,
__device__ void roc_shmem_getmem_nbi_wave(void *dest, const void *source,
size_t nelems, int pe) {
roc_shmemx_ctx_getmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
roc_shmem_ctx_getmem_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems,
pe);
}
template <typename T>
__device__ void roc_shmemx_get_nbi_wave(T *dest, const T *source, size_t nelems,
__device__ void roc_shmem_get_nbi_wave(T *dest, const T *source, size_t nelems,
int pe) {
roc_shmemx_get_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
roc_shmem_get_nbi_wave(ROC_SHMEM_CTX_DEFAULT, dest, source, nelems, pe);
}
} // namespace rocshmem
+4 -4
Ver ficheiro
@@ -54,16 +54,16 @@ __global__ void ExtendedPrimitiveTest(int loop, int skip, uint64_t *timer,
switch (type) {
case WGGetTestType:
roc_shmemx_ctx_getmem_wg(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_getmem_wg(ctx, r_buf, s_buf, size, 1);
break;
case WGGetNBITestType:
roc_shmemx_ctx_getmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_getmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
break;
case WGPutTestType:
roc_shmemx_ctx_putmem_wg(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_putmem_wg(ctx, r_buf, s_buf, size, 1);
break;
case WGPutNBITestType:
roc_shmemx_ctx_putmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_putmem_nbi_wg(ctx, r_buf, s_buf, size, 1);
break;
default:
break;
+4 -4
Ver ficheiro
@@ -56,16 +56,16 @@ __global__ void WaveLevelPrimitiveTest(int loop, int skip, uint64_t *timer,
switch (type) {
case WAVEGetTestType:
roc_shmemx_ctx_getmem_wave(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_getmem_wave(ctx, r_buf, s_buf, size, 1);
break;
case WAVEGetNBITestType:
roc_shmemx_ctx_getmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_getmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
break;
case WAVEPutTestType:
roc_shmemx_ctx_putmem_wave(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_putmem_wave(ctx, r_buf, s_buf, size, 1);
break;
case WAVEPutNBITestType:
roc_shmemx_ctx_putmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
roc_shmem_ctx_putmem_nbi_wave(ctx, r_buf, s_buf, size, 1);
break;
default:
break;
+3 -3
Ver ficheiro
@@ -45,7 +45,7 @@ using namespace rocshmem;
static int atoi_scaled(char *s);
static void usage(char *pgm);
static double shmemx_wtime(void) {
static double shmem_wtime(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
@@ -154,14 +154,14 @@ int main(int argc, char **argv) {
roc_shmem_barrier_all();
for (time_taken = 0.0, ps = i = 0; i < loops; i++) {
start_time = shmemx_wtime();
start_time = shmem_wtime();
roc_shmem_ctx_int_broadcast(ROC_SHMEM_CTX_DEFAULT, target, source, elements,
0, 0, 0, npes, &pSync[ps]);
if (Serialize) roc_shmem_barrier_all();
time_taken += (shmemx_wtime() - start_time);
time_taken += (shmem_wtime() - start_time);
if (ps_cnt > 1) {
ps += ROC_SHMEM_BCAST_SYNC_SIZE;
+3 -3
Ver ficheiro
@@ -83,7 +83,7 @@ static void usage(char *pgm) {
pgm, NUM_ELEMENTS, DFLT_LOOPS);
}
static inline double shmemx_wtime(void) {
static inline double shmem_wtime(void) {
struct timeval tv;
gettimeofday(&tv, 0);
return (double)((tv.tv_usec / 1000000.0) + tv.tv_sec);
@@ -186,11 +186,11 @@ int main(int argc, char **argv) {
roc_shmem_barrier_all();
for (i = 0; i < loops; i++) {
start_time = shmemx_wtime();
start_time = shmem_wtime();
roc_shmem_int_get(Target, Source, elements, target_pe);
time_taken += shmemx_wtime() - start_time;
time_taken += shmem_wtime() - start_time;
if (me == 0) {
if (Track && i > 0 && ((i % 200) == 0)) fprintf(stderr, ".%d", i);
+3 -3
Ver ficheiro
@@ -83,7 +83,7 @@ static void usage(char *pgm) {
pgm, NUM_ELEMENTS, DFLT_LOOPS);
}
static inline double shmemx_wtime(void) {
static inline double shmem_wtime(void) {
struct timeval tv;
gettimeofday(&tv, 0);
return (double)((tv.tv_usec / 1000000.0) + tv.tv_sec);
@@ -214,11 +214,11 @@ int main(int argc, char **argv) {
roc_shmem_barrier_all();
for (i = 0; i < loops; i++) {
start_time = shmemx_wtime();
start_time = shmem_wtime();
roc_shmem_int_put(Target, Source, elements, target_PE);
time_taken += (shmemx_wtime() - start_time);
time_taken += (shmem_wtime() - start_time);
if (me == 0) {
if (Track && i > 0 && ((i % 200) == 0)) fprintf(stderr, ".%d", i);
+4 -4
Ver ficheiro
@@ -44,7 +44,7 @@ using namespace rocshmem;
#define LOOPS 25000
static double shmemx_wtime(void) {
static double shmem_wtime(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
@@ -86,11 +86,11 @@ int main(int argc, char *argv[]) {
roc_shmem_barrier_all();
neighbor = (my_pe + 1) % npes;
start_time = shmemx_wtime();
start_time = shmem_wtime();
for (j = 0, elapsed = 0.0; j < loops; j++) {
start_time = shmemx_wtime();
start_time = shmem_wtime();
lval = roc_shmem_int64_atomic_fetch_inc((int64_t *)&data[1], neighbor);
elapsed += shmemx_wtime() - start_time;
elapsed += shmem_wtime() - start_time;
if (lval != (long)j) {
fprintf(stderr, "[%d] Test: FAIL previous val %ld != %d Exit.\n", my_pe,
lval, j);