diff --git a/include/roc_shmem/roc_shmem.hpp b/include/roc_shmem/roc_shmem.hpp index 3b9d6d0c3b..c0484e8bd7 100644 --- a/include/roc_shmem/roc_shmem.hpp +++ b/include/roc_shmem/roc_shmem.hpp @@ -1021,12 +1021,12 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system(); */ #define ATOMIC_FETCH_API_GEN(T, TNAME) \ __device__ ATTR_NO_INLINE T roc_shmem_ctx_##TNAME##_atomic_fetch( \ - roc_shmem_ctx_t ctx, T *dest, int pe); \ - __device__ ATTR_NO_INLINE T roc_shmem_##TNAME##_atomic_fetch(T *dest, \ + roc_shmem_ctx_t ctx, T *source, int pe); \ + __device__ ATTR_NO_INLINE T roc_shmem_##TNAME##_atomic_fetch(T *source, \ int pe); \ __host__ T roc_shmem_ctx_##TNAME##_atomic_fetch(roc_shmem_ctx_t ctx, \ - T *dest, int pe); \ - __host__ T roc_shmem_##TNAME##_atomic_fetch(T *dest, int pe); + T *source, int pe); \ + __host__ T roc_shmem_##TNAME##_atomic_fetch(T *source, int pe); /* * MACRO DECLARE SHMEM_ATOMIC_ADD APIs @@ -1152,83 +1152,83 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system(); * MACRO DECLARE SHMEM_WAIT_UNTIL APIs */ #define WAIT_UNTIL_API_GEN(T, TNAME) \ - __device__ void roc_shmem_##TNAME##_wait_until(T *ptr, \ - roc_shmem_cmps cmp, \ + __device__ void roc_shmem_##TNAME##_wait_until(T *ivars, \ + int cmp, \ T val); \ - __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val); \ - __device__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, \ + __device__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val); \ - __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val); \ - __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T* vals); \ - __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \ + __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T* vals); \ - __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status, \ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals); \ - __host__ void roc_shmem_##TNAME##_wait_until(T *ptr, \ - roc_shmem_cmps cmp, \ + __host__ void roc_shmem_##TNAME##_wait_until(T *ivars, \ + int cmp, \ T val); \ - __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val); \ - __host__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, \ + __host__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val); \ - __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val); \ - __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T* vals); \ - __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \ + __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T* vals); \ - __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T* vals); /* * MACRO DECLARE SHMEM_TEST APIs */ -#define TEST_API_GEN(T, TNAME) \ - __device__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val); \ - __host__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val); +#define TEST_API_GEN(T, TNAME) \ + __device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val); \ + __host__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val); /** * @name SHMEM_REDUCTIONS @@ -2031,7 +2031,7 @@ ATOMIC_INC_API_GEN(ptrdiff_t, ptrdiff) * coalesce contiguous messages and elect a leader thread to call into the * ROC_SHMEM function. * - * @param[in] ptr Pointer to memory on the symmetric heap to wait for. + * @param[in] ivars Pointer to memory on the symmetric heap to wait for. * @param[in] cmp Operation for the comparison. * @param[in] val Value to compare the memory at \p ptr to. * @@ -2064,7 +2064,7 @@ WAIT_UNTIL_API_GEN(unsigned long long, ulonglong) // NOLINT(runtime/int) * coalesce contiguous messages and elect a leader thread to call into the * ROC_SHMEM function. * - * @param[in] ptr Pointer to memory on the symmetric heap to wait for. + * @param[in] ivars Pointer to memory on the symmetric heap to wait for. * @param[in] cmp Operation for the comparison. * @param[in] val Value to compare the memory at \p ptr to. * diff --git a/src/context.hpp b/src/context.hpp index 72c8f38352..0db4e99b23 100644 --- a/src/context.hpp +++ b/src/context.hpp @@ -74,42 +74,42 @@ class Context { ***************************** DEVICE METHODS ***************************** *************************************************************************/ template - __device__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val); + __device__ void wait_until(T *ivars, int cmp, T val); template - __device__ void wait_until_all(T* ptr, size_t nelems, + __device__ void wait_until_all(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __device__ size_t wait_until_any(T* ptr, size_t nelems, + __device__ size_t wait_until_any(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __device__ size_t wait_until_some(T* ptr, size_t nelems, + __device__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __device__ void wait_until_all_vector(T* ptr, size_t nelems, + __device__ void wait_until_all_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __device__ size_t wait_until_any_vector(T* ptr, size_t nelems, + __device__ size_t wait_until_any_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __device__ size_t wait_until_some_vector(T* ptr, size_t nelems, + __device__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __device__ int test(T* ptr, roc_shmem_cmps cmp, T val); + __device__ int test(T *ivars, int cmp, T val); __device__ void threadfence_system(); @@ -365,42 +365,42 @@ class Context { int nreduce); template - __host__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val); + __host__ void wait_until(T *ivars, int cmp, T val); template - __host__ void wait_until_all(T* ptr, size_t nelems, + __host__ void wait_until_all(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __host__ size_t wait_until_any(T* ptr, size_t nelems, + __host__ size_t wait_until_any(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __host__ size_t wait_until_some(T* ptr, size_t nelems, + __host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __host__ void wait_until_all_vector(T* ptr, size_t nelems, + __host__ void wait_until_all_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __host__ size_t wait_until_any_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_any_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __host__ size_t wait_until_some_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __host__ int test(T* ptr, roc_shmem_cmps cmp, T val); + __host__ int test(T *ivars, int cmp, T val); public: /** diff --git a/src/context_tmpl_device.hpp b/src/context_tmpl_device.hpp index d6d0df63fa..d0310d476a 100644 --- a/src/context_tmpl_device.hpp +++ b/src/context_tmpl_device.hpp @@ -199,9 +199,9 @@ __device__ void Context::broadcast(T *dest, const T *source, int nelems, } template -__device__ __forceinline__ void Context::wait_until(T *ptr, roc_shmem_cmps cmp, +__device__ __forceinline__ void Context::wait_until(T *ivars, int cmp, T val) { - while (!test(ptr, cmp, val)) { + while (!test(ivars, cmp, val)) { } } @@ -219,9 +219,9 @@ __device__ __forceinline__ size_t status_entry(size_t nelems, template __device__ __forceinline__ -size_t Context::wait_until_any(T* ptr, size_t nelems, +size_t Context::wait_until_any(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val) { + int cmp, T val) { // zero nelems error condition if (!nelems) { return SIZE_MAX; @@ -240,7 +240,7 @@ size_t Context::wait_until_any(T* ptr, size_t nelems, if (status[i]) { continue; } - if (test(ptr + i, cmp, val)) { + if (test(ivars + i, cmp, val)) { return i; } } @@ -249,9 +249,9 @@ size_t Context::wait_until_any(T* ptr, size_t nelems, template __device__ __forceinline__ -void Context::wait_until_all(T* ptr, size_t nelems, +void Context::wait_until_all(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val) { + int cmp, T val) { // zero nelems error condition if (!nelems) { return; @@ -268,17 +268,17 @@ void Context::wait_until_all(T* ptr, size_t nelems, if (status[i]) { continue; } - while (!test(ptr + i, cmp, val)) { + while (!test(ivars + i, cmp, val)) { } } } template __device__ __forceinline__ -size_t Context::wait_until_some(T* ptr, size_t nelems, +size_t Context::wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T val) { + int cmp, T val) { // zero nelems error condition if (!nelems) { return 0; @@ -299,7 +299,7 @@ size_t Context::wait_until_some(T* ptr, size_t nelems, if (status[i]) { continue; } - if (test(ptr + i, cmp, val)) { + if (test(ivars + i, cmp, val)) { done = true; indices[ncompleted] = i; ncompleted++; @@ -311,62 +311,62 @@ size_t Context::wait_until_some(T* ptr, size_t nelems, template __device__ __forceinline__ -void Context::wait_until_all_vector(T* ptr, size_t nelems, +void Context::wait_until_all_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { ; } template __device__ __forceinline__ -size_t Context::wait_until_any_vector(T* ptr, size_t nelems, +size_t Context::wait_until_any_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { return 0; } template __device__ __forceinline__ -size_t Context::wait_until_some_vector(T* ptr, size_t nelems, +size_t Context::wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { return 0; } template -__device__ __forceinline__ int Context::test(T *ptr, roc_shmem_cmps cmp, +__device__ __forceinline__ int Context::test(T *ivars, int cmp, T val) { int ret = 0; - volatile T *vol_ptr = reinterpret_cast(ptr); + volatile T *vol_ivars = reinterpret_cast(ivars); switch (cmp) { case ROC_SHMEM_CMP_EQ: - if (uncached_load(vol_ptr) == val) { + if (uncached_load(vol_ivars) == val) { ret = 1; } break; case ROC_SHMEM_CMP_NE: - if (uncached_load(vol_ptr) != val) { + if (uncached_load(vol_ivars) != val) { ret = 1; } break; case ROC_SHMEM_CMP_GT: - if (uncached_load(vol_ptr) > val) { + if (uncached_load(vol_ivars) > val) { ret = 1; } break; case ROC_SHMEM_CMP_GE: - if (uncached_load(vol_ptr) >= val) { + if (uncached_load(vol_ivars) >= val) { ret = 1; } break; case ROC_SHMEM_CMP_LT: - if (uncached_load(vol_ptr) < val) { + if (uncached_load(vol_ivars) < val) { ret = 1; } break; case ROC_SHMEM_CMP_LE: - if (uncached_load(vol_ptr) <= val) { + if (uncached_load(vol_ivars) <= val) { ret = 1; } break; diff --git a/src/context_tmpl_host.hpp b/src/context_tmpl_host.hpp index bce99c1d6c..a4c7929501 100644 --- a/src/context_tmpl_host.hpp +++ b/src/context_tmpl_host.hpp @@ -234,73 +234,73 @@ __host__ void Context::to_all(roc_shmem_team_t team, T *dest, const T *source, } template -__host__ void Context::wait_until(T *ptr, roc_shmem_cmps cmp, T val) { +__host__ void Context::wait_until(T *ivars, int cmp, T val) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL); - HOST_DISPATCH(wait_until(ptr, cmp, val)); + HOST_DISPATCH(wait_until(ivars, cmp, val)); } template -__host__ size_t Context::wait_until_any(T* ptr, size_t nelems, +__host__ size_t Context::wait_until_any(T *ivars, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { + int cmp, T val) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY); - return HOST_DISPATCH(wait_until_any(ptr, nelems, status, cmp, val)); + return HOST_DISPATCH(wait_until_any(ivars, nelems, status, cmp, val)); } template -__host__ void Context::wait_until_all(T* ptr, size_t nelems, +__host__ void Context::wait_until_all(T *ivars, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { + int cmp, T val) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ALL); - HOST_DISPATCH(wait_until_all(ptr, nelems, status, cmp, val)); + HOST_DISPATCH(wait_until_all(ivars, nelems, status, cmp, val)); } template -__host__ size_t Context::wait_until_some(T* ptr, size_t nelems, +__host__ size_t Context::wait_until_some(T *ivars, size_t nelems, size_t* indices, const int* status, - roc_shmem_cmps cmp, T val) { + int cmp, T val) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_SOME); - HOST_DISPATCH_RET(wait_until_some(ptr, nelems, indices, status, cmp, val)); + HOST_DISPATCH_RET(wait_until_some(ivars, nelems, indices, status, cmp, val)); } template -__host__ void Context::wait_until_all_vector(T* ptr, size_t nelems, +__host__ void Context::wait_until_all_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ALL_VECTOR); - HOST_DISPATCH(wait_until_all_vector(ptr, nelems, status, cmp, vals)); + HOST_DISPATCH(wait_until_all_vector(ivars, nelems, status, cmp, vals)); } template -__host__ size_t Context::wait_until_any_vector(T* ptr, size_t nelems, +__host__ size_t Context::wait_until_any_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY_VECTOR); - HOST_DISPATCH_RET(wait_until_any_vector(ptr, nelems, status, cmp, vals)); + HOST_DISPATCH_RET(wait_until_any_vector(ivars, nelems, status, cmp, vals)); } template -__host__ size_t Context::wait_until_some_vector(T* ptr, size_t nelems, +__host__ size_t Context::wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_SOME_VECTOR); - HOST_DISPATCH_RET(wait_until_some_vector(ptr, nelems, indices, status, cmp, vals)); + HOST_DISPATCH_RET(wait_until_some_vector(ivars, nelems, indices, status, cmp, vals)); } template -__host__ int Context::test(T *ptr, roc_shmem_cmps cmp, T val) { +__host__ int Context::test(T *ivars, int cmp, T val) { ctxHostStats.incStat(NUM_HOST_TEST); - HOST_DISPATCH_RET(test(ptr, cmp, val)); + HOST_DISPATCH_RET(test(ivars, cmp, val)); } } // namespace rocshmem diff --git a/src/gpu_ib/context_ib_host.hpp b/src/gpu_ib/context_ib_host.hpp index c7c7a975b2..9528a1cee9 100644 --- a/src/gpu_ib/context_ib_host.hpp +++ b/src/gpu_ib/context_ib_host.hpp @@ -99,42 +99,42 @@ class GPUIBHostContext : public Context { int nreduce); template - __host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val); + __host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val); template - __host__ size_t wait_until_any(T* ptr, size_t nelems, + __host__ size_t wait_until_any(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T val); template - __host__ void wait_until_all(T* ptr, size_t nelems, + __host__ void wait_until_all(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T val); template - __host__ size_t wait_until_some(T* ptr, size_t nelems, + __host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, roc_shmem_cmps cmp, T val); template - __host__ void wait_until_all_vector(T* ptr, size_t nelems, + __host__ void wait_until_all_vector(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T* vals); template - __host__ size_t wait_until_any_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_any_vector(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T* vals); template - __host__ size_t wait_until_some_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, roc_shmem_cmps cmp, T* vals); template - __host__ int test(T *ptr, roc_shmem_cmps cmp, T val); + __host__ int test(T *ivars, roc_shmem_cmps cmp, T val); public: /* Pointer to the backend's host interface */ diff --git a/src/gpu_ib/context_ib_tmpl_host.hpp b/src/gpu_ib/context_ib_tmpl_host.hpp index 393aaf6281..3c5c75e66b 100644 --- a/src/gpu_ib/context_ib_tmpl_host.hpp +++ b/src/gpu_ib/context_ib_tmpl_host.hpp @@ -115,57 +115,57 @@ __host__ void GPUIBHostContext::to_all(roc_shmem_team_t team, T *dest, } template -__host__ void GPUIBHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) { - host_interface->wait_until(ptr, cmp, val, context_window_info); +__host__ void GPUIBHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) { + host_interface->wait_until(ivars, cmp, val, context_window_info); } template -__host__ void GPUIBHostContext::wait_until_all(T *ptr, size_t nelems, +__host__ void GPUIBHostContext::wait_until_all(T *ivars, size_t nelems, const int* status, roc_shmem_cmps cmp, T val) { - host_interface->wait_until_all(ptr, nelems, status, cmp, val, context_window_info); + host_interface->wait_until_all(ivars, nelems, status, cmp, val, context_window_info); } template -__host__ size_t GPUIBHostContext::wait_until_any(T *ptr, size_t nelems, +__host__ size_t GPUIBHostContext::wait_until_any(T *ivars, size_t nelems, const int* status, roc_shmem_cmps cmp, T val) { - return host_interface->wait_until_any(ptr, nelems, status, cmp, val, context_window_info); + return host_interface->wait_until_any(ivars, nelems, status, cmp, val, context_window_info); } template -__host__ size_t GPUIBHostContext::wait_until_some(T *ptr, size_t nelems, +__host__ size_t GPUIBHostContext::wait_until_some(T *ivars, size_t nelems, size_t* indices, const int* status, roc_shmem_cmps cmp, T val) { - return host_interface->wait_until_some(ptr, nelems, indices, status, cmp, val, context_window_info); + return host_interface->wait_until_some(ivars, nelems, indices, status, cmp, val, context_window_info); } template -__host__ void GPUIBHostContext::wait_until_all_vector(T *ptr, size_t nelems, +__host__ void GPUIBHostContext::wait_until_all_vector(T *ivars, size_t nelems, const int* status, roc_shmem_cmps cmp, T* vals) { - host_interface->wait_until_all_vector(ptr, nelems, status, cmp, vals, context_window_info); + host_interface->wait_until_all_vector(ivars, nelems, status, cmp, vals, context_window_info); } template -__host__ size_t GPUIBHostContext::wait_until_any_vector(T *ptr, size_t nelems, +__host__ size_t GPUIBHostContext::wait_until_any_vector(T *ivars, size_t nelems, const int* status, roc_shmem_cmps cmp, T* vals) { - return host_interface->wait_until_any_vector(ptr, nelems, status, cmp, vals, context_window_info); + return host_interface->wait_until_any_vector(ivars, nelems, status, cmp, vals, context_window_info); } template -__host__ size_t GPUIBHostContext::wait_until_some_vector(T *ptr, size_t nelems, +__host__ size_t GPUIBHostContext::wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int* status, roc_shmem_cmps cmp, T* vals) { - return host_interface->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals, context_window_info); + return host_interface->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals, context_window_info); } template -__host__ int GPUIBHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) { - return host_interface->test(ptr, cmp, val, context_window_info); +__host__ int GPUIBHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) { + return host_interface->test(ivars, cmp, val, context_window_info); } } // namespace rocshmem diff --git a/src/host/host.hpp b/src/host/host.hpp index a9210a7b21..4cd2b2e393 100644 --- a/src/host/host.hpp +++ b/src/host/host.hpp @@ -211,41 +211,41 @@ class HostInterface { int nreduce); template - __host__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val, + __host__ void wait_until(T *ivars, int cmp, T val, WindowInfo* window_info); template - __host__ void wait_until_all(T* ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val, + __host__ void wait_until_all(T *ivars, size_t nelems, const int* status, + int cmp, T val, WindowInfo* window_info); template - __host__ size_t wait_until_any(T* ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val, + __host__ size_t wait_until_any(T *ivars, size_t nelems, const int* status, + int cmp, T val, WindowInfo* window_info); template - __host__ size_t wait_until_some(T* ptr, size_t nelems, size_t* indices, - const int* status, roc_shmem_cmps cmp, T val, + __host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices, + const int* status, int cmp, T val, WindowInfo* window_info); template - __host__ void wait_until_all_vector(T* ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals, + __host__ void wait_until_all_vector(T *ivars, size_t nelems, const int* status, + int cmp, T* vals, WindowInfo* window_info); template - __host__ size_t wait_until_any_vector(T* ptr, size_t nelems, - const int* status, roc_shmem_cmps cmp, + __host__ size_t wait_until_any_vector(T *ivars, size_t nelems, + const int* status, int cmp, T* vals, WindowInfo* window_info); template - __host__ size_t wait_until_some_vector(T* ptr, size_t nelems, size_t* indices, - const int* status, roc_shmem_cmps cmp, + __host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, + const int* status, int cmp, T* vals, WindowInfo* window_info); template - __host__ int test(T* ptr, roc_shmem_cmps cmp, T val, WindowInfo* window_info); + __host__ int test(T *ivars, int cmp, T val, WindowInfo* window_info); #ifndef USE_COHERENT_HEAP __host__ void create_hdp_window(); @@ -295,11 +295,11 @@ class HostInterface { __host__ MPI_Datatype get_mpi_type(); template - __host__ int compare(roc_shmem_cmps cmp, T input_val, T target_val); + __host__ int compare(int cmp, T input_val, T target_val); template __host__ int test_and_compare(MPI_Aint offset, MPI_Datatype mpi_type, - roc_shmem_cmps cmp, T val, MPI_Win win); + int cmp, T val, MPI_Win win); template __host__ void to_all_internal(MPI_Comm mpi_comm, T* dest, const T* source, diff --git a/src/host/host_templates.hpp b/src/host/host_templates.hpp index c2b308133e..41ff7cb094 100644 --- a/src/host/host_templates.hpp +++ b/src/host/host_templates.hpp @@ -392,7 +392,7 @@ __host__ void HostInterface::to_all(roc_shmem_team_t team, T* dest, } template -__host__ inline int HostInterface::compare(roc_shmem_cmps cmp, T input_val, +__host__ inline int HostInterface::compare(int cmp, T input_val, T target_val) { int cond_satisfied{0}; @@ -426,7 +426,7 @@ __host__ inline int HostInterface::compare(roc_shmem_cmps cmp, T input_val, template __host__ inline int HostInterface::test_and_compare(MPI_Aint offset, MPI_Datatype mpi_type, - roc_shmem_cmps cmp, T val, + int cmp, T val, MPI_Win win) { T fetched_val{}; @@ -446,7 +446,7 @@ __host__ inline int HostInterface::test_and_compare(MPI_Aint offset, } template -__host__ void HostInterface::wait_until(T* ptr, roc_shmem_cmps cmp, T val, +__host__ void HostInterface::wait_until(T *ivars, int cmp, T val, WindowInfo* window_info) { DPRINTF("Function: host_wait_until\n"); @@ -454,13 +454,13 @@ __host__ void HostInterface::wait_until(T* ptr, roc_shmem_cmps cmp, T val, * Find the offset of this memory in the window */ MPI_Aint offset{ - compute_offset(ptr, window_info->get_start(), window_info->get_end())}; + compute_offset(ivars, window_info->get_start(), window_info->get_end())}; MPI_Datatype mpi_type{get_mpi_type()}; MPI_Win win{window_info->get_win()}; /* - * Continuously read the ptr atomically until it satisfies the condition + * Continuously read the ivars atomically until it satisfies the condition */ while (1) { int cond_satisfied{test_and_compare(offset, mpi_type, cmp, val, win)}; @@ -500,9 +500,9 @@ __host__ size_t status_entry(size_t nelems, } template -__host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems, +__host__ size_t HostInterface::wait_until_any(T* ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val, + int cmp, T val, WindowInfo* window_info) { DPRINTF("Function: host_wait_until_any\n"); @@ -524,7 +524,7 @@ __host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems, if (status[i]) { continue; } - if (test(ptr + i, cmp, val, window_info)) { + if (test(ivars + i, cmp, val, window_info)) { return i; } } @@ -532,9 +532,9 @@ __host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems, } template -__host__ void HostInterface::wait_until_all(T* ptr, size_t nelems, +__host__ void HostInterface::wait_until_all(T* ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val, + int cmp, T val, WindowInfo* window_info) { DPRINTF("Function: host_wait_until_all\n"); @@ -554,16 +554,16 @@ __host__ void HostInterface::wait_until_all(T* ptr, size_t nelems, if (status[i]) { continue; } - while (!test(ptr + i, cmp, val, window_info)) { + while (!test(ivars + i, cmp, val, window_info)) { } } } template -__host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems, +__host__ size_t HostInterface::wait_until_some(T* ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T val, + int cmp, T val, WindowInfo* window_info) { DPRINTF("Function: host_wait_until_some\n"); @@ -587,7 +587,7 @@ __host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems, if (status[i]) { continue; } - if (test(ptr + i, cmp, val, window_info)) { + if (test(ivars + i, cmp, val, window_info)) { done = true; indices[ncompleted] = i; ncompleted++; @@ -598,34 +598,34 @@ __host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems, } template -__host__ void HostInterface::wait_until_all_vector(T* ptr, size_t nelems, +__host__ void HostInterface::wait_until_all_vector(T* ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals, + int cmp, T* vals, WindowInfo* window_info) { DPRINTF("Function: host_wait_until_all_vector\n"); } template -__host__ size_t HostInterface::wait_until_any_vector(T* ptr, size_t nelems, +__host__ size_t HostInterface::wait_until_any_vector(T* ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals, + int cmp, T* vals, WindowInfo* window_info) { DPRINTF("Function: host_wait_until_any_vector\n"); return 0; } template -__host__ size_t HostInterface::wait_until_some_vector(T* ptr, size_t nelems, +__host__ size_t HostInterface::wait_until_some_vector(T* ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals, + int cmp, T* vals, WindowInfo* window_info) { DPRINTF("Function: host_wait_until_some_vector\n"); return 0; } template -__host__ int HostInterface::test(T* ptr, roc_shmem_cmps cmp, T val, +__host__ int HostInterface::test(T* ivars, int cmp, T val, WindowInfo* window_info) { DPRINTF("Function: host_test\n"); @@ -633,7 +633,7 @@ __host__ int HostInterface::test(T* ptr, roc_shmem_cmps cmp, T val, * Find the offset of this memory in the window */ MPI_Aint offset{ - compute_offset(ptr, window_info->get_start(), window_info->get_end())}; + compute_offset(ivars, window_info->get_start(), window_info->get_end())}; MPI_Datatype mpi_type{get_mpi_type()}; diff --git a/src/ipc/context_ipc_host.hpp b/src/ipc/context_ipc_host.hpp index f9421ca210..4c2e626d75 100644 --- a/src/ipc/context_ipc_host.hpp +++ b/src/ipc/context_ipc_host.hpp @@ -99,42 +99,42 @@ class IPCHostContext : public Context { int nreduce); template - __host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val); + __host__ void wait_until(T *ivars, int cmp, T val); template - __host__ size_t wait_until_any(T* ptr, size_t nelems, + __host__ size_t wait_until_any(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __host__ void wait_until_all(T* ptr, size_t nelems, + __host__ void wait_until_all(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __host__ size_t wait_until_some(T* ptr, size_t nelems, + __host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T val); + int cmp, T val); template - __host__ void wait_until_all_vector(T* ptr, size_t nelems, + __host__ void wait_until_all_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __host__ size_t wait_until_any_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_any_vector(T *ivars, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __host__ size_t wait_until_some_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template - __host__ int test(T *ptr, roc_shmem_cmps cmp, T val); + __host__ int test(T *ivars, int cmp, T val); public: /* Pointer to the backend's host interface */ diff --git a/src/ipc/context_ipc_tmpl_host.hpp b/src/ipc/context_ipc_tmpl_host.hpp index 76484de34d..d78593b888 100644 --- a/src/ipc/context_ipc_tmpl_host.hpp +++ b/src/ipc/context_ipc_tmpl_host.hpp @@ -115,57 +115,57 @@ __host__ void IPCHostContext::to_all(roc_shmem_team_t team, T *dest, } template -__host__ void IPCHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) { - host_interface->wait_until(ptr, cmp, val, context_window_info); +__host__ void IPCHostContext::wait_until(T *ivars, int cmp, T val) { + host_interface->wait_until(ivars, cmp, val, context_window_info); } template -__host__ void IPCHostContext::wait_until_all(T *ptr, size_t nelems, +__host__ void IPCHostContext::wait_until_all(T *ivars, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { - host_interface->wait_until_all(ptr, nelems, status, cmp, val, context_window_info); + int cmp, T val) { + host_interface->wait_until_all(ivars, nelems, status, cmp, val, context_window_info); } template -__host__ size_t IPCHostContext::wait_until_any(T *ptr, size_t nelems, +__host__ size_t IPCHostContext::wait_until_any(T *ivars, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { - return host_interface->wait_until_any(ptr, nelems, status, cmp, val, context_window_info); + int cmp, T val) { + return host_interface->wait_until_any(ivars, nelems, status, cmp, val, context_window_info); } template -__host__ size_t IPCHostContext::wait_until_some(T *ptr, size_t nelems, +__host__ size_t IPCHostContext::wait_until_some(T *ivars, size_t nelems, size_t* indices, const int* status, - roc_shmem_cmps cmp, T val) { - return host_interface->wait_until_some(ptr, nelems, indices, status, cmp, val, context_window_info); + int cmp, T val) { + return host_interface->wait_until_some(ivars, nelems, indices, status, cmp, val, context_window_info); } template -__host__ void IPCHostContext::wait_until_all_vector(T *ptr, size_t nelems, +__host__ void IPCHostContext::wait_until_all_vector(T *ivars, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals) { - host_interface->wait_until_all_vector(ptr, nelems, status, cmp, vals, context_window_info); + int cmp, T* vals) { + host_interface->wait_until_all_vector(ivars, nelems, status, cmp, vals, context_window_info); } template -__host__ size_t IPCHostContext::wait_until_any_vector(T *ptr, size_t nelems, +__host__ size_t IPCHostContext::wait_until_any_vector(T *ivars, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals) { - return host_interface->wait_until_any_vector(ptr, nelems, status, cmp, vals, context_window_info); + int cmp, T* vals) { + return host_interface->wait_until_any_vector(ivars, nelems, status, cmp, vals, context_window_info); } template -__host__ size_t IPCHostContext::wait_until_some_vector(T *ptr, size_t nelems, +__host__ size_t IPCHostContext::wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int* status, - roc_shmem_cmps cmp, T* vals) { - return host_interface->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals, context_window_info); + int cmp, T* vals) { + return host_interface->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals, context_window_info); } template -__host__ int IPCHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) { - return host_interface->test(ptr, cmp, val, context_window_info); +__host__ int IPCHostContext::test(T *ivars, int cmp, T val) { + return host_interface->test(ivars, cmp, val, context_window_info); } } // namespace rocshmem diff --git a/src/reverse_offload/context_ro_host.hpp b/src/reverse_offload/context_ro_host.hpp index ee26f807e8..80e8a6f6e5 100644 --- a/src/reverse_offload/context_ro_host.hpp +++ b/src/reverse_offload/context_ro_host.hpp @@ -148,42 +148,42 @@ class ROHostContext : public Context { int nreduce); template - __host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val); + __host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val); template - __host__ void wait_until_all(T* ptr, size_t nelems, + __host__ void wait_until_all(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T val); template - __host__ size_t wait_until_any(T* ptr, size_t nelems, + __host__ size_t wait_until_any(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T val); template - __host__ size_t wait_until_some(T* ptr, size_t nelems, + __host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, roc_shmem_cmps cmp, T val); template - __host__ void wait_until_all_vector(T* ptr, size_t nelems, + __host__ void wait_until_all_vector(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T* vals); template - __host__ size_t wait_until_any_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_any_vector(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T* vals); template - __host__ size_t wait_until_some_vector(T* ptr, size_t nelems, + __host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, roc_shmem_cmps cmp, T* vals); template - __host__ int test(T *ptr, roc_shmem_cmps cmp, T val); + __host__ int test(T *ivars, roc_shmem_cmps cmp, T val); }; } // namespace rocshmem diff --git a/src/reverse_offload/context_ro_tmpl_host.hpp b/src/reverse_offload/context_ro_tmpl_host.hpp index bce7897783..4e3b1855d7 100644 --- a/src/reverse_offload/context_ro_tmpl_host.hpp +++ b/src/reverse_offload/context_ro_tmpl_host.hpp @@ -141,58 +141,58 @@ __host__ void ROHostContext::to_all(roc_shmem_team_t team, T *dest, } template -__host__ void ROHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) { - host_interface->wait_until(ptr, cmp, val, context_window_info); +__host__ void ROHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) { + host_interface->wait_until(ivars, cmp, val, context_window_info); } template -__host__ size_t ROHostContext::wait_until_any(T* ptr, size_t nelems, +__host__ size_t ROHostContext::wait_until_any(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T val) { - return host_interface->wait_until_any(ptr, nelems, status, cmp, val, context_window_info); + return host_interface->wait_until_any(ivars, nelems, status, cmp, val, context_window_info); } template __host__ -void ROHostContext::wait_until_all(T* ptr, size_t nelems, +void ROHostContext::wait_until_all(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T val) { - host_interface->wait_until_all(ptr, nelems, status, cmp, val, context_window_info); + host_interface->wait_until_all(ivars, nelems, status, cmp, val, context_window_info); } template __host__ -size_t ROHostContext::wait_until_some(T* ptr, size_t nelems, +size_t ROHostContext::wait_until_some(T *ivars, size_t nelems, size_t* indices, const int *status, roc_shmem_cmps cmp, T val) { - return host_interface->wait_until_some(ptr, nelems, indices, status, cmp, val, context_window_info); + return host_interface->wait_until_some(ivars, nelems, indices, status, cmp, val, context_window_info); } template __host__ -void ROHostContext::wait_until_all_vector(T* ptr, size_t nelems, +void ROHostContext::wait_until_all_vector(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T* vals) { - host_interface->wait_until_all_vector(ptr, nelems, status, cmp, vals, context_window_info); + host_interface->wait_until_all_vector(ivars, nelems, status, cmp, vals, context_window_info); } template -__host__ size_t ROHostContext::wait_until_any_vector(T* ptr, size_t nelems, +__host__ size_t ROHostContext::wait_until_any_vector(T *ivars, size_t nelems, const int *status, roc_shmem_cmps cmp, T* vals) { - return host_interface->wait_until_any_vector(ptr, nelems, status, cmp, vals, context_window_info); + return host_interface->wait_until_any_vector(ivars, nelems, status, cmp, vals, context_window_info); } template -__host__ size_t ROHostContext::wait_until_some_vector(T* ptr, size_t nelems, +__host__ size_t ROHostContext::wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int *status, roc_shmem_cmps cmp, T* vals) { - return host_interface->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals, context_window_info); + return host_interface->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals, context_window_info); } template -__host__ int ROHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) { - return host_interface->test(ptr, cmp, val, context_window_info); +__host__ int ROHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) { + return host_interface->test(ivars, cmp, val, context_window_info); } } // namespace rocshmem diff --git a/src/roc_shmem.cpp b/src/roc_shmem.cpp index 32ef91bbda..cdf76fa9d3 100644 --- a/src/roc_shmem.cpp +++ b/src/roc_shmem.cpp @@ -392,8 +392,8 @@ __host__ T roc_shmem_atomic_fetch_inc(T *dest, int pe) { } template -__host__ T roc_shmem_atomic_fetch(T *dest, int pe) { - return roc_shmem_atomic_fetch(ROC_SHMEM_HOST_CTX_DEFAULT, dest, pe); +__host__ T roc_shmem_atomic_fetch(T *source, int pe) { + return roc_shmem_atomic_fetch(ROC_SHMEM_HOST_CTX_DEFAULT, source, pe); } template @@ -592,10 +592,10 @@ __host__ T roc_shmem_atomic_fetch_inc(roc_shmem_ctx_t ctx, T *dest, int pe) { } template -__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe) { +__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe) { DPRINTF("Host function: roc_shmem_atomic_fetch\n"); - return get_internal_ctx(ctx)->amo_fetch_add(dest, 0, pe); + return get_internal_ctx(ctx)->amo_fetch_add(source, 0, pe); } template @@ -744,74 +744,74 @@ __host__ void roc_shmem_to_all([[maybe_unused]] roc_shmem_ctx_t ctx, } template -__host__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val) { +__host__ void roc_shmem_wait_until(T *ivars, int cmp, T val) { DPRINTF("Host function: roc_shmem_wait_until\n"); - get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until(ptr, cmp, val); + get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until(ivars, cmp, val); } template -__host__ void roc_shmem_wait_until_all(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { +__host__ void roc_shmem_wait_until_all(T *ivars, size_t nelems, const int* status, + int cmp, T val) { DPRINTF("Host function: roc_shmem_wait_until_all\n"); - get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all(ptr, + get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all(ivars, nelems, status, cmp, val); } template -__host__ size_t roc_shmem_wait_until_any(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { +__host__ size_t roc_shmem_wait_until_any(T *ivars, size_t nelems, const int* status, + int cmp, T val) { DPRINTF("Host function: roc_shmem_wait_until_any\n"); - return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any(ptr, + return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any(ivars, nelems, status, cmp, val); } template -__host__ size_t roc_shmem_wait_until_some(T *ptr, size_t nelems, size_t* indices, - const int* status, roc_shmem_cmps cmp, +__host__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems, size_t* indices, + const int* status, int cmp, T val) { DPRINTF("Host function: roc_shmem_wait_until_some\n"); - return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some(ptr, nelems, + return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some(ivars, nelems, indices, status, cmp, val); } template -__host__ size_t roc_shmem_wait_until_any_vector(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals) { +__host__ size_t roc_shmem_wait_until_any_vector(T *ivars, size_t nelems, const int* status, + int cmp, T* vals) { DPRINTF("Host function: roc_shmem_wait_until_any_vector\n"); - return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any_vector(ptr, + return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any_vector(ivars, nelems, status, cmp, vals); } template -__host__ void roc_shmem_wait_until_all_vector(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals) { +__host__ void roc_shmem_wait_until_all_vector(T *ivars, size_t nelems, const int* status, + int cmp, T* vals) { DPRINTF("Host function: roc_shmem_wait_until_all_vector\n"); - get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all_vector(ptr, + get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all_vector(ivars, nelems, status, cmp, vals); } template -__host__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems, +__host__ size_t roc_shmem_wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int* status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { DPRINTF("Host function: roc_shmem_wait_until_some_vector\n"); - return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some_vector(ptr, + return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals); } template -__host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) { +__host__ int roc_shmem_test(T *ivars, int cmp, T val) { DPRINTF("Host function: roc_shmem_testl\n"); - return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->test(ptr, cmp, val); + return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->test(ivars, cmp, val); } /** @@ -940,31 +940,31 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) { * Declare templates for the wait types */ #define WAIT_GEN(T) \ - template __host__ void roc_shmem_wait_until(T * ptr, roc_shmem_cmps cmp, \ + template __host__ void roc_shmem_wait_until(T *ivars, int cmp, \ T val); \ - template __host__ int roc_shmem_test(T * ptr, roc_shmem_cmps cmp, T val); \ - template __host__ void Context::wait_until(T * ptr, roc_shmem_cmps cmp, \ + template __host__ int roc_shmem_test(T *ivars, int cmp, T val); \ + template __host__ void Context::wait_until(T *ivars, int cmp, \ T val); \ - template __host__ size_t roc_shmem_wait_until_any(T * ptr, \ + template __host__ size_t roc_shmem_wait_until_any(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __host__ void roc_shmem_wait_until_all(T * ptr, \ + int cmp, T val); \ + template __host__ void roc_shmem_wait_until_all(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __host__ size_t roc_shmem_wait_until_some(T * ptr, size_t nelems,\ + int cmp, T val); \ + template __host__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems,\ size_t* indices, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __host__ size_t roc_shmem_wait_until_any_vector(T * ptr, \ + int cmp, T val); \ + template __host__ size_t roc_shmem_wait_until_any_vector(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T* vals); \ - template __host__ void roc_shmem_wait_until_all_vector(T * ptr, \ + int cmp, T* vals); \ + template __host__ void roc_shmem_wait_until_all_vector(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T* vals); \ - template __host__ size_t roc_shmem_wait_until_some_vector(T * ptr, \ + int cmp, T* vals); \ + template __host__ size_t roc_shmem_wait_until_some_vector(T *ivars, \ size_t nelems, size_t* indices, \ - const int* status, roc_shmem_cmps cmp, \ + const int* status, int cmp, \ T* vals); \ - template __host__ int Context::test(T * ptr, roc_shmem_cmps cmp, T val); + template __host__ int Context::test(T *ivars, int cmp, T val); /** * Define APIs to call the template functions @@ -1099,11 +1099,11 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) { #define AMO_EXTENDED_DEF_GEN(T, TNAME) \ __host__ T roc_shmem_ctx_##TNAME##_atomic_fetch(roc_shmem_ctx_t ctx, \ - T *dest, int pe) { \ - return roc_shmem_atomic_fetch(ctx, dest, pe); \ + T *source, int pe) { \ + return roc_shmem_atomic_fetch(ctx, source, pe); \ } \ - __host__ T roc_shmem_##TNAME##_atomic_fetch(T *dest, int pe) { \ - return roc_shmem_atomic_fetch(dest, pe); \ + __host__ T roc_shmem_##TNAME##_atomic_fetch(T *source, int pe) { \ + return roc_shmem_atomic_fetch(source, pe); \ } \ __host__ void roc_shmem_ctx_##TNAME##_atomic_set(roc_shmem_ctx_t ctx, \ T *dest, T value, int pe) { \ @@ -1165,55 +1165,55 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) { } #define WAIT_DEF_GEN(T, TNAME) \ - __host__ void roc_shmem_##TNAME##_wait_until(T *ptr, roc_shmem_cmps cmp, \ + __host__ void roc_shmem_##TNAME##_wait_until(T *ivars, int cmp, \ T val) { \ - roc_shmem_wait_until(ptr, cmp, val); \ + roc_shmem_wait_until(ivars, cmp, val); \ } \ - __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, size_t nelems, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, size_t nelems,\ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val) { \ - return roc_shmem_wait_until_any(ptr, nelems, status, cmp, val); \ + return roc_shmem_wait_until_any(ivars, nelems, status, cmp, val); \ } \ - __host__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, size_t nelems, \ + __host__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val) { \ - roc_shmem_wait_until_all(ptr, nelems, status, cmp, val); \ + roc_shmem_wait_until_all(ivars, nelems, status, cmp, val); \ } \ - __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, size_t nelems, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, size_t nelems, \ size_t* indices, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val) { \ - return roc_shmem_wait_until_some(ptr, nelems, indices, status, cmp, val); \ + return roc_shmem_wait_until_some(ivars, nelems, indices, status, cmp, val); \ } \ - __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals) { \ - return roc_shmem_wait_until_any_vector(ptr, nelems, status, cmp, \ + return roc_shmem_wait_until_any_vector(ivars, nelems, status, cmp, \ vals); \ } \ - __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \ + __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals) { \ - roc_shmem_wait_until_all_vector(ptr, nelems, status, cmp, vals); \ + roc_shmem_wait_until_all_vector(ivars, nelems, status, cmp, vals); \ } \ - __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \ + __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status,\ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals) { \ - return roc_shmem_wait_until_some_vector(ptr, nelems, indices, \ + return roc_shmem_wait_until_some_vector(ivars, nelems, indices, \ status, cmp, vals); \ } \ - __host__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val) { \ - return roc_shmem_test(ptr, cmp, val); \ + __host__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \ + return roc_shmem_test(ivars, cmp, val); \ } /****************************************************************************** diff --git a/src/roc_shmem_gpu.cpp b/src/roc_shmem_gpu.cpp index 1afd5b10ff..4ff19b20a2 100644 --- a/src/roc_shmem_gpu.cpp +++ b/src/roc_shmem_gpu.cpp @@ -181,8 +181,8 @@ __device__ T roc_shmem_atomic_fetch_inc(T *dest, int pe) { } template -__device__ T roc_shmem_atomic_fetch(T *dest, int pe) { - return roc_shmem_atomic_fetch(ROC_SHMEM_CTX_DEFAULT, dest, pe); +__device__ T roc_shmem_atomic_fetch(T *source, int pe) { + return roc_shmem_atomic_fetch(ROC_SHMEM_CTX_DEFAULT, source, pe); } template @@ -488,85 +488,85 @@ __device__ void roc_shmem_wg_fcollect(roc_shmem_ctx_t ctx, } template -__device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val) { +__device__ void roc_shmem_wait_until(T *ivars, int cmp, T val) { GPU_DPRINTF("Function: roc_shmem_wait_until\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL); - ctx_internal->wait_until(ptr, cmp, val); + ctx_internal->wait_until(ivars, cmp, val); } template -__device__ void roc_shmem_wait_until_all(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { +__device__ void roc_shmem_wait_until_all(T *ivars, size_t nelems, const int* status, + int cmp, T val) { GPU_DPRINTF("Function: roc_shmem_wait_until_all\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ALL); - ctx_internal->wait_until_all(ptr, nelems, status, cmp, val); + ctx_internal->wait_until_all(ivars, nelems, status, cmp, val); } template -__device__ size_t roc_shmem_wait_until_any(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T val) { +__device__ size_t roc_shmem_wait_until_any(T *ivars, size_t nelems, const int* status, + int cmp, T val) { GPU_DPRINTF("Function: roc_shmem_wait_until_any\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ANY); - return ctx_internal->wait_until_any(ptr, nelems, status, cmp, val); + return ctx_internal->wait_until_any(ivars, nelems, status, cmp, val); } template -__device__ size_t roc_shmem_wait_until_some(T *ptr, size_t nelems, size_t* indices, - const int* status, roc_shmem_cmps cmp, +__device__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems, size_t* indices, + const int* status, int cmp, T val) { DPRINTF("Function: roc_shmem_wait_until_some\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_SOME); - return ctx_internal->wait_until_some(ptr, nelems, indices, status, cmp, val); + return ctx_internal->wait_until_some(ivars, nelems, indices, status, cmp, val); } template -__device__ size_t roc_shmem_wait_until_any_vector(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals) { +__device__ size_t roc_shmem_wait_until_any_vector(T *ivars, size_t nelems, const int* status, + int cmp, T* vals) { DPRINTF("Function: roc_shmem_wait_until_any_vector\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ANY_VECTOR); - return ctx_internal->wait_until_any_vector(ptr, nelems, status, cmp, vals); + return ctx_internal->wait_until_any_vector(ivars, nelems, status, cmp, vals); } template -__device__ void roc_shmem_wait_until_all_vector(T *ptr, size_t nelems, const int* status, - roc_shmem_cmps cmp, T* vals) { +__device__ void roc_shmem_wait_until_all_vector(T *ivars, size_t nelems, const int* status, + int cmp, T* vals) { DPRINTF("Function: roc_shmem_wait_until_all_vector\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ALL_VECTOR); - ctx_internal->wait_until_all_vector(ptr, nelems, status, cmp, vals); + ctx_internal->wait_until_all_vector(ivars, nelems, status, cmp, vals); } template -__device__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems, +__device__ size_t roc_shmem_wait_until_some_vector(T *ivars, size_t nelems, size_t* indices, const int* status, - roc_shmem_cmps cmp, T* vals) { + int cmp, T* vals) { DPRINTF("Function: roc_shmem_wait_until_some_vector\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_SOME_VECTOR); - return ctx_internal->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals); + return ctx_internal->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals); } template -__device__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) { +__device__ int roc_shmem_test(T *ivars, int cmp, T val) { GPU_DPRINTF("Function: roc_shmem_testl\n"); Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT); ctx_internal->ctxStats.incStat(NUM_TEST); - return ctx_internal->test(ptr, cmp, val); + return ctx_internal->test(ivars, cmp, val); } __device__ void roc_shmem_ctx_wg_barrier_all(roc_shmem_ctx_t ctx) { @@ -650,10 +650,10 @@ __device__ T roc_shmem_atomic_fetch_inc(roc_shmem_ctx_t ctx, T *dest, int pe) { } template -__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe) { +__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe) { GPU_DPRINTF("Function: roc_shmem_atomic_fetch\n"); - return get_internal_ctx(ctx)->amo_fetch_add(dest, 0, pe); + return get_internal_ctx(ctx)->amo_fetch_add(source, 0, pe); } template @@ -1030,52 +1030,53 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team, * Declare templates for the wait types */ #define WAIT_GEN(T) \ - template __device__ void roc_shmem_wait_until(T * ptr, \ - roc_shmem_cmps cmp, T val); \ - template __device__ size_t roc_shmem_wait_until_any(T * ptr, \ + template __device__ void roc_shmem_wait_until(T *ivars, \ + int cmp, T val); \ + template __device__ size_t roc_shmem_wait_until_any(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __device__ void roc_shmem_wait_until_all(T * ptr, \ + int cmp, T val); \ + template __device__ void roc_shmem_wait_until_all(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __device__ size_t roc_shmem_wait_until_some(T * ptr, size_t nelems,\ - size_t* indices, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __device__ size_t roc_shmem_wait_until_any_vector(T * ptr, \ - size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T* vals); \ - template __device__ void roc_shmem_wait_until_all_vector(T * ptr, \ - size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T* vals); \ - template __device__ size_t roc_shmem_wait_until_some_vector(T * ptr, \ + int cmp, T val); \ + template __device__ size_t roc_shmem_wait_until_some(T *ivars, \ size_t nelems, size_t* indices, \ - const int* status, roc_shmem_cmps cmp, \ + const int* status, \ + int cmp, T val); \ + template __device__ size_t roc_shmem_wait_until_any_vector(T *ivars, \ + size_t nelems, const int* status, \ + int cmp, T* vals); \ + template __device__ void roc_shmem_wait_until_all_vector(T *ivars, \ + size_t nelems, const int* status, \ + int cmp, T* vals); \ + template __device__ size_t roc_shmem_wait_until_some_vector(T *ivars, \ + size_t nelems, size_t* indices, \ + const int* status, int cmp, \ T* vals); \ - template __device__ int roc_shmem_test(T * ptr, roc_shmem_cmps cmp, \ + template __device__ int roc_shmem_test(T *ivars, int cmp, \ T val); \ - template __device__ void Context::wait_until(T * ptr, roc_shmem_cmps cmp, \ + template __device__ void Context::wait_until(T *ivars, int cmp, \ T val); \ - template __device__ size_t Context::wait_until_any(T * ptr, \ + template __device__ size_t Context::wait_until_any(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __device__ void Context::wait_until_all(T * ptr, \ + int cmp, T val); \ + template __device__ void Context::wait_until_all(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __device__ size_t Context::wait_until_some(T * ptr, \ + int cmp, T val); \ + template __device__ size_t Context::wait_until_some(T *ivars, \ size_t nelems, \ size_t* indices, const int* status, \ - roc_shmem_cmps cmp, T val); \ - template __device__ size_t Context::wait_until_any_vector(T * ptr, \ + int cmp, T val); \ + template __device__ size_t Context::wait_until_any_vector(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T* vals); \ - template __device__ void Context::wait_until_all_vector(T * ptr, \ + int cmp, T* vals); \ + template __device__ void Context::wait_until_all_vector(T *ivars, \ size_t nelems, const int* status, \ - roc_shmem_cmps cmp, T* vals); \ - template __device__ size_t Context::wait_until_some_vector(T * ptr, \ + int cmp, T* vals); \ + template __device__ size_t Context::wait_until_some_vector(T *ivars, \ size_t nelems, size_t* indices, \ - const int* status, roc_shmem_cmps cmp, \ + const int* status, int cmp, \ T* vals); \ - template __device__ int Context::test(T * ptr, roc_shmem_cmps cmp, T val); + template __device__ int Context::test(T *ivars, int cmp, T val); #define ARITH_REDUCTION_GEN(T) \ REDUCTION_GEN(T, ROC_SHMEM_SUM) \ @@ -1302,11 +1303,11 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team, #define AMO_EXTENDED_DEF_GEN(T, TNAME) \ __device__ T roc_shmem_ctx_##TNAME##_atomic_fetch(roc_shmem_ctx_t ctx, \ - T *dest, int pe) { \ - return roc_shmem_atomic_fetch(ctx, dest, pe); \ + T *source, int pe) { \ + return roc_shmem_atomic_fetch(ctx, source, pe); \ } \ - __device__ T roc_shmem_##TNAME##_atomic_fetch(T *dest, int pe) { \ - return roc_shmem_atomic_fetch(dest, pe); \ + __device__ T roc_shmem_##TNAME##_atomic_fetch(T *source, int pe) { \ + return roc_shmem_atomic_fetch(source, pe); \ } \ __device__ void roc_shmem_ctx_##TNAME##_atomic_set( \ roc_shmem_ctx_t ctx, T *dest, T value, int pe) { \ @@ -1370,57 +1371,57 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team, } #define WAIT_DEF_GEN(T, TNAME) \ - __device__ void roc_shmem_##TNAME##_wait_until(T *ptr, roc_shmem_cmps cmp, \ + __device__ void roc_shmem_##TNAME##_wait_until(T *ivars, int cmp, \ T val) { \ - roc_shmem_wait_until(ptr, cmp, val); \ + roc_shmem_wait_until(ivars, cmp, val); \ } \ - __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, size_t nelems,\ + __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, size_t nelems,\ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val) { \ - return roc_shmem_wait_until_any(ptr, nelems, status, cmp, val); \ + return roc_shmem_wait_until_any(ivars, nelems, status, cmp, val); \ } \ - __device__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, size_t nelems, \ + __device__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, size_t nelems,\ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val) { \ - roc_shmem_wait_until_all(ptr, nelems, status, cmp, val); \ + roc_shmem_wait_until_all(ivars, nelems, status, cmp, val); \ } \ - __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status, \ - roc_shmem_cmps cmp, \ + int cmp, \ T val) { \ - return roc_shmem_wait_until_some(ptr, nelems, indices, status, cmp, \ + return roc_shmem_wait_until_some(ivars, nelems, indices, status, cmp, \ val); \ } \ - __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals) { \ - return roc_shmem_wait_until_any_vector(ptr, nelems, status, cmp, \ + return roc_shmem_wait_until_any_vector(ivars, nelems, status, cmp, \ vals); \ } \ - __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \ + __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \ size_t nelems, \ const int* status, \ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals) { \ - roc_shmem_wait_until_all_vector(ptr, nelems, status, cmp, vals); \ + roc_shmem_wait_until_all_vector(ivars, nelems, status, cmp, vals); \ } \ - __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \ + __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \ size_t nelems, \ size_t* indices, \ const int* status,\ - roc_shmem_cmps cmp,\ + int cmp, \ T* vals) { \ - return roc_shmem_wait_until_some_vector(ptr, nelems, indices, \ + return roc_shmem_wait_until_some_vector(ivars, nelems, indices, \ status, cmp, vals); \ } \ - __device__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val) { \ - return roc_shmem_test(ptr, cmp, val); \ + __device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \ + return roc_shmem_test(ivars, cmp, val); \ } /****************************************************************************** diff --git a/src/templates.hpp b/src/templates.hpp index 3df3a01c23..025cefdcfa 100644 --- a/src/templates.hpp +++ b/src/templates.hpp @@ -287,7 +287,7 @@ __device__ T roc_shmem_atomic_fetch_inc(T *dest, int pe); * granularity. * * @param[in] ctx Context with which to perform this operation. - * @param[in] dest Destination address. Must be an address on the symmetric + * @param[in] source Source address. Must be an address on the symmetric heap. * @param[in] val The value to be atomically added. * @param[in] pe PE of the remote process. @@ -296,10 +296,10 @@ __device__ T roc_shmem_atomic_fetch_inc(T *dest, int pe); * */ template -__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe); +__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe); template -__device__ T roc_shmem_atomic_fetch(T *dest, int pe); +__device__ T roc_shmem_atomic_fetch(T *source, int pe); /** * @brief Atomically add the value \p val to \p dest on \p pe. @@ -380,7 +380,7 @@ __device__ void roc_shmem_atomic_set(T *dest, T value, int pe); * coalesce contiguous messages and elect a leader thread to call into the * ROC_SHMEM function. * - * @param[in] ptr Pointer to memory on the symmetric heap to wait for. + * @param[in] ivars Pointer to memory on the symmetric heap to wait for. * @param[in] cmp Operation for the comparison. * @param[in] val Value to compare the memory at \p ptr to. * @@ -388,7 +388,7 @@ __device__ void roc_shmem_atomic_set(T *dest, T value, int pe); * */ template -__device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val); +__device__ void roc_shmem_wait_until(T *ivars, int cmp, T val); /** * @brief test if the condition (* \p ptr \p cmps \p val) is @@ -399,7 +399,7 @@ __device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val); * coalesce contiguous messages and elect a leader thread to call into the * ROC_SHMEM function. * - * @param[in] ptr Pointer to memory on the symmetric heap to wait for. + * @param[in] ivars Pointer to memory on the symmetric heap to wait for. * @param[in] cmp Operation for the comparison. * @param[in] val Value to compare the memory at \p ptr to. * @@ -407,7 +407,7 @@ __device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val); * */ template -__device__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val); +__device__ int roc_shmem_test(T *ivars, int cmp, T val); /** * @brief Perform a broadcast between PEs in the active set. The caller diff --git a/src/templates_host.hpp b/src/templates_host.hpp index ff161feb0a..dabd6a3748 100644 --- a/src/templates_host.hpp +++ b/src/templates_host.hpp @@ -102,10 +102,10 @@ template __host__ T roc_shmem_atomic_fetch_inc(T *dest, int pe); template -__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe); +__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe); template -__host__ T roc_shmem_atomic_fetch(T *dest, int pe); +__host__ T roc_shmem_atomic_fetch(T *source, int pe); template __host__ void roc_shmem_atomic_add(roc_shmem_ctx_t ctx, T *dest, T val, int pe); @@ -136,35 +136,35 @@ __host__ void roc_shmem_to_all(roc_shmem_ctx_t ctx, T *dest, const T *source, int PE_size, T *pWrk, long *pSync); template -__host__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val); +__host__ void roc_shmem_wait_until(T *ivars, int cmp, T val); template -__host__ void wait_until_all(T* ptr, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); +__host__ void wait_until_all(T* ivars, size_t nelems, const int *status, + int cmp, T val); template -__host__ size_t wait_until_any(T* ptr, size_t nelems, const int *status, - roc_shmem_cmps cmp, T val); +__host__ size_t wait_until_any(T* ivars, size_t nelems, const int *status, + int cmp, T val); template -__host__ size_t wait_until_some(T* ptr, size_t nelems, size_t* indices, - const int *status, roc_shmem_cmps cmp, T val); +__host__ size_t wait_until_some(T* ivars, size_t nelems, size_t* indices, + const int *status, int cmp, T val); template -__host__ void wait_until_all_vector(T* ptr, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); +__host__ void wait_until_all_vector(T* ivars, size_t nelems, const int *status, + int cmp, T* vals); template -__host__ size_t wait_until_any_vector(T* ptr, size_t nelems, const int *status, - roc_shmem_cmps cmp, T* vals); +__host__ size_t wait_until_any_vector(T* ivars, size_t nelems, const int *status, + int cmp, T* vals); template -__host__ size_t wait_until_some_vector(T* ptr, size_t nelems, +__host__ size_t wait_until_some_vector(T* ivars, size_t nelems, size_t* indices, const int *status, - roc_shmem_cmps cmp, T* vals); + int cmp, T* vals); template -__host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val); +__host__ int roc_shmem_test(T *ivars, int cmp, T val); } // namespace rocshmem