diff --git a/include/roc_shmem/roc_shmem.hpp b/include/roc_shmem/roc_shmem.hpp
index 3b9d6d0c3b..c0484e8bd7 100644
--- a/include/roc_shmem/roc_shmem.hpp
+++ b/include/roc_shmem/roc_shmem.hpp
@@ -1021,12 +1021,12 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system();
  */
 #define ATOMIC_FETCH_API_GEN(T, TNAME)                                  \
   __device__ ATTR_NO_INLINE T roc_shmem_ctx_##TNAME##_atomic_fetch(     \
-      roc_shmem_ctx_t ctx, T *dest, int pe);                            \
-  __device__ ATTR_NO_INLINE T roc_shmem_##TNAME##_atomic_fetch(T *dest, \
+      roc_shmem_ctx_t ctx, T *source, int pe);                          \
+  __device__ ATTR_NO_INLINE T roc_shmem_##TNAME##_atomic_fetch(T *source, \
                                                                int pe); \
   __host__ T roc_shmem_ctx_##TNAME##_atomic_fetch(roc_shmem_ctx_t ctx,  \
-                                                  T *dest, int pe);     \
-  __host__ T roc_shmem_##TNAME##_atomic_fetch(T *dest, int pe);
+                                                  T *source, int pe);     \
+  __host__ T roc_shmem_##TNAME##_atomic_fetch(T *source, int pe);
 
 /*
  * MACRO DECLARE SHMEM_ATOMIC_ADD APIs
@@ -1152,83 +1152,83 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system();
  * MACRO DECLARE SHMEM_WAIT_UNTIL APIs
  */
 #define WAIT_UNTIL_API_GEN(T, TNAME)                                            \
-  __device__ void roc_shmem_##TNAME##_wait_until(T *ptr,                        \
-                                                 roc_shmem_cmps cmp,            \
+  __device__ void roc_shmem_##TNAME##_wait_until(T *ivars,                      \
+                                                 int cmp,                       \
                                                  T val);                        \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr,                  \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars,                \
                                                      size_t nelems,             \
                                                      const int* status,         \
-                                                     roc_shmem_cmps cmp,        \
+                                                     int cmp,                   \
                                                      T val);                    \
-  __device__ void roc_shmem_##TNAME##_wait_until_all(T *ptr,                    \
+  __device__ void roc_shmem_##TNAME##_wait_until_all(T *ivars,                  \
                                                      size_t nelems,             \
                                                      const int* status,         \
-                                                     roc_shmem_cmps cmp,        \
+                                                     int cmp,                   \
                                                      T val);                    \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr,                 \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars,               \
                                                       size_t nelems,            \
                                                       size_t* indices,          \
                                                       const int* status,        \
-                                                      roc_shmem_cmps cmp,       \
+                                                      int cmp,                  \
                                                       T val);                   \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr,           \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars,         \
                                                             size_t nelems,      \
                                                             const int* status,  \
-                                                            roc_shmem_cmps cmp, \
+                                                            int cmp,            \
                                                             T* vals);           \
-  __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr,             \
+  __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars,           \
                                                             size_t nelems,      \
                                                             const int* status,  \
-                                                            roc_shmem_cmps cmp, \
+                                                            int cmp,            \
                                                             T* vals);           \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr,          \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars,        \
                                                              size_t nelems,     \
                                                              size_t* indices,   \
                                                              const int* status, \
-                                                             roc_shmem_cmps cmp,\
+                                                             int cmp,           \
                                                              T* vals);          \
-  __host__ void roc_shmem_##TNAME##_wait_until(T *ptr,                          \
-                                               roc_shmem_cmps cmp,              \
+  __host__ void roc_shmem_##TNAME##_wait_until(T *ivars,                        \
+                                               int cmp,                         \
                                                T val);                          \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr,                    \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars,                  \
                                                      size_t nelems,             \
                                                      const int* status,         \
-                                                     roc_shmem_cmps cmp,        \
+                                                     int cmp,                   \
                                                      T val);                    \
-  __host__ void roc_shmem_##TNAME##_wait_until_all(T *ptr,                      \
+  __host__ void roc_shmem_##TNAME##_wait_until_all(T *ivars,                    \
                                                    size_t nelems,               \
                                                    const int* status,           \
-                                                   roc_shmem_cmps cmp,          \
+                                                   int cmp,                     \
                                                    T val);                      \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr,                   \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars,                 \
                                                     size_t nelems,              \
                                                     size_t* indices,            \
                                                     const int* status,          \
-                                                    roc_shmem_cmps cmp,         \
+                                                    int cmp,                    \
                                                     T val);                     \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr,             \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars,           \
                                                             size_t nelems,      \
                                                             const int* status,  \
-                                                            roc_shmem_cmps cmp, \
+                                                            int cmp,            \
                                                             T* vals);           \
-  __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr,               \
+  __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars,             \
                                                           size_t nelems,        \
                                                           const int* status,    \
-                                                          roc_shmem_cmps cmp,   \
+                                                          int cmp,              \
                                                           T* vals);             \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr,            \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars,          \
                                                            size_t nelems,       \
                                                            size_t* indices,     \
                                                            const int* status,   \
-                                                           roc_shmem_cmps cmp,  \
+                                                           int cmp,             \
                                                            T* vals);
 
 /*
  * MACRO DECLARE SHMEM_TEST APIs
  */
-#define TEST_API_GEN(T, TNAME)                                                \
-  __device__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val); \
-  __host__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val);
+#define TEST_API_GEN(T, TNAME)                                                  \
+  __device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val);            \
+  __host__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val);
 
 /**
  * @name SHMEM_REDUCTIONS
@@ -2031,7 +2031,7 @@ ATOMIC_INC_API_GEN(ptrdiff_t, ptrdiff)
  * coalesce contiguous messages and elect a leader thread to call into the
  * ROC_SHMEM function.
  *
- * @param[in] ptr Pointer to memory on the symmetric heap to wait for.
+ * @param[in] ivars Pointer to memory on the symmetric heap to wait for.
  * @param[in] cmp Operation for the comparison.
  * @param[in] val Value to compare the memory at \p ptr to.
  *
@@ -2064,7 +2064,7 @@ WAIT_UNTIL_API_GEN(unsigned long long, ulonglong)  // NOLINT(runtime/int)
  * coalesce contiguous messages and elect a leader thread to call into the
  * ROC_SHMEM function.
  *
- * @param[in] ptr Pointer to memory on the symmetric heap to wait for.
+ * @param[in] ivars Pointer to memory on the symmetric heap to wait for.
  * @param[in] cmp Operation for the comparison.
  * @param[in] val Value to compare the memory at \p ptr to.
  *
diff --git a/src/context.hpp b/src/context.hpp
index 72c8f38352..0db4e99b23 100644
--- a/src/context.hpp
+++ b/src/context.hpp
@@ -74,42 +74,42 @@ class Context {
    ***************************** DEVICE METHODS *****************************
    *************************************************************************/
   template <typename T>
-  __device__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val);
+  __device__ void wait_until(T *ivars, int cmp, T val);
 
   template <typename T>
-  __device__ void wait_until_all(T* ptr, size_t nelems,
+  __device__ void wait_until_all(T *ivars, size_t nelems,
                                  const int *status,
-                                 roc_shmem_cmps cmp, T val);
+                                 int cmp, T val);
 
   template <typename T>
-  __device__ size_t wait_until_any(T* ptr, size_t nelems,
+  __device__ size_t wait_until_any(T *ivars, size_t nelems,
                                    const int *status,
-                                   roc_shmem_cmps cmp, T val);
+                                   int cmp, T val);
 
   template <typename T>
-  __device__ size_t wait_until_some(T* ptr, size_t nelems,
+  __device__ size_t wait_until_some(T *ivars, size_t nelems,
                                     size_t* indices,
                                     const int *status,
-                                    roc_shmem_cmps cmp, T val);
+                                    int cmp, T val);
 
   template <typename T>
-  __device__ void wait_until_all_vector(T* ptr, size_t nelems,
+  __device__ void wait_until_all_vector(T *ivars, size_t nelems,
                                         const int *status,
-                                        roc_shmem_cmps cmp, T* vals);
+                                        int cmp, T* vals);
 
   template <typename T>
-  __device__ size_t wait_until_any_vector(T* ptr, size_t nelems,
+  __device__ size_t wait_until_any_vector(T *ivars, size_t nelems,
                                           const int *status,
-                                          roc_shmem_cmps cmp, T* vals);
+                                          int cmp, T* vals);
 
   template <typename T>
-  __device__ size_t wait_until_some_vector(T* ptr, size_t nelems,
+  __device__ size_t wait_until_some_vector(T *ivars, size_t nelems,
                                            size_t* indices,
                                            const int *status,
-                                           roc_shmem_cmps cmp, T* vals);
+                                           int cmp, T* vals);
 
   template <typename T>
-  __device__ int test(T* ptr, roc_shmem_cmps cmp, T val);
+  __device__ int test(T *ivars, int cmp, T val);
 
   __device__ void threadfence_system();
 
@@ -365,42 +365,42 @@ class Context {
                        int nreduce);
 
   template <typename T>
-  __host__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val);
+  __host__ void wait_until(T *ivars, int cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all(T* ptr, size_t nelems,
+  __host__ void wait_until_all(T *ivars, size_t nelems,
                                const int *status,
-                               roc_shmem_cmps cmp, T val);
+                               int cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_any(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any(T *ivars, size_t nelems,
                                  const int *status,
-                                 roc_shmem_cmps cmp, T val);
+                                 int cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_some(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some(T *ivars, size_t nelems,
                                   size_t* indices,
                                   const int *status,
-                                  roc_shmem_cmps cmp, T val);
+                                  int cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all_vector(T* ptr, size_t nelems,
+  __host__ void wait_until_all_vector(T *ivars, size_t nelems,
                                       const int *status,
-                                      roc_shmem_cmps cmp, T* vals);
+                                      int cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
                                         const int *status,
-                                        roc_shmem_cmps cmp, T* vals);
+                                        int cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
                                          size_t* indices,
                                          const int *status,
-                                         roc_shmem_cmps cmp, T* vals);
+                                         int cmp, T* vals);
 
   template <typename T>
-  __host__ int test(T* ptr, roc_shmem_cmps cmp, T val);
+  __host__ int test(T *ivars, int cmp, T val);
 
  public:
   /**
diff --git a/src/context_tmpl_device.hpp b/src/context_tmpl_device.hpp
index d6d0df63fa..d0310d476a 100644
--- a/src/context_tmpl_device.hpp
+++ b/src/context_tmpl_device.hpp
@@ -199,9 +199,9 @@ __device__ void Context::broadcast(T *dest, const T *source, int nelems,
 }
 
 template <typename T>
-__device__ __forceinline__ void Context::wait_until(T *ptr, roc_shmem_cmps cmp,
+__device__ __forceinline__ void Context::wait_until(T *ivars, int cmp,
                                                     T val) {
-  while (!test(ptr, cmp, val)) {
+  while (!test(ivars, cmp, val)) {
   }
 }
 
@@ -219,9 +219,9 @@ __device__ __forceinline__ size_t status_entry(size_t nelems,
 
 template <typename T>
 __device__ __forceinline__
-size_t Context::wait_until_any(T* ptr, size_t nelems,
+size_t Context::wait_until_any(T *ivars, size_t nelems,
                                const int *status,
-                               roc_shmem_cmps cmp, T val) {
+                               int cmp, T val) {
   // zero nelems error condition
   if (!nelems) {
     return SIZE_MAX;
@@ -240,7 +240,7 @@ size_t Context::wait_until_any(T* ptr, size_t nelems,
       if (status[i]) {
         continue;
       }
-      if (test(ptr + i, cmp, val)) {
+      if (test(ivars + i, cmp, val)) {
         return i;
       }
     }
@@ -249,9 +249,9 @@ size_t Context::wait_until_any(T* ptr, size_t nelems,
 
 template <typename T>
 __device__ __forceinline__
-void Context::wait_until_all(T* ptr, size_t nelems,
+void Context::wait_until_all(T *ivars, size_t nelems,
                              const int *status,
-                             roc_shmem_cmps cmp, T val) {
+                             int cmp, T val) {
   // zero nelems error condition
   if (!nelems) {
     return;
@@ -268,17 +268,17 @@ void Context::wait_until_all(T* ptr, size_t nelems,
     if (status[i]) {
       continue;
     }
-    while (!test(ptr + i, cmp, val)) {
+    while (!test(ivars + i, cmp, val)) {
     }
   }
 }
 
 template <typename T>
 __device__ __forceinline__
-size_t Context::wait_until_some(T* ptr, size_t nelems,
+size_t Context::wait_until_some(T *ivars, size_t nelems,
                               size_t* indices,
                               const int *status,
-                              roc_shmem_cmps cmp, T val) {
+                              int cmp, T val) {
   // zero nelems error condition
   if (!nelems) {
     return 0;
@@ -299,7 +299,7 @@ size_t Context::wait_until_some(T* ptr, size_t nelems,
       if (status[i]) {
         continue;
       }
-      if (test(ptr + i, cmp, val)) {
+      if (test(ivars + i, cmp, val)) {
         done = true;
         indices[ncompleted] = i;
         ncompleted++;
@@ -311,62 +311,62 @@ size_t Context::wait_until_some(T* ptr, size_t nelems,
 
 template <typename T>
 __device__ __forceinline__
-void Context::wait_until_all_vector(T* ptr, size_t nelems,
+void Context::wait_until_all_vector(T *ivars, size_t nelems,
                                     const int *status,
-                                    roc_shmem_cmps cmp, T* vals) {
+                                    int cmp, T* vals) {
   ;
 }
 
 template <typename T>
 __device__ __forceinline__
-size_t Context::wait_until_any_vector(T* ptr, size_t nelems,
+size_t Context::wait_until_any_vector(T *ivars, size_t nelems,
                                       const int *status,
-                                      roc_shmem_cmps cmp, T* vals) {
+                                      int cmp, T* vals) {
   return 0;
 }
 
 template <typename T>
 __device__ __forceinline__
-size_t Context::wait_until_some_vector(T* ptr, size_t nelems,
+size_t Context::wait_until_some_vector(T *ivars, size_t nelems,
                                      size_t* indices,
                                      const int *status,
-                                     roc_shmem_cmps cmp, T* vals) {
+                                     int cmp, T* vals) {
   return 0;
 }
 
 template <typename T>
-__device__ __forceinline__ int Context::test(T *ptr, roc_shmem_cmps cmp,
+__device__ __forceinline__ int Context::test(T *ivars, int cmp,
                                              T val) {
   int ret = 0;
-  volatile T *vol_ptr = reinterpret_cast<T *>(ptr);
+  volatile T *vol_ivars = reinterpret_cast<T *>(ivars);
   switch (cmp) {
     case ROC_SHMEM_CMP_EQ:
-      if (uncached_load(vol_ptr) == val) {
+      if (uncached_load(vol_ivars) == val) {
         ret = 1;
       }
       break;
     case ROC_SHMEM_CMP_NE:
-      if (uncached_load(vol_ptr) != val) {
+      if (uncached_load(vol_ivars) != val) {
         ret = 1;
       }
       break;
     case ROC_SHMEM_CMP_GT:
-      if (uncached_load(vol_ptr) > val) {
+      if (uncached_load(vol_ivars) > val) {
         ret = 1;
       }
       break;
     case ROC_SHMEM_CMP_GE:
-      if (uncached_load(vol_ptr) >= val) {
+      if (uncached_load(vol_ivars) >= val) {
         ret = 1;
       }
       break;
     case ROC_SHMEM_CMP_LT:
-      if (uncached_load(vol_ptr) < val) {
+      if (uncached_load(vol_ivars) < val) {
         ret = 1;
       }
       break;
     case ROC_SHMEM_CMP_LE:
-      if (uncached_load(vol_ptr) <= val) {
+      if (uncached_load(vol_ivars) <= val) {
         ret = 1;
       }
       break;
diff --git a/src/context_tmpl_host.hpp b/src/context_tmpl_host.hpp
index bce99c1d6c..a4c7929501 100644
--- a/src/context_tmpl_host.hpp
+++ b/src/context_tmpl_host.hpp
@@ -234,73 +234,73 @@ __host__ void Context::to_all(roc_shmem_team_t team, T *dest, const T *source,
 }
 
 template <typename T>
-__host__ void Context::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
+__host__ void Context::wait_until(T *ivars, int cmp, T val) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL);
 
-  HOST_DISPATCH(wait_until<T>(ptr, cmp, val));
+  HOST_DISPATCH(wait_until<T>(ivars, cmp, val));
 }
 
 template <typename T>
-__host__ size_t Context::wait_until_any(T* ptr, size_t nelems,
+__host__ size_t Context::wait_until_any(T *ivars, size_t nelems,
                                         const int* status,
-                                        roc_shmem_cmps cmp, T val) {
+                                        int cmp, T val) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY);
 
-  return HOST_DISPATCH(wait_until_any<T>(ptr, nelems, status, cmp, val));
+  return HOST_DISPATCH(wait_until_any<T>(ivars, nelems, status, cmp, val));
 }
 
 template <typename T>
-__host__ void Context::wait_until_all(T* ptr, size_t nelems,
+__host__ void Context::wait_until_all(T *ivars, size_t nelems,
                                       const int* status,
-                                      roc_shmem_cmps cmp, T val) {
+                                      int cmp, T val) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ALL);
 
-  HOST_DISPATCH(wait_until_all<T>(ptr, nelems, status, cmp, val));
+  HOST_DISPATCH(wait_until_all<T>(ivars, nelems, status, cmp, val));
 }
 
 template <typename T>
-__host__ size_t Context::wait_until_some(T* ptr, size_t nelems,
+__host__ size_t Context::wait_until_some(T *ivars, size_t nelems,
                                          size_t* indices,
                                          const int* status,
-                                         roc_shmem_cmps cmp, T val) {
+                                         int cmp, T val) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_SOME);
 
-  HOST_DISPATCH_RET(wait_until_some<T>(ptr, nelems, indices, status, cmp, val));
+  HOST_DISPATCH_RET(wait_until_some<T>(ivars, nelems, indices, status, cmp, val));
 }
 
 template <typename T>
-__host__ void Context::wait_until_all_vector(T* ptr, size_t nelems,
+__host__ void Context::wait_until_all_vector(T *ivars, size_t nelems,
                                              const int *status,
-                                             roc_shmem_cmps cmp, T* vals) {
+                                             int cmp, T* vals) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ALL_VECTOR);
 
-  HOST_DISPATCH(wait_until_all_vector<T>(ptr, nelems, status, cmp, vals));
+  HOST_DISPATCH(wait_until_all_vector<T>(ivars, nelems, status, cmp, vals));
 }
 
 template <typename T>
-__host__ size_t Context::wait_until_any_vector(T* ptr, size_t nelems,
+__host__ size_t Context::wait_until_any_vector(T *ivars, size_t nelems,
                                                const int *status,
-                                               roc_shmem_cmps cmp, T* vals) {
+                                               int cmp, T* vals) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY_VECTOR);
 
-  HOST_DISPATCH_RET(wait_until_any_vector<T>(ptr, nelems, status, cmp, vals));
+  HOST_DISPATCH_RET(wait_until_any_vector<T>(ivars, nelems, status, cmp, vals));
 }
 
 template <typename T>
-__host__ size_t Context::wait_until_some_vector(T* ptr, size_t nelems,
+__host__ size_t Context::wait_until_some_vector(T *ivars, size_t nelems,
                                                 size_t* indices,
                                                 const int *status,
-                                                roc_shmem_cmps cmp, T* vals) {
+                                                int cmp, T* vals) {
   ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_SOME_VECTOR);
 
-  HOST_DISPATCH_RET(wait_until_some_vector<T>(ptr, nelems, indices, status, cmp, vals));
+  HOST_DISPATCH_RET(wait_until_some_vector<T>(ivars, nelems, indices, status, cmp, vals));
 }
 
 template <typename T>
-__host__ int Context::test(T *ptr, roc_shmem_cmps cmp, T val) {
+__host__ int Context::test(T *ivars, int cmp, T val) {
   ctxHostStats.incStat(NUM_HOST_TEST);
 
-  HOST_DISPATCH_RET(test<T>(ptr, cmp, val));
+  HOST_DISPATCH_RET(test<T>(ivars, cmp, val));
 }
 
 }  // namespace rocshmem
diff --git a/src/gpu_ib/context_ib_host.hpp b/src/gpu_ib/context_ib_host.hpp
index c7c7a975b2..9528a1cee9 100644
--- a/src/gpu_ib/context_ib_host.hpp
+++ b/src/gpu_ib/context_ib_host.hpp
@@ -99,42 +99,42 @@ class GPUIBHostContext : public Context {
                        int nreduce);
 
   template <typename T>
-  __host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val);
+  __host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_any(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any(T *ivars, size_t nelems,
                                  const int *status,
                                  roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all(T* ptr, size_t nelems,
+  __host__ void wait_until_all(T *ivars, size_t nelems,
                                const int *status,
                                roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_some(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some(T *ivars, size_t nelems,
                                 size_t* indices,
                                 const int *status,
                                 roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all_vector(T* ptr, size_t nelems,
+  __host__ void wait_until_all_vector(T *ivars, size_t nelems,
                                       const int *status,
                                       roc_shmem_cmps cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
                                         const int *status,
                                         roc_shmem_cmps cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
                                          size_t* indices,
                                          const int *status,
                                          roc_shmem_cmps cmp, T* vals);
 
   template <typename T>
-  __host__ int test(T *ptr, roc_shmem_cmps cmp, T val);
+  __host__ int test(T *ivars, roc_shmem_cmps cmp, T val);
 
  public:
   /* Pointer to the backend's host interface */
diff --git a/src/gpu_ib/context_ib_tmpl_host.hpp b/src/gpu_ib/context_ib_tmpl_host.hpp
index 393aaf6281..3c5c75e66b 100644
--- a/src/gpu_ib/context_ib_tmpl_host.hpp
+++ b/src/gpu_ib/context_ib_tmpl_host.hpp
@@ -115,57 +115,57 @@ __host__ void GPUIBHostContext::to_all(roc_shmem_team_t team, T *dest,
 }
 
 template <typename T>
-__host__ void GPUIBHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
-  host_interface->wait_until<T>(ptr, cmp, val, context_window_info);
+__host__ void GPUIBHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
+  host_interface->wait_until<T>(ivars, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ void GPUIBHostContext::wait_until_all(T *ptr, size_t nelems,
+__host__ void GPUIBHostContext::wait_until_all(T *ivars, size_t nelems,
                                                const int* status,
                                                roc_shmem_cmps cmp, T val) {
-  host_interface->wait_until_all<T>(ptr, nelems, status, cmp, val, context_window_info);
+  host_interface->wait_until_all<T>(ivars, nelems, status, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ size_t GPUIBHostContext::wait_until_any(T *ptr, size_t nelems,
+__host__ size_t GPUIBHostContext::wait_until_any(T *ivars, size_t nelems,
                                                const int* status,
                                                roc_shmem_cmps cmp, T val) {
-  return host_interface->wait_until_any<T>(ptr, nelems, status, cmp, val, context_window_info);
+  return host_interface->wait_until_any<T>(ivars, nelems, status, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ size_t GPUIBHostContext::wait_until_some(T *ptr, size_t nelems,
+__host__ size_t GPUIBHostContext::wait_until_some(T *ivars, size_t nelems,
                                                  size_t* indices,
                                                  const int* status,
                                                  roc_shmem_cmps cmp, T val) {
-  return host_interface->wait_until_some<T>(ptr, nelems, indices, status, cmp, val, context_window_info);
+  return host_interface->wait_until_some<T>(ivars, nelems, indices, status, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ void GPUIBHostContext::wait_until_all_vector(T *ptr, size_t nelems,
+__host__ void GPUIBHostContext::wait_until_all_vector(T *ivars, size_t nelems,
                                                       const int* status,
                                                       roc_shmem_cmps cmp, T* vals) {
-  host_interface->wait_until_all_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
+  host_interface->wait_until_all_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ size_t GPUIBHostContext::wait_until_any_vector(T *ptr, size_t nelems,
+__host__ size_t GPUIBHostContext::wait_until_any_vector(T *ivars, size_t nelems,
                                                         const int* status,
                                                         roc_shmem_cmps cmp, T* vals) {
-  return host_interface->wait_until_any_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
+  return host_interface->wait_until_any_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ size_t GPUIBHostContext::wait_until_some_vector(T *ptr, size_t nelems,
+__host__ size_t GPUIBHostContext::wait_until_some_vector(T *ivars, size_t nelems,
                                                          size_t* indices,
                                                          const int* status,
                                                          roc_shmem_cmps cmp, T* vals) {
-  return host_interface->wait_until_some_vector<T>(ptr, nelems, indices, status, cmp, vals, context_window_info);
+  return host_interface->wait_until_some_vector<T>(ivars, nelems, indices, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ int GPUIBHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) {
-  return host_interface->test<T>(ptr, cmp, val, context_window_info);
+__host__ int GPUIBHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) {
+  return host_interface->test<T>(ivars, cmp, val, context_window_info);
 }
 
 }  // namespace rocshmem
diff --git a/src/host/host.hpp b/src/host/host.hpp
index a9210a7b21..4cd2b2e393 100644
--- a/src/host/host.hpp
+++ b/src/host/host.hpp
@@ -211,41 +211,41 @@ class HostInterface {
                        int nreduce);
 
   template <typename T>
-  __host__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val,
+  __host__ void wait_until(T *ivars, int cmp, T val,
                            WindowInfo* window_info);
 
   template <typename T>
-  __host__ void wait_until_all(T* ptr, size_t nelems, const int* status,
-                               roc_shmem_cmps cmp, T val,
+  __host__ void wait_until_all(T *ivars, size_t nelems, const int* status,
+                               int cmp, T val,
                                WindowInfo* window_info);
 
   template <typename T>
-  __host__ size_t wait_until_any(T* ptr, size_t nelems, const int* status,
-                                 roc_shmem_cmps cmp, T val,
+  __host__ size_t wait_until_any(T *ivars, size_t nelems, const int* status,
+                                 int cmp, T val,
                                  WindowInfo* window_info);
 
   template <typename T>
-  __host__ size_t wait_until_some(T* ptr, size_t nelems, size_t* indices,
-                                  const int* status, roc_shmem_cmps cmp, T val,
+  __host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices,
+                                  const int* status, int cmp, T val,
                                   WindowInfo* window_info);
 
   template <typename T>
-  __host__ void wait_until_all_vector(T* ptr, size_t nelems, const int* status,
-                                      roc_shmem_cmps cmp, T* vals,
+  __host__ void wait_until_all_vector(T *ivars, size_t nelems, const int* status,
+                                      int cmp, T* vals,
                                       WindowInfo* window_info);
 
   template <typename T>
-  __host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
-                                        const int* status, roc_shmem_cmps cmp,
+  __host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
+                                        const int* status, int cmp,
                                         T* vals, WindowInfo* window_info);
 
   template <typename T>
-  __host__ size_t wait_until_some_vector(T* ptr, size_t nelems, size_t* indices,
-                                         const int* status, roc_shmem_cmps cmp,
+  __host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices,
+                                         const int* status, int cmp,
                                          T* vals, WindowInfo* window_info);
 
   template <typename T>
-  __host__ int test(T* ptr, roc_shmem_cmps cmp, T val, WindowInfo* window_info);
+  __host__ int test(T *ivars, int cmp, T val, WindowInfo* window_info);
 
 #ifndef USE_COHERENT_HEAP
   __host__ void create_hdp_window();
@@ -295,11 +295,11 @@ class HostInterface {
   __host__ MPI_Datatype get_mpi_type();
 
   template <typename T>
-  __host__ int compare(roc_shmem_cmps cmp, T input_val, T target_val);
+  __host__ int compare(int cmp, T input_val, T target_val);
 
   template <typename T>
   __host__ int test_and_compare(MPI_Aint offset, MPI_Datatype mpi_type,
-                                roc_shmem_cmps cmp, T val, MPI_Win win);
+                                int cmp, T val, MPI_Win win);
 
   template <typename T, ROC_SHMEM_OP Op>
   __host__ void to_all_internal(MPI_Comm mpi_comm, T* dest, const T* source,
diff --git a/src/host/host_templates.hpp b/src/host/host_templates.hpp
index c2b308133e..41ff7cb094 100644
--- a/src/host/host_templates.hpp
+++ b/src/host/host_templates.hpp
@@ -392,7 +392,7 @@ __host__ void HostInterface::to_all(roc_shmem_team_t team, T* dest,
 }
 
 template <typename T>
-__host__ inline int HostInterface::compare(roc_shmem_cmps cmp, T input_val,
+__host__ inline int HostInterface::compare(int cmp, T input_val,
                                            T target_val) {
   int cond_satisfied{0};
 
@@ -426,7 +426,7 @@ __host__ inline int HostInterface::compare(roc_shmem_cmps cmp, T input_val,
 template <typename T>
 __host__ inline int HostInterface::test_and_compare(MPI_Aint offset,
                                                     MPI_Datatype mpi_type,
-                                                    roc_shmem_cmps cmp, T val,
+                                                    int cmp, T val,
                                                     MPI_Win win) {
   T fetched_val{};
 
@@ -446,7 +446,7 @@ __host__ inline int HostInterface::test_and_compare(MPI_Aint offset,
 }
 
 template <typename T>
-__host__ void HostInterface::wait_until(T* ptr, roc_shmem_cmps cmp, T val,
+__host__ void HostInterface::wait_until(T *ivars, int cmp, T val,
                                         WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until\n");
 
@@ -454,13 +454,13 @@ __host__ void HostInterface::wait_until(T* ptr, roc_shmem_cmps cmp, T val,
    * Find the offset of this memory in the window
    */
   MPI_Aint offset{
-      compute_offset(ptr, window_info->get_start(), window_info->get_end())};
+      compute_offset(ivars, window_info->get_start(), window_info->get_end())};
 
   MPI_Datatype mpi_type{get_mpi_type<T>()};
   MPI_Win win{window_info->get_win()};
 
   /*
-   * Continuously read the ptr atomically until it satisfies the condition
+   * Continuously read the ivars atomically until it satisfies the condition
    */
   while (1) {
     int cond_satisfied{test_and_compare(offset, mpi_type, cmp, val, win)};
@@ -500,9 +500,9 @@ __host__ size_t status_entry(size_t nelems,
 }
 
 template <typename T>
-__host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems,
+__host__ size_t HostInterface::wait_until_any(T* ivars, size_t nelems,
                                               const int *status,
-                                              roc_shmem_cmps cmp, T val,
+                                              int cmp, T val,
                                               WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until_any\n");
 
@@ -524,7 +524,7 @@ __host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems,
       if (status[i]) {
         continue;
       }
-      if (test(ptr + i, cmp, val, window_info)) {
+      if (test(ivars + i, cmp, val, window_info)) {
         return i;
       }
     }
@@ -532,9 +532,9 @@ __host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems,
 }
 
 template <typename T>
-__host__ void HostInterface::wait_until_all(T* ptr, size_t nelems,
+__host__ void HostInterface::wait_until_all(T* ivars, size_t nelems,
                                             const int *status,
-                                            roc_shmem_cmps cmp, T val,
+                                            int cmp, T val,
                                             WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until_all\n");
 
@@ -554,16 +554,16 @@ __host__ void HostInterface::wait_until_all(T* ptr, size_t nelems,
     if (status[i]) {
       continue;
     }
-    while (!test(ptr + i, cmp, val, window_info)) {
+    while (!test(ivars + i, cmp, val, window_info)) {
     }
   }
 }
 
 template <typename T>
-__host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems,
+__host__ size_t HostInterface::wait_until_some(T* ivars, size_t nelems,
                                              size_t* indices,
                                              const int *status,
-                                             roc_shmem_cmps cmp, T val,
+                                             int cmp, T val,
                                              WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until_some\n");
 
@@ -587,7 +587,7 @@ __host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems,
       if (status[i]) {
         continue;
       }
-      if (test(ptr + i, cmp, val, window_info)) {
+      if (test(ivars + i, cmp, val, window_info)) {
         done = true;
         indices[ncompleted] = i;
         ncompleted++;
@@ -598,34 +598,34 @@ __host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems,
 }
 
 template <typename T>
-__host__ void HostInterface::wait_until_all_vector(T* ptr, size_t nelems,
+__host__ void HostInterface::wait_until_all_vector(T* ivars, size_t nelems,
                                                    const int *status,
-                                                   roc_shmem_cmps cmp, T* vals,
+                                                   int cmp, T* vals,
                                                    WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until_all_vector\n");
 }
 
 template <typename T>
-__host__ size_t HostInterface::wait_until_any_vector(T* ptr, size_t nelems,
+__host__ size_t HostInterface::wait_until_any_vector(T* ivars, size_t nelems,
                                                      const int *status,
-                                                     roc_shmem_cmps cmp, T* vals,
+                                                     int cmp, T* vals,
                                                      WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until_any_vector\n");
   return 0;
 }
 
 template <typename T>
-__host__ size_t HostInterface::wait_until_some_vector(T* ptr, size_t nelems,
+__host__ size_t HostInterface::wait_until_some_vector(T* ivars, size_t nelems,
                                                     size_t* indices,
                                                     const int *status,
-                                                    roc_shmem_cmps cmp, T* vals,
+                                                    int cmp, T* vals,
                                                     WindowInfo* window_info) {
   DPRINTF("Function: host_wait_until_some_vector\n");
   return 0;
 }
 
 template <typename T>
-__host__ int HostInterface::test(T* ptr, roc_shmem_cmps cmp, T val,
+__host__ int HostInterface::test(T* ivars, int cmp, T val,
                                  WindowInfo* window_info) {
   DPRINTF("Function: host_test\n");
 
@@ -633,7 +633,7 @@ __host__ int HostInterface::test(T* ptr, roc_shmem_cmps cmp, T val,
    * Find the offset of this memory in the window
    */
   MPI_Aint offset{
-      compute_offset(ptr, window_info->get_start(), window_info->get_end())};
+      compute_offset(ivars, window_info->get_start(), window_info->get_end())};
 
   MPI_Datatype mpi_type{get_mpi_type<T>()};
 
diff --git a/src/ipc/context_ipc_host.hpp b/src/ipc/context_ipc_host.hpp
index f9421ca210..4c2e626d75 100644
--- a/src/ipc/context_ipc_host.hpp
+++ b/src/ipc/context_ipc_host.hpp
@@ -99,42 +99,42 @@ class IPCHostContext : public Context {
                        int nreduce);
 
   template <typename T>
-  __host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val);
+  __host__ void wait_until(T *ivars, int cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_any(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any(T *ivars, size_t nelems,
                                  const int *status,
-                                 roc_shmem_cmps cmp, T val);
+                                 int cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all(T* ptr, size_t nelems,
+  __host__ void wait_until_all(T *ivars, size_t nelems,
                                const int *status,
-                               roc_shmem_cmps cmp, T val);
+                               int cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_some(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some(T *ivars, size_t nelems,
                                 size_t* indices,
                                 const int *status,
-                                roc_shmem_cmps cmp, T val);
+                                int cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all_vector(T* ptr, size_t nelems,
+  __host__ void wait_until_all_vector(T *ivars, size_t nelems,
                                       const int *status,
-                                      roc_shmem_cmps cmp, T* vals);
+                                      int cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
                                         const int *status,
-                                        roc_shmem_cmps cmp, T* vals);
+                                        int cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
                                          size_t* indices,
                                          const int *status,
-                                         roc_shmem_cmps cmp, T* vals);
+                                         int cmp, T* vals);
 
   template <typename T>
-  __host__ int test(T *ptr, roc_shmem_cmps cmp, T val);
+  __host__ int test(T *ivars, int cmp, T val);
 
  public:
   /* Pointer to the backend's host interface */
diff --git a/src/ipc/context_ipc_tmpl_host.hpp b/src/ipc/context_ipc_tmpl_host.hpp
index 76484de34d..d78593b888 100644
--- a/src/ipc/context_ipc_tmpl_host.hpp
+++ b/src/ipc/context_ipc_tmpl_host.hpp
@@ -115,57 +115,57 @@ __host__ void IPCHostContext::to_all(roc_shmem_team_t team, T *dest,
 }
 
 template <typename T>
-__host__ void IPCHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
-  host_interface->wait_until<T>(ptr, cmp, val, context_window_info);
+__host__ void IPCHostContext::wait_until(T *ivars, int cmp, T val) {
+  host_interface->wait_until<T>(ivars, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ void IPCHostContext::wait_until_all(T *ptr, size_t nelems,
+__host__ void IPCHostContext::wait_until_all(T *ivars, size_t nelems,
                                                const int* status,
-                                               roc_shmem_cmps cmp, T val) {
-  host_interface->wait_until_all<T>(ptr, nelems, status, cmp, val, context_window_info);
+                                               int cmp, T val) {
+  host_interface->wait_until_all<T>(ivars, nelems, status, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ size_t IPCHostContext::wait_until_any(T *ptr, size_t nelems,
+__host__ size_t IPCHostContext::wait_until_any(T *ivars, size_t nelems,
                                                const int* status,
-                                               roc_shmem_cmps cmp, T val) {
-  return host_interface->wait_until_any<T>(ptr, nelems, status, cmp, val, context_window_info);
+                                               int cmp, T val) {
+  return host_interface->wait_until_any<T>(ivars, nelems, status, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ size_t IPCHostContext::wait_until_some(T *ptr, size_t nelems,
+__host__ size_t IPCHostContext::wait_until_some(T *ivars, size_t nelems,
                                                  size_t* indices,
                                                  const int* status,
-                                                 roc_shmem_cmps cmp, T val) {
-  return host_interface->wait_until_some<T>(ptr, nelems, indices, status, cmp, val, context_window_info);
+                                                 int cmp, T val) {
+  return host_interface->wait_until_some<T>(ivars, nelems, indices, status, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ void IPCHostContext::wait_until_all_vector(T *ptr, size_t nelems,
+__host__ void IPCHostContext::wait_until_all_vector(T *ivars, size_t nelems,
                                                       const int* status,
-                                                      roc_shmem_cmps cmp, T* vals) {
-  host_interface->wait_until_all_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
+                                                      int cmp, T* vals) {
+  host_interface->wait_until_all_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ size_t IPCHostContext::wait_until_any_vector(T *ptr, size_t nelems,
+__host__ size_t IPCHostContext::wait_until_any_vector(T *ivars, size_t nelems,
                                                         const int* status,
-                                                        roc_shmem_cmps cmp, T* vals) {
-  return host_interface->wait_until_any_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
+                                                        int cmp, T* vals) {
+  return host_interface->wait_until_any_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ size_t IPCHostContext::wait_until_some_vector(T *ptr, size_t nelems,
+__host__ size_t IPCHostContext::wait_until_some_vector(T *ivars, size_t nelems,
                                                          size_t* indices,
                                                          const int* status,
-                                                         roc_shmem_cmps cmp, T* vals) {
-  return host_interface->wait_until_some_vector<T>(ptr, nelems, indices, status, cmp, vals, context_window_info);
+                                                         int cmp, T* vals) {
+  return host_interface->wait_until_some_vector<T>(ivars, nelems, indices, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ int IPCHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) {
-  return host_interface->test<T>(ptr, cmp, val, context_window_info);
+__host__ int IPCHostContext::test(T *ivars, int cmp, T val) {
+  return host_interface->test<T>(ivars, cmp, val, context_window_info);
 }
 
 }  // namespace rocshmem
diff --git a/src/reverse_offload/context_ro_host.hpp b/src/reverse_offload/context_ro_host.hpp
index ee26f807e8..80e8a6f6e5 100644
--- a/src/reverse_offload/context_ro_host.hpp
+++ b/src/reverse_offload/context_ro_host.hpp
@@ -148,42 +148,42 @@ class ROHostContext : public Context {
                        int nreduce);
 
   template <typename T>
-  __host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val);
+  __host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all(T* ptr, size_t nelems,
+  __host__ void wait_until_all(T *ivars, size_t nelems,
                                const int *status,
                                roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_any(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any(T *ivars, size_t nelems,
                                  const int *status,
                                  roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ size_t wait_until_some(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some(T *ivars, size_t nelems,
                                 size_t* indices,
                                 const int *status,
                                 roc_shmem_cmps cmp, T val);
 
   template <typename T>
-  __host__ void wait_until_all_vector(T* ptr, size_t nelems,
+  __host__ void wait_until_all_vector(T *ivars, size_t nelems,
                                       const int *status,
                                       roc_shmem_cmps cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
                                         const int *status,
                                         roc_shmem_cmps cmp, T* vals);
 
   template <typename T>
-  __host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
+  __host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
                                        size_t* indices,
                                        const int *status,
                                        roc_shmem_cmps cmp, T* vals);
 
   template <typename T>
-  __host__ int test(T *ptr, roc_shmem_cmps cmp, T val);
+  __host__ int test(T *ivars, roc_shmem_cmps cmp, T val);
 };
 
 }  // namespace rocshmem
diff --git a/src/reverse_offload/context_ro_tmpl_host.hpp b/src/reverse_offload/context_ro_tmpl_host.hpp
index bce7897783..4e3b1855d7 100644
--- a/src/reverse_offload/context_ro_tmpl_host.hpp
+++ b/src/reverse_offload/context_ro_tmpl_host.hpp
@@ -141,58 +141,58 @@ __host__ void ROHostContext::to_all(roc_shmem_team_t team, T *dest,
 }
 
 template <typename T>
-__host__ void ROHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
-  host_interface->wait_until<T>(ptr, cmp, val, context_window_info);
+__host__ void ROHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
+  host_interface->wait_until<T>(ivars, cmp, val, context_window_info);
 }
 
 template <typename T>
-__host__ size_t ROHostContext::wait_until_any(T* ptr, size_t nelems,
+__host__ size_t ROHostContext::wait_until_any(T *ivars, size_t nelems,
       const int *status, roc_shmem_cmps cmp, T val) {
-  return host_interface->wait_until_any<T>(ptr, nelems, status, cmp, val, context_window_info);
+  return host_interface->wait_until_any<T>(ivars, nelems, status, cmp, val, context_window_info);
 }
 
 template <typename T>
 __host__
-void ROHostContext::wait_until_all(T* ptr, size_t nelems,
+void ROHostContext::wait_until_all(T *ivars, size_t nelems,
     const int *status, roc_shmem_cmps cmp, T val) {
-  host_interface->wait_until_all(ptr, nelems, status, cmp, val, context_window_info);
+  host_interface->wait_until_all(ivars, nelems, status, cmp, val, context_window_info);
 }
 
 template <typename T>
 __host__
-size_t ROHostContext::wait_until_some(T* ptr, size_t nelems,
+size_t ROHostContext::wait_until_some(T *ivars, size_t nelems,
                               size_t* indices,
                               const int *status,
                               roc_shmem_cmps cmp, T val) {
-  return host_interface->wait_until_some(ptr, nelems, indices, status, cmp, val, context_window_info);
+  return host_interface->wait_until_some(ivars, nelems, indices, status, cmp, val, context_window_info);
 }
 
 template <typename T>
 __host__
-void ROHostContext::wait_until_all_vector(T* ptr, size_t nelems,
+void ROHostContext::wait_until_all_vector(T *ivars, size_t nelems,
                                     const int *status,
                                     roc_shmem_cmps cmp, T* vals) {
-  host_interface->wait_until_all_vector(ptr, nelems, status, cmp, vals, context_window_info);
+  host_interface->wait_until_all_vector(ivars, nelems, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ size_t ROHostContext::wait_until_any_vector(T* ptr, size_t nelems,
+__host__ size_t ROHostContext::wait_until_any_vector(T *ivars, size_t nelems,
                                       const int *status,
                                       roc_shmem_cmps cmp, T* vals) {
-  return host_interface->wait_until_any_vector(ptr, nelems, status, cmp, vals, context_window_info);
+  return host_interface->wait_until_any_vector(ivars, nelems, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ size_t ROHostContext::wait_until_some_vector(T* ptr, size_t nelems,
+__host__ size_t ROHostContext::wait_until_some_vector(T *ivars, size_t nelems,
                                      size_t* indices,
                                      const int *status,
                                      roc_shmem_cmps cmp, T* vals) {
-  return host_interface->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals, context_window_info);
+  return host_interface->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals, context_window_info);
 }
 
 template <typename T>
-__host__ int ROHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) {
-  return host_interface->test<T>(ptr, cmp, val, context_window_info);
+__host__ int ROHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) {
+  return host_interface->test<T>(ivars, cmp, val, context_window_info);
 }
 
 }  // namespace rocshmem
diff --git a/src/roc_shmem.cpp b/src/roc_shmem.cpp
index 32ef91bbda..cdf76fa9d3 100644
--- a/src/roc_shmem.cpp
+++ b/src/roc_shmem.cpp
@@ -392,8 +392,8 @@ __host__ T roc_shmem_atomic_fetch_inc(T *dest, int pe) {
 }
 
 template <typename T>
-__host__ T roc_shmem_atomic_fetch(T *dest, int pe) {
-  return roc_shmem_atomic_fetch(ROC_SHMEM_HOST_CTX_DEFAULT, dest, pe);
+__host__ T roc_shmem_atomic_fetch(T *source, int pe) {
+  return roc_shmem_atomic_fetch(ROC_SHMEM_HOST_CTX_DEFAULT, source, pe);
 }
 
 template <typename T>
@@ -592,10 +592,10 @@ __host__ T roc_shmem_atomic_fetch_inc(roc_shmem_ctx_t ctx, T *dest, int pe) {
 }
 
 template <typename T>
-__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe) {
+__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe) {
   DPRINTF("Host function: roc_shmem_atomic_fetch\n");
 
-  return get_internal_ctx(ctx)->amo_fetch_add<T>(dest, 0, pe);
+  return get_internal_ctx(ctx)->amo_fetch_add<T>(source, 0, pe);
 }
 
 template <typename T>
@@ -744,74 +744,74 @@ __host__ void roc_shmem_to_all([[maybe_unused]] roc_shmem_ctx_t ctx,
 }
 
 template <typename T>
-__host__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
+__host__ void roc_shmem_wait_until(T *ivars, int cmp, T val) {
   DPRINTF("Host function: roc_shmem_wait_until\n");
 
-  get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until(ptr, cmp, val);
+  get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until(ivars, cmp, val);
 }
 
 template <typename T>
-__host__ void roc_shmem_wait_until_all(T *ptr, size_t nelems, const int* status,
-                                       roc_shmem_cmps cmp, T val) {
+__host__ void roc_shmem_wait_until_all(T *ivars, size_t nelems, const int* status,
+                                       int cmp, T val) {
   DPRINTF("Host function: roc_shmem_wait_until_all\n");
 
-  get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all(ptr,
+  get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all(ivars,
       nelems, status, cmp, val);
 }
 
 template <typename T>
-__host__ size_t roc_shmem_wait_until_any(T *ptr, size_t nelems, const int* status,
-                                       roc_shmem_cmps cmp, T val) {
+__host__ size_t roc_shmem_wait_until_any(T *ivars, size_t nelems, const int* status,
+                                       int cmp, T val) {
   DPRINTF("Host function: roc_shmem_wait_until_any\n");
 
-  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any(ptr,
+  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any(ivars,
       nelems, status, cmp, val);
 }
 
 template <typename T>
-__host__ size_t roc_shmem_wait_until_some(T *ptr, size_t nelems, size_t* indices,
-                                        const int* status, roc_shmem_cmps cmp,
+__host__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems, size_t* indices,
+                                        const int* status, int cmp,
                                         T val) {
   DPRINTF("Host function: roc_shmem_wait_until_some\n");
 
-  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some(ptr, nelems,
+  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some(ivars, nelems,
       indices, status, cmp, val);
 }
 
 template <typename T>
-__host__ size_t roc_shmem_wait_until_any_vector(T *ptr, size_t nelems, const int* status,
-                                              roc_shmem_cmps cmp, T* vals) {
+__host__ size_t roc_shmem_wait_until_any_vector(T *ivars, size_t nelems, const int* status,
+                                                int cmp, T* vals) {
   DPRINTF("Host function: roc_shmem_wait_until_any_vector\n");
 
-  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any_vector(ptr,
+  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any_vector(ivars,
       nelems, status, cmp, vals);
 }
 
 template <typename T>
-__host__ void roc_shmem_wait_until_all_vector(T *ptr, size_t nelems, const int* status,
-                                              roc_shmem_cmps cmp, T* vals) {
+__host__ void roc_shmem_wait_until_all_vector(T *ivars, size_t nelems, const int* status,
+                                              int cmp, T* vals) {
   DPRINTF("Host function: roc_shmem_wait_until_all_vector\n");
 
-  get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all_vector(ptr,
+  get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all_vector(ivars,
       nelems, status, cmp, vals);
 }
 
 template <typename T>
-__host__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems,
+__host__ size_t roc_shmem_wait_until_some_vector(T *ivars, size_t nelems,
                                                size_t* indices,
                                                const int* status,
-                                               roc_shmem_cmps cmp, T* vals) {
+                                               int cmp, T* vals) {
   DPRINTF("Host function: roc_shmem_wait_until_some_vector\n");
 
-  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some_vector(ptr,
+  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some_vector(ivars,
       nelems, indices, status, cmp, vals);
 }
 
 template <typename T>
-__host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
+__host__ int roc_shmem_test(T *ivars, int cmp, T val) {
   DPRINTF("Host function: roc_shmem_testl\n");
 
-  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->test(ptr, cmp, val);
+  return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->test(ivars, cmp, val);
 }
 
 /**
@@ -940,31 +940,31 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
  * Declare templates for the wait types
  */
 #define WAIT_GEN(T)                                                            \
-  template __host__ void roc_shmem_wait_until<T>(T * ptr, roc_shmem_cmps cmp,  \
+  template __host__ void roc_shmem_wait_until<T>(T *ivars, int cmp,            \
                                                  T val);                       \
-  template __host__ int roc_shmem_test<T>(T * ptr, roc_shmem_cmps cmp, T val); \
-  template __host__ void Context::wait_until<T>(T * ptr, roc_shmem_cmps cmp,   \
+  template __host__ int roc_shmem_test<T>(T *ivars, int cmp, T val);           \
+  template __host__ void Context::wait_until<T>(T *ivars, int cmp,             \
                                                 T val);                        \
-  template __host__ size_t roc_shmem_wait_until_any<T>(T * ptr,                \
+  template __host__ size_t roc_shmem_wait_until_any<T>(T *ivars,               \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __host__ void roc_shmem_wait_until_all<T>(T * ptr,                  \
+                                      int cmp, T val);                         \
+  template __host__ void roc_shmem_wait_until_all<T>(T *ivars,                 \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __host__ size_t roc_shmem_wait_until_some<T>(T * ptr, size_t nelems,\
+                                      int cmp, T val);                         \
+  template __host__ size_t roc_shmem_wait_until_some<T>(T *ivars, size_t nelems,\
                                       size_t* indices, const int* status,      \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __host__ size_t roc_shmem_wait_until_any_vector<T>(T * ptr,         \
+                                      int cmp, T val);                         \
+  template __host__ size_t roc_shmem_wait_until_any_vector<T>(T *ivars,        \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T* vals);            \
-  template __host__ void roc_shmem_wait_until_all_vector<T>(T * ptr,           \
+                                      int cmp, T* vals);                       \
+  template __host__ void roc_shmem_wait_until_all_vector<T>(T *ivars,          \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T* vals);            \
-  template __host__ size_t roc_shmem_wait_until_some_vector<T>(T * ptr,        \
+                                      int cmp, T* vals);                       \
+  template __host__ size_t roc_shmem_wait_until_some_vector<T>(T *ivars,       \
                                       size_t nelems, size_t* indices,          \
-                                      const int* status, roc_shmem_cmps cmp,   \
+                                      const int* status, int cmp,              \
                                       T* vals);                                \
-  template __host__ int Context::test<T>(T * ptr, roc_shmem_cmps cmp, T val);
+  template __host__ int Context::test<T>(T *ivars, int cmp, T val);
 
 /**
  * Define APIs to call the template functions
@@ -1099,11 +1099,11 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
 
 #define AMO_EXTENDED_DEF_GEN(T, TNAME)                                         \
   __host__ T roc_shmem_ctx_##TNAME##_atomic_fetch(roc_shmem_ctx_t ctx,         \
-                                                  T *dest, int pe) {           \
-    return roc_shmem_atomic_fetch<T>(ctx, dest, pe);                           \
+                                                  T *source, int pe) {         \
+    return roc_shmem_atomic_fetch<T>(ctx, source, pe);                         \
   }                                                                            \
-  __host__ T roc_shmem_##TNAME##_atomic_fetch(T *dest, int pe) {               \
-    return roc_shmem_atomic_fetch<T>(dest, pe);                                \
+  __host__ T roc_shmem_##TNAME##_atomic_fetch(T *source, int pe) {             \
+    return roc_shmem_atomic_fetch<T>(source, pe);                              \
   }                                                                            \
   __host__ void roc_shmem_ctx_##TNAME##_atomic_set(roc_shmem_ctx_t ctx,        \
                                                    T *dest, T value, int pe) { \
@@ -1165,55 +1165,55 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
   }
 
 #define WAIT_DEF_GEN(T, TNAME)                                               \
-  __host__ void roc_shmem_##TNAME##_wait_until(T *ptr, roc_shmem_cmps cmp,   \
+  __host__ void roc_shmem_##TNAME##_wait_until(T *ivars, int cmp,            \
                                                T val) {                      \
-    roc_shmem_wait_until<T>(ptr, cmp, val);                                  \
+    roc_shmem_wait_until<T>(ivars, cmp, val);                                \
   }                                                                          \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, size_t nelems,  \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, size_t nelems,\
                                                      const int* status,      \
-                                                     roc_shmem_cmps cmp,     \
+                                                     int cmp,                \
                                                      T val) {                \
-    return roc_shmem_wait_until_any<T>(ptr, nelems, status, cmp, val);       \
+    return roc_shmem_wait_until_any<T>(ivars, nelems, status, cmp, val);     \
   }                                                                          \
-  __host__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, size_t nelems,    \
+  __host__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, size_t nelems,  \
                                                    const int* status,        \
-                                                   roc_shmem_cmps cmp,       \
+                                                   int cmp,                  \
                                                    T val) {                  \
-    roc_shmem_wait_until_all<T>(ptr, nelems, status, cmp, val);              \
+    roc_shmem_wait_until_all<T>(ivars, nelems, status, cmp, val);            \
   }                                                                          \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, size_t nelems, \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, size_t nelems, \
                                                     size_t* indices,         \
                                                     const int* status,       \
-                                                    roc_shmem_cmps cmp,      \
+                                                    int cmp,                 \
                                                     T val) {                 \
-    return roc_shmem_wait_until_some<T>(ptr, nelems, indices, status, cmp, val);    \
+    return roc_shmem_wait_until_some<T>(ivars, nelems, indices, status, cmp, val); \
   }                                                                          \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr,          \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars,        \
                                                           size_t nelems,     \
                                                           const int* status, \
-                                                          roc_shmem_cmps cmp,\
+                                                          int cmp,           \
                                                           T* vals) {         \
-    return roc_shmem_wait_until_any_vector<T>(ptr, nelems, status, cmp,      \
+    return roc_shmem_wait_until_any_vector<T>(ivars, nelems, status, cmp,    \
                                               vals);                         \
   }                                                                          \
-  __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr,            \
+  __host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars,          \
                                                           size_t nelems,     \
                                                           const int* status, \
-                                                          roc_shmem_cmps cmp,\
+                                                          int cmp,           \
                                                           T* vals) {         \
-    roc_shmem_wait_until_all_vector<T>(ptr, nelems, status, cmp, vals);      \
+    roc_shmem_wait_until_all_vector<T>(ivars, nelems, status, cmp, vals);    \
   }                                                                          \
-  __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr,         \
+  __host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars,       \
                                                            size_t nelems,    \
                                                            size_t* indices,  \
                                                            const int* status,\
-                                                          roc_shmem_cmps cmp,\
+                                                           int cmp,          \
                                                            T* vals) {        \
-    return roc_shmem_wait_until_some_vector<T>(ptr, nelems, indices,         \
+    return roc_shmem_wait_until_some_vector<T>(ivars, nelems, indices,       \
         status, cmp, vals);                                                  \
   }                                                                          \
-  __host__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val) { \
-    return roc_shmem_test<T>(ptr, cmp, val);                                 \
+  __host__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) {          \
+    return roc_shmem_test<T>(ivars, cmp, val);                                 \
   }
 
 /******************************************************************************
diff --git a/src/roc_shmem_gpu.cpp b/src/roc_shmem_gpu.cpp
index 1afd5b10ff..4ff19b20a2 100644
--- a/src/roc_shmem_gpu.cpp
+++ b/src/roc_shmem_gpu.cpp
@@ -181,8 +181,8 @@ __device__ T roc_shmem_atomic_fetch_inc(T *dest, int pe) {
 }
 
 template <typename T>
-__device__ T roc_shmem_atomic_fetch(T *dest, int pe) {
-  return roc_shmem_atomic_fetch(ROC_SHMEM_CTX_DEFAULT, dest, pe);
+__device__ T roc_shmem_atomic_fetch(T *source, int pe) {
+  return roc_shmem_atomic_fetch(ROC_SHMEM_CTX_DEFAULT, source, pe);
 }
 
 template <typename T>
@@ -488,85 +488,85 @@ __device__ void roc_shmem_wg_fcollect(roc_shmem_ctx_t ctx,
 }
 
 template <typename T>
-__device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
+__device__ void roc_shmem_wait_until(T *ivars, int cmp, T val) {
   GPU_DPRINTF("Function: roc_shmem_wait_until\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL);
-  ctx_internal->wait_until(ptr, cmp, val);
+  ctx_internal->wait_until(ivars, cmp, val);
 }
 
 template <typename T>
-__device__ void roc_shmem_wait_until_all(T *ptr, size_t nelems, const int* status,
-                                         roc_shmem_cmps cmp, T val) {
+__device__ void roc_shmem_wait_until_all(T *ivars, size_t nelems, const int* status,
+                                         int cmp, T val) {
   GPU_DPRINTF("Function: roc_shmem_wait_until_all\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ALL);
-  ctx_internal->wait_until_all(ptr, nelems, status, cmp, val);
+  ctx_internal->wait_until_all(ivars, nelems, status, cmp, val);
 }
 
 template <typename T>
-__device__ size_t roc_shmem_wait_until_any(T *ptr, size_t nelems, const int* status,
-                                           roc_shmem_cmps cmp, T val) {
+__device__ size_t roc_shmem_wait_until_any(T *ivars, size_t nelems, const int* status,
+                                           int cmp, T val) {
   GPU_DPRINTF("Function: roc_shmem_wait_until_any\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ANY);
-  return ctx_internal->wait_until_any(ptr, nelems, status, cmp, val);
+  return ctx_internal->wait_until_any(ivars, nelems, status, cmp, val);
 }
 
 template <typename T>
-__device__ size_t roc_shmem_wait_until_some(T *ptr, size_t nelems, size_t* indices,
-                                          const int* status, roc_shmem_cmps cmp,
+__device__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems, size_t* indices,
+                                          const int* status, int cmp,
                                           T val) {
   DPRINTF("Function: roc_shmem_wait_until_some\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_SOME);
-  return ctx_internal->wait_until_some(ptr, nelems, indices, status, cmp, val);
+  return ctx_internal->wait_until_some(ivars, nelems, indices, status, cmp, val);
 }
 
 template <typename T>
-__device__ size_t roc_shmem_wait_until_any_vector(T *ptr, size_t nelems, const int* status,
-                                                  roc_shmem_cmps cmp, T* vals) {
+__device__ size_t roc_shmem_wait_until_any_vector(T *ivars, size_t nelems, const int* status,
+                                                  int cmp, T* vals) {
   DPRINTF("Function: roc_shmem_wait_until_any_vector\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ANY_VECTOR);
-  return ctx_internal->wait_until_any_vector(ptr, nelems, status, cmp, vals);
+  return ctx_internal->wait_until_any_vector(ivars, nelems, status, cmp, vals);
 }
 
 template <typename T>
-__device__ void roc_shmem_wait_until_all_vector(T *ptr, size_t nelems, const int* status,
-                                                roc_shmem_cmps cmp, T* vals) {
+__device__ void roc_shmem_wait_until_all_vector(T *ivars, size_t nelems, const int* status,
+                                                int cmp, T* vals) {
   DPRINTF("Function: roc_shmem_wait_until_all_vector\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ALL_VECTOR);
-  ctx_internal->wait_until_all_vector(ptr, nelems, status, cmp, vals);
+  ctx_internal->wait_until_all_vector(ivars, nelems, status, cmp, vals);
 }
 
 template <typename T>
-__device__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems,
+__device__ size_t roc_shmem_wait_until_some_vector(T *ivars, size_t nelems,
                                                  size_t* indices,
                                                  const int* status,
-                                                 roc_shmem_cmps cmp, T* vals) {
+                                                 int cmp, T* vals) {
   DPRINTF("Function: roc_shmem_wait_until_some_vector\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_SOME_VECTOR);
-  return ctx_internal->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals);
+  return ctx_internal->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals);
 }
 
 template <typename T>
-__device__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
+__device__ int roc_shmem_test(T *ivars, int cmp, T val) {
   GPU_DPRINTF("Function: roc_shmem_testl\n");
 
   Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
   ctx_internal->ctxStats.incStat(NUM_TEST);
 
-  return ctx_internal->test(ptr, cmp, val);
+  return ctx_internal->test(ivars, cmp, val);
 }
 
 __device__ void roc_shmem_ctx_wg_barrier_all(roc_shmem_ctx_t ctx) {
@@ -650,10 +650,10 @@ __device__ T roc_shmem_atomic_fetch_inc(roc_shmem_ctx_t ctx, T *dest, int pe) {
 }
 
 template <typename T>
-__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe) {
+__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe) {
   GPU_DPRINTF("Function: roc_shmem_atomic_fetch\n");
 
-  return get_internal_ctx(ctx)->amo_fetch_add<T>(dest, 0, pe);
+  return get_internal_ctx(ctx)->amo_fetch_add<T>(source, 0, pe);
 }
 
 template <typename T>
@@ -1030,52 +1030,53 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
  * Declare templates for the wait types
  */
 #define WAIT_GEN(T)                                                            \
-  template __device__ void roc_shmem_wait_until<T>(T * ptr,                    \
-                                                   roc_shmem_cmps cmp, T val); \
-  template __device__ size_t roc_shmem_wait_until_any<T>(T * ptr,              \
+  template __device__ void roc_shmem_wait_until<T>(T *ivars,                   \
+                                                   int cmp, T val);            \
+  template __device__ size_t roc_shmem_wait_until_any<T>(T *ivars,             \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __device__ void roc_shmem_wait_until_all<T>(T * ptr,                \
+                                      int cmp, T val);                         \
+  template __device__ void roc_shmem_wait_until_all<T>(T *ivars,               \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __device__ size_t roc_shmem_wait_until_some<T>(T * ptr, size_t nelems,\
-                                      size_t* indices, const int* status,      \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __device__ size_t roc_shmem_wait_until_any_vector<T>(T * ptr,       \
-                                      size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T* vals);            \
-  template __device__ void roc_shmem_wait_until_all_vector<T>(T * ptr,         \
-                                      size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T* vals);            \
-  template __device__ size_t roc_shmem_wait_until_some_vector<T>(T * ptr,      \
+                                      int cmp, T val);                         \
+  template __device__ size_t roc_shmem_wait_until_some<T>(T *ivars,            \
                                       size_t nelems, size_t* indices,          \
-                                      const int* status, roc_shmem_cmps cmp,   \
+                                      const int* status,                       \
+                                      int cmp, T val);                         \
+  template __device__ size_t roc_shmem_wait_until_any_vector<T>(T *ivars,      \
+                                      size_t nelems, const int* status,        \
+                                      int cmp, T* vals);                       \
+  template __device__ void roc_shmem_wait_until_all_vector<T>(T *ivars,        \
+                                      size_t nelems, const int* status,        \
+                                      int cmp, T* vals);                       \
+  template __device__ size_t roc_shmem_wait_until_some_vector<T>(T *ivars,     \
+                                      size_t nelems, size_t* indices,          \
+                                      const int* status, int cmp,              \
                                       T* vals);                                \
-  template __device__ int roc_shmem_test<T>(T * ptr, roc_shmem_cmps cmp,       \
+  template __device__ int roc_shmem_test<T>(T *ivars, int cmp,                 \
                                             T val);                            \
-  template __device__ void Context::wait_until<T>(T * ptr, roc_shmem_cmps cmp, \
+  template __device__ void Context::wait_until<T>(T *ivars, int cmp,           \
                                                   T val);                      \
-  template __device__ size_t Context::wait_until_any<T>(T * ptr,               \
+  template __device__ size_t Context::wait_until_any<T>(T *ivars,              \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __device__ void Context::wait_until_all<T>(T * ptr,                 \
+                                      int cmp, T val);                         \
+  template __device__ void Context::wait_until_all<T>(T *ivars,                \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __device__ size_t Context::wait_until_some<T>(T * ptr,              \
+                                      int cmp, T val);                         \
+  template __device__ size_t Context::wait_until_some<T>(T *ivars,             \
                                       size_t nelems,                           \
                                       size_t* indices, const int* status,      \
-                                      roc_shmem_cmps cmp, T val);              \
-  template __device__ size_t Context::wait_until_any_vector<T>(T * ptr,        \
+                                      int cmp, T val);                         \
+  template __device__ size_t Context::wait_until_any_vector<T>(T *ivars,       \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T* vals);            \
-  template __device__ void Context::wait_until_all_vector<T>(T * ptr,          \
+                                      int cmp, T* vals);                       \
+  template __device__ void Context::wait_until_all_vector<T>(T *ivars,         \
                                       size_t nelems, const int* status,        \
-                                      roc_shmem_cmps cmp, T* vals);            \
-  template __device__ size_t Context::wait_until_some_vector<T>(T * ptr,       \
+                                      int cmp, T* vals);                       \
+  template __device__ size_t Context::wait_until_some_vector<T>(T *ivars,      \
                                       size_t nelems, size_t* indices,          \
-                                      const int* status, roc_shmem_cmps cmp,   \
+                                      const int* status, int cmp,              \
                                       T* vals);                                \
-  template __device__ int Context::test<T>(T * ptr, roc_shmem_cmps cmp, T val);
+  template __device__ int Context::test<T>(T *ivars, int cmp, T val);
 
 #define ARITH_REDUCTION_GEN(T)    \
   REDUCTION_GEN(T, ROC_SHMEM_SUM) \
@@ -1302,11 +1303,11 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
 
 #define AMO_EXTENDED_DEF_GEN(T, TNAME)                                         \
   __device__ T roc_shmem_ctx_##TNAME##_atomic_fetch(roc_shmem_ctx_t ctx,       \
-                                                    T *dest, int pe) {         \
-    return roc_shmem_atomic_fetch<T>(ctx, dest, pe);                           \
+                                                    T *source, int pe) {       \
+    return roc_shmem_atomic_fetch<T>(ctx, source, pe);                         \
   }                                                                            \
-  __device__ T roc_shmem_##TNAME##_atomic_fetch(T *dest, int pe) {             \
-    return roc_shmem_atomic_fetch<T>(dest, pe);                                \
+  __device__ T roc_shmem_##TNAME##_atomic_fetch(T *source, int pe) {           \
+    return roc_shmem_atomic_fetch<T>(source, pe);                              \
   }                                                                            \
   __device__ void roc_shmem_ctx_##TNAME##_atomic_set(                          \
       roc_shmem_ctx_t ctx, T *dest, T value, int pe) {                         \
@@ -1370,57 +1371,57 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
   }
 
 #define WAIT_DEF_GEN(T, TNAME)                                                 \
-  __device__ void roc_shmem_##TNAME##_wait_until(T *ptr, roc_shmem_cmps cmp,   \
+  __device__ void roc_shmem_##TNAME##_wait_until(T *ivars, int cmp,            \
                                                  T val) {                      \
-    roc_shmem_wait_until<T>(ptr, cmp, val);                                    \
+    roc_shmem_wait_until<T>(ivars, cmp, val);                                  \
   }                                                                            \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, size_t nelems,\
+  __device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, size_t nelems,\
                                                      const int* status,      \
-                                                     roc_shmem_cmps cmp,     \
+                                                     int cmp,                \
                                                      T val) {                \
-    return roc_shmem_wait_until_any<T>(ptr, nelems, status, cmp, val);       \
+    return roc_shmem_wait_until_any<T>(ivars, nelems, status, cmp, val);     \
   }                                                                          \
-  __device__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, size_t nelems,  \
+  __device__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, size_t nelems,\
                                                    const int* status,        \
-                                                   roc_shmem_cmps cmp,       \
+                                                   int cmp,                  \
                                                    T val) {                  \
-    roc_shmem_wait_until_all<T>(ptr, nelems, status, cmp, val);              \
+    roc_shmem_wait_until_all<T>(ivars, nelems, status, cmp, val);            \
   }                                                                          \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr,              \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars,            \
                                                     size_t nelems,           \
                                                     size_t* indices,         \
                                                     const int* status,       \
-                                                    roc_shmem_cmps cmp,      \
+                                                    int cmp,                 \
                                                     T val) {                 \
-    return roc_shmem_wait_until_some<T>(ptr, nelems, indices, status, cmp,   \
+    return roc_shmem_wait_until_some<T>(ivars, nelems, indices, status, cmp, \
                                         val);                                \
   }                                                                          \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr,        \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars,      \
                                                           size_t nelems,     \
                                                           const int* status, \
-                                                          roc_shmem_cmps cmp,\
+                                                          int cmp,           \
                                                           T* vals) {         \
-    return roc_shmem_wait_until_any_vector<T>(ptr, nelems, status, cmp,      \
+    return roc_shmem_wait_until_any_vector<T>(ivars, nelems, status, cmp,    \
                                               vals);                         \
   }                                                                          \
-  __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr,          \
+  __device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars,        \
                                                           size_t nelems,     \
                                                           const int* status, \
-                                                          roc_shmem_cmps cmp,\
+                                                          int cmp,           \
                                                           T* vals) {         \
-    roc_shmem_wait_until_all_vector<T>(ptr, nelems, status, cmp, vals);      \
+    roc_shmem_wait_until_all_vector<T>(ivars, nelems, status, cmp, vals);    \
   }                                                                          \
-  __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr,       \
+  __device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars,     \
                                                            size_t nelems,    \
                                                            size_t* indices,  \
                                                            const int* status,\
-                                                          roc_shmem_cmps cmp,\
+                                                           int cmp,          \
                                                            T* vals) {        \
-    return roc_shmem_wait_until_some_vector<T>(ptr, nelems, indices,         \
+    return roc_shmem_wait_until_some_vector<T>(ivars, nelems, indices,       \
         status, cmp, vals);                                                  \
   }                                                                          \
-  __device__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val) { \
-    return roc_shmem_test<T>(ptr, cmp, val);                                   \
+  __device__ int roc_shmem_##TNAME##_test(T *ivars, int cmp, T val) { \
+    return roc_shmem_test<T>(ivars, cmp, val);                                   \
   }
 
 /******************************************************************************
diff --git a/src/templates.hpp b/src/templates.hpp
index 3df3a01c23..025cefdcfa 100644
--- a/src/templates.hpp
+++ b/src/templates.hpp
@@ -287,7 +287,7 @@ __device__ T roc_shmem_atomic_fetch_inc(T *dest, int pe);
  * granularity.
  *
  * @param[in] ctx     Context with which to perform this operation.
- * @param[in] dest    Destination address. Must be an address on the symmetric
+ * @param[in] source  Source address. Must be an address on the symmetric
                       heap.
  * @param[in] val     The value to be atomically added.
  * @param[in] pe      PE of the remote process.
@@ -296,10 +296,10 @@ __device__ T roc_shmem_atomic_fetch_inc(T *dest, int pe);
  *
  */
 template <typename T>
-__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe);
+__device__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe);
 
 template <typename T>
-__device__ T roc_shmem_atomic_fetch(T *dest, int pe);
+__device__ T roc_shmem_atomic_fetch(T *source, int pe);
 
 /**
  * @brief Atomically add the value \p val to \p dest on \p pe.
@@ -380,7 +380,7 @@ __device__ void roc_shmem_atomic_set(T *dest, T value, int pe);
  * coalesce contiguous messages and elect a leader thread to call into the
  * ROC_SHMEM function.
  *
- * @param[in] ptr Pointer to memory on the symmetric heap to wait for.
+ * @param[in] ivars Pointer to memory on the symmetric heap to wait for.
  * @param[in] cmp Operation for the comparison.
  * @param[in] val Value to compare the memory at \p ptr to.
  *
@@ -388,7 +388,7 @@ __device__ void roc_shmem_atomic_set(T *dest, T value, int pe);
  *
  */
 template <typename T>
-__device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
+__device__ void roc_shmem_wait_until(T *ivars, int cmp, T val);
 
 /**
  * @brief test if the condition (* \p ptr \p cmps \p val) is
@@ -399,7 +399,7 @@ __device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
  * coalesce contiguous messages and elect a leader thread to call into the
  * ROC_SHMEM function.
  *
- * @param[in] ptr Pointer to memory on the symmetric heap to wait for.
+ * @param[in] ivars Pointer to memory on the symmetric heap to wait for.
  * @param[in] cmp Operation for the comparison.
  * @param[in] val Value to compare the memory at \p ptr to.
  *
@@ -407,7 +407,7 @@ __device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
  *
  */
 template <typename T>
-__device__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val);
+__device__ int roc_shmem_test(T *ivars, int cmp, T val);
 
 /**
  * @brief Perform a broadcast between PEs in the active set. The caller
diff --git a/src/templates_host.hpp b/src/templates_host.hpp
index ff161feb0a..dabd6a3748 100644
--- a/src/templates_host.hpp
+++ b/src/templates_host.hpp
@@ -102,10 +102,10 @@ template <typename T>
 __host__ T roc_shmem_atomic_fetch_inc(T *dest, int pe);
 
 template <typename T>
-__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *dest, int pe);
+__host__ T roc_shmem_atomic_fetch(roc_shmem_ctx_t ctx, T *source, int pe);
 
 template <typename T>
-__host__ T roc_shmem_atomic_fetch(T *dest, int pe);
+__host__ T roc_shmem_atomic_fetch(T *source, int pe);
 
 template <typename T>
 __host__ void roc_shmem_atomic_add(roc_shmem_ctx_t ctx, T *dest, T val, int pe);
@@ -136,35 +136,35 @@ __host__ void roc_shmem_to_all(roc_shmem_ctx_t ctx, T *dest, const T *source,
                                int PE_size, T *pWrk, long *pSync);
 
 template <typename T>
-__host__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
+__host__ void roc_shmem_wait_until(T *ivars, int cmp, T val);
 
 template <typename T>
-__host__ void wait_until_all(T* ptr, size_t nelems, const int *status,
-                             roc_shmem_cmps cmp, T val);
+__host__ void wait_until_all(T* ivars, size_t nelems, const int *status,
+                             int cmp, T val);
 
 template <typename T>
-__host__ size_t wait_until_any(T* ptr, size_t nelems, const int *status,
-                               roc_shmem_cmps cmp, T val);
+__host__ size_t wait_until_any(T* ivars, size_t nelems, const int *status,
+                               int cmp, T val);
 
 template <typename T>
-__host__ size_t wait_until_some(T* ptr, size_t nelems, size_t* indices,
-                              const int *status, roc_shmem_cmps cmp, T val);
+__host__ size_t wait_until_some(T* ivars, size_t nelems, size_t* indices,
+                              const int *status, int cmp, T val);
 
 template <typename T>
-__host__ void wait_until_all_vector(T* ptr, size_t nelems, const int *status,
-                                    roc_shmem_cmps cmp, T* vals);
+__host__ void wait_until_all_vector(T* ivars, size_t nelems, const int *status,
+                                    int cmp, T* vals);
 
 template <typename T>
-__host__ size_t wait_until_any_vector(T* ptr, size_t nelems, const int *status,
-                                      roc_shmem_cmps cmp, T* vals);
+__host__ size_t wait_until_any_vector(T* ivars, size_t nelems, const int *status,
+                                      int cmp, T* vals);
 
 template <typename T>
-__host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
+__host__ size_t wait_until_some_vector(T* ivars, size_t nelems,
                                      size_t* indices, const int *status,
-                                     roc_shmem_cmps cmp, T* vals);
+                                     int cmp, T* vals);
 
 template <typename T>
-__host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val);
+__host__ int roc_shmem_test(T *ivars, int cmp, T val);
 
 }  // namespace rocshmem