*_wait* routines changed parameter from ptr to ivars to match OpenSHMEM
Этот коммит содержится в:
@@ -1152,71 +1152,71 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system();
|
||||
* MACRO DECLARE SHMEM_WAIT_UNTIL APIs
|
||||
*/
|
||||
#define WAIT_UNTIL_API_GEN(T, TNAME) \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until(T *ptr, \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until(T *ivars, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals); \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until(T *ptr, \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until(T *ivars, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status, \
|
||||
@@ -1226,9 +1226,9 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system();
|
||||
/*
|
||||
* MACRO DECLARE SHMEM_TEST APIs
|
||||
*/
|
||||
#define TEST_API_GEN(T, TNAME) \
|
||||
__device__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val); \
|
||||
__host__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
#define TEST_API_GEN(T, TNAME) \
|
||||
__device__ int roc_shmem_##TNAME##_test(T *ivars, roc_shmem_cmps cmp, T val); \
|
||||
__host__ int roc_shmem_##TNAME##_test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
/**
|
||||
* @name SHMEM_REDUCTIONS
|
||||
@@ -2031,7 +2031,7 @@ ATOMIC_INC_API_GEN(ptrdiff_t, ptrdiff)
|
||||
* coalesce contiguous messages and elect a leader thread to call into the
|
||||
* ROC_SHMEM function.
|
||||
*
|
||||
* @param[in] ptr Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] ivars Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] cmp Operation for the comparison.
|
||||
* @param[in] val Value to compare the memory at \p ptr to.
|
||||
*
|
||||
@@ -2064,7 +2064,7 @@ WAIT_UNTIL_API_GEN(unsigned long long, ulonglong) // NOLINT(runtime/int)
|
||||
* coalesce contiguous messages and elect a leader thread to call into the
|
||||
* ROC_SHMEM function.
|
||||
*
|
||||
* @param[in] ptr Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] ivars Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] cmp Operation for the comparison.
|
||||
* @param[in] val Value to compare the memory at \p ptr to.
|
||||
*
|
||||
|
||||
+16
-16
@@ -74,42 +74,42 @@ class Context {
|
||||
***************************** DEVICE METHODS *****************************
|
||||
*************************************************************************/
|
||||
template <typename T>
|
||||
__device__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val);
|
||||
__device__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__device__ void wait_until_all(T* ptr, size_t nelems,
|
||||
__device__ void wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t wait_until_any(T* ptr, size_t nelems,
|
||||
__device__ size_t wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t wait_until_some(T* ptr, size_t nelems,
|
||||
__device__ size_t wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__device__ void wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__device__ void wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__device__ size_t wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__device__ size_t wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__device__ int test(T* ptr, roc_shmem_cmps cmp, T val);
|
||||
__device__ int test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
__device__ void threadfence_system();
|
||||
|
||||
@@ -365,42 +365,42 @@ class Context {
|
||||
int nreduce);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ int test(T* ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ int test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
public:
|
||||
/**
|
||||
|
||||
@@ -199,9 +199,9 @@ __device__ void Context::broadcast(T *dest, const T *source, int nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void Context::wait_until(T *ptr, roc_shmem_cmps cmp,
|
||||
__device__ __forceinline__ void Context::wait_until(T *ivars, roc_shmem_cmps cmp,
|
||||
T val) {
|
||||
while (!test(ptr, cmp, val)) {
|
||||
while (!test(ivars, cmp, val)) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ __device__ __forceinline__ size_t status_entry(size_t nelems,
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__
|
||||
size_t Context::wait_until_any(T* ptr, size_t nelems,
|
||||
size_t Context::wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
// zero nelems error condition
|
||||
@@ -240,7 +240,7 @@ size_t Context::wait_until_any(T* ptr, size_t nelems,
|
||||
if (status[i]) {
|
||||
continue;
|
||||
}
|
||||
if (test(ptr + i, cmp, val)) {
|
||||
if (test(ivars + i, cmp, val)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
@@ -249,7 +249,7 @@ size_t Context::wait_until_any(T* ptr, size_t nelems,
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__
|
||||
void Context::wait_until_all(T* ptr, size_t nelems,
|
||||
void Context::wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
// zero nelems error condition
|
||||
@@ -268,14 +268,14 @@ void Context::wait_until_all(T* ptr, size_t nelems,
|
||||
if (status[i]) {
|
||||
continue;
|
||||
}
|
||||
while (!test(ptr + i, cmp, val)) {
|
||||
while (!test(ivars + i, cmp, val)) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__
|
||||
size_t Context::wait_until_some(T* ptr, size_t nelems,
|
||||
size_t Context::wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
@@ -299,7 +299,7 @@ size_t Context::wait_until_some(T* ptr, size_t nelems,
|
||||
if (status[i]) {
|
||||
continue;
|
||||
}
|
||||
if (test(ptr + i, cmp, val)) {
|
||||
if (test(ivars + i, cmp, val)) {
|
||||
done = true;
|
||||
indices[ncompleted] = i;
|
||||
ncompleted++;
|
||||
@@ -311,7 +311,7 @@ size_t Context::wait_until_some(T* ptr, size_t nelems,
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__
|
||||
void Context::wait_until_all_vector(T* ptr, size_t nelems,
|
||||
void Context::wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
;
|
||||
@@ -319,7 +319,7 @@ void Context::wait_until_all_vector(T* ptr, size_t nelems,
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__
|
||||
size_t Context::wait_until_any_vector(T* ptr, size_t nelems,
|
||||
size_t Context::wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return 0;
|
||||
@@ -327,7 +327,7 @@ size_t Context::wait_until_any_vector(T* ptr, size_t nelems,
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__
|
||||
size_t Context::wait_until_some_vector(T* ptr, size_t nelems,
|
||||
size_t Context::wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
@@ -335,38 +335,38 @@ size_t Context::wait_until_some_vector(T* ptr, size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ int Context::test(T *ptr, roc_shmem_cmps cmp,
|
||||
__device__ __forceinline__ int Context::test(T *ivars, roc_shmem_cmps cmp,
|
||||
T val) {
|
||||
int ret = 0;
|
||||
volatile T *vol_ptr = reinterpret_cast<T *>(ptr);
|
||||
volatile T *vol_ivars = reinterpret_cast<T *>(ivars);
|
||||
switch (cmp) {
|
||||
case ROC_SHMEM_CMP_EQ:
|
||||
if (uncached_load(vol_ptr) == val) {
|
||||
if (uncached_load(vol_ivars) == val) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case ROC_SHMEM_CMP_NE:
|
||||
if (uncached_load(vol_ptr) != val) {
|
||||
if (uncached_load(vol_ivars) != val) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case ROC_SHMEM_CMP_GT:
|
||||
if (uncached_load(vol_ptr) > val) {
|
||||
if (uncached_load(vol_ivars) > val) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case ROC_SHMEM_CMP_GE:
|
||||
if (uncached_load(vol_ptr) >= val) {
|
||||
if (uncached_load(vol_ivars) >= val) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case ROC_SHMEM_CMP_LT:
|
||||
if (uncached_load(vol_ptr) < val) {
|
||||
if (uncached_load(vol_ivars) < val) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case ROC_SHMEM_CMP_LE:
|
||||
if (uncached_load(vol_ptr) <= val) {
|
||||
if (uncached_load(vol_ivars) <= val) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -234,73 +234,73 @@ __host__ void Context::to_all(roc_shmem_team_t team, T *dest, const T *source,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void Context::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
__host__ void Context::wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL);
|
||||
|
||||
HOST_DISPATCH(wait_until<T>(ptr, cmp, val));
|
||||
HOST_DISPATCH(wait_until<T>(ivars, cmp, val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t Context::wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t Context::wait_until_any(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY);
|
||||
|
||||
return HOST_DISPATCH(wait_until_any<T>(ptr, nelems, status, cmp, val));
|
||||
return HOST_DISPATCH(wait_until_any<T>(ivars, nelems, status, cmp, val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void Context::wait_until_all(T* ptr, size_t nelems,
|
||||
__host__ void Context::wait_until_all(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ALL);
|
||||
|
||||
HOST_DISPATCH(wait_until_all<T>(ptr, nelems, status, cmp, val));
|
||||
HOST_DISPATCH(wait_until_all<T>(ivars, nelems, status, cmp, val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t Context::wait_until_some(T* ptr, size_t nelems,
|
||||
__host__ size_t Context::wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_SOME);
|
||||
|
||||
HOST_DISPATCH_RET(wait_until_some<T>(ptr, nelems, indices, status, cmp, val));
|
||||
HOST_DISPATCH_RET(wait_until_some<T>(ivars, nelems, indices, status, cmp, val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void Context::wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__host__ void Context::wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ALL_VECTOR);
|
||||
|
||||
HOST_DISPATCH(wait_until_all_vector<T>(ptr, nelems, status, cmp, vals));
|
||||
HOST_DISPATCH(wait_until_all_vector<T>(ivars, nelems, status, cmp, vals));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t Context::wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t Context::wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_ANY_VECTOR);
|
||||
|
||||
HOST_DISPATCH_RET(wait_until_any_vector<T>(ptr, nelems, status, cmp, vals));
|
||||
HOST_DISPATCH_RET(wait_until_any_vector<T>(ivars, nelems, status, cmp, vals));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t Context::wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t Context::wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
ctxHostStats.incStat(NUM_HOST_WAIT_UNTIL_SOME_VECTOR);
|
||||
|
||||
HOST_DISPATCH_RET(wait_until_some_vector<T>(ptr, nelems, indices, status, cmp, vals));
|
||||
HOST_DISPATCH_RET(wait_until_some_vector<T>(ivars, nelems, indices, status, cmp, vals));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ int Context::test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
__host__ int Context::test(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
ctxHostStats.incStat(NUM_HOST_TEST);
|
||||
|
||||
HOST_DISPATCH_RET(test<T>(ptr, cmp, val));
|
||||
HOST_DISPATCH_RET(test<T>(ivars, cmp, val));
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -99,42 +99,42 @@ class GPUIBHostContext : public Context {
|
||||
int nreduce);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ int test(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ int test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
public:
|
||||
/* Pointer to the backend's host interface */
|
||||
|
||||
@@ -115,57 +115,57 @@ __host__ void GPUIBHostContext::to_all(roc_shmem_team_t team, T *dest,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void GPUIBHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until<T>(ptr, cmp, val, context_window_info);
|
||||
__host__ void GPUIBHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until<T>(ivars, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void GPUIBHostContext::wait_until_all(T *ptr, size_t nelems,
|
||||
__host__ void GPUIBHostContext::wait_until_all(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until_all<T>(ptr, nelems, status, cmp, val, context_window_info);
|
||||
host_interface->wait_until_all<T>(ivars, nelems, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t GPUIBHostContext::wait_until_any(T *ptr, size_t nelems,
|
||||
__host__ size_t GPUIBHostContext::wait_until_any(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->wait_until_any<T>(ptr, nelems, status, cmp, val, context_window_info);
|
||||
return host_interface->wait_until_any<T>(ivars, nelems, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t GPUIBHostContext::wait_until_some(T *ptr, size_t nelems,
|
||||
__host__ size_t GPUIBHostContext::wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->wait_until_some<T>(ptr, nelems, indices, status, cmp, val, context_window_info);
|
||||
return host_interface->wait_until_some<T>(ivars, nelems, indices, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void GPUIBHostContext::wait_until_all_vector(T *ptr, size_t nelems,
|
||||
__host__ void GPUIBHostContext::wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
host_interface->wait_until_all_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
|
||||
host_interface->wait_until_all_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t GPUIBHostContext::wait_until_any_vector(T *ptr, size_t nelems,
|
||||
__host__ size_t GPUIBHostContext::wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return host_interface->wait_until_any_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
|
||||
return host_interface->wait_until_any_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t GPUIBHostContext::wait_until_some_vector(T *ptr, size_t nelems,
|
||||
__host__ size_t GPUIBHostContext::wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return host_interface->wait_until_some_vector<T>(ptr, nelems, indices, status, cmp, vals, context_window_info);
|
||||
return host_interface->wait_until_some_vector<T>(ivars, nelems, indices, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ int GPUIBHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->test<T>(ptr, cmp, val, context_window_info);
|
||||
__host__ int GPUIBHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->test<T>(ivars, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -211,41 +211,41 @@ class HostInterface {
|
||||
int nreduce);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until(T* ptr, roc_shmem_cmps cmp, T val,
|
||||
__host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all(T* ptr, size_t nelems, const int* status,
|
||||
__host__ void wait_until_all(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any(T* ptr, size_t nelems, const int* status,
|
||||
__host__ size_t wait_until_any(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some(T* ptr, size_t nelems, size_t* indices,
|
||||
__host__ size_t wait_until_some(T *ivars, size_t nelems, size_t* indices,
|
||||
const int* status, roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all_vector(T* ptr, size_t nelems, const int* status,
|
||||
__host__ void wait_until_all_vector(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T* vals,
|
||||
WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int* status, roc_shmem_cmps cmp,
|
||||
T* vals, WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some_vector(T* ptr, size_t nelems, size_t* indices,
|
||||
__host__ size_t wait_until_some_vector(T *ivars, size_t nelems, size_t* indices,
|
||||
const int* status, roc_shmem_cmps cmp,
|
||||
T* vals, WindowInfo* window_info);
|
||||
|
||||
template <typename T>
|
||||
__host__ int test(T* ptr, roc_shmem_cmps cmp, T val, WindowInfo* window_info);
|
||||
__host__ int test(T *ivars, roc_shmem_cmps cmp, T val, WindowInfo* window_info);
|
||||
|
||||
#ifndef USE_COHERENT_HEAP
|
||||
__host__ void create_hdp_window();
|
||||
|
||||
@@ -446,7 +446,7 @@ __host__ inline int HostInterface::test_and_compare(MPI_Aint offset,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void HostInterface::wait_until(T* ptr, roc_shmem_cmps cmp, T val,
|
||||
__host__ void HostInterface::wait_until(T *ivars, roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info) {
|
||||
DPRINTF("Function: host_wait_until\n");
|
||||
|
||||
@@ -454,13 +454,13 @@ __host__ void HostInterface::wait_until(T* ptr, roc_shmem_cmps cmp, T val,
|
||||
* Find the offset of this memory in the window
|
||||
*/
|
||||
MPI_Aint offset{
|
||||
compute_offset(ptr, window_info->get_start(), window_info->get_end())};
|
||||
compute_offset(ivars, window_info->get_start(), window_info->get_end())};
|
||||
|
||||
MPI_Datatype mpi_type{get_mpi_type<T>()};
|
||||
MPI_Win win{window_info->get_win()};
|
||||
|
||||
/*
|
||||
* Continuously read the ptr atomically until it satisfies the condition
|
||||
* Continuously read the ivars atomically until it satisfies the condition
|
||||
*/
|
||||
while (1) {
|
||||
int cond_satisfied{test_and_compare(offset, mpi_type, cmp, val, win)};
|
||||
@@ -500,7 +500,7 @@ __host__ size_t status_entry(size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t HostInterface::wait_until_any(T* ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info) {
|
||||
@@ -524,7 +524,7 @@ __host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems,
|
||||
if (status[i]) {
|
||||
continue;
|
||||
}
|
||||
if (test(ptr + i, cmp, val, window_info)) {
|
||||
if (test(ivars + i, cmp, val, window_info)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
@@ -532,7 +532,7 @@ __host__ size_t HostInterface::wait_until_any(T* ptr, size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void HostInterface::wait_until_all(T* ptr, size_t nelems,
|
||||
__host__ void HostInterface::wait_until_all(T* ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info) {
|
||||
@@ -554,13 +554,13 @@ __host__ void HostInterface::wait_until_all(T* ptr, size_t nelems,
|
||||
if (status[i]) {
|
||||
continue;
|
||||
}
|
||||
while (!test(ptr + i, cmp, val, window_info)) {
|
||||
while (!test(ivars + i, cmp, val, window_info)) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems,
|
||||
__host__ size_t HostInterface::wait_until_some(T* ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val,
|
||||
@@ -587,7 +587,7 @@ __host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems,
|
||||
if (status[i]) {
|
||||
continue;
|
||||
}
|
||||
if (test(ptr + i, cmp, val, window_info)) {
|
||||
if (test(ivars + i, cmp, val, window_info)) {
|
||||
done = true;
|
||||
indices[ncompleted] = i;
|
||||
ncompleted++;
|
||||
@@ -598,7 +598,7 @@ __host__ size_t HostInterface::wait_until_some(T* ptr, size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void HostInterface::wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__host__ void HostInterface::wait_until_all_vector(T* ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals,
|
||||
WindowInfo* window_info) {
|
||||
@@ -606,7 +606,7 @@ __host__ void HostInterface::wait_until_all_vector(T* ptr, size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t HostInterface::wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t HostInterface::wait_until_any_vector(T* ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals,
|
||||
WindowInfo* window_info) {
|
||||
@@ -615,7 +615,7 @@ __host__ size_t HostInterface::wait_until_any_vector(T* ptr, size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t HostInterface::wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t HostInterface::wait_until_some_vector(T* ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals,
|
||||
@@ -625,7 +625,7 @@ __host__ size_t HostInterface::wait_until_some_vector(T* ptr, size_t nelems,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ int HostInterface::test(T* ptr, roc_shmem_cmps cmp, T val,
|
||||
__host__ int HostInterface::test(T* ivars, roc_shmem_cmps cmp, T val,
|
||||
WindowInfo* window_info) {
|
||||
DPRINTF("Function: host_test\n");
|
||||
|
||||
@@ -633,7 +633,7 @@ __host__ int HostInterface::test(T* ptr, roc_shmem_cmps cmp, T val,
|
||||
* Find the offset of this memory in the window
|
||||
*/
|
||||
MPI_Aint offset{
|
||||
compute_offset(ptr, window_info->get_start(), window_info->get_end())};
|
||||
compute_offset(ivars, window_info->get_start(), window_info->get_end())};
|
||||
|
||||
MPI_Datatype mpi_type{get_mpi_type<T>()};
|
||||
|
||||
|
||||
@@ -99,42 +99,42 @@ class IPCHostContext : public Context {
|
||||
int nreduce);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ int test(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ int test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
public:
|
||||
/* Pointer to the backend's host interface */
|
||||
|
||||
@@ -115,57 +115,57 @@ __host__ void IPCHostContext::to_all(roc_shmem_team_t team, T *dest,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void IPCHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until<T>(ptr, cmp, val, context_window_info);
|
||||
__host__ void IPCHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until<T>(ivars, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void IPCHostContext::wait_until_all(T *ptr, size_t nelems,
|
||||
__host__ void IPCHostContext::wait_until_all(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until_all<T>(ptr, nelems, status, cmp, val, context_window_info);
|
||||
host_interface->wait_until_all<T>(ivars, nelems, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t IPCHostContext::wait_until_any(T *ptr, size_t nelems,
|
||||
__host__ size_t IPCHostContext::wait_until_any(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->wait_until_any<T>(ptr, nelems, status, cmp, val, context_window_info);
|
||||
return host_interface->wait_until_any<T>(ivars, nelems, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t IPCHostContext::wait_until_some(T *ptr, size_t nelems,
|
||||
__host__ size_t IPCHostContext::wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->wait_until_some<T>(ptr, nelems, indices, status, cmp, val, context_window_info);
|
||||
return host_interface->wait_until_some<T>(ivars, nelems, indices, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void IPCHostContext::wait_until_all_vector(T *ptr, size_t nelems,
|
||||
__host__ void IPCHostContext::wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
host_interface->wait_until_all_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
|
||||
host_interface->wait_until_all_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t IPCHostContext::wait_until_any_vector(T *ptr, size_t nelems,
|
||||
__host__ size_t IPCHostContext::wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return host_interface->wait_until_any_vector<T>(ptr, nelems, status, cmp, vals, context_window_info);
|
||||
return host_interface->wait_until_any_vector<T>(ivars, nelems, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t IPCHostContext::wait_until_some_vector(T *ptr, size_t nelems,
|
||||
__host__ size_t IPCHostContext::wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return host_interface->wait_until_some_vector<T>(ptr, nelems, indices, status, cmp, vals, context_window_info);
|
||||
return host_interface->wait_until_some_vector<T>(ivars, nelems, indices, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ int IPCHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->test<T>(ptr, cmp, val, context_window_info);
|
||||
__host__ int IPCHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->test<T>(ivars, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -148,42 +148,42 @@ class ROHostContext : public Context {
|
||||
int nreduce);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ void wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all_vector(T* ptr, size_t nelems,
|
||||
__host__ void wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ int test(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ int test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
};
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -141,58 +141,58 @@ __host__ void ROHostContext::to_all(roc_shmem_team_t team, T *dest,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void ROHostContext::wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until<T>(ptr, cmp, val, context_window_info);
|
||||
__host__ void ROHostContext::wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until<T>(ivars, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t ROHostContext::wait_until_any(T* ptr, size_t nelems,
|
||||
__host__ size_t ROHostContext::wait_until_any(T *ivars, size_t nelems,
|
||||
const int *status, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->wait_until_any<T>(ptr, nelems, status, cmp, val, context_window_info);
|
||||
return host_interface->wait_until_any<T>(ivars, nelems, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__
|
||||
void ROHostContext::wait_until_all(T* ptr, size_t nelems,
|
||||
void ROHostContext::wait_until_all(T *ivars, size_t nelems,
|
||||
const int *status, roc_shmem_cmps cmp, T val) {
|
||||
host_interface->wait_until_all(ptr, nelems, status, cmp, val, context_window_info);
|
||||
host_interface->wait_until_all(ivars, nelems, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__
|
||||
size_t ROHostContext::wait_until_some(T* ptr, size_t nelems,
|
||||
size_t ROHostContext::wait_until_some(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->wait_until_some(ptr, nelems, indices, status, cmp, val, context_window_info);
|
||||
return host_interface->wait_until_some(ivars, nelems, indices, status, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__
|
||||
void ROHostContext::wait_until_all_vector(T* ptr, size_t nelems,
|
||||
void ROHostContext::wait_until_all_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
host_interface->wait_until_all_vector(ptr, nelems, status, cmp, vals, context_window_info);
|
||||
host_interface->wait_until_all_vector(ivars, nelems, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t ROHostContext::wait_until_any_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t ROHostContext::wait_until_any_vector(T *ivars, size_t nelems,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return host_interface->wait_until_any_vector(ptr, nelems, status, cmp, vals, context_window_info);
|
||||
return host_interface->wait_until_any_vector(ivars, nelems, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t ROHostContext::wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t ROHostContext::wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int *status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
return host_interface->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals, context_window_info);
|
||||
return host_interface->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals, context_window_info);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ int ROHostContext::test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->test<T>(ptr, cmp, val, context_window_info);
|
||||
__host__ int ROHostContext::test(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
return host_interface->test<T>(ivars, cmp, val, context_window_info);
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
+42
-42
@@ -744,74 +744,74 @@ __host__ void roc_shmem_to_all([[maybe_unused]] roc_shmem_ctx_t ctx,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
__host__ void roc_shmem_wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until\n");
|
||||
|
||||
get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until(ptr, cmp, val);
|
||||
get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until(ivars, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void roc_shmem_wait_until_all(T *ptr, size_t nelems, const int* status,
|
||||
__host__ void roc_shmem_wait_until_all(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until_all\n");
|
||||
|
||||
get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all(ptr,
|
||||
get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all(ivars,
|
||||
nelems, status, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t roc_shmem_wait_until_any(T *ptr, size_t nelems, const int* status,
|
||||
__host__ size_t roc_shmem_wait_until_any(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until_any\n");
|
||||
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any(ptr,
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any(ivars,
|
||||
nelems, status, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t roc_shmem_wait_until_some(T *ptr, size_t nelems, size_t* indices,
|
||||
__host__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems, size_t* indices,
|
||||
const int* status, roc_shmem_cmps cmp,
|
||||
T val) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until_some\n");
|
||||
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some(ptr, nelems,
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some(ivars, nelems,
|
||||
indices, status, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t roc_shmem_wait_until_any_vector(T *ptr, size_t nelems, const int* status,
|
||||
__host__ size_t roc_shmem_wait_until_any_vector(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until_any_vector\n");
|
||||
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any_vector(ptr,
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_any_vector(ivars,
|
||||
nelems, status, cmp, vals);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void roc_shmem_wait_until_all_vector(T *ptr, size_t nelems, const int* status,
|
||||
__host__ void roc_shmem_wait_until_all_vector(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until_all_vector\n");
|
||||
|
||||
get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all_vector(ptr,
|
||||
get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_all_vector(ivars,
|
||||
nelems, status, cmp, vals);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems,
|
||||
__host__ size_t roc_shmem_wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
DPRINTF("Host function: roc_shmem_wait_until_some_vector\n");
|
||||
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some_vector(ptr,
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->wait_until_some_vector(ivars,
|
||||
nelems, indices, status, cmp, vals);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
__host__ int roc_shmem_test(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
DPRINTF("Host function: roc_shmem_testl\n");
|
||||
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->test(ptr, cmp, val);
|
||||
return get_internal_ctx(ROC_SHMEM_HOST_CTX_DEFAULT)->test(ivars, cmp, val);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -940,31 +940,31 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
* Declare templates for the wait types
|
||||
*/
|
||||
#define WAIT_GEN(T) \
|
||||
template __host__ void roc_shmem_wait_until<T>(T * ptr, roc_shmem_cmps cmp, \
|
||||
template __host__ void roc_shmem_wait_until<T>(T *ivars, roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
template __host__ int roc_shmem_test<T>(T * ptr, roc_shmem_cmps cmp, T val); \
|
||||
template __host__ void Context::wait_until<T>(T * ptr, roc_shmem_cmps cmp, \
|
||||
template __host__ int roc_shmem_test<T>(T *ivars, roc_shmem_cmps cmp, T val);\
|
||||
template __host__ void Context::wait_until<T>(T *ivars, roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
template __host__ size_t roc_shmem_wait_until_any<T>(T * ptr, \
|
||||
template __host__ size_t roc_shmem_wait_until_any<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __host__ void roc_shmem_wait_until_all<T>(T * ptr, \
|
||||
template __host__ void roc_shmem_wait_until_all<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __host__ size_t roc_shmem_wait_until_some<T>(T * ptr, size_t nelems,\
|
||||
template __host__ size_t roc_shmem_wait_until_some<T>(T *ivars, size_t nelems,\
|
||||
size_t* indices, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __host__ size_t roc_shmem_wait_until_any_vector<T>(T * ptr, \
|
||||
template __host__ size_t roc_shmem_wait_until_any_vector<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T* vals); \
|
||||
template __host__ void roc_shmem_wait_until_all_vector<T>(T * ptr, \
|
||||
template __host__ void roc_shmem_wait_until_all_vector<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T* vals); \
|
||||
template __host__ size_t roc_shmem_wait_until_some_vector<T>(T * ptr, \
|
||||
template __host__ size_t roc_shmem_wait_until_some_vector<T>(T *ivars, \
|
||||
size_t nelems, size_t* indices, \
|
||||
const int* status, roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
template __host__ int Context::test<T>(T * ptr, roc_shmem_cmps cmp, T val);
|
||||
template __host__ int Context::test<T>(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
/**
|
||||
* Define APIs to call the template functions
|
||||
@@ -1165,55 +1165,55 @@ __host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
}
|
||||
|
||||
#define WAIT_DEF_GEN(T, TNAME) \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until(T *ptr, roc_shmem_cmps cmp, \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until(T *ivars, roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
roc_shmem_wait_until<T>(ptr, cmp, val); \
|
||||
roc_shmem_wait_until<T>(ivars, cmp, val); \
|
||||
} \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, size_t nelems, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, size_t nelems,\
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
return roc_shmem_wait_until_any<T>(ptr, nelems, status, cmp, val); \
|
||||
return roc_shmem_wait_until_any<T>(ivars, nelems, status, cmp, val); \
|
||||
} \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, size_t nelems, \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
roc_shmem_wait_until_all<T>(ptr, nelems, status, cmp, val); \
|
||||
roc_shmem_wait_until_all<T>(ivars, nelems, status, cmp, val); \
|
||||
} \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, size_t nelems, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
return roc_shmem_wait_until_some<T>(ptr, nelems, indices, status, cmp, val); \
|
||||
return roc_shmem_wait_until_some<T>(ivars, nelems, indices, status, cmp, val); \
|
||||
} \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals) { \
|
||||
return roc_shmem_wait_until_any_vector<T>(ptr, nelems, status, cmp, \
|
||||
return roc_shmem_wait_until_any_vector<T>(ivars, nelems, status, cmp, \
|
||||
vals); \
|
||||
} \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \
|
||||
__host__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals) { \
|
||||
roc_shmem_wait_until_all_vector<T>(ptr, nelems, status, cmp, vals); \
|
||||
roc_shmem_wait_until_all_vector<T>(ivars, nelems, status, cmp, vals); \
|
||||
} \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \
|
||||
__host__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status,\
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals) { \
|
||||
return roc_shmem_wait_until_some_vector<T>(ptr, nelems, indices, \
|
||||
return roc_shmem_wait_until_some_vector<T>(ivars, nelems, indices, \
|
||||
status, cmp, vals); \
|
||||
} \
|
||||
__host__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val) { \
|
||||
return roc_shmem_test<T>(ptr, cmp, val); \
|
||||
__host__ int roc_shmem_##TNAME##_test(T *ivars, roc_shmem_cmps cmp, T val) { \
|
||||
return roc_shmem_test<T>(ivars, cmp, val); \
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
|
||||
@@ -488,67 +488,67 @@ __device__ void roc_shmem_wg_fcollect(roc_shmem_ctx_t ctx,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
__device__ void roc_shmem_wait_until(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
GPU_DPRINTF("Function: roc_shmem_wait_until\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL);
|
||||
ctx_internal->wait_until(ptr, cmp, val);
|
||||
ctx_internal->wait_until(ivars, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmem_wait_until_all(T *ptr, size_t nelems, const int* status,
|
||||
__device__ void roc_shmem_wait_until_all(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
GPU_DPRINTF("Function: roc_shmem_wait_until_all\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ALL);
|
||||
ctx_internal->wait_until_all(ptr, nelems, status, cmp, val);
|
||||
ctx_internal->wait_until_all(ivars, nelems, status, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t roc_shmem_wait_until_any(T *ptr, size_t nelems, const int* status,
|
||||
__device__ size_t roc_shmem_wait_until_any(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T val) {
|
||||
GPU_DPRINTF("Function: roc_shmem_wait_until_any\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ANY);
|
||||
return ctx_internal->wait_until_any(ptr, nelems, status, cmp, val);
|
||||
return ctx_internal->wait_until_any(ivars, nelems, status, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t roc_shmem_wait_until_some(T *ptr, size_t nelems, size_t* indices,
|
||||
__device__ size_t roc_shmem_wait_until_some(T *ivars, size_t nelems, size_t* indices,
|
||||
const int* status, roc_shmem_cmps cmp,
|
||||
T val) {
|
||||
DPRINTF("Function: roc_shmem_wait_until_some\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_SOME);
|
||||
return ctx_internal->wait_until_some(ptr, nelems, indices, status, cmp, val);
|
||||
return ctx_internal->wait_until_some(ivars, nelems, indices, status, cmp, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t roc_shmem_wait_until_any_vector(T *ptr, size_t nelems, const int* status,
|
||||
__device__ size_t roc_shmem_wait_until_any_vector(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
DPRINTF("Function: roc_shmem_wait_until_any_vector\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ANY_VECTOR);
|
||||
return ctx_internal->wait_until_any_vector(ptr, nelems, status, cmp, vals);
|
||||
return ctx_internal->wait_until_any_vector(ivars, nelems, status, cmp, vals);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void roc_shmem_wait_until_all_vector(T *ptr, size_t nelems, const int* status,
|
||||
__device__ void roc_shmem_wait_until_all_vector(T *ivars, size_t nelems, const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
DPRINTF("Function: roc_shmem_wait_until_all_vector\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_ALL_VECTOR);
|
||||
ctx_internal->wait_until_all_vector(ptr, nelems, status, cmp, vals);
|
||||
ctx_internal->wait_until_all_vector(ivars, nelems, status, cmp, vals);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems,
|
||||
__device__ size_t roc_shmem_wait_until_some_vector(T *ivars, size_t nelems,
|
||||
size_t* indices,
|
||||
const int* status,
|
||||
roc_shmem_cmps cmp, T* vals) {
|
||||
@@ -556,17 +556,17 @@ __device__ size_t roc_shmem_wait_until_some_vector(T *ptr, size_t nelems,
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_WAIT_UNTIL_SOME_VECTOR);
|
||||
return ctx_internal->wait_until_some_vector(ptr, nelems, indices, status, cmp, vals);
|
||||
return ctx_internal->wait_until_some_vector(ivars, nelems, indices, status, cmp, vals);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val) {
|
||||
__device__ int roc_shmem_test(T *ivars, roc_shmem_cmps cmp, T val) {
|
||||
GPU_DPRINTF("Function: roc_shmem_testl\n");
|
||||
|
||||
Context *ctx_internal = get_internal_ctx(ROC_SHMEM_CTX_DEFAULT);
|
||||
ctx_internal->ctxStats.incStat(NUM_TEST);
|
||||
|
||||
return ctx_internal->test(ptr, cmp, val);
|
||||
return ctx_internal->test(ivars, cmp, val);
|
||||
}
|
||||
|
||||
__device__ void roc_shmem_ctx_wg_barrier_all(roc_shmem_ctx_t ctx) {
|
||||
@@ -1030,52 +1030,53 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
|
||||
* Declare templates for the wait types
|
||||
*/
|
||||
#define WAIT_GEN(T) \
|
||||
template __device__ void roc_shmem_wait_until<T>(T * ptr, \
|
||||
template __device__ void roc_shmem_wait_until<T>(T *ivars, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ size_t roc_shmem_wait_until_any<T>(T * ptr, \
|
||||
template __device__ size_t roc_shmem_wait_until_any<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ void roc_shmem_wait_until_all<T>(T * ptr, \
|
||||
template __device__ void roc_shmem_wait_until_all<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ size_t roc_shmem_wait_until_some<T>(T * ptr, size_t nelems,\
|
||||
size_t* indices, const int* status, \
|
||||
template __device__ size_t roc_shmem_wait_until_some<T>(T *ivars, \
|
||||
size_t nelems, size_t* indices, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ size_t roc_shmem_wait_until_any_vector<T>(T * ptr, \
|
||||
template __device__ size_t roc_shmem_wait_until_any_vector<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T* vals); \
|
||||
template __device__ void roc_shmem_wait_until_all_vector<T>(T * ptr, \
|
||||
template __device__ void roc_shmem_wait_until_all_vector<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T* vals); \
|
||||
template __device__ size_t roc_shmem_wait_until_some_vector<T>(T * ptr, \
|
||||
template __device__ size_t roc_shmem_wait_until_some_vector<T>(T *ivars, \
|
||||
size_t nelems, size_t* indices, \
|
||||
const int* status, roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
template __device__ int roc_shmem_test<T>(T * ptr, roc_shmem_cmps cmp, \
|
||||
template __device__ int roc_shmem_test<T>(T *ivars, roc_shmem_cmps cmp, \
|
||||
T val); \
|
||||
template __device__ void Context::wait_until<T>(T * ptr, roc_shmem_cmps cmp, \
|
||||
template __device__ void Context::wait_until<T>(T *ivars, roc_shmem_cmps cmp,\
|
||||
T val); \
|
||||
template __device__ size_t Context::wait_until_any<T>(T * ptr, \
|
||||
template __device__ size_t Context::wait_until_any<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ void Context::wait_until_all<T>(T * ptr, \
|
||||
template __device__ void Context::wait_until_all<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ size_t Context::wait_until_some<T>(T * ptr, \
|
||||
template __device__ size_t Context::wait_until_some<T>(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, const int* status, \
|
||||
roc_shmem_cmps cmp, T val); \
|
||||
template __device__ size_t Context::wait_until_any_vector<T>(T * ptr, \
|
||||
template __device__ size_t Context::wait_until_any_vector<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T* vals); \
|
||||
template __device__ void Context::wait_until_all_vector<T>(T * ptr, \
|
||||
template __device__ void Context::wait_until_all_vector<T>(T *ivars, \
|
||||
size_t nelems, const int* status, \
|
||||
roc_shmem_cmps cmp, T* vals); \
|
||||
template __device__ size_t Context::wait_until_some_vector<T>(T * ptr, \
|
||||
template __device__ size_t Context::wait_until_some_vector<T>(T *ivars, \
|
||||
size_t nelems, size_t* indices, \
|
||||
const int* status, roc_shmem_cmps cmp, \
|
||||
T* vals); \
|
||||
template __device__ int Context::test<T>(T * ptr, roc_shmem_cmps cmp, T val);
|
||||
template __device__ int Context::test<T>(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
#define ARITH_REDUCTION_GEN(T) \
|
||||
REDUCTION_GEN(T, ROC_SHMEM_SUM) \
|
||||
@@ -1370,57 +1371,57 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
|
||||
}
|
||||
|
||||
#define WAIT_DEF_GEN(T, TNAME) \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until(T *ptr, roc_shmem_cmps cmp, \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until(T *ivars, roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
roc_shmem_wait_until<T>(ptr, cmp, val); \
|
||||
roc_shmem_wait_until<T>(ivars, cmp, val); \
|
||||
} \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ptr, size_t nelems,\
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any(T *ivars, size_t nelems,\
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
return roc_shmem_wait_until_any<T>(ptr, nelems, status, cmp, val); \
|
||||
return roc_shmem_wait_until_any<T>(ivars, nelems, status, cmp, val); \
|
||||
} \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all(T *ptr, size_t nelems, \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all(T *ivars, size_t nelems,\
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
roc_shmem_wait_until_all<T>(ptr, nelems, status, cmp, val); \
|
||||
roc_shmem_wait_until_all<T>(ivars, nelems, status, cmp, val); \
|
||||
} \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp, \
|
||||
T val) { \
|
||||
return roc_shmem_wait_until_some<T>(ptr, nelems, indices, status, cmp, \
|
||||
return roc_shmem_wait_until_some<T>(ivars, nelems, indices, status, cmp, \
|
||||
val); \
|
||||
} \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_any_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals) { \
|
||||
return roc_shmem_wait_until_any_vector<T>(ptr, nelems, status, cmp, \
|
||||
return roc_shmem_wait_until_any_vector<T>(ivars, nelems, status, cmp, \
|
||||
vals); \
|
||||
} \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ptr, \
|
||||
__device__ void roc_shmem_##TNAME##_wait_until_all_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
const int* status, \
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals) { \
|
||||
roc_shmem_wait_until_all_vector<T>(ptr, nelems, status, cmp, vals); \
|
||||
roc_shmem_wait_until_all_vector<T>(ivars, nelems, status, cmp, vals); \
|
||||
} \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ptr, \
|
||||
__device__ size_t roc_shmem_##TNAME##_wait_until_some_vector(T *ivars, \
|
||||
size_t nelems, \
|
||||
size_t* indices, \
|
||||
const int* status,\
|
||||
roc_shmem_cmps cmp,\
|
||||
T* vals) { \
|
||||
return roc_shmem_wait_until_some_vector<T>(ptr, nelems, indices, \
|
||||
return roc_shmem_wait_until_some_vector<T>(ivars, nelems, indices, \
|
||||
status, cmp, vals); \
|
||||
} \
|
||||
__device__ int roc_shmem_##TNAME##_test(T *ptr, roc_shmem_cmps cmp, T val) { \
|
||||
return roc_shmem_test<T>(ptr, cmp, val); \
|
||||
__device__ int roc_shmem_##TNAME##_test(T *ivars, roc_shmem_cmps cmp, T val) { \
|
||||
return roc_shmem_test<T>(ivars, cmp, val); \
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
|
||||
@@ -380,7 +380,7 @@ __device__ void roc_shmem_atomic_set(T *dest, T value, int pe);
|
||||
* coalesce contiguous messages and elect a leader thread to call into the
|
||||
* ROC_SHMEM function.
|
||||
*
|
||||
* @param[in] ptr Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] ivars Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] cmp Operation for the comparison.
|
||||
* @param[in] val Value to compare the memory at \p ptr to.
|
||||
*
|
||||
@@ -388,7 +388,7 @@ __device__ void roc_shmem_atomic_set(T *dest, T value, int pe);
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__device__ void roc_shmem_wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
/**
|
||||
* @brief test if the condition (* \p ptr \p cmps \p val) is
|
||||
@@ -399,7 +399,7 @@ __device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
* coalesce contiguous messages and elect a leader thread to call into the
|
||||
* ROC_SHMEM function.
|
||||
*
|
||||
* @param[in] ptr Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] ivars Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] cmp Operation for the comparison.
|
||||
* @param[in] val Value to compare the memory at \p ptr to.
|
||||
*
|
||||
@@ -407,7 +407,7 @@ __device__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
*
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__device__ int roc_shmem_test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
/**
|
||||
* @brief Perform a broadcast between PEs in the active set. The caller
|
||||
|
||||
@@ -136,35 +136,35 @@ __host__ void roc_shmem_to_all(roc_shmem_ctx_t ctx, T *dest, const T *source,
|
||||
int PE_size, T *pWrk, long *pSync);
|
||||
|
||||
template <typename T>
|
||||
__host__ void roc_shmem_wait_until(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ void roc_shmem_wait_until(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all(T* ptr, size_t nelems, const int *status,
|
||||
__host__ void wait_until_all(T* ivars, size_t nelems, const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any(T* ptr, size_t nelems, const int *status,
|
||||
__host__ size_t wait_until_any(T* ivars, size_t nelems, const int *status,
|
||||
roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some(T* ptr, size_t nelems, size_t* indices,
|
||||
__host__ size_t wait_until_some(T* ivars, size_t nelems, size_t* indices,
|
||||
const int *status, roc_shmem_cmps cmp, T val);
|
||||
|
||||
template <typename T>
|
||||
__host__ void wait_until_all_vector(T* ptr, size_t nelems, const int *status,
|
||||
__host__ void wait_until_all_vector(T* ivars, size_t nelems, const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_any_vector(T* ptr, size_t nelems, const int *status,
|
||||
__host__ size_t wait_until_any_vector(T* ivars, size_t nelems, const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ size_t wait_until_some_vector(T* ptr, size_t nelems,
|
||||
__host__ size_t wait_until_some_vector(T* ivars, size_t nelems,
|
||||
size_t* indices, const int *status,
|
||||
roc_shmem_cmps cmp, T* vals);
|
||||
|
||||
template <typename T>
|
||||
__host__ int roc_shmem_test(T *ptr, roc_shmem_cmps cmp, T val);
|
||||
__host__ int roc_shmem_test(T *ivars, roc_shmem_cmps cmp, T val);
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user