Add IPC and/or/xor/swap amos, reenable functional tests (#184)
Αυτή η υποβολή περιλαμβάνεται σε:
υποβλήθηκε από
GitHub
γονέας
f5aefd15f3
υποβολή
96336da78f
@@ -85,49 +85,59 @@ __device__ void IPCContext::amo_set(void *dest, T value, int pe) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T IPCContext::amo_swap(void *dst, T value, int pe) {
|
||||
printf("IPC amo_swap not implemented\n");
|
||||
abort();
|
||||
return 0;
|
||||
__device__ T IPCContext::amo_swap(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
return ipcImpl_.ipcAMOSwap(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T IPCContext::amo_fetch_and(void *dst, T value, int pe) {
|
||||
printf("IPC amo_fetch_and not implemented\n");
|
||||
abort();
|
||||
return 0;
|
||||
__device__ T IPCContext::amo_fetch_and(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
return ipcImpl_.ipcAMOFetchAnd(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void IPCContext::amo_and(void *dst, T value, int pe) {
|
||||
printf("IPC amo_and not implemented\n");
|
||||
abort();
|
||||
__device__ void IPCContext::amo_and(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcAMOAnd(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T IPCContext::amo_fetch_or(void *dst, T value, int pe) {
|
||||
printf("IPC amo_fetch_or not implemented\n");
|
||||
abort();
|
||||
return 0;
|
||||
__device__ T IPCContext::amo_fetch_or(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
return ipcImpl_.ipcAMOFetchOr(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void IPCContext::amo_or(void *dst, T value, int pe) {
|
||||
printf("IPC amo_or not implemented\n");
|
||||
abort();
|
||||
__device__ void IPCContext::amo_or(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcAMOOr(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T IPCContext::amo_fetch_xor(void *dst, T value, int pe) {
|
||||
printf("IPC amo_fetch_xor not implemented\n");
|
||||
abort();
|
||||
return 0;
|
||||
__device__ T IPCContext::amo_fetch_xor(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
return ipcImpl_.ipcAMOFetchXor(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void IPCContext::amo_xor(void *dst, T value, int pe) {
|
||||
printf("IPC amo_xor not implemented\n");
|
||||
abort();
|
||||
__device__ void IPCContext::amo_xor(void *dest, T value, int pe) {
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcAMOXor(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
+48
-13
@@ -84,12 +84,25 @@ class IpcOnImpl {
|
||||
|
||||
__device__ void ipcFence() { __threadfence_system(); }
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOAdd(T *val, T value) {
|
||||
__hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T ipcAMOFetchAdd(T *val, T value) {
|
||||
return __hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOCas(T *val, T cond, T value) {
|
||||
__hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST,
|
||||
__ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T ipcAMOFetchCas(T *val, T cond, T value) {
|
||||
__hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST,
|
||||
@@ -98,24 +111,46 @@ class IpcOnImpl {
|
||||
return cond;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOAdd(T *val, T value) {
|
||||
__hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOCas(T *val, T cond, T value) {
|
||||
__hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST,
|
||||
__ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOSet(T *val, T value) {
|
||||
__hip_atomic_store(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T ipcAMOSwap(T *val, T value) {
|
||||
return __hip_atomic_exchange(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOAnd(T *val, T value) {
|
||||
__hip_atomic_fetch_and(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T ipcAMOFetchAnd(T *val, T value) {
|
||||
return __hip_atomic_fetch_and(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOOr(T *val, T value) {
|
||||
__hip_atomic_fetch_or(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T ipcAMOFetchOr(T *val, T value) {
|
||||
return __hip_atomic_fetch_or(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void ipcAMOXor(T *val, T value) {
|
||||
__hip_atomic_fetch_xor(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T ipcAMOFetchXor(T *val, T value) {
|
||||
return __hip_atomic_fetch_xor(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
__device__ void zero_byte_read(int pe) {
|
||||
int local_pe = pe % shm_size;
|
||||
uint32_t *pe_ipc_base = reinterpret_cast<uint32_t *>(ipc_bases[local_pe]);
|
||||
|
||||
@@ -75,19 +75,9 @@ void AMOBitwiseTester<T>::launchKernel(dim3 gridsize, dim3 blocksize, int loop,
|
||||
num_timed_msgs = loop;
|
||||
}
|
||||
|
||||
#if defined(USE_IPC) and not defined(USE_RO)
|
||||
#define DISABLE_IPC_TEST 1
|
||||
#else
|
||||
#define DISABLE_IPC_TEST 0
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
void AMOBitwiseTester<T>::verifyResults(size_t size) {
|
||||
T ret;
|
||||
if(DISABLE_IPC_TEST) {
|
||||
printf("AMO binary ops not implemented for IPC: values were not verified\n");
|
||||
return;
|
||||
}
|
||||
if (args.myid == 0) {
|
||||
T expected_val = 0;
|
||||
|
||||
@@ -141,10 +131,6 @@ void AMOBitwiseTester<T>::verifyResults(size_t size) {
|
||||
TestType type, ShmemContextType ctx_type) { \
|
||||
__shared__ rocshmem_ctx_t ctx; \
|
||||
int wg_id = get_flat_grid_id(); \
|
||||
if(DISABLE_IPC_TEST) { \
|
||||
printf("AMO binary ops not implemented for IPC: test was not run\n"); \
|
||||
return; \
|
||||
} \
|
||||
rocshmem_wg_init(); \
|
||||
rocshmem_wg_ctx_create(ctx_type, &ctx); \
|
||||
if (hipThreadIdx_x == 0) { \
|
||||
|
||||
@@ -75,12 +75,6 @@ void AMOExtendedTester<T>::launchKernel(dim3 gridsize, dim3 blocksize, int loop,
|
||||
num_timed_msgs = loop;
|
||||
}
|
||||
|
||||
#if defined(USE_IPC) and not defined(USE_RO)
|
||||
#define DISABLE_IPC_TEST 1
|
||||
#else
|
||||
#define DISABLE_IPC_TEST 0
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
void AMOExtendedTester<T>::verifyResults(size_t size) {
|
||||
T ret;
|
||||
@@ -95,10 +89,6 @@ void AMOExtendedTester<T>::verifyResults(size_t size) {
|
||||
expected_val = 44;
|
||||
break;
|
||||
case AMO_SwapTestType:
|
||||
if(DISABLE_IPC_TEST) {
|
||||
printf("AMO Swap not implemented for IPC: values were not verified\n");
|
||||
return;
|
||||
}
|
||||
expected_val = num_msgs / 2;
|
||||
break;
|
||||
default:
|
||||
@@ -146,10 +136,6 @@ void AMOExtendedTester<T>::verifyResults(size_t size) {
|
||||
rocshmem_ctx_##TNAME##_atomic_set(ctx, (T *)r_buf, 44, 1); \
|
||||
break; \
|
||||
case AMO_SwapTestType: \
|
||||
if(DISABLE_IPC_TEST) { \
|
||||
printf("AMO Swap not implemented for IPC: test was not run\n"); \
|
||||
break; \
|
||||
} \
|
||||
ret = rocshmem_ctx_##TNAME##_atomic_swap(ctx, (T *)r_buf, \
|
||||
ret + 1, 1); \
|
||||
break; \
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user