diff --git a/src/ipc/context_ipc_tmpl_device.hpp b/src/ipc/context_ipc_tmpl_device.hpp index d35cec7f44..d3825a1f2f 100644 --- a/src/ipc/context_ipc_tmpl_device.hpp +++ b/src/ipc/context_ipc_tmpl_device.hpp @@ -85,49 +85,59 @@ __device__ void IPCContext::amo_set(void *dest, T value, int pe) { } template -__device__ T IPCContext::amo_swap(void *dst, T value, int pe) { - printf("IPC amo_swap not implemented\n"); - abort(); - return 0; +__device__ T IPCContext::amo_swap(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + return ipcImpl_.ipcAMOSwap( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template -__device__ T IPCContext::amo_fetch_and(void *dst, T value, int pe) { - printf("IPC amo_fetch_and not implemented\n"); - abort(); - return 0; +__device__ T IPCContext::amo_fetch_and(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + return ipcImpl_.ipcAMOFetchAnd( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template -__device__ void IPCContext::amo_and(void *dst, T value, int pe) { - printf("IPC amo_and not implemented\n"); - abort(); +__device__ void IPCContext::amo_and(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + ipcImpl_.ipcAMOAnd( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template -__device__ T IPCContext::amo_fetch_or(void *dst, T value, int pe) { - printf("IPC amo_fetch_or not implemented\n"); - abort(); - return 0; +__device__ T IPCContext::amo_fetch_or(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + return ipcImpl_.ipcAMOFetchOr( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template -__device__ void IPCContext::amo_or(void *dst, T value, int pe) { - printf("IPC amo_or not implemented\n"); - abort(); +__device__ void IPCContext::amo_or(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + ipcImpl_.ipcAMOOr( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template -__device__ T IPCContext::amo_fetch_xor(void *dst, T value, int pe) { - printf("IPC amo_fetch_xor not implemented\n"); - abort(); - return 0; +__device__ T IPCContext::amo_fetch_xor(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + return ipcImpl_.ipcAMOFetchXor( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template -__device__ void IPCContext::amo_xor(void *dst, T value, int pe) { - printf("IPC amo_xor not implemented\n"); - abort(); +__device__ void IPCContext::amo_xor(void *dest, T value, int pe) { + uint64_t L_offset = + reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; + ipcImpl_.ipcAMOXor( + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template diff --git a/src/ipc_policy.hpp b/src/ipc_policy.hpp index bae17faf08..83bcefd4a6 100644 --- a/src/ipc_policy.hpp +++ b/src/ipc_policy.hpp @@ -84,12 +84,25 @@ class IpcOnImpl { __device__ void ipcFence() { __threadfence_system(); } + template + __device__ void ipcAMOAdd(T *val, T value) { + __hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST, + __HIP_MEMORY_SCOPE_SYSTEM); + } + template __device__ T ipcAMOFetchAdd(T *val, T value) { return __hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); } + template + __device__ void ipcAMOCas(T *val, T cond, T value) { + __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST, + __HIP_MEMORY_SCOPE_SYSTEM); + } + template __device__ T ipcAMOFetchCas(T *val, T cond, T value) { __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST, @@ -98,24 +111,46 @@ class IpcOnImpl { return cond; } - template - __device__ void ipcAMOAdd(T *val, T value) { - __hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST, - __HIP_MEMORY_SCOPE_SYSTEM); - } - - template - __device__ void ipcAMOCas(T *val, T cond, T value) { - __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST, - __HIP_MEMORY_SCOPE_SYSTEM); - } - template __device__ void ipcAMOSet(T *val, T value) { __hip_atomic_store(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); } + template + __device__ T ipcAMOSwap(T *val, T value) { + return __hip_atomic_exchange(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + + template + __device__ void ipcAMOAnd(T *val, T value) { + __hip_atomic_fetch_and(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + + template + __device__ T ipcAMOFetchAnd(T *val, T value) { + return __hip_atomic_fetch_and(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + + template + __device__ void ipcAMOOr(T *val, T value) { + __hip_atomic_fetch_or(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + + template + __device__ T ipcAMOFetchOr(T *val, T value) { + return __hip_atomic_fetch_or(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + + template + __device__ void ipcAMOXor(T *val, T value) { + __hip_atomic_fetch_xor(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + + template + __device__ T ipcAMOFetchXor(T *val, T value) { + return __hip_atomic_fetch_xor(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); + } + __device__ void zero_byte_read(int pe) { int local_pe = pe % shm_size; uint32_t *pe_ipc_base = reinterpret_cast(ipc_bases[local_pe]); diff --git a/tests/functional_tests/amo_bitwise_tester.cpp b/tests/functional_tests/amo_bitwise_tester.cpp index 446074b45a..daee50a913 100644 --- a/tests/functional_tests/amo_bitwise_tester.cpp +++ b/tests/functional_tests/amo_bitwise_tester.cpp @@ -75,19 +75,9 @@ void AMOBitwiseTester::launchKernel(dim3 gridsize, dim3 blocksize, int loop, num_timed_msgs = loop; } -#if defined(USE_IPC) and not defined(USE_RO) -#define DISABLE_IPC_TEST 1 -#else -#define DISABLE_IPC_TEST 0 -#endif - template void AMOBitwiseTester::verifyResults(size_t size) { T ret; - if(DISABLE_IPC_TEST) { - printf("AMO binary ops not implemented for IPC: values were not verified\n"); - return; - } if (args.myid == 0) { T expected_val = 0; @@ -141,10 +131,6 @@ void AMOBitwiseTester::verifyResults(size_t size) { TestType type, ShmemContextType ctx_type) { \ __shared__ rocshmem_ctx_t ctx; \ int wg_id = get_flat_grid_id(); \ - if(DISABLE_IPC_TEST) { \ - printf("AMO binary ops not implemented for IPC: test was not run\n"); \ - return; \ - } \ rocshmem_wg_init(); \ rocshmem_wg_ctx_create(ctx_type, &ctx); \ if (hipThreadIdx_x == 0) { \ diff --git a/tests/functional_tests/amo_extended_tester.cpp b/tests/functional_tests/amo_extended_tester.cpp index 15652048b4..8058c0cc8e 100644 --- a/tests/functional_tests/amo_extended_tester.cpp +++ b/tests/functional_tests/amo_extended_tester.cpp @@ -75,12 +75,6 @@ void AMOExtendedTester::launchKernel(dim3 gridsize, dim3 blocksize, int loop, num_timed_msgs = loop; } -#if defined(USE_IPC) and not defined(USE_RO) -#define DISABLE_IPC_TEST 1 -#else -#define DISABLE_IPC_TEST 0 -#endif - template void AMOExtendedTester::verifyResults(size_t size) { T ret; @@ -95,10 +89,6 @@ void AMOExtendedTester::verifyResults(size_t size) { expected_val = 44; break; case AMO_SwapTestType: - if(DISABLE_IPC_TEST) { - printf("AMO Swap not implemented for IPC: values were not verified\n"); - return; - } expected_val = num_msgs / 2; break; default: @@ -146,10 +136,6 @@ void AMOExtendedTester::verifyResults(size_t size) { rocshmem_ctx_##TNAME##_atomic_set(ctx, (T *)r_buf, 44, 1); \ break; \ case AMO_SwapTestType: \ - if(DISABLE_IPC_TEST) { \ - printf("AMO Swap not implemented for IPC: test was not run\n"); \ - break; \ - } \ ret = rocshmem_ctx_##TNAME##_atomic_swap(ctx, (T *)r_buf, \ ret + 1, 1); \ break; \