diff --git a/src/ipc/context_ipc_device.cpp b/src/ipc/context_ipc_device.cpp index 83f3f609e6..47d45565ef 100644 --- a/src/ipc/context_ipc_device.cpp +++ b/src/ipc/context_ipc_device.cpp @@ -69,6 +69,7 @@ __device__ void IPCContext::putmem(void *dest, const void *source, size_t nelems reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems, @@ -77,6 +78,7 @@ __device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems uint64_t L_offset = const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::putmem_nbi(void *dest, const void *source, @@ -142,6 +144,7 @@ __device__ void IPCContext::putmem_wave(void *dest, const void *source, reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::getmem_wave(void *dest, const void *source, @@ -151,6 +154,7 @@ __device__ void IPCContext::getmem_wave(void *dest, const void *source, const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::putmem_nbi_wave(void *dest, const void *source, @@ -169,6 +173,7 @@ __device__ void IPCContext::internal_putmem(void *dest, const void *source, reinterpret_cast(dest) - Wrk_Sync_buffer_bases_[my_pe]; memcpy(Wrk_Sync_buffer_bases_[pe] + L_offset, const_cast(source), nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::internal_getmem(void *dest, const void *source, @@ -177,6 +182,7 @@ __device__ void IPCContext::internal_getmem(void *dest, const void *source, uint64_t L_offset = const_cast(src_typed) - Wrk_Sync_buffer_bases_[my_pe]; memcpy(dest, Wrk_Sync_buffer_bases_[pe] + L_offset, nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::internal_putmem_wg(void *dest, const void *source, @@ -203,6 +209,7 @@ __device__ void IPCContext::internal_putmem_wave(void *dest, reinterpret_cast(dest) - Wrk_Sync_buffer_bases_[my_pe]; memcpy_wave(Wrk_Sync_buffer_bases_[pe] + L_offset, const_cast(source), nelems); + ipcImpl_.ipcFence(); } __device__ void IPCContext::internal_getmem_wave(void *dest, @@ -212,6 +219,7 @@ __device__ void IPCContext::internal_getmem_wave(void *dest, const_cast(src_typed) - Wrk_Sync_buffer_bases_[my_pe]; memcpy_wave(dest, Wrk_Sync_buffer_bases_[pe] + L_offset, nelems); + ipcImpl_.ipcFence(); } } // namespace rocshmem