diff --git a/projects/rocshmem/src/ipc/context_ipc_device.cpp b/projects/rocshmem/src/ipc/context_ipc_device.cpp index 1bb39b260f..6cda9876d2 100644 --- a/projects/rocshmem/src/ipc/context_ipc_device.cpp +++ b/projects/rocshmem/src/ipc/context_ipc_device.cpp @@ -113,6 +113,7 @@ __device__ void IPCContext::putmem_wg(void *dest, const void *source, uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); __syncthreads(); + ipcImpl_.ipcFence(); } __device__ void IPCContext::getmem_wg(void *dest, const void *source, @@ -121,6 +122,7 @@ __device__ void IPCContext::getmem_wg(void *dest, const void *source, uint64_t L_offset = const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); __syncthreads(); + ipcImpl_.ipcFence(); } __device__ void IPCContext::putmem_nbi_wg(void *dest, const void *source,