From 1ecc3550622605584853072b16736fc121850850 Mon Sep 17 00:00:00 2001 From: Avinash Kethineedi Date: Thu, 4 Dec 2025 10:21:25 -0600 Subject: [PATCH] IPC: insert `__threadfence_system()` after *wg RMA APIs to guarantee global memory visibility (#346) [ROCm/rocshmem commit: f907ef91e446e0c8689976e95d34a7032f519ab0] --- projects/rocshmem/src/ipc/context_ipc_device.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/rocshmem/src/ipc/context_ipc_device.cpp b/projects/rocshmem/src/ipc/context_ipc_device.cpp index 1bb39b260f..6cda9876d2 100644 --- a/projects/rocshmem/src/ipc/context_ipc_device.cpp +++ b/projects/rocshmem/src/ipc/context_ipc_device.cpp @@ -113,6 +113,7 @@ __device__ void IPCContext::putmem_wg(void *dest, const void *source, uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); __syncthreads(); + ipcImpl_.ipcFence(); } __device__ void IPCContext::getmem_wg(void *dest, const void *source, @@ -121,6 +122,7 @@ __device__ void IPCContext::getmem_wg(void *dest, const void *source, uint64_t L_offset = const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); __syncthreads(); + ipcImpl_.ipcFence(); } __device__ void IPCContext::putmem_nbi_wg(void *dest, const void *source,