IPC: insert __threadfence_system() after *wg RMA APIs to guarantee global memory visibility (#346)

[ROCm/rocshmem commit: f907ef91e4]
Dieser Commit ist enthalten in:
Avinash Kethineedi
2025-12-04 10:21:25 -06:00
committet von GitHub
Ursprung 3d658b558b
Commit 1ecc355062
@@ -113,6 +113,7 @@ __device__ void IPCContext::putmem_wg(void *dest, const void *source,
uint64_t L_offset = reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[pe] + L_offset, const_cast<void *>(source), nelems);
__syncthreads();
ipcImpl_.ipcFence();
}
__device__ void IPCContext::getmem_wg(void *dest, const void *source,
@@ -121,6 +122,7 @@ __device__ void IPCContext::getmem_wg(void *dest, const void *source,
uint64_t L_offset = const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems);
__syncthreads();
ipcImpl_.ipcFence();
}
__device__ void IPCContext::putmem_nbi_wg(void *dest, const void *source,