Update puts and gets to include a fence following data movement, ensuring data visibility

이 커밋은 다음에 포함됨:
avinashkethineedi
2024-11-12 16:52:07 +00:00
부모 958575d8a4
커밋 d1ee997542
+8
파일 보기
@@ -69,6 +69,7 @@ __device__ void IPCContext::putmem(void *dest, const void *source, size_t nelems
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[pe] + L_offset,
const_cast<void *>(source), nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems,
@@ -77,6 +78,7 @@ __device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems
uint64_t L_offset =
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::putmem_nbi(void *dest, const void *source,
@@ -142,6 +144,7 @@ __device__ void IPCContext::putmem_wave(void *dest, const void *source,
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[pe] + L_offset,
const_cast<void *>(source), nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::getmem_wave(void *dest, const void *source,
@@ -151,6 +154,7 @@ __device__ void IPCContext::getmem_wave(void *dest, const void *source,
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[pe] + L_offset,
nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::putmem_nbi_wave(void *dest, const void *source,
@@ -169,6 +173,7 @@ __device__ void IPCContext::internal_putmem(void *dest, const void *source,
reinterpret_cast<char *>(dest) - Wrk_Sync_buffer_bases_[my_pe];
memcpy(Wrk_Sync_buffer_bases_[pe] + L_offset,
const_cast<void *>(source), nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::internal_getmem(void *dest, const void *source,
@@ -177,6 +182,7 @@ __device__ void IPCContext::internal_getmem(void *dest, const void *source,
uint64_t L_offset =
const_cast<char *>(src_typed) - Wrk_Sync_buffer_bases_[my_pe];
memcpy(dest, Wrk_Sync_buffer_bases_[pe] + L_offset, nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::internal_putmem_wg(void *dest, const void *source,
@@ -203,6 +209,7 @@ __device__ void IPCContext::internal_putmem_wave(void *dest,
reinterpret_cast<char *>(dest) - Wrk_Sync_buffer_bases_[my_pe];
memcpy_wave(Wrk_Sync_buffer_bases_[pe] + L_offset,
const_cast<void *>(source), nelems);
ipcImpl_.ipcFence();
}
__device__ void IPCContext::internal_getmem_wave(void *dest,
@@ -212,6 +219,7 @@ __device__ void IPCContext::internal_getmem_wave(void *dest,
const_cast<char *>(src_typed) - Wrk_Sync_buffer_bases_[my_pe];
memcpy_wave(dest, Wrk_Sync_buffer_bases_[pe] + L_offset,
nelems);
ipcImpl_.ipcFence();
}
} // namespace rocshmem