From 7bbf34d33446f253917b00a9dd237916c0940957 Mon Sep 17 00:00:00 2001 From: avinashkethineedi Date: Thu, 5 Sep 2024 11:52:00 -0700 Subject: [PATCH] remove local_pe calculation from puts, gets and atomics functions * All the PEs are assumed to be accessible using IPC backend --- src/ipc/context_ipc_device.cpp | 24 ++++++------------------ src/ipc/context_ipc_tmpl_device.hpp | 15 +++++---------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/src/ipc/context_ipc_device.cpp b/src/ipc/context_ipc_device.cpp index c1fa885c99..4bf7072aa5 100644 --- a/src/ipc/context_ipc_device.cpp +++ b/src/ipc/context_ipc_device.cpp @@ -59,22 +59,18 @@ __device__ void IPCContext::ctx_destroy(){ __device__ void IPCContext::putmem(void *dest, const void *source, size_t nelems, int pe) { - // TODO (Avinash) check if PE is available for IPC using (isIpcAvailable) - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; - ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[local_pe] + L_offset, + ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); } __device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems, int pe) { - // TODO (Avinash) check if PE is available for IPC using (isIpcAvailable) - int local_pe = pe % ipcImpl_.shm_size; const char *src_typed = reinterpret_cast(source); uint64_t L_offset = const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; - ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, nelems); + ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); } __device__ void IPCContext::putmem_nbi(void *dest, const void *source, @@ -115,23 +111,19 @@ __device__ void IPCContext::sync(roc_shmem_team_t team) { __device__ void IPCContext::putmem_wg(void *dest, const void *source, size_t nelems, int pe) { - // TODO (Avinash) check if PE is available for IPC using (isIpcAvailable) - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; - ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[local_pe] + L_offset, + ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); __syncthreads(); } __device__ void IPCContext::getmem_wg(void *dest, const void *source, size_t nelems, int pe) { - // TODO (Avinash) check if PE is available for IPC using (isIpcAvailable) - int local_pe = pe % ipcImpl_.shm_size; const char *src_typed = reinterpret_cast(source); uint64_t L_offset = const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; - ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, nelems); + ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); __syncthreads(); } @@ -147,22 +139,18 @@ __device__ void IPCContext::getmem_nbi_wg(void *dest, const void *source, __device__ void IPCContext::putmem_wave(void *dest, const void *source, size_t nelems, int pe) { - // TODO (Avinash) check if PE is available for IPC using (isIpcAvailable) - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; - ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[local_pe] + L_offset, + ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[pe] + L_offset, const_cast(source), nelems); } __device__ void IPCContext::getmem_wave(void *dest, const void *source, size_t nelems, int pe) { - // TODO (Avinash) check if PE is available for IPC using (isIpcAvailable) - int local_pe = pe % ipcImpl_.shm_size; const char *src_typed = reinterpret_cast(source); uint64_t L_offset = const_cast(src_typed) - ipcImpl_.ipc_bases[my_pe]; - ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, + ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems); } diff --git a/src/ipc/context_ipc_tmpl_device.hpp b/src/ipc/context_ipc_tmpl_device.hpp index 5e697cafaf..94ef855736 100644 --- a/src/ipc/context_ipc_tmpl_device.hpp +++ b/src/ipc/context_ipc_tmpl_device.hpp @@ -71,20 +71,18 @@ __device__ void IPCContext::get_nbi(T *dest, const T *source, size_t nelems, // Atomics template __device__ void IPCContext::amo_add(void *dest, T value, int pe) { - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcAMOAdd( - reinterpret_cast(ipcImpl_.ipc_bases[local_pe] + L_offset), value); + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template __device__ void IPCContext::amo_set(void *dest, T value, int pe) { - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcAMOSet( - reinterpret_cast(ipcImpl_.ipc_bases[local_pe] + L_offset), value); + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template @@ -128,30 +126,27 @@ __device__ void IPCContext::amo_xor(void *dst, T value, int pe) { template __device__ void IPCContext::amo_cas(void *dest, T value, T cond, int pe) { - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; ipcImpl_.ipcAMOCas( - reinterpret_cast(ipcImpl_.ipc_bases[local_pe] + L_offset), cond, + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), cond, value); } template __device__ T IPCContext::amo_fetch_add(void *dest, T value, int pe) { - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; return ipcImpl_.ipcAMOFetchAdd( - reinterpret_cast(ipcImpl_.ipc_bases[local_pe] + L_offset), value); + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), value); } template __device__ T IPCContext::amo_fetch_cas(void *dest, T value, T cond, int pe) { - int local_pe = pe % ipcImpl_.shm_size; uint64_t L_offset = reinterpret_cast(dest) - ipcImpl_.ipc_bases[my_pe]; return ipcImpl_.ipcAMOFetchCas( - reinterpret_cast(ipcImpl_.ipc_bases[local_pe] + L_offset), cond, + reinterpret_cast(ipcImpl_.ipc_bases[pe] + L_offset), cond, value); }