remove local_pe calculation from puts, gets and atomics functions

* All the PEs are assumed to be accessible using IPC backend
This commit is contained in:
avinashkethineedi
2024-09-05 11:52:00 -07:00
szülő e1e1ac6df6
commit 7bbf34d334
2 fájl változott, egészen pontosan 11 új sor hozzáadva és 28 régi sor törölve
+6 -18
Fájl megtekintése
@@ -59,22 +59,18 @@ __device__ void IPCContext::ctx_destroy(){
__device__ void IPCContext::putmem(void *dest, const void *source, size_t nelems,
int pe) {
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[local_pe] + L_offset,
ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[pe] + L_offset,
const_cast<void *>(source), nelems);
}
__device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems,
int pe) {
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
int local_pe = pe % ipcImpl_.shm_size;
const char *src_typed = reinterpret_cast<const char *>(source);
uint64_t L_offset =
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, nelems);
ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems);
}
__device__ void IPCContext::putmem_nbi(void *dest, const void *source,
@@ -115,23 +111,19 @@ __device__ void IPCContext::sync(roc_shmem_team_t team) {
__device__ void IPCContext::putmem_wg(void *dest, const void *source,
size_t nelems, int pe) {
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[local_pe] + L_offset,
ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[pe] + L_offset,
const_cast<void *>(source), nelems);
__syncthreads();
}
__device__ void IPCContext::getmem_wg(void *dest, const void *source,
size_t nelems, int pe) {
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
int local_pe = pe % ipcImpl_.shm_size;
const char *src_typed = reinterpret_cast<const char *>(source);
uint64_t L_offset =
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, nelems);
ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems);
__syncthreads();
}
@@ -147,22 +139,18 @@ __device__ void IPCContext::getmem_nbi_wg(void *dest, const void *source,
__device__ void IPCContext::putmem_wave(void *dest, const void *source,
size_t nelems, int pe) {
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[local_pe] + L_offset,
ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[pe] + L_offset,
const_cast<void *>(source), nelems);
}
__device__ void IPCContext::getmem_wave(void *dest, const void *source,
size_t nelems, int pe) {
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
int local_pe = pe % ipcImpl_.shm_size;
const char *src_typed = reinterpret_cast<const char *>(source);
uint64_t L_offset =
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[local_pe] + L_offset,
ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[pe] + L_offset,
nelems);
}
+5 -10
Fájl megtekintése
@@ -71,20 +71,18 @@ __device__ void IPCContext::get_nbi(T *dest, const T *source, size_t nelems,
// Atomics
template <typename T>
__device__ void IPCContext::amo_add(void *dest, T value, int pe) {
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcAMOAdd(
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), value);
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
}
template <typename T>
__device__ void IPCContext::amo_set(void *dest, T value, int pe) {
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcAMOSet(
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), value);
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
}
template <typename T>
@@ -128,30 +126,27 @@ __device__ void IPCContext::amo_xor(void *dst, T value, int pe) {
template <typename T>
__device__ void IPCContext::amo_cas(void *dest, T value, T cond, int pe) {
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
ipcImpl_.ipcAMOCas(
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), cond,
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), cond,
value);
}
template <typename T>
__device__ T IPCContext::amo_fetch_add(void *dest, T value, int pe) {
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
return ipcImpl_.ipcAMOFetchAdd(
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), value);
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
}
template <typename T>
__device__ T IPCContext::amo_fetch_cas(void *dest, T value, T cond, int pe) {
int local_pe = pe % ipcImpl_.shm_size;
uint64_t L_offset =
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
return ipcImpl_.ipcAMOFetchCas(
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), cond,
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), cond,
value);
}