remove local_pe calculation from puts, gets and atomics functions
* All the PEs are assumed to be accessible using IPC backend
This commit is contained in:
@@ -59,22 +59,18 @@ __device__ void IPCContext::ctx_destroy(){
|
||||
|
||||
__device__ void IPCContext::putmem(void *dest, const void *source, size_t nelems,
|
||||
int pe) {
|
||||
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[local_pe] + L_offset,
|
||||
ipcImpl_.ipcCopy(ipcImpl_.ipc_bases[pe] + L_offset,
|
||||
const_cast<void *>(source), nelems);
|
||||
}
|
||||
|
||||
__device__ void IPCContext::getmem(void *dest, const void *source, size_t nelems,
|
||||
int pe) {
|
||||
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
const char *src_typed = reinterpret_cast<const char *>(source);
|
||||
uint64_t L_offset =
|
||||
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, nelems);
|
||||
ipcImpl_.ipcCopy(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems);
|
||||
}
|
||||
|
||||
__device__ void IPCContext::putmem_nbi(void *dest, const void *source,
|
||||
@@ -115,23 +111,19 @@ __device__ void IPCContext::sync(roc_shmem_team_t team) {
|
||||
|
||||
__device__ void IPCContext::putmem_wg(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[local_pe] + L_offset,
|
||||
ipcImpl_.ipcCopy_wg(ipcImpl_.ipc_bases[pe] + L_offset,
|
||||
const_cast<void *>(source), nelems);
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
__device__ void IPCContext::getmem_wg(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
const char *src_typed = reinterpret_cast<const char *>(source);
|
||||
uint64_t L_offset =
|
||||
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[local_pe] + L_offset, nelems);
|
||||
ipcImpl_.ipcCopy_wg(dest, ipcImpl_.ipc_bases[pe] + L_offset, nelems);
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
@@ -147,22 +139,18 @@ __device__ void IPCContext::getmem_nbi_wg(void *dest, const void *source,
|
||||
|
||||
__device__ void IPCContext::putmem_wave(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[local_pe] + L_offset,
|
||||
ipcImpl_.ipcCopy_wave(ipcImpl_.ipc_bases[pe] + L_offset,
|
||||
const_cast<void *>(source), nelems);
|
||||
}
|
||||
|
||||
__device__ void IPCContext::getmem_wave(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
// TODO (Avinash) check if PE is available for IPC using (isIpcAvailable)
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
const char *src_typed = reinterpret_cast<const char *>(source);
|
||||
uint64_t L_offset =
|
||||
const_cast<char *>(src_typed) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[local_pe] + L_offset,
|
||||
ipcImpl_.ipcCopy_wave(dest, ipcImpl_.ipc_bases[pe] + L_offset,
|
||||
nelems);
|
||||
}
|
||||
|
||||
|
||||
@@ -71,20 +71,18 @@ __device__ void IPCContext::get_nbi(T *dest, const T *source, size_t nelems,
|
||||
// Atomics
|
||||
template <typename T>
|
||||
__device__ void IPCContext::amo_add(void *dest, T value, int pe) {
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcAMOAdd(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), value);
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void IPCContext::amo_set(void *dest, T value, int pe) {
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcAMOSet(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), value);
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -128,30 +126,27 @@ __device__ void IPCContext::amo_xor(void *dst, T value, int pe) {
|
||||
|
||||
template <typename T>
|
||||
__device__ void IPCContext::amo_cas(void *dest, T value, T cond, int pe) {
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
ipcImpl_.ipcAMOCas(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), cond,
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), cond,
|
||||
value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T IPCContext::amo_fetch_add(void *dest, T value, int pe) {
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
return ipcImpl_.ipcAMOFetchAdd(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), value);
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T IPCContext::amo_fetch_cas(void *dest, T value, T cond, int pe) {
|
||||
int local_pe = pe % ipcImpl_.shm_size;
|
||||
uint64_t L_offset =
|
||||
reinterpret_cast<char *>(dest) - ipcImpl_.ipc_bases[my_pe];
|
||||
return ipcImpl_.ipcAMOFetchCas(
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[local_pe] + L_offset), cond,
|
||||
reinterpret_cast<T *>(ipcImpl_.ipc_bases[pe] + L_offset), cond,
|
||||
value);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user