@@ -161,7 +161,7 @@ __device__ void IPCContext::getmem_nbi_wave(void *dest, const void *source,
|
||||
__device__ void IPCContext::internal_putmem(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
uint64_t L_offset = reinterpret_cast<char *>(dest) - wrk_sync_pool_bases_[my_pe];
|
||||
memcpy(wrk_sync_pool_bases_[pe] + L_offset, const_cast<void *>(source), nelems);
|
||||
memcpy_lane(wrk_sync_pool_bases_[pe] + L_offset, const_cast<void *>(source), nelems);
|
||||
ipcImpl_.ipcFence();
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ __device__ void IPCContext::internal_getmem(void *dest, const void *source,
|
||||
size_t nelems, int pe) {
|
||||
const char *src_typed = reinterpret_cast<const char *>(source);
|
||||
uint64_t L_offset = const_cast<char *>(src_typed) - wrk_sync_pool_bases_[my_pe];
|
||||
memcpy(dest, wrk_sync_pool_bases_[pe] + L_offset, nelems);
|
||||
memcpy_lane(dest, wrk_sync_pool_bases_[pe] + L_offset, nelems);
|
||||
ipcImpl_.ipcFence();
|
||||
}
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ __host__ void IpcOnImpl::ipcHostStop() {
|
||||
}
|
||||
|
||||
__device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) {
|
||||
memcpy(dst, src, size);
|
||||
memcpy_lane(dst, src, size);
|
||||
}
|
||||
|
||||
__device__ void IpcOnImpl::ipcCopy_wave(void *dst, void *src, size_t size) {
|
||||
|
||||
@@ -385,7 +385,7 @@ __device__ void gpu_dprintf(const char* fmt, const Args&... args) {
|
||||
#define LOAD(VAR) __atomic_load_n((VAR), __ATOMIC_SEQ_CST)
|
||||
#define STORE(DST, SRC) __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST)
|
||||
|
||||
__device__ __forceinline__ void memcpy(void* dst, void* src, size_t size) {
|
||||
__device__ __forceinline__ void memcpy_lane(void* dst, void* src, size_t size) {
|
||||
uint8_t* dst_bytes{static_cast<uint8_t*>(dst)};
|
||||
uint8_t* src_bytes{static_cast<uint8_t*>(src)};
|
||||
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする