diff --git a/projects/clr/hipamd/include/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hcc_detail/hip_runtime.h index d009bec35b..740e60d0eb 100644 --- a/projects/clr/hipamd/include/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hcc_detail/hip_runtime.h @@ -568,6 +568,55 @@ __device__ void __threadfence_system(void); #define hipGridDim_y (hc_get_num_groups(1)) #define hipGridDim_z (hc_get_num_groups(2)) +// loop unrolling +__device__ static inline void* memcpy(void* dst, void* src, size_t size) +{ + uint64_t i = 0; + uint64_t totalLength = size/sizeof(uint32_t); + for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x; + i<(totalLength/4); + i = i + hipBlockDim_x * hipGridDim_x) + { + ((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i]; + ((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1]; + ((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2]; + ((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3]; + } + if(4*i < totalLength){ + ((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i]; + ((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1]; + ((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2]; + ((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3]; + + } + return nullptr; +} + +__device__ static inline void* memset(void* ptr, uint8_t val, size_t size) +{ + uint32_t _val = 0; + _val = (val | val << 8 | val << 16 | val << 24); + uint64_t totalLength = size/sizeof(uint32_t); + uint64_t i = 0; + for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x; + i<(totalLength/4); + i = i + hipBlockDim_x * hipGridDim_x) + { + ((uint32_t*)ptr)[4*i] = _val; + ((uint32_t*)ptr)[4*i+1] = _val; + ((uint32_t*)ptr)[4*i+2] = _val; + ((uint32_t*)ptr)[4*i+3] = _val; + } + if(4*i < totalLength){ + ((uint32_t*)ptr)[4*i] = _val; + ((uint32_t*)ptr)[4*i+1] = _val; + ((uint32_t*)ptr)[4*i+2] = _val; + ((uint32_t*)ptr)[4*i+3] = _val; + + } + return nullptr; +} + #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) #define HIP_KERNEL_NAME(...) __VA_ARGS__ @@ -653,4 +702,6 @@ do {\ */ + + #endif diff --git a/projects/clr/hipamd/tests/src/hipDeviceMemcpy.cpp b/projects/clr/hipamd/tests/src/hipDeviceMemcpy.cpp new file mode 100644 index 0000000000..7c1cc59808 --- /dev/null +++ b/projects/clr/hipamd/tests/src/hipDeviceMemcpy.cpp @@ -0,0 +1,42 @@ +#include +#include +#include + +#define LEN 1030 +#define SIZE LEN << 2 + +__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In, uint32_t *Vald) +{ + memcpy(Out, In, SIZE, Vald); +} + +__global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size) +{ + memset(ptr, val, size); +} + +int main() +{ + uint32_t *A, *Ad, *B, *Bd; + uint32_t *Val, *Vald; + A = new uint32_t[LEN]; + B = new uint32_t[LEN]; + Val = new uint32_t; + *Val = 0; + for(int i=0;i