added memset and memcpy device functions

- Added memcpy and memset device functions
- Added test for memcpy and memset

Change-Id: Icd21a8dd964953b86d5e92889bf1664bee647219


[ROCm/clr commit: 5e7c396bcd]
Αυτή η υποβολή περιλαμβάνεται σε:
Aditya Atluri
2016-08-27 11:13:56 -05:00
γονέας fa734c5a6a
υποβολή 6a596c353a
2 αρχεία άλλαξαν με 93 προσθήκες και 0 διαγραφές
@@ -568,6 +568,55 @@ __device__ void __threadfence_system(void);
#define hipGridDim_y (hc_get_num_groups(1))
#define hipGridDim_z (hc_get_num_groups(2))
// loop unrolling
__device__ static inline void* memcpy(void* dst, void* src, size_t size)
{
uint64_t i = 0;
uint64_t totalLength = size/sizeof(uint32_t);
for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x;
i<(totalLength/4);
i = i + hipBlockDim_x * hipGridDim_x)
{
((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i];
((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1];
((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2];
((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3];
}
if(4*i < totalLength){
((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i];
((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1];
((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2];
((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3];
}
return nullptr;
}
__device__ static inline void* memset(void* ptr, uint8_t val, size_t size)
{
uint32_t _val = 0;
_val = (val | val << 8 | val << 16 | val << 24);
uint64_t totalLength = size/sizeof(uint32_t);
uint64_t i = 0;
for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x;
i<(totalLength/4);
i = i + hipBlockDim_x * hipGridDim_x)
{
((uint32_t*)ptr)[4*i] = _val;
((uint32_t*)ptr)[4*i+1] = _val;
((uint32_t*)ptr)[4*i+2] = _val;
((uint32_t*)ptr)[4*i+3] = _val;
}
if(4*i < totalLength){
((uint32_t*)ptr)[4*i] = _val;
((uint32_t*)ptr)[4*i+1] = _val;
((uint32_t*)ptr)[4*i+2] = _val;
((uint32_t*)ptr)[4*i+3] = _val;
}
return nullptr;
}
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
#define HIP_KERNEL_NAME(...) __VA_ARGS__
@@ -653,4 +702,6 @@ do {\
*/
#endif
@@ -0,0 +1,42 @@
#include<iostream>
#include<hip/hip_runtime.h>
#include<hip/hip_runtime_api.h>
#define LEN 1030
#define SIZE LEN << 2
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In, uint32_t *Vald)
{
memcpy(Out, In, SIZE, Vald);
}
__global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size)
{
memset(ptr, val, size);
}
int main()
{
uint32_t *A, *Ad, *B, *Bd;
uint32_t *Val, *Vald;
A = new uint32_t[LEN];
B = new uint32_t[LEN];
Val = new uint32_t;
*Val = 0;
for(int i=0;i<LEN;i++){
A[i] = i *1.0f;
B[i] = 0.0f;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Vald, sizeof(uint32_t));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(cpy, dim3(1), dim3(LEN/4), 0, 0, Bd, Ad, Vald);
hipLaunchKernel(set, dim3(1), dim3(LEN/4), 0, 0, Bd, 0x1, SIZE);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(Val, Vald, sizeof(uint32_t), hipMemcpyDeviceToHost);
for(int i=LEN-16;i<LEN;i++){
std::cout<<A[i]<<" "<<B[i]<<std::endl;
}
std::cout<<*Val<<std::endl;
}