changed memcpy and memset device functions

Change-Id: Ia7f450536a75fad4fe13c7fcf5e9e7a9b5450f52
This commit is contained in:
Aditya Atluri
2016-10-11 17:43:15 -05:00
parent d71c0d10de
commit 288f024d00
2 ha cambiato i file con 196 aggiunte e 36 eliminazioni
+9 -36
Vedi File
@@ -585,48 +585,21 @@ __device__ void __threadfence_system(void) __attribute__((deprecated("Provided
// loop unrolling
__device__ static inline void* memcpy(void* dst, void* src, size_t size)
{
uint64_t i = 0;
uint64_t totalLength = size/sizeof(uint32_t);
for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x;
i<(totalLength/4);
i = i + hipBlockDim_x * hipGridDim_x)
{
((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i];
((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1];
((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2];
((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3];
}
if(4*i < totalLength){
((uint32_t*)dst)[4*i] = ((uint32_t*)src)[4*i];
((uint32_t*)dst)[4*i+1] = ((uint32_t*)src)[4*i+1];
((uint32_t*)dst)[4*i+2] = ((uint32_t*)src)[4*i+2];
((uint32_t*)dst)[4*i+3] = ((uint32_t*)src)[4*i+3];
uint8_t *dstPtr, *srcPtr;
dstPtr = (uint8_t*)dst;
srcPtr = (uint8_t*)src;
for(uint32_t i=0;i<size;i++) {
dstPtr[i] = srcPtr[i];
}
return nullptr;
}
__device__ static inline void* memset(void* ptr, uint8_t val, size_t size)
{
uint32_t _val = 0;
_val = (val | val << 8 | val << 16 | val << 24);
uint64_t totalLength = size/sizeof(uint32_t);
uint64_t i = 0;
for(i=hipThreadIdx_x+hipBlockIdx_x*hipBlockDim_x;
i<(totalLength/4);
i = i + hipBlockDim_x * hipGridDim_x)
{
((uint32_t*)ptr)[4*i] = _val;
((uint32_t*)ptr)[4*i+1] = _val;
((uint32_t*)ptr)[4*i+2] = _val;
((uint32_t*)ptr)[4*i+3] = _val;
}
if(4*i < totalLength){
((uint32_t*)ptr)[4*i] = _val;
((uint32_t*)ptr)[4*i+1] = _val;
((uint32_t*)ptr)[4*i+2] = _val;
((uint32_t*)ptr)[4*i+3] = _val;
uint8_t *dstPtr;
dstPtr = (uint8_t*)ptr;
for(uint32_t i=0;i<size;i++) {
dstPtr[i] = val;
}
return nullptr;
}