changed memcpy and memset device functions

Change-Id: Ia7f450536a75fad4fe13c7fcf5e9e7a9b5450f52
This commit is contained in:
Aditya Atluri
2016-10-11 17:43:15 -05:00
والد d71c0d10de
کامیت 288f024d00
2فایلهای تغییر یافته به همراه196 افزوده شده و 36 حذف شده
@@ -0,0 +1,187 @@
#include<hip/hip_runtime_api.h>
#include<hip/hip_runtime.h>
#include<iostream>
#define LEN8 8 * 4
#define LEN9 9 * 4
#define LEN10 10 * 4
#define LEN11 11 * 4
#define LEN12 12 * 4
__global__ void MemCpy8(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memcpy(Out + tid*8, In + tid*8, 8);
}
__global__ void MemCpy9(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memcpy(Out + tid*9, In + tid*9, 9);
}
__global__ void MemCpy10(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memcpy(Out + tid*10, In + tid*10, 10);
}
__global__ void MemCpy11(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memcpy(Out + tid*11, In + tid*11, 11);
}
__global__ void MemCpy12(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memcpy(Out + tid*12, In + tid*12, 12);
}
__global__ void MemSet8(hipLaunchParm lp, uint8_t *In) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memset(In + tid*8, 1, 8);
}
__global__ void MemSet9(hipLaunchParm lp, uint8_t *In) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memset(In + tid*9, 1, 9);
}
__global__ void MemSet10(hipLaunchParm lp, uint8_t *In) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memset(In + tid*10, 1, 10);
}
__global__ void MemSet11(hipLaunchParm lp, uint8_t *In) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memset(In + tid*11, 1, 11);
}
__global__ void MemSet12(hipLaunchParm lp, uint8_t *In) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
memset(In + tid*12, 1, 12);
}
int main(){
uint8_t *A, *Ad, *B, *Bd, *C, *Cd;
A = new uint8_t[LEN8];
B = new uint8_t[LEN8];
C = new uint8_t[LEN8];
for(uint32_t i=0;i<LEN8;i++) {
A[i] = i;
B[i] = 0;
C[i] = 0;
}
hipMalloc((void**)&Ad, LEN8);
hipMalloc((void**)&Bd, LEN8);
hipMalloc((void**)&Cd, LEN8);
hipMemcpy(Ad, A, LEN8, hipMemcpyHostToDevice);
hipLaunchKernel(MemCpy8, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
hipLaunchKernel(MemSet8, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
hipMemcpy(B, Bd, LEN8, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, LEN8, hipMemcpyDeviceToHost);
for(uint32_t i=0;i<LEN8;i++) {
assert(A[i] == B[i]);
assert(C[i] == 1);
}
delete A;
delete B;
delete C;
A = new uint8_t[LEN9];
B = new uint8_t[LEN9];
C = new uint8_t[LEN9];
for(uint32_t i=0;i<LEN9;i++) {
A[i] = i;
B[i] = 0;
C[i] = 0;
}
hipMalloc((void**)&Ad, LEN9);
hipMalloc((void**)&Bd, LEN9);
hipMalloc((void**)&Cd, LEN9);
hipMemcpy(Ad, A, LEN9, hipMemcpyHostToDevice);
hipLaunchKernel(MemCpy9, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
hipLaunchKernel(MemSet9, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
hipMemcpy(B, Bd, LEN9, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, LEN9, hipMemcpyDeviceToHost);
for(uint32_t i=0;i<LEN9;i++) {
assert(A[i] == B[i]);
assert(C[i] == 1);
}
delete A;
delete B;
delete C;
A = new uint8_t[LEN10];
B = new uint8_t[LEN10];
C = new uint8_t[LEN10];
for(uint32_t i=0;i<LEN10;i++) {
A[i] = i;
B[i] = 0;
C[i] = 0;
}
hipMalloc((void**)&Ad, LEN10);
hipMalloc((void**)&Bd, LEN10);
hipMalloc((void**)&Cd, LEN10);
hipMemcpy(Ad, A, LEN10, hipMemcpyHostToDevice);
hipLaunchKernel(MemCpy10, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
hipLaunchKernel(MemSet10, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
hipMemcpy(B, Bd, LEN10, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, LEN10, hipMemcpyDeviceToHost);
for(uint32_t i=0;i<LEN10;i++) {
assert(A[i] == B[i]);
assert(C[i] == 1);
}
delete A;
delete B;
delete C;
A = new uint8_t[LEN11];
B = new uint8_t[LEN11];
C = new uint8_t[LEN11];
for(uint32_t i=0;i<LEN11;i++) {
A[i] = i;
B[i] = 0;
C[i] = 0;
}
hipMalloc((void**)&Ad, LEN11);
hipMalloc((void**)&Bd, LEN11);
hipMalloc((void**)&Cd, LEN11);
hipMemcpy(Ad, A, LEN11, hipMemcpyHostToDevice);
hipLaunchKernel(MemCpy11, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
hipLaunchKernel(MemSet11, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
hipMemcpy(B, Bd, LEN11, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, LEN11, hipMemcpyDeviceToHost);
for(uint32_t i=0;i<LEN11;i++) {
assert(A[i] == B[i]);
assert(C[i] == 1);
}
delete A;
delete B;
delete C;
A = new uint8_t[LEN12];
B = new uint8_t[LEN12];
C = new uint8_t[LEN12];
for(uint32_t i=0;i<LEN12;i++) {
A[i] = i;
B[i] = 0;
C[i] = 0;
}
hipMalloc((void**)&Ad, LEN12);
hipMalloc((void**)&Bd, LEN12);
hipMalloc((void**)&Cd, LEN12);
hipMemcpy(Ad, A, LEN12, hipMemcpyHostToDevice);
hipLaunchKernel(MemCpy12, dim3(2,1,1), dim3(2,1,1), 0, 0, Ad, Bd);
hipLaunchKernel(MemSet12, dim3(2,1,1), dim3(2,1,1), 0, 0, Cd);
hipMemcpy(B, Bd, LEN12, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, LEN12, hipMemcpyDeviceToHost);
for(uint32_t i=0;i<LEN12;i++) {
assert(A[i] == B[i]);
assert(C[i] == 1);
}
delete A;
delete B;
delete C;
}