[HIPIFY][tests] Add vec_add.cu test

[ROCm/clr commit: f7fb6e2199]
This commit is contained in:
Evgeny Mankov
2018-02-01 18:34:16 +03:00
parent 4928f0d720
commit 180cf3f29c
@@ -0,0 +1,90 @@
// RUN: %run_test hipify "%s" "%t" %cuda_args
// Kernel definition
__global__ void vecAdd(float* A, float* B, float* C)
{
int i = threadIdx.x;
A[i] = 0;
B[i] = i;
C[i] = A[i] + B[i];
}
// CHECK: #include <hip/hip_runtime.h>
#include <stdio.h>
#define SIZE 10
#define KERNELINVOKES 5000000
int vecadd(int gpudevice, int rank)
{
int devcheck(int, int);
devcheck(gpudevice, rank);
float A[SIZE], B[SIZE], C[SIZE];
// Kernel invocation
float *devPtrA;
float *devPtrB;
float *devPtrC;
int memsize = SIZE * sizeof(float);
// CHECK: hipMalloc((void**)&devPtrA, memsize);
// CHECK: hipMalloc((void**)&devPtrB, memsize);
// CHECK: hipMalloc((void**)&devPtrC, memsize);
cudaMalloc((void**)&devPtrA, memsize);
cudaMalloc((void**)&devPtrB, memsize);
cudaMalloc((void**)&devPtrC, memsize);
// CHECK: hipMemcpy(devPtrA, A, memsize, hipMemcpyHostToDevice);
// CHECK: hipMemcpy(devPtrB, B, memsize, hipMemcpyHostToDevice);
cudaMemcpy(devPtrA, A, memsize, cudaMemcpyHostToDevice);
cudaMemcpy(devPtrB, B, memsize, cudaMemcpyHostToDevice);
for (int i = 0; i<KERNELINVOKES; i++)
{
// CHECK: hipLaunchKernelGGL(vecAdd, dim3(1), dim3(gpudevice), 0, 0, devPtrA, devPtrB, devPtrC);
vecAdd <<< 1, gpudevice >>>(devPtrA, devPtrB, devPtrC);
}
// CHECK: hipMemcpy(C, devPtrC, memsize, hipMemcpyDeviceToHost);
cudaMemcpy(C, devPtrC, memsize, cudaMemcpyDeviceToHost);
// calculate only up to gpudevice to show the unique output
// of each rank's kernel launch
for (int i = 0; i<gpudevice; i++)
printf("rank %d: C[%d]=%f\n", rank, i, C[i]);
// CHECK: hipFree(devPtrA);
// CHECK: hipFree(devPtrA);
// CHECK: hipFree(devPtrA);
cudaFree(devPtrA);
cudaFree(devPtrA);
cudaFree(devPtrA);
}
int devcheck(int gpudevice, int rank)
{
int device_count = 0;
int device; // used with cudaGetDevice() to verify cudaSetDevice()
// CHECK: hipGetDeviceCount(&device_count);
cudaGetDeviceCount(&device_count);
if (gpudevice >= device_count)
{
printf("gpudevice >= device_count ... exiting\n");
exit(1);
}
// CHECK: hipError_t cudareturn;
// CHECK: hipDeviceProp_t deviceProp;
// CHECK: hipGetDeviceProperties(&deviceProp, gpudevice);
cudaError_t cudareturn;
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, gpudevice);
// CHECK: if (deviceProp.hipWarpSize <= 1)
if (deviceProp.warpSize <= 1)
{
printf("rank %d: warning, CUDA Device Emulation (CPU) detected, exiting\n", rank);
exit(1);
}
// CHECK: cudareturn = hipSetDevice(gpudevice);
cudareturn = cudaSetDevice(gpudevice);
// CHECK: if (cudareturn == hipErrorInvalidDevice)
if (cudareturn == cudaErrorInvalidDevice)
{
// CHECK: perror("hipSetDevice returned hipErrorInvalidDevice");
perror("cudaSetDevice returned cudaErrorInvalidDevice");
}
else
{
// CHECK: hipGetDevice(&device);
cudaGetDevice(&device);
printf("rank %d: cudaGetDevice()=%d\n", rank, device);
}
}