From 7d71dfd1f738a01188ee39dced233e8d281f9dd1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 1 Feb 2018 17:07:48 +0300 Subject: [PATCH 1/2] [HIPIFY][tests] Add intro.cu test --- hipamd/tests/hipify-clang/intro.cu | 174 +++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 hipamd/tests/hipify-clang/intro.cu diff --git a/hipamd/tests/hipify-clang/intro.cu b/hipamd/tests/hipify-clang/intro.cu new file mode 100644 index 0000000000..4b9c5c0da7 --- /dev/null +++ b/hipamd/tests/hipify-clang/intro.cu @@ -0,0 +1,174 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +#include +#include +#include +// CHECK: #include +#include + +#define K_THREADS 64 +#define K_INDEX() ((gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x) +#define RND() ((rand() & 0x7FFF) / float(0x8000)) +#define ERRORCHECK() cErrorCheck(__FILE__, __LINE__) + +// CHECK: hipEvent_t t##_start, t##_end; \ +// CHECK: hipEventCreate(&t##_start); \ +// CHECK: hipEventCreate(&t##_end); +#define TIMER_CREATE(t) \ + cudaEvent_t t##_start, t##_end; \ + cudaEventCreate(&t##_start); \ + cudaEventCreate(&t##_end); + +// CHECK: hipEventRecord(t##_start); \ +// CHECK: hipEventSynchronize(t##_start); +#define TIMER_START(t) \ + cudaEventRecord(t##_start); \ + cudaEventSynchronize(t##_start); \ + +// CHECK: hipEventRecord(t##_start); \ +// CHECK: hipEventSynchronize(t##_start); \ +// CHECK: hipEventRecord(t##_end); \ +// CHECK: hipEventSynchronize(t##_end); \ +// CHECK: hipEventElapsedTime(&t, t##_start, t##_end); +#define TIMER_END(t) \ + cudaEventRecord(t##_start); \ + cudaEventSynchronize(t##_start); \ + cudaEventRecord(t##_end); \ + cudaEventSynchronize(t##_end); \ + cudaEventElapsedTime(&t, t##_start, t##_end); + + +inline void cErrorCheck(const char *file, int line) { +// CHECK: hipDeviceSynchronize(); +// CHECK: hipError_t err = hipGetLastError(); +// CHECK: if (err != hipSuccess) { +// CHECK: printf("Error: %s\n", hipGetErrorString(err)); + cudaThreadSynchronize(); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("Error: %s\n", cudaGetErrorString(err)); + printf(" @ %s: %d\n", file, line); + exit(-1); + } +} + +inline dim3 K_GRID(int n, int threads = K_THREADS) { + int blocks = (int)ceilf(sqrtf((float)n/threads)); + dim3 grid(blocks, blocks); + return grid; +} + +typedef struct data { + int n; + float4 *r, *v, *f; +} data; + +data cpu, gpu; + +#define N 20 + +__global__ void repulsion(data gpu); +__global__ void integration(data gpu); + + +int main() { + printf("Cuda Test 1\n"); + + int count = 0; + // CHECK: hipGetDeviceCount(&count); + cudaGetDeviceCount(&count); + printf(" %d CUDA devices found\n", count); + if(!count) { + ::exit(EXIT_FAILURE); + } + // CHECK: hipFree(0); + cudaFree(0); + + cpu.n = N; + + cpu.r = (float4*)malloc(N * sizeof(float4)); + cpu.v = (float4*)malloc(N * sizeof(float4)); + cpu.f = (float4*)malloc(N * sizeof(float4)); + + for(int i = 0; i < N; ++i) { + cpu.v[i] = make_float4(0,0,0,0); + cpu.r[i] = make_float4(RND(), RND(), RND(), 0); + cpu.f[i] = make_float4(0,0.01,0,0); + } + + gpu = cpu; + // CHECK: hipMalloc(&gpu.r, N * sizeof(float4)); + // CHECK: hipMalloc(&gpu.v, N * sizeof(float4)); + // CHECK: hipMalloc(&gpu.f, N * sizeof(float4)); + cudaMalloc(&gpu.r, N * sizeof(float4)); + cudaMalloc(&gpu.v, N * sizeof(float4)); + cudaMalloc(&gpu.f, N * sizeof(float4)); + // CHECK: hipMemcpy(gpu.r, cpu.r, cpu.n * sizeof(float4), hipMemcpyHostToDevice); + // CHECK: hipMemcpy(gpu.v, cpu.v, cpu.n * sizeof(float4), hipMemcpyHostToDevice); + // CHECK: hipMemcpy(gpu.f, cpu.f, cpu.n * sizeof(float4), hipMemcpyHostToDevice); + cudaMemcpy(gpu.r, cpu.r, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); + cudaMemcpy(gpu.v, cpu.v, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); + cudaMemcpy(gpu.f, cpu.f, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); + + ERRORCHECK(); + float rep; + TIMER_CREATE(rep); + TIMER_START(rep); + // CHECK: hipLaunchKernelGGL(integration, dim3(K_GRID(cpu.n)), dim3(K_THREADS), 0, 0, gpu); + integration <<< K_GRID(cpu.n), K_THREADS >>>(gpu); + + TIMER_END(rep); + printf("Took: %f ms\n", rep); + ERRORCHECK(); + // CHECK: hipMemcpy(cpu.r, gpu.r, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); + // CHECK: hipMemcpy(cpu.v, gpu.v, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); + // CHECK: hipMemcpy(cpu.f, gpu.f, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); + cudaMemcpy(cpu.r, gpu.r, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); + cudaMemcpy(cpu.v, gpu.v, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); + cudaMemcpy(cpu.f, gpu.f, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); + // CHECK: hipHostFree(cpu.r); + // CHECK: hipHostFree(cpu.v); + // CHECK: hipHostFree(cpu.f); + cudaFreeHost(cpu.r); + cudaFreeHost(cpu.v); + cudaFreeHost(cpu.f); + // CHECK: hipFree(gpu.r); + // CHECK: hipFree(gpu.v); + // CHECK: hipFree(gpu.f); + cudaFree(gpu.r); + cudaFree(gpu.v); + cudaFree(gpu.f); + // CHECK: hipDeviceReset(); + cudaDeviceReset(); + + printf("Results: \n"); + for(int i = 0; i < N; ++i) { + printf("%f, %f, %f \n", cpu.r[i].x, cpu.r[i].y, cpu.r[i].z); + } + + printf("Ready...\n"); + return 0; +} + +__global__ void repulsion(data gpu) { + int idx = K_INDEX(); + if(idx < N) { + gpu.r[idx].x = 1; + gpu.r[idx].y = 1; + gpu.r[idx].z = 1; + } +} + +#define MULT4(v, s) v.x *= s; v.y *= s; v.z *= s; v.w *= s; +#define ADD4(v1, v2) v1.x += v2.x; v1.y += v2.y; v1.z += v2.z; v1.w += v2.w; + +__global__ void integration(data gpu) { + int i = K_INDEX(); + if(i < N) { + MULT4(gpu.f[i], 0.01); + MULT4(gpu.v[i], 0.01); + ADD4(gpu.v[i], gpu.f[i]); + ADD4(gpu.r[i], gpu.v[i]); + gpu.f[i] = make_float4(0,0,0,0); + } +} \ No newline at end of file From 91ef7b50e5d77dba7ffa52ad1f0a46cf467cb128 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 1 Feb 2018 17:36:45 +0300 Subject: [PATCH 2/2] [HIPIFY][tests] add new line at the end of file --- hipamd/tests/hipify-clang/intro.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/hipify-clang/intro.cu b/hipamd/tests/hipify-clang/intro.cu index 4b9c5c0da7..da797eb2ec 100644 --- a/hipamd/tests/hipify-clang/intro.cu +++ b/hipamd/tests/hipify-clang/intro.cu @@ -171,4 +171,4 @@ __global__ void integration(data gpu) { ADD4(gpu.r[i], gpu.v[i]); gpu.f[i] = make_float4(0,0,0,0); } -} \ No newline at end of file +}