Replace hipLaunchKernel -> hipLaunchKernelGGL

Change-Id: I4d99009e1199811d417becf1e1b934ec4d4e30be
2018-10-17 12:01:44 +05:30
@@ -31,7 +31,7 @@ THE SOFTWARE.
 #include <test_common.h>

 #ifdef __HCC__
-#include <amp.h>
+#include <hc.hpp>
 #endif

 // cudaA
@@ -53,7 +53,7 @@ __device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; };

 __device__ __host__ float PlusOne(float x) { return x + 1.0; }

-__global__ void MyKernel(const hipLaunchParm lp, const float* a, const float* b, float* c,
+__global__ void MyKernel(const float* a, const float* b, float* c,
                         unsigned N) {
    // KERNELBEGIN;

@@ -71,12 +71,12 @@ void callMyKernel() {
    const unsigned blockSize = 256;
    unsigned N = blockSize;

-    hipLaunchKernel(MyKernel, dim3(N / blockSize), dim3(blockSize), 0, 0, a, b, c, N);
+    hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize), 0, 0, a, b, c, N);
 }


 template <typename T>
-__global__ void vectorADD(const hipLaunchParm lp, T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
+__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
 //    KERNELBEGIN;
 #ifdef NOT_YET
    int a = __shfl_up(x, 1);
@@ -93,11 +93,7 @@ __global__ void vectorADD(const hipLaunchParm lp, T __restrict__* A_d, T* B_d, T
    int b = threadIdx.x;
    int c;

-    // TODO - move to HIP atomics when ready.
-    concurrency ::atomic_fetch_add(&c, b);
-    // Concurrency::atomic_add_unsigned (&x, a);
-
-    // concurrency ::atomic_add_ (x, a);
+    atomicAdd(&c, b);
 #endif

    __syncthreads();