Revert adoption of CUDA indexing in general - this can only work with later versions of the compiler, just like module based dispatch, and thus must be guarded against usage in earlier (e.g. 1.6) versions.

2017-11-29 21:01:28 +00:00
@@ -40,8 +40,8 @@ template <typename T>
 __global__ void
 vector_square(T *C_d, const T *A_d, size_t N)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;

    for (size_t i=offset; i<N; i+=stride) {
        C_d[i] = A_d[i] * A_d[i];