Revert adoption of CUDA indexing in general - this can only work with later versions of the compiler, just like module based dispatch, and thus must be guarded against usage in earlier (e.g. 1.6) versions.

이 커밋은 다음에 포함됨:
Alex Voicu
2017-11-29 21:01:28 +00:00
부모 75a4e404ca
커밋 fe3719af15
+2 -2
파일 보기
@@ -40,8 +40,8 @@ template <typename T>
__global__ void
vector_square(T *C_d, const T *A_d, size_t N)
{
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x ;
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
size_t stride = hipBlockDim_x * hipGridDim_x ;
for (size_t i=offset; i<N; i+=stride) {
C_d[i] = A_d[i] * A_d[i];