From fbaf729f88353841ca2eb4f9b24f593bbb3b14ce Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 29 Nov 2017 21:36:29 +0000 Subject: [PATCH] Revert "Revert adoption of CUDA indexing in general - this can only work with later versions of the compiler, just like module based dispatch, and thus must be guarded against usage in earlier (e.g. 1.6) versions." This reverts commit d2fd1f5 --- samples/0_Intro/square/square.cu | 4 +-- src/device_util.cpp | 4 +-- src/hip_memory.cpp | 4 +-- .../device/hipFuncDeviceSynchronize.cpp | 2 +- tests/src/deviceLib/hipComplex.cpp | 2 +- tests/src/deviceLib/hipDeviceMemcpy.cpp | 4 +-- tests/src/deviceLib/hipFloatMath.cpp | 2 +- tests/src/deviceLib/hipSimpleAtomicsTest.cpp | 2 +- tests/src/deviceLib/hipTestDevice.cpp | 32 +++++++++---------- tests/src/deviceLib/hipTestDeviceDouble.cpp | 28 ++++++++-------- tests/src/deviceLib/hipTestDeviceSymbol.cpp | 2 +- tests/src/deviceLib/hipTestHalf.cpp | 4 +-- tests/src/deviceLib/hipThreadFence.cpp | 2 +- tests/src/deviceLib/hip_anyall.cpp | 6 ++-- tests/src/deviceLib/hip_ballot.cpp | 8 ++--- tests/src/deviceLib/hip_brev.cpp | 4 +-- tests/src/deviceLib/hip_clz.cpp | 4 +-- tests/src/deviceLib/hip_ffs.cpp | 4 +-- tests/src/deviceLib/hip_popc.cpp | 4 +-- tests/src/deviceLib/hip_test_ldg.cpp | 4 +-- tests/src/deviceLib/hip_test_make_type.cpp | 20 ++++++------ tests/src/deviceLib/hip_trig.cpp | 2 +- tests/src/experimental/xcompile/hHip.c | 2 +- tests/src/experimental/xcompile/hipxxKer.cpp | 2 +- tests/src/experimental/xcompile/hxxHip.cpp | 2 +- tests/src/hipC.c | 2 +- tests/src/hipC.cpp | 2 +- tests/src/hipCKernel.c | 2 +- tests/src/kernel/hipDynamicShared.cpp | 4 +-- tests/src/kernel/hipDynamicShared2.cpp | 2 +- tests/src/kernel/hipGridLaunch.cpp | 4 +-- tests/src/kernel/hipLanguageExtensions.cpp | 8 ++--- tests/src/kernel/hipTestConstant.cpp | 2 +- tests/src/kernel/hipTestMallocKernel.cpp | 4 +-- tests/src/kernel/hipTestMemKernel.cpp | 20 ++++++------ tests/src/kernel/inline_asm_vadd.cpp | 2 +- tests/src/kernel/inline_asm_vmac.cpp | 2 +- tests/src/kernel/launch_bounds.cpp | 2 +- .../device/hipDeviceSynchronize.cpp | 2 +- .../src/runtimeApi/memory/hipHostGetFlags.cpp | 2 +- tests/src/runtimeApi/memory/hipHostMalloc.cpp | 4 +-- .../src/runtimeApi/memory/hipHostRegister.cpp | 2 +- .../src/runtimeApi/memory/hipMemcpyAsync.cpp | 4 +-- .../memory/hipMemoryAllocateCoherent.cpp | 2 +- .../runtimeApi/memory/p2p_copy_coherency.cpp | 8 ++--- tests/src/runtimeApi/module/hipModule.cpp | 2 +- tests/src/runtimeApi/module/vcpy_kernel.cpp | 2 +- .../multiThread/hipMultiThreadStreams2.cpp | 2 +- .../runtimeApi/stream/hipAPIStreamDisable.cpp | 4 +-- .../runtimeApi/stream/hipAPIStreamEnable.cpp | 4 +-- tests/src/runtimeApi/stream/hipNullStream.cpp | 4 +-- tests/src/runtimeApi/stream/hipStream.h | 2 +- .../synchronization/copy_coherency.cpp | 8 ++--- .../synchronization/memcpyInt.device.cpp | 4 +-- tests/src/specialFunc.cu | 2 +- tests/src/stress/hipStressAsync.cpp | 2 +- tests/src/stress/hipStressChain.cpp | 2 +- tests/src/stress/hipStressKernel.cpp | 2 +- tests/src/stress/hipStressSync.cpp | 2 +- tests/src/test_common.h | 20 ++++++------ tests/src/texture/hipTextureObj2D.cpp | 4 +-- tests/src/texture/hipTextureRef2D.cpp | 4 +-- 62 files changed, 152 insertions(+), 152 deletions(-) diff --git a/samples/0_Intro/square/square.cu b/samples/0_Intro/square/square.cu index 82b31db14a..ccaa9ae0bc 100644 --- a/samples/0_Intro/square/square.cu +++ b/samples/0_Intro/square/square.cu @@ -40,8 +40,8 @@ template __global__ void vector_square(T *C_d, const T *A_d, size_t N) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; for (size_t i=offset; i(&f[idx]), diff --git a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp index c8c2e644c3..dac56bf709 100644 --- a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp +++ b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #define NUM_STREAMS 2 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; // Kernel loop designed to execute very slowly... ... ... so we can test timing-related behavior below if(tx == 0){ for(int i = 0; i>pshift] = __any(tid -77); - device_all[threadIdx.x>>pshift] = __all(tid -77); + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + device_any[hipThreadIdx_x>>pshift] = __any(tid -77); + device_all[hipThreadIdx_x>>pshift] = __all(tid -77); } int main(int argc, char *argv[]) diff --git a/tests/src/deviceLib/hip_ballot.cpp b/tests/src/deviceLib/hip_ballot.cpp index 14b8f314a1..742c47a065 100644 --- a/tests/src/deviceLib/hip_ballot.cpp +++ b/tests/src/deviceLib/hip_ballot.cpp @@ -34,12 +34,12 @@ __global__ void gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - const unsigned int warp_num = threadIdx.x >> pshift; + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + const unsigned int warp_num = hipThreadIdx_x >> pshift; #ifdef __HIP_PLATFORM_HCC__ - atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); + atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); #else - atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popc(__ballot(tid - 245))); + atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popc(__ballot(tid - 245))); #endif } diff --git a/tests/src/deviceLib/hip_brev.cpp b/tests/src/deviceLib/hip_brev.cpp index c08c39dec9..855a8bec47 100644 --- a/tests/src/deviceLib/hip_brev.cpp +++ b/tests/src/deviceLib/hip_brev.cpp @@ -72,8 +72,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_clz.cpp b/tests/src/deviceLib/hip_clz.cpp index 53fd611184..bdb31f3e8d 100644 --- a/tests/src/deviceLib/hip_clz.cpp +++ b/tests/src/deviceLib/hip_clz.cpp @@ -83,8 +83,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_ffs.cpp b/tests/src/deviceLib/hip_ffs.cpp index 49530bb298..c855ede060 100644 --- a/tests/src/deviceLib/hip_ffs.cpp +++ b/tests/src/deviceLib/hip_ffs.cpp @@ -73,8 +73,8 @@ HIP_kernel(hipLaunchParm lp, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_popc.cpp b/tests/src/deviceLib/hip_popc.cpp index 19dafb4d43..e503e55b42 100644 --- a/tests/src/deviceLib/hip_popc.cpp +++ b/tests/src/deviceLib/hip_popc.cpp @@ -64,8 +64,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_test_ldg.cpp b/tests/src/deviceLib/hip_test_ldg.cpp index 4db522cc10..5540c4917d 100644 --- a/tests/src/deviceLib/hip_test_ldg.cpp +++ b/tests/src/deviceLib/hip_test_ldg.cpp @@ -57,8 +57,8 @@ vectoradd_float(hipLaunchParm lp, T* a, const T* bm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_test_make_type.cpp b/tests/src/deviceLib/hip_test_make_type.cpp index 6eba236e12..ce689ceb89 100644 --- a/tests/src/deviceLib/hip_test_make_type.cpp +++ b/tests/src/deviceLib/hip_test_make_type.cpp @@ -45,8 +45,8 @@ vectoradd_char1(hipLaunchParm lp, char1* a, const char1* bm, const char1* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -59,8 +59,8 @@ vectoradd_char2(hipLaunchParm lp, char2* a, const char2* bm, const char2* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -73,8 +73,8 @@ vectoradd_char3(hipLaunchParm lp, char3* a, const char3* bm, const char3* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -86,8 +86,8 @@ vectoradd_char4(hipLaunchParm lp, char4* a, const char4* bm, const char4* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -100,8 +100,8 @@ vectoradd_char4(hipLaunchParm lp, __kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) { - int x = blockDimX * blockIdx.x + threadIdx.x; - int y = blockDimY * blockIdy.y + threadIdx.y; + int x = blockDimX * hipBlockIdx_x + hipThreadIdx_x; + int y = blockDimY * blockIdy.y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp index 6ee8dc58ad..5ec28101f3 100644 --- a/tests/src/deviceLib/hip_trig.cpp +++ b/tests/src/deviceLib/hip_trig.cpp @@ -36,7 +36,7 @@ THE SOFTWARE. #define SIZE LEN<<2 __global__ void kernel_trig(hipLaunchParm lp, float *In, float *sin_d, float *cos_d, float *tan_d, float *sin_pd, float *cos_pd){ - int tid = threadIdx.x + blockIdx.x * blockDim.x; + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; sin_d[tid] = __sinf(In[tid]); cos_d[tid] = __cosf(In[tid]); tan_d[tid] = __tanf(In[tid]); diff --git a/tests/src/experimental/xcompile/hHip.c b/tests/src/experimental/xcompile/hHip.c index 17e7e9ecf6..2ac4ebc73e 100644 --- a/tests/src/experimental/xcompile/hHip.c +++ b/tests/src/experimental/xcompile/hHip.c @@ -29,7 +29,7 @@ THE SOFTWARE. __global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd, size_t len) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx < len) { Cd[tx] = Ad[tx] + Bd[tx]; diff --git a/tests/src/experimental/xcompile/hipxxKer.cpp b/tests/src/experimental/xcompile/hipxxKer.cpp index 5dca6c1bca..d1bbed63cd 100644 --- a/tests/src/experimental/xcompile/hipxxKer.cpp +++ b/tests/src/experimental/xcompile/hipxxKer.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. __global__ void Kern(hipLaunchParm lp, float *A) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; A[tx] += 1.0f; } diff --git a/tests/src/experimental/xcompile/hxxHip.cpp b/tests/src/experimental/xcompile/hxxHip.cpp index bca5d64afc..6a748d5c89 100644 --- a/tests/src/experimental/xcompile/hxxHip.cpp +++ b/tests/src/experimental/xcompile/hxxHip.cpp @@ -33,7 +33,7 @@ class memManager; template __global__ void Add(hipLaunchParm lp, T* Ad, T* Bd, T* Cd, size_t Len) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx < Len) { Cd[tx] = Ad[tx] + Bd[tx]; diff --git a/tests/src/hipC.c b/tests/src/hipC.c index efa03bb909..644df6c98f 100644 --- a/tests/src/hipC.c +++ b/tests/src/hipC.c @@ -34,7 +34,7 @@ THE SOFTWARE. #define SIZE 1024*1024*sizeof(int) __global__ void Iter(hipLaunchParm lp, int *Ad){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx == 0){ for(int i=0;i(my_sdata); #endif - size_t gid = (blockIdx.x * blockDim.x + threadIdx.x); - size_t tid = threadIdx.x; + size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t tid = hipThreadIdx_x; // initialize dynamic shared memory if (tid < groupElements) { diff --git a/tests/src/kernel/hipDynamicShared2.cpp b/tests/src/kernel/hipDynamicShared2.cpp index 4567ff6fc2..95e70a9956 100644 --- a/tests/src/kernel/hipDynamicShared2.cpp +++ b/tests/src/kernel/hipDynamicShared2.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. __global__ void vectorAdd(hipLaunchParm lp, float *Ad, float *Bd) { HIP_DYNAMIC_SHARED(float, sBd); - int tx = threadIdx.x; + int tx = hipThreadIdx_x; for(int i=0;i __global__ void Inc(hipLaunchParm lp, float *Ad){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; Ad[tx] = Ad[tx] + float(1); } diff --git a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp index 5cd46c808a..c4f4b23dc0 100644 --- a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp @@ -70,8 +70,8 @@ template __global__ void addK (hipLaunchParm lp, T *A, T K, size_t numElements) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; for (size_t i=offset; i __global__ void Inc(hipLaunchParm lp, T *Array){ -int tx = threadIdx.x + blockIdx.x * blockDim.x; +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; Array[tx] = Array[tx] + T(1); } diff --git a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp index 66b93a164f..4e343121ed 100644 --- a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp +++ b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. const int NN = 1 << 21; __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){ - int tid = threadIdx.x; + int tid = hipThreadIdx_x; if(tid < 1){ for(int i=0;i __global__ void Inc(hipLaunchParm lp, T *In){ -int tx = threadIdx.x + blockIdx.x * blockDim.x; +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; In[tx] = In[tx] + 1; } diff --git a/tests/src/runtimeApi/synchronization/copy_coherency.cpp b/tests/src/runtimeApi/synchronization/copy_coherency.cpp index b2a66f61e2..e4bfb98206 100644 --- a/tests/src/runtimeApi/synchronization/copy_coherency.cpp +++ b/tests/src/runtimeApi/synchronization/copy_coherency.cpp @@ -102,8 +102,8 @@ MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); __global__ void memsetIntKernel(int * ptr, const int val, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x ; + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; for (size_t i= gid; i< numElements; i+=stride){ ptr[i] = val; } @@ -112,8 +112,8 @@ memsetIntKernel(int * ptr, const int val, size_t numElements) __global__ void memcpyIntKernel(int *dst, const int * src, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x ; + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; for (size_t i= gid; i< numElements; i+=stride){ dst[i] = src[i]; } diff --git a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp index 2916d51bf9..b34d331682 100644 --- a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp +++ b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp @@ -5,8 +5,8 @@ extern "C" __global__ void memcpyIntKernel(hipLaunchParm lp, int *dst, const int * src, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x ; + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; for (size_t i= gid; i< numElements; i+=stride){ dst[i] = src[i]; } diff --git a/tests/src/specialFunc.cu b/tests/src/specialFunc.cu index 085be062d9..744dcd8926 100644 --- a/tests/src/specialFunc.cu +++ b/tests/src/specialFunc.cu @@ -23,7 +23,7 @@ THE SOFTWARE. void __global__ test_kernel(float *A) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; + int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; float a = __ballot(tid < 16); float b = __shfl(tid < 16); diff --git a/tests/src/stress/hipStressAsync.cpp b/tests/src/stress/hipStressAsync.cpp index a142b41730..1f8cab1a36 100644 --- a/tests/src/stress/hipStressAsync.cpp +++ b/tests/src/stress/hipStressAsync.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. #define ITER 1<<10 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx == 0){ for(int i = 0; i=0; i-=stride) { C_d[i] = A_d[i] + B_d[i]; @@ -169,8 +169,8 @@ addCount( const T *A_d, size_t NELEM, int count) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; // Deliberately do this in an inefficient way to increase kernel runtime for (int i=0; i=0; i-=stride) { C_d[i] = val; diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp index 9ddafd6b1c..443d708418 100644 --- a/tests/src/texture/hipTextureObj2D.cpp +++ b/tests/src/texture/hipTextureObj2D.cpp @@ -17,8 +17,8 @@ __global__ void tex2DKernel(float* outputData, int width, int height) { - int x = blockIdx.x*blockDim.x + threadIdx.x; - int y = blockIdx.y*blockDim.y + threadIdx.y; + int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y; outputData[y*width + x] = tex2D(textureObject, x, y); } diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index c42f09d5a0..ebc7a04385 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -20,8 +20,8 @@ __global__ void tex2DKernel(float* outputData, int width, int height) { - int x = blockIdx.x*blockDim.x + threadIdx.x; - int y = blockIdx.y*blockDim.y + threadIdx.y; + int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y; #ifdef __HIP_PLATFORM_HCC__ outputData[y*width + x] = tex2D(tex, textureObject, x, y); #else