diff --git a/samples/0_Intro/square/square.cu b/samples/0_Intro/square/square.cu index 82b31db14a..ccaa9ae0bc 100644 --- a/samples/0_Intro/square/square.cu +++ b/samples/0_Intro/square/square.cu @@ -40,8 +40,8 @@ template __global__ void vector_square(T *C_d, const T *A_d, size_t N) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; for (size_t i=offset; i(&f[idx]), diff --git a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp index c8c2e644c3..dac56bf709 100644 --- a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp +++ b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #define NUM_STREAMS 2 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; // Kernel loop designed to execute very slowly... ... ... so we can test timing-related behavior below if(tx == 0){ for(int i = 0; i>pshift] = __any(tid -77); - device_all[threadIdx.x>>pshift] = __all(tid -77); + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + device_any[hipThreadIdx_x>>pshift] = __any(tid -77); + device_all[hipThreadIdx_x>>pshift] = __all(tid -77); } int main(int argc, char *argv[]) diff --git a/tests/src/deviceLib/hip_ballot.cpp b/tests/src/deviceLib/hip_ballot.cpp index 14b8f314a1..742c47a065 100644 --- a/tests/src/deviceLib/hip_ballot.cpp +++ b/tests/src/deviceLib/hip_ballot.cpp @@ -34,12 +34,12 @@ __global__ void gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - const unsigned int warp_num = threadIdx.x >> pshift; + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + const unsigned int warp_num = hipThreadIdx_x >> pshift; #ifdef __HIP_PLATFORM_HCC__ - atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); + atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); #else - atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popc(__ballot(tid - 245))); + atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popc(__ballot(tid - 245))); #endif } diff --git a/tests/src/deviceLib/hip_brev.cpp b/tests/src/deviceLib/hip_brev.cpp index c08c39dec9..855a8bec47 100644 --- a/tests/src/deviceLib/hip_brev.cpp +++ b/tests/src/deviceLib/hip_brev.cpp @@ -72,8 +72,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_clz.cpp b/tests/src/deviceLib/hip_clz.cpp index 53fd611184..bdb31f3e8d 100644 --- a/tests/src/deviceLib/hip_clz.cpp +++ b/tests/src/deviceLib/hip_clz.cpp @@ -83,8 +83,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_ffs.cpp b/tests/src/deviceLib/hip_ffs.cpp index 49530bb298..c855ede060 100644 --- a/tests/src/deviceLib/hip_ffs.cpp +++ b/tests/src/deviceLib/hip_ffs.cpp @@ -73,8 +73,8 @@ HIP_kernel(hipLaunchParm lp, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_popc.cpp b/tests/src/deviceLib/hip_popc.cpp index 19dafb4d43..e503e55b42 100644 --- a/tests/src/deviceLib/hip_popc.cpp +++ b/tests/src/deviceLib/hip_popc.cpp @@ -64,8 +64,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_test_ldg.cpp b/tests/src/deviceLib/hip_test_ldg.cpp index 4db522cc10..5540c4917d 100644 --- a/tests/src/deviceLib/hip_test_ldg.cpp +++ b/tests/src/deviceLib/hip_test_ldg.cpp @@ -57,8 +57,8 @@ vectoradd_float(hipLaunchParm lp, T* a, const T* bm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_test_make_type.cpp b/tests/src/deviceLib/hip_test_make_type.cpp index 6eba236e12..ce689ceb89 100644 --- a/tests/src/deviceLib/hip_test_make_type.cpp +++ b/tests/src/deviceLib/hip_test_make_type.cpp @@ -45,8 +45,8 @@ vectoradd_char1(hipLaunchParm lp, char1* a, const char1* bm, const char1* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -59,8 +59,8 @@ vectoradd_char2(hipLaunchParm lp, char2* a, const char2* bm, const char2* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -73,8 +73,8 @@ vectoradd_char3(hipLaunchParm lp, char3* a, const char3* bm, const char3* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -86,8 +86,8 @@ vectoradd_char4(hipLaunchParm lp, char4* a, const char4* bm, const char4* cm, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { @@ -100,8 +100,8 @@ vectoradd_char4(hipLaunchParm lp, __kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) { - int x = blockDimX * blockIdx.x + threadIdx.x; - int y = blockDimY * blockIdy.y + threadIdx.y; + int x = blockDimX * hipBlockIdx_x + hipThreadIdx_x; + int y = blockDimY * blockIdy.y + hipThreadIdx_y; int i = y * width + x; if ( i < (width * height)) { diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp index 6ee8dc58ad..5ec28101f3 100644 --- a/tests/src/deviceLib/hip_trig.cpp +++ b/tests/src/deviceLib/hip_trig.cpp @@ -36,7 +36,7 @@ THE SOFTWARE. #define SIZE LEN<<2 __global__ void kernel_trig(hipLaunchParm lp, float *In, float *sin_d, float *cos_d, float *tan_d, float *sin_pd, float *cos_pd){ - int tid = threadIdx.x + blockIdx.x * blockDim.x; + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; sin_d[tid] = __sinf(In[tid]); cos_d[tid] = __cosf(In[tid]); tan_d[tid] = __tanf(In[tid]); diff --git a/tests/src/experimental/xcompile/hHip.c b/tests/src/experimental/xcompile/hHip.c index 17e7e9ecf6..2ac4ebc73e 100644 --- a/tests/src/experimental/xcompile/hHip.c +++ b/tests/src/experimental/xcompile/hHip.c @@ -29,7 +29,7 @@ THE SOFTWARE. __global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd, size_t len) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx < len) { Cd[tx] = Ad[tx] + Bd[tx]; diff --git a/tests/src/experimental/xcompile/hipxxKer.cpp b/tests/src/experimental/xcompile/hipxxKer.cpp index 5dca6c1bca..d1bbed63cd 100644 --- a/tests/src/experimental/xcompile/hipxxKer.cpp +++ b/tests/src/experimental/xcompile/hipxxKer.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. __global__ void Kern(hipLaunchParm lp, float *A) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; A[tx] += 1.0f; } diff --git a/tests/src/experimental/xcompile/hxxHip.cpp b/tests/src/experimental/xcompile/hxxHip.cpp index bca5d64afc..6a748d5c89 100644 --- a/tests/src/experimental/xcompile/hxxHip.cpp +++ b/tests/src/experimental/xcompile/hxxHip.cpp @@ -33,7 +33,7 @@ class memManager; template __global__ void Add(hipLaunchParm lp, T* Ad, T* Bd, T* Cd, size_t Len) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx < Len) { Cd[tx] = Ad[tx] + Bd[tx]; diff --git a/tests/src/hipC.c b/tests/src/hipC.c index efa03bb909..644df6c98f 100644 --- a/tests/src/hipC.c +++ b/tests/src/hipC.c @@ -34,7 +34,7 @@ THE SOFTWARE. #define SIZE 1024*1024*sizeof(int) __global__ void Iter(hipLaunchParm lp, int *Ad){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx == 0){ for(int i=0;i(my_sdata); #endif - size_t gid = (blockIdx.x * blockDim.x + threadIdx.x); - size_t tid = threadIdx.x; + size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t tid = hipThreadIdx_x; // initialize dynamic shared memory if (tid < groupElements) { diff --git a/tests/src/kernel/hipDynamicShared2.cpp b/tests/src/kernel/hipDynamicShared2.cpp index 4567ff6fc2..95e70a9956 100644 --- a/tests/src/kernel/hipDynamicShared2.cpp +++ b/tests/src/kernel/hipDynamicShared2.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. __global__ void vectorAdd(hipLaunchParm lp, float *Ad, float *Bd) { HIP_DYNAMIC_SHARED(float, sBd); - int tx = threadIdx.x; + int tx = hipThreadIdx_x; for(int i=0;i __global__ void Inc(hipLaunchParm lp, float *Ad){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; Ad[tx] = Ad[tx] + float(1); } diff --git a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp index 5cd46c808a..c4f4b23dc0 100644 --- a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp @@ -70,8 +70,8 @@ template __global__ void addK (hipLaunchParm lp, T *A, T K, size_t numElements) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; for (size_t i=offset; i __global__ void Inc(hipLaunchParm lp, T *Array){ -int tx = threadIdx.x + blockIdx.x * blockDim.x; +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; Array[tx] = Array[tx] + T(1); } diff --git a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp index 66b93a164f..4e343121ed 100644 --- a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp +++ b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. const int NN = 1 << 21; __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){ - int tid = threadIdx.x; + int tid = hipThreadIdx_x; if(tid < 1){ for(int i=0;i __global__ void Inc(hipLaunchParm lp, T *In){ -int tx = threadIdx.x + blockIdx.x * blockDim.x; +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; In[tx] = In[tx] + 1; } diff --git a/tests/src/runtimeApi/synchronization/copy_coherency.cpp b/tests/src/runtimeApi/synchronization/copy_coherency.cpp index b2a66f61e2..e4bfb98206 100644 --- a/tests/src/runtimeApi/synchronization/copy_coherency.cpp +++ b/tests/src/runtimeApi/synchronization/copy_coherency.cpp @@ -102,8 +102,8 @@ MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); __global__ void memsetIntKernel(int * ptr, const int val, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x ; + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; for (size_t i= gid; i< numElements; i+=stride){ ptr[i] = val; } @@ -112,8 +112,8 @@ memsetIntKernel(int * ptr, const int val, size_t numElements) __global__ void memcpyIntKernel(int *dst, const int * src, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x ; + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; for (size_t i= gid; i< numElements; i+=stride){ dst[i] = src[i]; } diff --git a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp index 2916d51bf9..b34d331682 100644 --- a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp +++ b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp @@ -5,8 +5,8 @@ extern "C" __global__ void memcpyIntKernel(hipLaunchParm lp, int *dst, const int * src, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x ; + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; for (size_t i= gid; i< numElements; i+=stride){ dst[i] = src[i]; } diff --git a/tests/src/specialFunc.cu b/tests/src/specialFunc.cu index 085be062d9..744dcd8926 100644 --- a/tests/src/specialFunc.cu +++ b/tests/src/specialFunc.cu @@ -23,7 +23,7 @@ THE SOFTWARE. void __global__ test_kernel(float *A) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; + int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; float a = __ballot(tid < 16); float b = __shfl(tid < 16); diff --git a/tests/src/stress/hipStressAsync.cpp b/tests/src/stress/hipStressAsync.cpp index a142b41730..1f8cab1a36 100644 --- a/tests/src/stress/hipStressAsync.cpp +++ b/tests/src/stress/hipStressAsync.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. #define ITER 1<<10 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; + int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if(tx == 0){ for(int i = 0; i=0; i-=stride) { C_d[i] = A_d[i] + B_d[i]; @@ -169,8 +169,8 @@ addCount( const T *A_d, size_t NELEM, int count) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; // Deliberately do this in an inefficient way to increase kernel runtime for (int i=0; i=0; i-=stride) { C_d[i] = val; diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp index 9ddafd6b1c..443d708418 100644 --- a/tests/src/texture/hipTextureObj2D.cpp +++ b/tests/src/texture/hipTextureObj2D.cpp @@ -17,8 +17,8 @@ __global__ void tex2DKernel(float* outputData, int width, int height) { - int x = blockIdx.x*blockDim.x + threadIdx.x; - int y = blockIdx.y*blockDim.y + threadIdx.y; + int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y; outputData[y*width + x] = tex2D(textureObject, x, y); } diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index c42f09d5a0..ebc7a04385 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -20,8 +20,8 @@ __global__ void tex2DKernel(float* outputData, int width, int height) { - int x = blockIdx.x*blockDim.x + threadIdx.x; - int y = blockIdx.y*blockDim.y + threadIdx.y; + int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y; #ifdef __HIP_PLATFORM_HCC__ outputData[y*width + x] = tex2D(tex, textureObject, x, y); #else