diff --git a/samples/0_Intro/square/square.cu b/samples/0_Intro/square/square.cu
index 82b31db14a..ccaa9ae0bc 100644
--- a/samples/0_Intro/square/square.cu
+++ b/samples/0_Intro/square/square.cu
@@ -40,8 +40,8 @@ template <typename T>
 __global__ void
 vector_square(T *C_d, const T *A_d, size_t N)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (size_t i=offset; i<N; i+=stride) {
         C_d[i] = A_d[i] * A_d[i];
diff --git a/src/device_util.cpp b/src/device_util.cpp
index b6aebdfce0..298c8b3725 100644
--- a/src/device_util.cpp
+++ b/src/device_util.cpp
@@ -45,8 +45,8 @@ __device__ void *__hip_hc_malloc(size_t size)
     {
         return (void*)nullptr;
     }
-    uint32_t totalThreads = blockDim.x * gridDim.x * blockDim.y * gridDim.y * blockDim.z * gridDim.z;
-    uint32_t currentWorkItem = threadIdx.x + blockDim.x * blockIdx.x;
+    uint32_t totalThreads = hipBlockDim_x * hipGridDim_x * hipBlockDim_y * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
+    uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x;
 
     uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads;
     uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads;
diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp
index c88a1dabc1..59db00796d 100644
--- a/src/hip_memory.cpp
+++ b/src/hip_memory.cpp
@@ -1307,9 +1307,9 @@ namespace
     __global__
     void hip_fill_n(RandomAccessIterator f, N n, T value)
     {
-        const uint32_t grid_dim = gridDim.x * blockDim.x;
+        const uint32_t grid_dim = hipGridDim_x * hipBlockDim_x;
 
-        size_t idx = blockIdx.x * block_dim + threadIdx.x;
+        size_t idx = hipBlockIdx_x * block_dim + hipThreadIdx_x;
         while (idx < n) {
             __builtin_memcpy(
                 reinterpret_cast<void*>(&f[idx]),
diff --git a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp
index c8c2e644c3..dac56bf709 100644
--- a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp
+++ b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp
@@ -34,7 +34,7 @@ THE SOFTWARE.
 #define NUM_STREAMS 2
 
 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     // Kernel loop designed to execute very slowly... ... ...   so we can test timing-related behavior below
     if(tx == 0){
         for(int i = 0; i<num;i++){
diff --git a/tests/src/deviceLib/hipComplex.cpp b/tests/src/deviceLib/hipComplex.cpp
index c01b430339..8a153b6bf0 100644
--- a/tests/src/deviceLib/hipComplex.cpp
+++ b/tests/src/deviceLib/hipComplex.cpp
@@ -27,7 +27,7 @@ THE SOFTWARE.
 #define SIZE 64<<2
 
 __global__  void getSqAbs(hipLaunchParm lp, float *A, float *B, float *C){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     C[tx] = hipCsqabsf(make_hipFloatComplex(A[tx], B[tx]));
 }
 
diff --git a/tests/src/deviceLib/hipDeviceMemcpy.cpp b/tests/src/deviceLib/hipDeviceMemcpy.cpp
index e845ae8f2f..23ccdee03b 100644
--- a/tests/src/deviceLib/hipDeviceMemcpy.cpp
+++ b/tests/src/deviceLib/hipDeviceMemcpy.cpp
@@ -16,13 +16,13 @@
 
 __global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In)
 {
-    int tx = threadIdx.x;
+    int tx = hipThreadIdx_x;
     memcpy(Out + tx, In + tx, sizeof(uint32_t));
 }
 
 __global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size)
 {
-    int tx = threadIdx.x;
+    int tx = hipThreadIdx_x;
     memset(ptr + tx, val, sizeof(uint32_t));
 }
 
diff --git a/tests/src/deviceLib/hipFloatMath.cpp b/tests/src/deviceLib/hipFloatMath.cpp
index 9c97e50aa6..7a96b5cd0d 100644
--- a/tests/src/deviceLib/hipFloatMath.cpp
+++ b/tests/src/deviceLib/hipFloatMath.cpp
@@ -35,7 +35,7 @@ THE SOFTWARE.
 
 
 __global__ void floatMath(hipLaunchParm lp, float *In, float *Out) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   Out[tid] = __cosf(In[tid]);
   Out[tid] = __exp10f(Out[tid]);
   Out[tid] = __expf(Out[tid]);
diff --git a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp
index 5485e4afc9..d12bd6fceb 100644
--- a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp
+++ b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp
@@ -217,7 +217,7 @@ int computeGold(int *gpuData, const int len)
 __global__ void testKernel(hipLaunchParm lp,int *g_odata)
 {
     // access thread id
-    const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
 
     // Test various atomic instructions
 
diff --git a/tests/src/deviceLib/hipTestDevice.cpp b/tests/src/deviceLib/hipTestDevice.cpp
index 068866021b..fa85940839 100644
--- a/tests/src/deviceLib/hipTestDevice.cpp
+++ b/tests/src/deviceLib/hipTestDevice.cpp
@@ -32,82 +32,82 @@ THE SOFTWARE.
 #define SIZE N*sizeof(float)
 
 __global__ void test_sincosf(hipLaunchParm lp, float* a, float* b, float *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     sincosf(a[tid], b+tid, c+tid);
 }
 
 __global__ void test_sincospif(hipLaunchParm lp, float* a, float* b, float *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     sincospif(a[tid], b+tid, c+tid);
 }
 
 __global__ void test_fdividef(hipLaunchParm lp, float *a, float* b, float *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     c[tid] = fdividef(a[tid], b[tid]);
 }
 
 __global__ void test_llrintf(hipLaunchParm lp, float *a, long long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = llrintf(a[tid]);
 }
 
 __global__ void test_lrintf(hipLaunchParm lp, float *a, long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = lrintf(a[tid]);
 }
 
 __global__ void test_rintf(hipLaunchParm lp, float *a, float *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = rintf(a[tid]);
 }
 
 __global__ void test_llroundf(hipLaunchParm lp, float *a, long long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = llroundf(a[tid]);
 }
 
 __global__ void test_lroundf(hipLaunchParm lp, float *a, long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = lroundf(a[tid]);
 }
 
 __global__ void test_rhypotf(hipLaunchParm lp, float *a, float* b, float *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     c[tid] = rhypotf(a[tid], b[tid]);
 }
 
 __global__ void test_norm3df(hipLaunchParm lp, float *a, float* b, float *c, float *d){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     d[tid] = norm3df(a[tid], b[tid], c[tid]);
 }
 
 __global__ void test_norm4df(hipLaunchParm lp, float *a, float* b, float *c, float *d, float *e){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     e[tid] = norm4df(a[tid], b[tid], c[tid], d[tid]);
 }
 
 __global__ void test_normf(hipLaunchParm lp, float *a, float *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = normf(N, a);
 }
 
 __global__ void test_rnorm3df(hipLaunchParm lp, float *a, float* b, float *c, float *d){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     d[tid] = rnorm3df(a[tid], b[tid], c[tid]);
 }
 
 __global__ void test_rnorm4df(hipLaunchParm lp, float *a, float* b, float *c, float *d, float *e){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     e[tid] = rnorm4df(a[tid], b[tid], c[tid], d[tid]);
 }
 
 __global__ void test_rnormf(hipLaunchParm lp, float *a, float *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = rnormf(N, a);
 }
 
 __global__ void test_erfinvf(hipLaunchParm lp, float *a, float *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = erff(erfinvf(a[tid]));
 }
 
diff --git a/tests/src/deviceLib/hipTestDeviceDouble.cpp b/tests/src/deviceLib/hipTestDeviceDouble.cpp
index 0b5d18fe5a..3b919d0cab 100644
--- a/tests/src/deviceLib/hipTestDeviceDouble.cpp
+++ b/tests/src/deviceLib/hipTestDeviceDouble.cpp
@@ -32,72 +32,72 @@ THE SOFTWARE.
 #define SIZE N*sizeof(double)
 
 __global__ void test_sincos(hipLaunchParm lp, double* a, double* b, double *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     sincos(a[tid], b+tid, c+tid);
 }
 
 __global__ void test_sincospi(hipLaunchParm lp, double* a, double* b, double *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     sincospi(a[tid], b+tid, c+tid);
 }
 
 __global__ void test_llrint(hipLaunchParm lp, double *a, long long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = llrint(a[tid]);
 }
 
 __global__ void test_lrint(hipLaunchParm lp, double *a, long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = lrint(a[tid]);
 }
 
 __global__ void test_rint(hipLaunchParm lp, double *a, double *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = rint(a[tid]);
 }
 
 __global__ void test_llround(hipLaunchParm lp, double *a, long long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = llround(a[tid]);
 }
 
 __global__ void test_lround(hipLaunchParm lp, double *a, long int *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = lround(a[tid]);
 }
 
 __global__ void test_rhypot(hipLaunchParm lp, double *a, double* b, double *c){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     c[tid] = rhypot(a[tid], b[tid]);
 }
 
 __global__ void test_norm3d(hipLaunchParm lp, double *a, double* b, double *c, double *d){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     d[tid] = norm3d(a[tid], b[tid], c[tid]);
 }
 
 __global__ void test_norm4d(hipLaunchParm lp, double *a, double* b, double *c, double *d, double *e){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     e[tid] = norm4d(a[tid], b[tid], c[tid], d[tid]);
 }
 
 __global__ void test_rnorm3d(hipLaunchParm lp, double *a, double* b, double *c, double *d){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     d[tid] = rnorm3d(a[tid], b[tid], c[tid]);
 }
 
 __global__ void test_rnorm4d(hipLaunchParm lp, double *a, double* b, double *c, double *d, double *e){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     e[tid] = rnorm4d(a[tid], b[tid], c[tid], d[tid]);
 }
 
 __global__ void test_rnorm(hipLaunchParm lp, double *a, double *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = rnorm(N, a);
 }
 
 __global__ void test_erfinv(hipLaunchParm lp, double *a, double *b){
-    int tid = threadIdx.x;
+    int tid = hipThreadIdx_x;
     b[tid] = erf(erfinv(a[tid]));
 }
 
diff --git a/tests/src/deviceLib/hipTestDeviceSymbol.cpp b/tests/src/deviceLib/hipTestDeviceSymbol.cpp
index be064f643c..140f3d97dc 100644
--- a/tests/src/deviceLib/hipTestDeviceSymbol.cpp
+++ b/tests/src/deviceLib/hipTestDeviceSymbol.cpp
@@ -36,7 +36,7 @@ __device__ int globalOut[NUM];
 
 __global__ void Assign(hipLaunchParm lp, int* Out)
 {
-    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Out[tid] = globalIn[tid];
     globalOut[tid] = globalIn[tid];
 }
diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp
index dd91e82440..f56538f080 100644
--- a/tests/src/deviceLib/hipTestHalf.cpp
+++ b/tests/src/deviceLib/hipTestHalf.cpp
@@ -29,7 +29,7 @@ THE SOFTWARE.
 #if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
 
 __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) {
-  int tx = threadIdx.x;
+  int tx = hipThreadIdx_x;
   __half a = A[tx];
   __half b = B[tx];
   __half c = C[tx];
@@ -45,7 +45,7 @@ __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) {
 }
 
 __global__ void __half2Math(hipLaunchParm lp, __half2 *A, __half2 *B, __half2 *C) {
-  int tx = threadIdx.x;
+  int tx = hipThreadIdx_x;
   __half2 a = A[tx];
   __half2 b = B[tx];
   __half2 c = C[tx];
diff --git a/tests/src/deviceLib/hipThreadFence.cpp b/tests/src/deviceLib/hipThreadFence.cpp
index 2f73b68529..1bd9fbe02c 100644
--- a/tests/src/deviceLib/hipThreadFence.cpp
+++ b/tests/src/deviceLib/hipThreadFence.cpp
@@ -33,7 +33,7 @@ THE SOFTWARE.
 
 __global__ void vAdd(hipLaunchParm lp, float *In1, float *In2, float *In3, float *In4, float *Out)
 {
-    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     In4[tid] = In1[tid] + In2[tid];
     __threadfence();
     In3[tid] = In3[tid] + In4[tid];
diff --git a/tests/src/deviceLib/hip_anyall.cpp b/tests/src/deviceLib/hip_anyall.cpp
index f0b314ce8d..c51c90f33f 100644
--- a/tests/src/deviceLib/hip_anyall.cpp
+++ b/tests/src/deviceLib/hip_anyall.cpp
@@ -37,9 +37,9 @@ __global__ void
 	warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift)
 {
 
-   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-   device_any[threadIdx.x>>pshift] = __any(tid -77);
-   device_all[threadIdx.x>>pshift] = __all(tid -77);
+   int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
+   device_any[hipThreadIdx_x>>pshift] = __any(tid -77);
+   device_all[hipThreadIdx_x>>pshift] = __all(tid -77);
 }
 
 int main(int argc, char *argv[])
diff --git a/tests/src/deviceLib/hip_ballot.cpp b/tests/src/deviceLib/hip_ballot.cpp
index 14b8f314a1..742c47a065 100644
--- a/tests/src/deviceLib/hip_ballot.cpp
+++ b/tests/src/deviceLib/hip_ballot.cpp
@@ -34,12 +34,12 @@ __global__ void
 	gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift)
 {
 
-   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-   const unsigned int warp_num = threadIdx.x >> pshift;
+   int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
+   const unsigned int warp_num = hipThreadIdx_x >> pshift;
 #ifdef __HIP_PLATFORM_HCC__
-   atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
+   atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
 #else
-	atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popc(__ballot(tid - 245)));
+	atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popc(__ballot(tid - 245)));
 #endif
 
 }
diff --git a/tests/src/deviceLib/hip_brev.cpp b/tests/src/deviceLib/hip_brev.cpp
index c08c39dec9..855a8bec47 100644
--- a/tests/src/deviceLib/hip_brev.cpp
+++ b/tests/src/deviceLib/hip_brev.cpp
@@ -72,8 +72,8 @@ HIP_kernel(hipLaunchParm lp,
              unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height)
   {
 
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
diff --git a/tests/src/deviceLib/hip_clz.cpp b/tests/src/deviceLib/hip_clz.cpp
index 53fd611184..bdb31f3e8d 100644
--- a/tests/src/deviceLib/hip_clz.cpp
+++ b/tests/src/deviceLib/hip_clz.cpp
@@ -83,8 +83,8 @@ HIP_kernel(hipLaunchParm lp,
     unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
   {
 
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
diff --git a/tests/src/deviceLib/hip_ffs.cpp b/tests/src/deviceLib/hip_ffs.cpp
index 49530bb298..c855ede060 100644
--- a/tests/src/deviceLib/hip_ffs.cpp
+++ b/tests/src/deviceLib/hip_ffs.cpp
@@ -73,8 +73,8 @@ HIP_kernel(hipLaunchParm lp,
 			    int width, int height)
   {
 
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
diff --git a/tests/src/deviceLib/hip_popc.cpp b/tests/src/deviceLib/hip_popc.cpp
index 19dafb4d43..e503e55b42 100644
--- a/tests/src/deviceLib/hip_popc.cpp
+++ b/tests/src/deviceLib/hip_popc.cpp
@@ -64,8 +64,8 @@ HIP_kernel(hipLaunchParm lp,
              unsigned int* a, unsigned int* b,unsigned  int* c, unsigned long long int* d, int width, int height)
   {
 
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
diff --git a/tests/src/deviceLib/hip_test_ldg.cpp b/tests/src/deviceLib/hip_test_ldg.cpp
index 4db522cc10..5540c4917d 100644
--- a/tests/src/deviceLib/hip_test_ldg.cpp
+++ b/tests/src/deviceLib/hip_test_ldg.cpp
@@ -57,8 +57,8 @@ vectoradd_float(hipLaunchParm lp,
              T* a, const T*  bm,  int width, int height)
 
   {
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
diff --git a/tests/src/deviceLib/hip_test_make_type.cpp b/tests/src/deviceLib/hip_test_make_type.cpp
index 6eba236e12..ce689ceb89 100644
--- a/tests/src/deviceLib/hip_test_make_type.cpp
+++ b/tests/src/deviceLib/hip_test_make_type.cpp
@@ -45,8 +45,8 @@ vectoradd_char1(hipLaunchParm lp,
              char1* a, const char1*  bm, const char1* cm, int width, int height)
 
   {
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
@@ -59,8 +59,8 @@ vectoradd_char2(hipLaunchParm lp,
              char2* a, const char2*  bm, const char2* cm, int width, int height)
 
   {
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
@@ -73,8 +73,8 @@ vectoradd_char3(hipLaunchParm lp,
              char3* a, const char3*  bm, const char3* cm, int width, int height)
 
   {
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
@@ -86,8 +86,8 @@ vectoradd_char4(hipLaunchParm lp,
              char4* a, const char4*  bm, const char4* cm, int width, int height)
 
   {
-      int x = blockDim.x * blockIdx.x + threadIdx.x;
-      int y = blockDim.y * blockIdx.y + threadIdx.y;
+      int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+      int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 
       int i = y * width + x;
       if ( i < (width * height)) {
@@ -100,8 +100,8 @@ vectoradd_char4(hipLaunchParm lp,
 __kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) {
 
 
-  int x = blockDimX * blockIdx.x + threadIdx.x;
-  int y = blockDimY * blockIdy.y + threadIdx.y;
+  int x = blockDimX * hipBlockIdx_x + hipThreadIdx_x;
+  int y = blockDimY * blockIdy.y + hipThreadIdx_y;
 
   int i = y * width + x;
   if ( i < (width * height)) {
diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp
index 6ee8dc58ad..5ec28101f3 100644
--- a/tests/src/deviceLib/hip_trig.cpp
+++ b/tests/src/deviceLib/hip_trig.cpp
@@ -36,7 +36,7 @@ THE SOFTWARE.
 #define SIZE LEN<<2
 
 __global__ void kernel_trig(hipLaunchParm lp, float *In, float *sin_d, float *cos_d, float *tan_d, float *sin_pd, float *cos_pd){
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   sin_d[tid] = __sinf(In[tid]);
   cos_d[tid] = __cosf(In[tid]);
   tan_d[tid] = __tanf(In[tid]);
diff --git a/tests/src/experimental/xcompile/hHip.c b/tests/src/experimental/xcompile/hHip.c
index 17e7e9ecf6..2ac4ebc73e 100644
--- a/tests/src/experimental/xcompile/hHip.c
+++ b/tests/src/experimental/xcompile/hHip.c
@@ -29,7 +29,7 @@ THE SOFTWARE.
 
 __global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd, size_t len)
 {
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     if(tx < len)
     {
         Cd[tx] = Ad[tx] + Bd[tx];
diff --git a/tests/src/experimental/xcompile/hipxxKer.cpp b/tests/src/experimental/xcompile/hipxxKer.cpp
index 5dca6c1bca..d1bbed63cd 100644
--- a/tests/src/experimental/xcompile/hipxxKer.cpp
+++ b/tests/src/experimental/xcompile/hipxxKer.cpp
@@ -30,7 +30,7 @@ THE SOFTWARE.
 
 __global__ void Kern(hipLaunchParm lp, float *A)
 {
-	int tx = threadIdx.x + blockIdx.x * blockDim.x;
+	int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
 	A[tx] += 1.0f;
 }
 
diff --git a/tests/src/experimental/xcompile/hxxHip.cpp b/tests/src/experimental/xcompile/hxxHip.cpp
index bca5d64afc..6a748d5c89 100644
--- a/tests/src/experimental/xcompile/hxxHip.cpp
+++ b/tests/src/experimental/xcompile/hxxHip.cpp
@@ -33,7 +33,7 @@ class memManager;
 template<typename T>
 __global__ void Add(hipLaunchParm lp, T* Ad, T* Bd, T* Cd, size_t Len)
 {
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     if(tx < Len)
     {
         Cd[tx] = Ad[tx] + Bd[tx];
diff --git a/tests/src/hipC.c b/tests/src/hipC.c
index efa03bb909..644df6c98f 100644
--- a/tests/src/hipC.c
+++ b/tests/src/hipC.c
@@ -34,7 +34,7 @@ THE SOFTWARE.
 #define SIZE 1024*1024*sizeof(int)
 
 __global__ void Iter(hipLaunchParm lp, int *Ad){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     if(tx == 0){
         for(int i=0;i<ITER;i++){
             Ad[tx] += 1;
diff --git a/tests/src/hipC.cpp b/tests/src/hipC.cpp
index 6eac543ecf..8abb877808 100644
--- a/tests/src/hipC.cpp
+++ b/tests/src/hipC.cpp
@@ -29,7 +29,7 @@ THE SOFTWARE.
 #define SIZE 1024*1024*sizeof(int)
 
 __global__ void Iter(hipLaunchParm lp, int *Ad){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     if(tx == 0){
         for(int i=0;i<ITER;i++){
             Ad[tx] += 1;
diff --git a/tests/src/hipCKernel.c b/tests/src/hipCKernel.c
index 891165f831..7a72cf84ca 100644
--- a/tests/src/hipCKernel.c
+++ b/tests/src/hipCKernel.c
@@ -2,7 +2,7 @@
 #include "hip/hip_runtime_api.h"
 
 __global__ void Kernel(hipLaunchParm lp, float *Ad){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Ad[tx] += Ad[tx-1];
 }
 
diff --git a/tests/src/kernel/hipDynamicShared.cpp b/tests/src/kernel/hipDynamicShared.cpp
index 4c16c52884..ba19fcaa0d 100644
--- a/tests/src/kernel/hipDynamicShared.cpp
+++ b/tests/src/kernel/hipDynamicShared.cpp
@@ -40,8 +40,8 @@ __global__ void testExternSharedKernel(hipLaunchParm lp, const T* A_d, const T*
     T *sdata = reinterpret_cast<T *>(my_sdata);
 #endif
 
-    size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t tid = threadIdx.x;
+    size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t tid = hipThreadIdx_x;
 
     // initialize dynamic shared memory
     if (tid < groupElements) {
diff --git a/tests/src/kernel/hipDynamicShared2.cpp b/tests/src/kernel/hipDynamicShared2.cpp
index 4567ff6fc2..95e70a9956 100644
--- a/tests/src/kernel/hipDynamicShared2.cpp
+++ b/tests/src/kernel/hipDynamicShared2.cpp
@@ -34,7 +34,7 @@ THE SOFTWARE.
 
 __global__ void vectorAdd(hipLaunchParm lp, float *Ad, float *Bd) {
   HIP_DYNAMIC_SHARED(float, sBd);
-  int tx = threadIdx.x;
+  int tx = hipThreadIdx_x;
   for(int i=0;i<LEN/64;i++) {
     sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
     Bd[tx + i * 64] = sBd[tx + i * 64];
diff --git a/tests/src/kernel/hipGridLaunch.cpp b/tests/src/kernel/hipGridLaunch.cpp
index dff6527e2c..992dad62f7 100644
--- a/tests/src/kernel/hipGridLaunch.cpp
+++ b/tests/src/kernel/hipGridLaunch.cpp
@@ -48,8 +48,8 @@ vectorADD2( hipLaunchParm lp,
             T *C_d,
             size_t N)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (size_t i=offset; i<N; i+=stride) {
         double foo = __hiloint2double(A_d[i], B_d[i]);
diff --git a/tests/src/kernel/hipLanguageExtensions.cpp b/tests/src/kernel/hipLanguageExtensions.cpp
index 5c5397675c..709fe3f1b8 100644
--- a/tests/src/kernel/hipLanguageExtensions.cpp
+++ b/tests/src/kernel/hipLanguageExtensions.cpp
@@ -62,7 +62,7 @@ __global__ void MyKernel (const hipLaunchParm lp, const float *a, const float *b
 {
     //KERNELBEGIN;
 
-    unsigned gid = threadIdx.x;
+    unsigned gid = hipThreadIdx_x;
     if (gid < N) {
         c[gid] = a[gid] + PlusOne(b[gid]);
     }
@@ -96,7 +96,7 @@ vectorADD(const hipLaunchParm lp,
     int zuzu = deviceVar + 1;
 
 
-    int b = threadIdx.x;
+    int b = hipThreadIdx_x;
     int c;
 #ifdef NOT_YET
     int a = __shfl_up(x, 1);
@@ -119,8 +119,8 @@ vectorADD(const hipLaunchParm lp,
     __syncthreads();
 
 
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (size_t i=offset; i<N; i+=stride) {
 		C_d[i] = A_d[i] + B_d[i];
diff --git a/tests/src/kernel/hipTestConstant.cpp b/tests/src/kernel/hipTestConstant.cpp
index 3922046a2e..e4d187b4d6 100644
--- a/tests/src/kernel/hipTestConstant.cpp
+++ b/tests/src/kernel/hipTestConstant.cpp
@@ -38,7 +38,7 @@ __constant__ int Value[LEN];
 
 __global__ void Get(hipLaunchParm lp, int *Ad)
 {
-    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Ad[tid] = Value[tid];
 }
 
diff --git a/tests/src/kernel/hipTestMallocKernel.cpp b/tests/src/kernel/hipTestMallocKernel.cpp
index 4aa0eb1cf7..9dd8b053a5 100644
--- a/tests/src/kernel/hipTestMallocKernel.cpp
+++ b/tests/src/kernel/hipTestMallocKernel.cpp
@@ -33,12 +33,12 @@ THE SOFTWARE.
 #define SIZE NUM * 8
 
 __global__ void Alloc(hipLaunchParm lp, uint64_t *Ptr) {
-    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Ptr[tid] = (uint64_t)malloc(128);
 }
 
 __global__ void Free(hipLaunchParm lp, uint64_t *Ptr) {
-    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     free((void*)Ptr[tid]);
 }
 
diff --git a/tests/src/kernel/hipTestMemKernel.cpp b/tests/src/kernel/hipTestMemKernel.cpp
index c44f275eed..bac1905cd1 100644
--- a/tests/src/kernel/hipTestMemKernel.cpp
+++ b/tests/src/kernel/hipTestMemKernel.cpp
@@ -35,52 +35,52 @@ THE SOFTWARE.
 #define LEN12 12 * 4
 
 __global__ void MemCpy8(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memcpy(Out + tid*8, In + tid*8, 8);
 }
 
 __global__ void MemCpy9(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memcpy(Out + tid*9, In + tid*9, 9);
 }
 
 __global__ void MemCpy10(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memcpy(Out + tid*10, In + tid*10, 10);
 }
 
 __global__ void MemCpy11(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memcpy(Out + tid*11, In + tid*11, 11);
 }
 
 __global__ void MemCpy12(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memcpy(Out + tid*12, In + tid*12, 12);
 }
 
 __global__ void MemSet8(hipLaunchParm lp, uint8_t *In) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memset(In + tid*8, 1, 8);
 }
 
 __global__ void MemSet9(hipLaunchParm lp, uint8_t *In) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memset(In + tid*9, 1, 9);
 }
 
 __global__ void MemSet10(hipLaunchParm lp, uint8_t *In) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memset(In + tid*10, 1, 10);
 }
 
 __global__ void MemSet11(hipLaunchParm lp, uint8_t *In) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memset(In + tid*11, 1, 11);
 }
 
 __global__ void MemSet12(hipLaunchParm lp, uint8_t *In) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
   memset(In + tid*12, 1, 12);
 }
 
diff --git a/tests/src/kernel/inline_asm_vadd.cpp b/tests/src/kernel/inline_asm_vadd.cpp
index 23406eefff..c191401172 100644
--- a/tests/src/kernel/inline_asm_vadd.cpp
+++ b/tests/src/kernel/inline_asm_vadd.cpp
@@ -37,7 +37,7 @@ __global__ void vadd_asm(hipLaunchParm lp,
                                 float *out,
                                 float *in)
 {
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
 
 #ifdef __HIP_PLATFORM_NVCC__
         asm volatile("add.f32 %0,%1,%2;":"=f"(out[i]):"f"(in[i]),"f"(out[i]));
diff --git a/tests/src/kernel/inline_asm_vmac.cpp b/tests/src/kernel/inline_asm_vmac.cpp
index a78da01e74..ba17633fcd 100644
--- a/tests/src/kernel/inline_asm_vmac.cpp
+++ b/tests/src/kernel/inline_asm_vmac.cpp
@@ -35,7 +35,7 @@ __global__ void vmac_asm(hipLaunchParm lp,
                                 float *out,
                                 float *in)
 {
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
 
     asm volatile ("v_mac_f32_e32 %0, %2, %3" : "=v" (out[i]) : "0"(out[i]), "v" (a), "v" (in[i]));
 }
diff --git a/tests/src/kernel/launch_bounds.cpp b/tests/src/kernel/launch_bounds.cpp
index 269b88e34d..3b1476fb11 100644
--- a/tests/src/kernel/launch_bounds.cpp
+++ b/tests/src/kernel/launch_bounds.cpp
@@ -33,7 +33,7 @@ void
 __launch_bounds__(256, 2)
 myKern(hipLaunchParm lp, int *C, const int *A, int N, int xfactor)
 {
-    int tid = (blockIdx.x * blockDim.x + threadIdx.x);
+    int tid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
 
     if (tid < N) {
         C[tid] = A[tid];
diff --git a/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp b/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp
index c8c2e644c3..dac56bf709 100644
--- a/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp
+++ b/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp
@@ -34,7 +34,7 @@ THE SOFTWARE.
 #define NUM_STREAMS 2
 
 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     // Kernel loop designed to execute very slowly... ... ...   so we can test timing-related behavior below
     if(tx == 0){
         for(int i = 0; i<num;i++){
diff --git a/tests/src/runtimeApi/memory/hipHostGetFlags.cpp b/tests/src/runtimeApi/memory/hipHostGetFlags.cpp
index f9359fb5da..9fad60aec8 100644
--- a/tests/src/runtimeApi/memory/hipHostGetFlags.cpp
+++ b/tests/src/runtimeApi/memory/hipHostGetFlags.cpp
@@ -33,7 +33,7 @@ THE SOFTWARE.
 #define SIZE LEN*sizeof(float)
 
 __global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){
-int tx = threadIdx.x + blockIdx.x * blockDim.x;
+int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
 Cd[tx] = Ad[tx] + Bd[tx];
 }
 
diff --git a/tests/src/runtimeApi/memory/hipHostMalloc.cpp b/tests/src/runtimeApi/memory/hipHostMalloc.cpp
index 4210ef0654..f086c818f0 100644
--- a/tests/src/runtimeApi/memory/hipHostMalloc.cpp
+++ b/tests/src/runtimeApi/memory/hipHostMalloc.cpp
@@ -33,13 +33,13 @@
 #define SIZE LEN*sizeof(float)
 
 __global__ void Add(float *Ad, float *Bd, float *Cd){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Cd[tx] = Ad[tx] + Bd[tx];
 }
 
 
 __global__ void Set(int *Ad, int val){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Ad[tx] = val;
 }
 
diff --git a/tests/src/runtimeApi/memory/hipHostRegister.cpp b/tests/src/runtimeApi/memory/hipHostRegister.cpp
index a65196ba89..001cb84c07 100644
--- a/tests/src/runtimeApi/memory/hipHostRegister.cpp
+++ b/tests/src/runtimeApi/memory/hipHostRegister.cpp
@@ -29,7 +29,7 @@ THE SOFTWARE.
 #include<malloc.h>
 
 __global__ void Inc(hipLaunchParm lp, float *Ad){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     Ad[tx] = Ad[tx] + float(1);
 }
 
diff --git a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp
index 5cd46c808a..c4f4b23dc0 100644
--- a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp
+++ b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp
@@ -70,8 +70,8 @@ template<typename T>
 __global__ void
 addK (hipLaunchParm lp, T *A, T K, size_t numElements)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (size_t i=offset; i<numElements; i+=stride) {
         A[i] = A[i] + K;
diff --git a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp
index 667d4b404b..6042f538b3 100644
--- a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp
+++ b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp
@@ -31,7 +31,7 @@ THE SOFTWARE.
 
 __global__ void Kernel(hipLaunchParm lp,volatile float* hostRes)
 {
-    int tid = threadIdx.x + blockIdx.x * blockDim.x;
+    int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     hostRes[tid] = tid + 1;
     __threadfence_system();
     // expecting that the data is getting flushed to host here!
diff --git a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp
index 81450f1fba..ee3384d813 100644
--- a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp
+++ b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp
@@ -63,8 +63,8 @@ int enablePeers(int dev0, int dev1)
 __global__ void
 memsetIntKernel(int * ptr, const int val, size_t numElements)
 {
-    int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-    int stride = blockDim.x * gridDim.x ;
+    int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    int stride = hipBlockDim_x * hipGridDim_x ;
     for (size_t i= gid; i< numElements; i+=stride){
        ptr[i] = val;
     }
@@ -73,8 +73,8 @@ memsetIntKernel(int * ptr, const int val, size_t numElements)
 __global__ void
 memcpyIntKernel(const int * src, int* dst, size_t numElements)
 {
-    int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-    int stride = blockDim.x * gridDim.x ;
+    int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    int stride = hipBlockDim_x * hipGridDim_x ;
     for (size_t i= gid; i< numElements; i+=stride){
        dst[i] = src[i];
     }
diff --git a/tests/src/runtimeApi/module/hipModule.cpp b/tests/src/runtimeApi/module/hipModule.cpp
index f2c2137738..1b7b62cff2 100644
--- a/tests/src/runtimeApi/module/hipModule.cpp
+++ b/tests/src/runtimeApi/module/hipModule.cpp
@@ -34,7 +34,7 @@ THE SOFTWARE.
 #define kernel_name "hello_world"
 
 __global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){
-  int tx = threadIdx.x;
+  int tx = hipThreadIdx_x;
   Bd[tx] = Ad[tx];
 }
 
diff --git a/tests/src/runtimeApi/module/vcpy_kernel.cpp b/tests/src/runtimeApi/module/vcpy_kernel.cpp
index 7ee1ad333b..0375eee342 100644
--- a/tests/src/runtimeApi/module/vcpy_kernel.cpp
+++ b/tests/src/runtimeApi/module/vcpy_kernel.cpp
@@ -24,7 +24,7 @@ THE SOFTWARE.
 
 extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
 {
-    int tx = threadIdx.x;
+    int tx = hipThreadIdx_x;
     b[tx] = a[tx];
 }
 
diff --git a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp b/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp
index 3727901645..780372f263 100644
--- a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp
+++ b/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp
@@ -35,7 +35,7 @@ THE SOFTWARE.
 
 template<typename T>
 __global__ void Inc(hipLaunchParm lp, T *Array){
-int tx = threadIdx.x + blockIdx.x * blockDim.x;
+int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
 Array[tx] = Array[tx] + T(1);
 }
 
diff --git a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp
index 66b93a164f..4e343121ed 100644
--- a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp
+++ b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp
@@ -29,7 +29,7 @@ THE SOFTWARE.
 const int NN = 1 << 21;
 
 __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
-	int tid = threadIdx.x;
+	int tid = hipThreadIdx_x;
 	if(tid < 1){
 		for(int i=0;i<n;i++){
 			x[i] = sqrt(powf(3.14159,i));
@@ -39,7 +39,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
 }
 
 __global__ void nKernel(hipLaunchParm lp, float *y){
-	int tid = threadIdx.x;
+	int tid = hipThreadIdx_x;
 	y[tid] = y[tid] + 1.0f;
 }
 
diff --git a/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp
index e91b62231f..63cde4563e 100644
--- a/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp
+++ b/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp
@@ -31,7 +31,7 @@ THE SOFTWARE.
 const int NN = 1 << 21;
 
 __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
-	int tid = threadIdx.x;
+	int tid = hipThreadIdx_x;
 	if(tid < 1){
 		for(int i=0;i<n;i++){
 			x[i] = sqrt(powf(3.14159,i));
@@ -41,7 +41,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
 }
 
 __global__ void nKernel(hipLaunchParm lp, float *y){
-	int tid = threadIdx.x;
+	int tid = hipThreadIdx_x;
 	y[tid] = y[tid] + 1.0f;
 }
 
diff --git a/tests/src/runtimeApi/stream/hipNullStream.cpp b/tests/src/runtimeApi/stream/hipNullStream.cpp
index 7a4cd978a1..04a232f3bb 100644
--- a/tests/src/runtimeApi/stream/hipNullStream.cpp
+++ b/tests/src/runtimeApi/stream/hipNullStream.cpp
@@ -41,8 +41,8 @@ vectorADDRepeat(hipLaunchParm lp,
             size_t NELEM,
             int repeat)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (int j=1; j<=repeat;j++) {
         for (size_t i=offset; i<NELEM; i+=stride) {
diff --git a/tests/src/runtimeApi/stream/hipStream.h b/tests/src/runtimeApi/stream/hipStream.h
index 0ce06bbc3f..583275cc91 100644
--- a/tests/src/runtimeApi/stream/hipStream.h
+++ b/tests/src/runtimeApi/stream/hipStream.h
@@ -73,7 +73,7 @@ void D2H(T *Dst, T *Src, size_t size){
 
 template<typename T>
 __global__ void Inc(hipLaunchParm lp, T *In){
-int tx = threadIdx.x + blockIdx.x * blockDim.x;
+int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
 In[tx] = In[tx] + 1;
 }
 
diff --git a/tests/src/runtimeApi/synchronization/copy_coherency.cpp b/tests/src/runtimeApi/synchronization/copy_coherency.cpp
index b2a66f61e2..e4bfb98206 100644
--- a/tests/src/runtimeApi/synchronization/copy_coherency.cpp
+++ b/tests/src/runtimeApi/synchronization/copy_coherency.cpp
@@ -102,8 +102,8 @@ MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
 __global__ void
 memsetIntKernel(int * ptr, const int val, size_t numElements)
 {
-    int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-    int stride = blockDim.x * gridDim.x ;
+    int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    int stride = hipBlockDim_x * hipGridDim_x ;
     for (size_t i= gid; i< numElements; i+=stride){
        ptr[i] = val;
     }
@@ -112,8 +112,8 @@ memsetIntKernel(int * ptr, const int val, size_t numElements)
 __global__ void
 memcpyIntKernel(int *dst, const int * src, size_t numElements)
 {
-    int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-    int stride = blockDim.x * gridDim.x ;
+    int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    int stride = hipBlockDim_x * hipGridDim_x ;
     for (size_t i= gid; i< numElements; i+=stride){
        dst[i] = src[i];
     }
diff --git a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp
index 2916d51bf9..b34d331682 100644
--- a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp
+++ b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp
@@ -5,8 +5,8 @@
 extern "C" __global__ void
 memcpyIntKernel(hipLaunchParm lp, int *dst, const int * src, size_t numElements)
 {
-    int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-    int stride = blockDim.x * gridDim.x ;
+    int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    int stride = hipBlockDim_x * hipGridDim_x ;
     for (size_t i= gid; i< numElements; i+=stride){
        dst[i] = src[i];
     }
diff --git a/tests/src/specialFunc.cu b/tests/src/specialFunc.cu
index 085be062d9..744dcd8926 100644
--- a/tests/src/specialFunc.cu
+++ b/tests/src/specialFunc.cu
@@ -23,7 +23,7 @@ THE SOFTWARE.
 void __global__
 test_kernel(float *A) 
 {
-    int tid = blockIdx.x * blockDim.x + threadIdx.x;
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
 
     float a = __ballot(tid < 16);
     float b = __shfl(tid < 16);
diff --git a/tests/src/stress/hipStressAsync.cpp b/tests/src/stress/hipStressAsync.cpp
index a142b41730..1f8cab1a36 100644
--- a/tests/src/stress/hipStressAsync.cpp
+++ b/tests/src/stress/hipStressAsync.cpp
@@ -30,7 +30,7 @@ THE SOFTWARE.
 #define ITER 1<<10
 
 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     if(tx == 0){
         for(int i = 0; i<num;i++){
             Ad[tx] += 1;
diff --git a/tests/src/stress/hipStressChain.cpp b/tests/src/stress/hipStressChain.cpp
index 55582d85e5..97183a97fa 100644
--- a/tests/src/stress/hipStressChain.cpp
+++ b/tests/src/stress/hipStressChain.cpp
@@ -26,7 +26,7 @@ THE SOFTWARE.
 static size_t size[NUM_SIZE];
 
 __global__ void Add(hipLaunchParm lp, int *Ad){
-    int tx = threadIdx.x;
+    int tx = hipThreadIdx_x;
     Ad[tx] = Ad[tx] + tx;
 }
 
diff --git a/tests/src/stress/hipStressKernel.cpp b/tests/src/stress/hipStressKernel.cpp
index 90cc10894f..52d8fa1fe9 100644
--- a/tests/src/stress/hipStressKernel.cpp
+++ b/tests/src/stress/hipStressKernel.cpp
@@ -26,7 +26,7 @@ THE SOFTWARE.
 static size_t size[NUM_SIZE];
 
 __global__ void Add(hipLaunchParm lp, int *Ad){
-    int tx = threadIdx.x;
+    int tx = hipThreadIdx_x;
     Ad[tx] = Ad[tx] + tx;
 }
 
diff --git a/tests/src/stress/hipStressSync.cpp b/tests/src/stress/hipStressSync.cpp
index ef6a340937..8f0252bb61 100644
--- a/tests/src/stress/hipStressSync.cpp
+++ b/tests/src/stress/hipStressSync.cpp
@@ -30,7 +30,7 @@ THE SOFTWARE.
 #define ITER 1<<10
 
 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
     if(tx == 0){
         for(int i = 0; i<num;i++){
             Ad[tx] += 1;
diff --git a/tests/src/test_common.h b/tests/src/test_common.h
index 1b9744647b..b176da29d4 100644
--- a/tests/src/test_common.h
+++ b/tests/src/test_common.h
@@ -136,8 +136,8 @@ vectorADD(hipLaunchParm lp,
             T *C_d,
             size_t NELEM)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (size_t i=offset; i<NELEM; i+=stride) {
         C_d[i] = A_d[i] + B_d[i];
@@ -153,8 +153,8 @@ vectorADDReverse(hipLaunchParm lp,
             T *C_d,
             size_t NELEM)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) {
         C_d[i] = A_d[i] + B_d[i];
@@ -169,8 +169,8 @@ addCount( const T *A_d,
         size_t NELEM,
         int count)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     // Deliberately do this in an inefficient way to increase kernel runtime
     for (int i=0; i<count; i++) {
@@ -188,8 +188,8 @@ addCountReverse( const T *A_d,
         int64_t NELEM,
         int count)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     // Deliberately do this in an inefficient way to increase kernel runtime
     for (int i=0; i<count; i++) {
@@ -205,8 +205,8 @@ __global__ void
 memsetReverse( T *C_d,  T val,
         int64_t NELEM)
 {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
 
     for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) {
         C_d[i] = val;
diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp
index 9ddafd6b1c..443d708418 100644
--- a/tests/src/texture/hipTextureObj2D.cpp
+++ b/tests/src/texture/hipTextureObj2D.cpp
@@ -17,8 +17,8 @@ __global__ void tex2DKernel(float* outputData,
                              int width,
                              int height)
 {
-    int x = blockIdx.x*blockDim.x + threadIdx.x;
-    int y = blockIdx.y*blockDim.y + threadIdx.y;
+    int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x;
+    int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y;
     outputData[y*width + x] = tex2D<float>(textureObject, x, y);
 }
 
diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp
index c42f09d5a0..ebc7a04385 100644
--- a/tests/src/texture/hipTextureRef2D.cpp
+++ b/tests/src/texture/hipTextureRef2D.cpp
@@ -20,8 +20,8 @@ __global__ void tex2DKernel(float* outputData,
                              int width,
                              int height)
 {
-    int x = blockIdx.x*blockDim.x + threadIdx.x;
-    int y = blockIdx.y*blockDim.y + threadIdx.y;
+    int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x;
+    int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y;
 #ifdef __HIP_PLATFORM_HCC__
     outputData[y*width + x] = tex2D(tex, textureObject, x, y);
 #else