From 56a1863c7420208a2adb268eaf539b5281038f63 Mon Sep 17 00:00:00 2001 From: scchan Date: Tue, 15 Mar 2016 23:53:21 -0500 Subject: [PATCH 1/2] fix builtins mapping for IDs and Dims --- include/hcc_detail/hip_runtime.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index b9a6981fee..f5219369c8 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -428,21 +428,21 @@ __device__ inline float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); }; /** * Kernel launching */ -#define hipThreadIdx_x (amp_get_local_id(2)) +#define hipThreadIdx_x (amp_get_local_id(0)) #define hipThreadIdx_y (amp_get_local_id(1)) -#define hipThreadIdx_z (amp_get_local_id(0)) +#define hipThreadIdx_z (amp_get_local_id(2)) -#define hipBlockIdx_x (hc_get_group_id(2)) +#define hipBlockIdx_x (hc_get_group_id(0)) #define hipBlockIdx_y (hc_get_group_id(1)) -#define hipBlockIdx_z (hc_get_group_id(0)) +#define hipBlockIdx_z (hc_get_group_id(2)) -#define hipBlockDim_x (amp_get_local_size(2)) +#define hipBlockDim_x (amp_get_local_size(0)) #define hipBlockDim_y (amp_get_local_size(1)) -#define hipBlockDim_z (amp_get_local_size(0)) +#define hipBlockDim_z (amp_get_local_size(2)) -#define hipGridDim_x (hc_get_num_groups(2)) +#define hipGridDim_x (hc_get_num_groups(0)) #define hipGridDim_y (hc_get_num_groups(1)) -#define hipGridDim_z (hc_get_num_groups(0)) +#define hipGridDim_z (hc_get_num_groups(2)) From 0eead76d4e587f6f34c14ea0c0fa2959485325f9 Mon Sep 17 00:00:00 2001 From: scchan Date: Wed, 23 Mar 2016 11:24:37 -0500 Subject: [PATCH 2/2] add compiler version guard to the grid launch fix --- include/hcc_detail/hip_runtime.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index f5219369c8..79aca00f9a 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -428,6 +428,9 @@ __device__ inline float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); }; /** * Kernel launching */ + +#if __hcc_workweek__ >= 16123 + #define hipThreadIdx_x (amp_get_local_id(0)) #define hipThreadIdx_y (amp_get_local_id(1)) #define hipThreadIdx_z (amp_get_local_id(2)) @@ -444,7 +447,25 @@ __device__ inline float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); }; #define hipGridDim_y (hc_get_num_groups(1)) #define hipGridDim_z (hc_get_num_groups(2)) +#else +#define hipThreadIdx_x (amp_get_local_id(2)) +#define hipThreadIdx_y (amp_get_local_id(1)) +#define hipThreadIdx_z (amp_get_local_id(0)) + +#define hipBlockIdx_x (hc_get_group_id(2)) +#define hipBlockIdx_y (hc_get_group_id(1)) +#define hipBlockIdx_z (hc_get_group_id(0)) + +#define hipBlockDim_x (amp_get_local_size(2)) +#define hipBlockDim_y (amp_get_local_size(1)) +#define hipBlockDim_z (amp_get_local_size(0)) + +#define hipGridDim_x (hc_get_num_groups(2)) +#define hipGridDim_y (hc_get_num_groups(1)) +#define hipGridDim_z (hc_get_num_groups(0)) + +#endif #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)