From 0005dd5f66631fda1b26bbc392e2115cc7cc262f Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Wed, 8 Mar 2017 01:32:59 +0800 Subject: [PATCH] Changes to HIP to cope with Promote-free HCC Squashed commit of the following: commit c111b5bd10d7c2a5b0b1ad8b07f6e81185b47b39 Author: Wen-Heng (Jack) Chung Date: Sat Mar 4 17:06:46 2017 +0800 Use __device__ for all variables and functions to be used in kernel path Abolish __device and adopt [[hc]] in HIP implementation, so __device__ can be used on all HIP applications, no matter they are variables or functions. Change-Id: I20ca25857ce3bc3e42a5ebf65cafea2c8492f4c7 commit 30c0e4e4701bbf6bd9a7182e0320a71ff73d3a83 Author: Wen-Heng (Jack) Chung Date: Thu Mar 2 12:14:11 2017 +0800 XXX FIXME get around LDS spills caused in Promote-free HCC hipDynamicShared2 uses all 64KB of LDS for computation. But in Promote-free HCC there are cases where LDS spills would occur, which would make the test case to hang. In this workaround commit we reduce the size of dynamic LDS used to get around this known issue, and will revert this commit when LDS spills are resolved in HCC. Change-Id: If648b36200a4f9143951a8129192bcb7ed0bef5e commit e803173be2d73e2f132a7ff7f61e7a20b4083d34 Author: Wen-Heng (Jack) Chung Date: Wed Mar 1 21:41:41 2017 +0800 Fix math functions which take pointer arguments Change-Id: I332c997e640edbc44824691e2a9434c6b3dadefa commit de590c469e213c42090ff83dbd060f25bb1d6047 Author: Wen-Heng (Jack) Chung Date: Wed Mar 1 18:38:54 2017 +0800 Changes to cope with Promote-free HCC - abolish usage of address_space GNU attribute - use __device in file-scope global variables which would be accessed by GPU kernels - temporarily disable some math functions which take pointer arguments Change-Id: I730311dee848e20e763e35cd3980317fce0dce0d Change-Id: I1f6b970b53b9401eeaaab08f04a7b9fed0fb8cf0 --- hipamd/include/hip/hcc_detail/hip_runtime.h | 7 +++---- hipamd/include/hip/hcc_detail/host_defines.h | 2 +- hipamd/src/device_functions.cpp | 4 ++-- hipamd/src/device_util.cpp | 6 +++--- hipamd/src/hip_fp16.cpp | 2 +- hipamd/src/math_functions.cpp | 3 ++- hipamd/tests/src/deviceLib/hipTestDeviceSymbol.cpp | 7 ------- hipamd/tests/src/kernel/hipDynamicShared2.cpp | 2 +- 8 files changed, 13 insertions(+), 20 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 67c63103d3..d256b3f32f 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -250,7 +250,7 @@ __device__ float __shfl_xor(float input, int lane_mask, int width); __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); -__device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr(); +__device__ void* __get_dynamicgroupbaseptr(); /** @@ -418,10 +418,9 @@ do {\ // Macro to replace extern __shared__ declarations // to local variable definitions #define HIP_DYNAMIC_SHARED(type, var) \ - __attribute__((address_space(3))) type* var = \ - (__attribute__((address_space(3))) type*)__get_dynamicgroupbaseptr(); \ + type* var = (type*)__get_dynamicgroupbaseptr(); \ -#define HIP_DYNAMIC_SHARED_ATTRIBUTE __attribute__((address_space(3))) +#define HIP_DYNAMIC_SHARED_ATTRIBUTE #endif // __HCC__ diff --git a/hipamd/include/hip/hcc_detail/host_defines.h b/hipamd/include/hip/hcc_detail/host_defines.h index e401cb24f3..012d3f0346 100644 --- a/hipamd/include/hip/hcc_detail/host_defines.h +++ b/hipamd/include/hip/hcc_detail/host_defines.h @@ -47,7 +47,7 @@ THE SOFTWARE. */ // _restrict is supported by the compiler #define __shared__ tile_static -#define __constant__ __attribute__((address_space(1))) +#define __constant__ __attribute__((hc)) #else // Non-HCC compiler diff --git a/hipamd/src/device_functions.cpp b/hipamd/src/device_functions.cpp index abc9db570e..10d8d3ab89 100644 --- a/hipamd/src/device_functions.cpp +++ b/hipamd/src/device_functions.cpp @@ -41,8 +41,8 @@ struct holder32Bit { }; } __attribute__((aligned(4))); -struct holder64Bit hold64; -struct holder32Bit hold32; +__device__ struct holder64Bit hold64; +__device__ struct holder32Bit hold32; __device__ float __double2float_rd(double x) { diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index 4b0e7efefd..88ffa7ab4d 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -34,8 +34,8 @@ THE SOFTWARE. This is the best place to put them because the device global variables need to be initialized at the start. */ -__attribute__((address_space(1))) char gpuHeap[SIZE_OF_HEAP]; -__attribute__((address_space(1))) uint32_t gpuFlags[NUM_PAGES]; +__device__ char gpuHeap[SIZE_OF_HEAP]; +__device__ uint32_t gpuFlags[NUM_PAGES]; __device__ void *__hip_hc_malloc(size_t size) { @@ -1083,7 +1083,7 @@ __host__ __device__ int max(int arg1, int arg2) return (int)(hc::precise_math::fmax((float)arg1, (float)arg2)); } -__device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr() +__device__ void* __get_dynamicgroupbaseptr() { return hc::get_dynamic_group_segment_base_pointer(); } diff --git a/hipamd/src/hip_fp16.cpp b/hipamd/src/hip_fp16.cpp index b306a9d3de..e7f75844ff 100644 --- a/hipamd/src/hip_fp16.cpp +++ b/hipamd/src/hip_fp16.cpp @@ -31,7 +31,7 @@ struct hipHalfHolder{ #define HINF 65504 -static struct hipHalfHolder __hInfValue = {HINF}; +__device__ static struct hipHalfHolder __hInfValue = {HINF}; __device__ __half __hadd(__half a, __half b) { return a + b; diff --git a/hipamd/src/math_functions.cpp b/hipamd/src/math_functions.cpp index a1ee9d3ce5..ff876def5f 100644 --- a/hipamd/src/math_functions.cpp +++ b/hipamd/src/math_functions.cpp @@ -202,7 +202,8 @@ __device__ long long int llroundf(float x) int y = hc::precise_math::roundf(x); long long int z = y; return z; -}__device__ float log10f(float x) +} +__device__ float log10f(float x) { return hc::precise_math::log10f(x); } diff --git a/hipamd/tests/src/deviceLib/hipTestDeviceSymbol.cpp b/hipamd/tests/src/deviceLib/hipTestDeviceSymbol.cpp index e58aa58877..429c1d69ef 100644 --- a/hipamd/tests/src/deviceLib/hipTestDeviceSymbol.cpp +++ b/hipamd/tests/src/deviceLib/hipTestDeviceSymbol.cpp @@ -31,15 +31,8 @@ THE SOFTWARE. #define NUM 1024 #define SIZE 1024*4 -#ifdef __HIP_PLATFORM_HCC__ -__attribute__((address_space(1))) int globalIn[NUM]; -__attribute__((address_space(1))) int globalOut[NUM]; -#endif - -#ifdef __HIP_PLATFORM_NVCC__ __device__ int globalIn[NUM]; __device__ int globalOut[NUM]; -#endif __global__ void Assign(hipLaunchParm lp, int* Out) { diff --git a/hipamd/tests/src/kernel/hipDynamicShared2.cpp b/hipamd/tests/src/kernel/hipDynamicShared2.cpp index 0f6ebb4927..ea24e9341f 100644 --- a/hipamd/tests/src/kernel/hipDynamicShared2.cpp +++ b/hipamd/tests/src/kernel/hipDynamicShared2.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#define LEN 16*1024 +#define LEN 8*1024 #define SIZE LEN*4 __global__ void vectorAdd(hipLaunchParm lp, float *Ad, float *Bd) {