diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index ccea650776..de8f0cb9a3 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -220,7 +220,7 @@ if($HIP_PLATFORM eq "hcc"){ } } -if(($HIP_PLATFORM eq "hcc") and defined $ENV{HIP_EXPERIMENTAL}){ +if(($HIP_PLATFORM eq "hcc")){ $EXPORT_LL=" "; $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_ir.ll\n"; } diff --git a/hipamd/include/hip/hcc_detail/hip_fp16.h b/hipamd/include/hip/hcc_detail/hip_fp16.h index d51a5d1fcd..c779bcfba2 100644 --- a/hipamd/include/hip/hcc_detail/hip_fp16.h +++ b/hipamd/include/hip/hcc_detail/hip_fp16.h @@ -25,213 +25,81 @@ THE SOFTWARE. #include "hip/hip_runtime.h" -#if 0 +#if __clang_major__ == 4 typedef __fp16 __half; typedef struct __attribute__((aligned(4))){ - int a; + union { + __half p[2]; + unsigned int q; + }; } __half2; -extern "C" __half __hip_hadd_gfx803(__half a, __half b); -extern "C" __half __hip_hfma_gfx803(__half a, __half b); -extern "C" __half __hip_hmul_gfx803(__half a, __half b); -extern "C" __half __hip_hsub_gfx803(__half a, __half b); +extern "C" __half __hip_hc_ir_hadd_half(__half, __half); +extern "C" __half __hip_hc_ir_hfma_half(__half, __half, __half); +extern "C" __half __hip_hc_ir_hmul_half(__half, __half); +extern "C" __half __hip_hc_ir_hsub_half(__half, __half); -extern "C" int __hip_hadd2_gfx803(int a, int b); -extern "C" int __hip_hfma2_gfx803(int a, int b); -extern "C" int __hip_hmul2_gfx803(int a, int b); -extern "C" int __hip_hsub2_gfx803(int a, int b); - -__device__ inline __half __hadd(__half a, __half b) { - return __hip_hadd_gfx803(a, b); +__device__ static inline __half __hadd(const __half a, const __half b) { + return __hip_hc_ir_hadd_half(a, b); } -__device__ inline __half __hadd_sat(__half a, __half b) { - return __hip_hadd_gfx803(a, b); +__device__ static inline __half __hadd_sat(__half a, __half b) { + return __hip_hc_ir_hadd_half(a, b); } -__device__ inline __half __hfma(__half a, __half b) { - return __hip_hfma_gfx803(a, b); +__device__ static inline __half __hfma(__half a, __half b, __half c) { + return __hip_hc_ir_hfma_half(a, b, c); } -__device__ inline __half __hfma_sat(__half a, __half b) { - return __hip_hfma_gfx803(a, b); +__device__ static inline __half __hfma_sat(__half a, __half b, __half c) { + return __hip_hc_ir_hfma_half(a, b, c); } -__device__ inline __half __hmul(__half a, __half b) { - return __hip_hmul_gfx803(a, b); +__device__ static inline __half __hmul(__half a, __half b) { + return __hip_hc_ir_hmul_half(a, b); } -__device__ inline __half __hmul_sat(__half a, __half b) { - return __hip_hmul_gfx803(a, b); +__device__ static inline __half __hmul_sat(__half a, __half b) { + return __hip_hc_ir_hmul_half(a, b); } -__device__ inline __half __hsub(__half a, __half b) { - return __hip_hsub_gfx803(a, b); +__device__ static inline __half __hneg(__half a) { + return -a; } -__device__ inline __half __hsub_sat(__half a, __half b) { - return __hip_hsub_gfx803(a, b); +__device__ static inline __half __hsub(__half a, __half b) { + return __hip_hc_ir_hsub_half(a, b); } - -__device__ inline __half2 __hadd2(__half2 a, __half2 b) { - __half2 ret; - ret.a = __hip_hadd2_gfx803(a.a, b.a); - return ret; +__device__ static inline __half __hsub_sat(__half a, __half b) { + return __hip_hc_ir_hsub_half(a, b); } -#else +__device__ static inline __half hdiv(__half a, __half b) { + return a/b; +} -typedef struct{ +#endif + +#if __clang_major__ == 3 + +typedef struct { unsigned x: 16; } __half; - typedef struct __attribute__((aligned(4))){ - __half p,q; + union { + __half p[2]; + unsigned int q; + }; } __half2; -typedef __half half; -typedef __half2 half2; -/* -Arithmetic functions -*/ -__device__ __half __hadd(const __half a, const __half b); - -__device__ __half __hadd_sat(const __half a, const __half b); - -__device__ __half __hfma(const __half a, const __half b, const __half c); - -__device__ __half __hfma_sat(const __half a, const __half b, const __half c); - -__device__ __half __hmul(const __half a, const __half b); - -__device__ __half __hmul_sat(const __half a, const __half b); - -__device__ __half __hneq(const __half a); - -__device__ __half __hsub(const __half a, const __half b); - -__device__ __half __hsub_sat(const __half a, const __half b); - - - -/* -Half2 Arithmetic Instructions -*/ - -__device__ __half2 __hadd2(const __half2 a, const __half2 b); - -__device__ __half2 __hadd2_sat(const __half2 a, const __half2 b); - -__device__ __half2 __hfma2(const __half2 a, const __half2 b, const __half2 c); - -__device__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const __half2 c); - -__device__ __half2 __hmul2(const __half2 a, const __half2 b); - -__device__ __half2 __hmul2_sat(const __half2 a, const __half2 b); - -__device__ __half2 __hneq2(const __half2 a); - -__device__ __half2 __hsub2(const __half2 a, const __half2 b); - -__device__ __half2 __hsub2_sat(const __half2 a, const __half2 b); - -/* -Half Cmps -*/ - -__device__ bool __heq(const __half a, const __half b); - -__device__ bool __hge(const __half a, const __half b); - -__device__ bool __hgt(const __half a, const __half b); - -__device__ bool __hisinf(const __half a); - -__device__ bool __hisnan(const __half a); - -__device__ bool __hle(const __half a, const __half b); - -__device__ bool __hlt(const __half a, const __half b); - -__device__ bool __hne(const __half a, const __half b); - -/* -Half2 Cmps -*/ - -__device__ bool __hbeq2(const __half2 a, const __half2 b); - -__device__ bool __hbge2(const __half2 a, const __half2 b); - -__device__ bool __hbgt2(const __half2 a, const __half2 b); - -__device__ bool __hble2(const __half2 a, const __half2 b); - -__device__ bool __hblt2(const __half2 a, const __half2 b); - -__device__ bool __hbne2(const __half2 a, const __half2 b); - -__device__ __half2 __heq2(const __half2 a, const __half2 b); - -__device__ __half2 __hge2(const __half2 a, const __half2 b); - -__device__ __half2 __hgt2(const __half2 a, const __half2 b); - -__device__ __half2 __hisnan2(const __half2 a); - -__device__ __half2 __hle2(const __half2 a, const __half2 b); - -__device__ __half2 __hlt2(const __half2 a, const __half2 b); - -__device__ __half2 __hne2(const __half2 a, const __half2 b); - - -/* -Half Cnvs and Data Mvmnt -*/ - -__device__ __half2 __float22half2_rn(const float2 a); - -__device__ __half __float2half(const float a); - -__device__ __half2 __float2half2_rn(const float a); - -__device__ __half2 __floats2half2_rn(const float a, const float b); - -__device__ float2 __half22float2(const __half2 a); - -__device__ float __half2float(const __half a); - -__device__ __half2 __half2half2(const __half a); - -__device__ __half2 __halves2half2(const __half a, const __half b); - -__device__ float __high2float(const __half2 a); - -__device__ __half __high2half(const __half2 a); - -__device__ __half2 __high2half2(const __half2 a); - -__device__ __half2 __highs2half2(const __half2 a, const __half2 b); - -__device__ float __low2float(const __half2 a); - -__device__ __half __low2half(const __half2 a); - -__device__ __half2 __low2half2(const __half2 a); - -__device__ __half2 __lows2half2(const __half2 a, const __half2 b); - -__device__ __half2 __lowhigh2highlow(const __half2 a); - -__device__ __half2 __low2half2(const __half2 a, const __half2 b); #endif + + #endif diff --git a/hipamd/src/hip_fp16.cpp b/hipamd/src/hip_fp16.cpp index 0ecac0a6fb..83e0a161c7 100644 --- a/hipamd/src/hip_fp16.cpp +++ b/hipamd/src/hip_fp16.cpp @@ -35,6 +35,8 @@ typedef struct{ }; } struct_float; +#if __clang_major__ == 3 + static __device__ float cvt_half_to_float(__half a){ struct_float ret = {0}; if(a.x == 0){ @@ -64,44 +66,44 @@ static __device__ __half cvt_float_to_half(float b){ } -__device__ __half __hadd(const __half a, const __half b){ +__device__ __half __soft_hadd(const __half a, const __half b){ return cvt_float_to_half(cvt_half_to_float(a)+cvt_half_to_float(b)); } -__device__ __half __hadd_sat(const __half a, const __half b){ +__device__ __half __soft_hadd_sat(const __half a, const __half b){ float f = cvt_half_to_float(a) + cvt_half_to_float(b); return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); } -__device__ __half __hfma(const __half a, const __half b, const __half c){ +__device__ __half __soft_hfma(const __half a, const __half b, const __half c){ return cvt_float_to_half(fmaf(cvt_half_to_float(a), cvt_half_to_float(b), cvt_half_to_float(c))); } -__device__ __half __hfma_sat(const __half a, const __half b, const __half c){ +__device__ __half __soft_hfma_sat(const __half a, const __half b, const __half c){ float f = fmaf(cvt_half_to_float(a), cvt_half_to_float(b), cvt_half_to_float(c)); return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); } -__device__ __half __hmul(const __half a, const __half b){ +__device__ __half __soft_hmul(const __half a, const __half b){ return cvt_float_to_half(cvt_half_to_float(a)*cvt_half_to_float(b)); } -__device__ __half __hmul_sat(const __half a, const __half b){ +__device__ __half __soft_hmul_sat(const __half a, const __half b){ float f = cvt_half_to_float(a) * cvt_half_to_float(b); return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); } -__device__ __half __hneq(const __half a){ +__device__ __half __soft_hneq(const __half a){ __half ret = {a.x}; ret.x ^= 1 << 15; return ret; } -__device__ __half __hsub(const __half a, const __half b){ +__device__ __half __soft_hsub(const __half a, const __half b){ return cvt_float_to_half(cvt_half_to_float(a)-cvt_half_to_float(b)); } -__device__ __half __hsub_sat(const __half a, const __half b){ +__device__ __half __soft_hsub_sat(const __half a, const __half b){ float f = cvt_half_to_float(a) - cvt_half_to_float(b); return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); } @@ -111,66 +113,66 @@ __device__ __half __hsub_sat(const __half a, const __half b){ Half2 Arithmetic Instructions */ -__device__ __half2 __hadd2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hadd2(const __half2 a, const __half2 b){ __half2 ret; - ret.p = __hadd(a.p, b.p); - ret.q = __hadd(a.q, b.q); + ret.p[1] = __soft_hadd(a.p[1], b.p[1]); + ret.p[0] = __soft_hadd(a.p[0], b.p[0]); return ret; } -__device__ __half2 __hadd2_sat(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hadd2_sat(const __half2 a, const __half2 b){ __half2 ret; - ret.p = __hadd_sat(a.p, b.p); - ret.q = __hadd_sat(a.q, b.q); + ret.p[1] = __soft_hadd_sat(a.p[1], b.p[1]); + ret.p[0] = __soft_hadd_sat(a.p[0], b.p[0]); return ret; } -__device__ __half2 __hfma2(const __half2 a, const __half2 b, const __half2 c){ +__device__ __half2 __soft_hfma2(const __half2 a, const __half2 b, const __half2 c){ __half2 ret; - ret.p = __hfma(a.p, b.p, c.p); - ret.q = __hfma(a.q, b.q, c.q); + ret.p[1] = __soft_hfma(a.p[1], b.p[1], c.p[1]); + ret.p[0] = __soft_hfma(a.p[0], b.p[0], c.p[0]); return ret; } -__device__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const __half2 c){ +__device__ __half2 __soft_hfma2_sat(const __half2 a, const __half2 b, const __half2 c){ __half2 ret; - ret.p = __hfma_sat(a.p, b.p, c.p); - ret.q = __hfma_sat(a.q, b.q, c.q); + ret.p[1] = __soft_hfma_sat(a.p[1], b.p[1], c.p[1]); + ret.p[0] = __soft_hfma_sat(a.p[0], b.p[0], c.p[0]); return ret; } -__device__ __half2 __hmul2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hmul2(const __half2 a, const __half2 b){ __half2 ret; - ret.p = __hmul(a.p, b.p); - ret.q = __hmul(a.q, b.q); + ret.p[1] = __soft_hmul(a.p[1], b.p[1]); + ret.p[0] = __soft_hmul(a.p[0], b.p[0]); return ret; } -__device__ __half2 __hmul2_sat(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hmul2_sat(const __half2 a, const __half2 b){ __half2 ret; - ret.p = __hmul_sat(a.p, b.p); - ret.q = __hmul_sat(a.q, b.q); + ret.p[1] = __soft_hmul_sat(a.p[1], b.p[1]); + ret.p[0] = __soft_hmul_sat(a.p[0], b.p[0]); return ret; } -__device__ __half2 __hneq2(const __half2 a){ +__device__ __half2 __soft_hneq2(const __half2 a){ __half2 ret; - ret.p = __hneq(a.p); - ret.q = __hneq(a.q); + ret.p[1] = __soft_hneq(a.p[1]); + ret.p[0] = __soft_hneq(a.p[0]); return ret; } -__device__ __half2 __hsub2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hsub2(const __half2 a, const __half2 b){ __half2 ret; - ret.p = __hsub(a.p, b.p); - ret.q = __hsub(a.q, b.q); + ret.p[1] = __soft_hsub(a.p[1], b.p[1]); + ret.p[0] = __soft_hsub(a.p[0], b.p[0]); return ret; } -__device__ __half2 __hsub2_sat(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hsub2_sat(const __half2 a, const __half2 b){ __half2 ret; - ret.p = __hsub_sat(a.p, b.p); - ret.q = __hsub_sat(a.q, b.q); + ret.p[1] = __soft_hsub_sat(a.p[1], b.p[1]); + ret.p[0] = __soft_hsub_sat(a.p[0], b.p[0]); return ret; } @@ -178,23 +180,23 @@ __device__ __half2 __hsub2_sat(const __half2 a, const __half2 b){ Half Cmps */ -__device__ bool __heq(const __half a, const __half b){ +__device__ bool __soft_heq(const __half a, const __half b){ return (a.x == b.x ? true:false); } -__device__ bool __hge(const __half a, const __half b){ +__device__ bool __soft_hge(const __half a, const __half b){ return (cvt_half_to_float(a) >= cvt_half_to_float(b)); } -__device__ bool __hgt(const __half a, const __half b){ +__device__ bool __soft_hgt(const __half a, const __half b){ return (cvt_half_to_float(a) > cvt_half_to_float(b)); } -__device__ bool __hisinf(const __half a){ +__device__ bool __soft_hisinf(const __half a){ return ((a.x == __half_neg_inf) ? -1 : (a.x == __half_pos_inf) ? 1 : 0); } -__device__ bool __hisnan(const __half a){ +__device__ bool __soft_hisnan(const __half a){ if(((a.x & __half_pos_inf) == a.x) || ((a.x & __half_neg_inf) == a.x)){ return true; }else{ @@ -202,15 +204,15 @@ __device__ bool __hisnan(const __half a){ } } -__device__ bool __hle(const __half a, const __half b){ +__device__ bool __soft_hle(const __half a, const __half b){ return (cvt_half_to_float(a) <= cvt_half_to_float(b)); } -__device__ bool __hlt(const __half a, const __half b){ +__device__ bool __soft_hlt(const __half a, const __half b){ return (cvt_half_to_float(a) < cvt_half_to_float(b)); } -__device__ bool __hne(const __half a, const __half b){ +__device__ bool __soft_hne(const __half a, const __half b){ return a.x == b.x ? false : true; } @@ -218,78 +220,78 @@ __device__ bool __hne(const __half a, const __half b){ Half2 Cmps */ -__device__ bool __hbeq2(const __half2 a, const __half2 b){ - return __heq(a.p, b.p) && __heq(a.q, b.q); +__device__ bool __soft_hbeq2(const __half2 a, const __half2 b){ + return __soft_heq(a.p[1], b.p[1]) && __soft_heq(a.p[0], b.p[0]); } -__device__ bool __hbge2(const __half2 a, const __half2 b){ - return __hge(a.p, b.p) && __hge(a.q, b.q); +__device__ bool __soft_hbge2(const __half2 a, const __half2 b){ + return __soft_hge(a.p[1], b.p[1]) && __soft_hge(a.p[0], b.p[0]); } -__device__ bool __hbgt2(const __half2 a, const __half2 b){ - return __hgt(a.p, b.p) && __hgt(a.q, b.q); +__device__ bool __soft_hbgt2(const __half2 a, const __half2 b){ + return __soft_hgt(a.p[1], b.p[1]) && __soft_hgt(a.p[0], b.p[0]); } -__device__ bool __hble2(const __half2 a, const __half2 b){ - return __hle(a.p, b.p) && __hle(a.q, b.q); +__device__ bool __soft_hble2(const __half2 a, const __half2 b){ + return __soft_hle(a.p[1], b.p[1]) && __soft_hle(a.p[0], b.p[0]); } -__device__ bool __hblt2(const __half2 a, const __half2 b){ - return __hlt(a.p, b.p) && __hlt(a.q, b.q); +__device__ bool __soft_hblt2(const __half2 a, const __half2 b){ + return __soft_hlt(a.p[1], b.p[1]) && __soft_hlt(a.p[0], b.p[0]); } -__device__ bool __hbne2(const __half2 a, const __half2 b){ - return __hne(a.p, b.p) && __hne(a.q, b.q); +__device__ bool __soft_hbne2(const __half2 a, const __half2 b){ + return __soft_hne(a.p[1], b.p[1]) && __soft_hne(a.p[0], b.p[0]); } -__device__ __half2 __heq2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_heq2(const __half2 a, const __half2 b){ __half2 ret = {0}; - ret.p = (__heq(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; - ret.q = (__heq(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = (__soft_heq(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = (__soft_heq(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float; return ret; } -__device__ __half2 __hge2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hge2(const __half2 a, const __half2 b){ __half2 ret = {0}; - ret.p = (__hge(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; - ret.q = (__hge(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = (__soft_hge(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = (__soft_hge(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float; return ret; } -__device__ __half2 __hgt2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hgt2(const __half2 a, const __half2 b){ __half2 ret = {0}; - ret.p = (__hgt(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; - ret.q = (__hgt(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = (__soft_hgt(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = (__soft_hgt(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float; return ret; } -__device__ __half2 __hisnan2(const __half2 a){ +__device__ __half2 __soft_hisnan2(const __half2 a){ __half2 ret = {0}; - ret.p = __hisnan(a.p) ? __half_value_one_float : __half_value_zero_float; - ret.q = __hisnan(a.q) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = __soft_hisnan(a.p[1]) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = __soft_hisnan(a.p[0]) ? __half_value_one_float : __half_value_zero_float; return ret; } -__device__ __half2 __hle2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hle2(const __half2 a, const __half2 b){ __half2 ret = {0}; - ret.p = (__hle(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; - ret.q = (__hle(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = (__soft_hle(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = (__soft_hle(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float; return ret; } -__device__ __half2 __hlt2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hlt2(const __half2 a, const __half2 b){ __half2 ret = {0}; - ret.p = (__hlt(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; - ret.q = (__hlt(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = (__soft_hlt(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = (__soft_hlt(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float; return ret; } -__device__ __half2 __hne2(const __half2 a, const __half2 b){ +__device__ __half2 __soft_hne2(const __half2 a, const __half2 b){ __half2 ret = {0}; - ret.p = (__hne(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; - ret.q = (__hne(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + ret.p[1] = (__soft_hne(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float; + ret.p[0] = (__soft_hne(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float; return ret; } @@ -297,78 +299,80 @@ __device__ __half2 __hne2(const __half2 a, const __half2 b){ Half Cnvs and Data Mvmnt */ -__device__ __half2 __float22half2_rn(const float2 a){ +__device__ __half2 __soft_float22half2_rn(const float2 a){ __half2 ret = {0}; - ret.p = cvt_float_to_half(a.x); - ret.q = cvt_float_to_half(a.y); + ret.p[1] = cvt_float_to_half(a.x); + ret.p[0] = cvt_float_to_half(a.y); return ret; } -__device__ __half __float2half(const float a){ +__device__ __half __soft_float2half(const float a){ return cvt_float_to_half(a); } -__device__ __half2 __float2half2_rn(const float a){ +__device__ __half2 __soft_float2half2_rn(const float a){ __half ret = cvt_float_to_half(a); return {ret, ret}; } -__device__ __half2 __floats2half2_rn(const float a, const float b){ +__device__ __half2 __soft_floats2half2_rn(const float a, const float b){ return {cvt_float_to_half(a), cvt_float_to_half(b)}; } -__device__ float2 __half22float2(const __half2 a){ - return {cvt_half_to_float(a.p), cvt_half_to_float(a.q)}; +__device__ float2 __soft_half22float2(const __half2 a){ + return {cvt_half_to_float(a.p[1]), cvt_half_to_float(a.p[0])}; } -__device__ float __half2float(const __half a){ +__device__ float __soft_half2float(const __half a){ return cvt_half_to_float(a); } -__device__ __half2 __half2half2(const __half a){ +__device__ __half2 __soft_half2half2(const __half a){ return {a,a}; } -__device__ __half2 __halves2half2(const __half a, const __half b){ +__device__ __half2 __soft_halves2half2(const __half a, const __half b){ return {a,b}; } -__device__ float __high2float(const __half2 a){ - return cvt_half_to_float(a.p); +__device__ float __soft_high2float(const __half2 a){ + return cvt_half_to_float(a.p[1]); } -__device__ __half __high2half(const __half2 a){ - return a.p; +__device__ __half __soft_high2half(const __half2 a){ + return a.p[1]; } -__device__ __half2 __high2half2(const __half2 a){ - return {a.p, a.p}; +__device__ __half2 __soft_high2half2(const __half2 a){ + return {a.p[1], a.p[1]}; } -__device__ __half2 __highs2half2(const __half2 a, const __half2 b){ - return {a.p, b.p}; +__device__ __half2 __soft_highs2half2(const __half2 a, const __half2 b){ + return {a.p[1], b.p[1]}; } -__device__ float __low2float(const __half2 a){ - return cvt_half_to_float(a.q); +__device__ float __soft_low2float(const __half2 a){ + return cvt_half_to_float(a.p[0]); } -__device__ __half __low2half(const __half2 a){ - return a.q; +__device__ __half __soft_low2half(const __half2 a){ + return a.p[0]; } -__device__ __half2 __low2half2(const __half2 a){ - return {a.q, a.q}; +__device__ __half2 __soft_low2half2(const __half2 a){ + return {a.p[0], a.p[0]}; } -__device__ __half2 __lows2half2(const __half2 a, const __half2 b){ - return {a.q, b.q}; +__device__ __half2 __soft_lows2half2(const __half2 a, const __half2 b){ + return {a.p[0], b.p[0]}; } -__device__ __half2 __lowhigh2highlow(const __half2 a){ - return {a.q, a.p}; +__device__ __half2 __soft_lowhigh2highlow(const __half2 a){ + return {a.p[0], a.p[1]}; } -__device__ __half2 __low2half2(const __half2 a, const __half2 b){ - return {a.q, b.q}; +__device__ __half2 __soft_low2half2(const __half2 a, const __half2 b){ + return {a.p[0], b.p[0]}; } + +#endif diff --git a/hipamd/src/hip_ir.ll b/hipamd/src/hip_ir.ll index 472038df6a..202bf9f215 100644 --- a/hipamd/src/hip_ir.ll +++ b/hipamd/src/hip_ir.ll @@ -12,6 +12,55 @@ define linkonce_odr spir_func void @__threadfence_block() #1 { ret void } +; Lightning does not support inline asm for 16-bit data types +; So, bitcast half to short and then extend to 32bit i32 +; After inline asm, convert back to half +define half @__hip_hc_ir_hadd_half(half %a, half %b) #1 { + %1 = bitcast half %a to i16 + %2 = bitcast half %b to i16 + %3 = zext i16 %1 to i32 + %4 = zext i16 %2 to i32 + %5 = tail call i32 asm "v_add_f16 $0, $1, $2","=v,v,v"(i32 %3, i32 %4) + %6 = trunc i32 %5 to i16 + %7 = bitcast i16 %6 to half + ret half %7 +} + +define half @__hip_hc_ir_hsub_half(half %a, half %b) #1 { + %1 = bitcast half %a to i16 + %2 = bitcast half %b to i16 + %3 = zext i16 %1 to i32 + %4 = zext i16 %2 to i32 + %5 = tail call i32 asm "v_sub_f16 $0, $1, $2","=v,v,v"(i32 %3, i32 %4) + %6 = trunc i32 %5 to i16 + %7 = bitcast i16 %6 to half + ret half %7 +} + +define half @__hip_hc_ir_hmul_half(half %a, half %b) #1 { + %1 = bitcast half %a to i16 + %2 = bitcast half %b to i16 + %3 = zext i16 %1 to i32 + %4 = zext i16 %2 to i32 + %5 = tail call i32 asm "v_mul_f16 $0, $1, $2","=v,v,v"(i32 %3, i32 %4) + %6 = trunc i32 %5 to i16 + %7 = bitcast i16 %6 to half + ret half %7 +} + +define half @__hip_hc_ir_hfma_half(half %a, half %b, half %c) #1 { + %1 = bitcast half %a to i16 + %2 = bitcast half %b to i16 + %3 = bitcast half %c to i16 + %4 = zext i16 %1 to i32 + %5 = zext i16 %2 to i32 + %6 = zext i16 %3 to i32 + %7 = tail call i32 asm "v_mad_f16 $0, $1, $2, $3","=v,v,v,v"(i32 %4, i32 %5, i32 %6) + %8 = trunc i32 %7 to i16 + %9 = bitcast i16 %8 to half + ret half %9 +} + attributes #1 = { alwaysinline nounwind } diff --git a/hipamd/tests/src/deviceLib/hipTestHalf.cpp b/hipamd/tests/src/deviceLib/hipTestHalf.cpp new file mode 100644 index 0000000000..9533bf34ca --- /dev/null +++ b/hipamd/tests/src/deviceLib/hipTestHalf.cpp @@ -0,0 +1,75 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "test_common.h" +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "hip/hip_fp16.h" + +#define hInf 0x7C00 +#define hInfPK 0x7C007C00 +#define h65504 0xF7FF +#define h65504PK 0xF7FFF7FF +#define h27 0x4EC0 +#define h27PK 0x4EC04EC0 +#define h7 0x4700 +#define h7PK 0x47004700 +#define h3 0x4200 +#define h3PK 0x42004200 +#define h1 0x3C00 +#define h1PK 0x3C003C00 +#define hPoint5 0x3800 +#define hPoint5PK 0x38003800 +#define hZero 0x0000 +#define hNeg1 0xBC00 +#define hNeg1PK 0xBC00BC00 + +struct holder{ +union{ + __half a; + unsigned short b; +}; +}; + +__global__ void CheckHalf(hipLaunchParm lp, __half* In1, __half* In2, __half* In3, __half* Out){ + Out[0] = __hadd(In1[0], In2[0]); + Out[1] = __hadd_sat(In1[1], In2[1]); + Out[2] = __hfma(In1[2], In2[2],In3[2]); + Out[3] = __hfma_sat(In1[3], In2[3], In3[3]); + Out[4] = __hmul(In1[4], In2[4]); + Out[5] = __hmul_sat(In1[5], In2[5]); + Out[6] = __hneg(In1[6]); + Out[7] = __hsub(In1[7], In2[7]); + Out[8] = __hsub_sat(In1[8], In2[8]); + Out[9] = hdiv(In1[9], In2[9]); +} + + +int main(){ + +}