Started adding native half math library support

1. Removed HIP_EXPERIMENTAL env variable so that device code will be accessed from LLVM IR
2. Removed soft support from headers and moved to hip_fp16.cpp
3. Added LLVM IR + inline asm to hip_ir.ll
4. Added test for fp16
5. Added barriers for hcc 3.5 and hcc 4.0 for half support
a. Which means, hcc 4.0 can parse __fp16 but hcc 3.5 cant
b. HCC 4.0 code is implemented now, hcc 3.5 will be added later

Change-Id: Ic37859b2688ebb02e168bab643d1882bf4727952
Este commit está contenido en:
Aditya Atluri
2017-01-12 11:30:20 -06:00
padre f85d7b7d97
commit c286bf6f8a
Se han modificado 5 ficheros con 278 adiciones y 282 borrados
+1 -1
Ver fichero
@@ -220,7 +220,7 @@ if($HIP_PLATFORM eq "hcc"){
}
}
if(($HIP_PLATFORM eq "hcc") and defined $ENV{HIP_EXPERIMENTAL}){
if(($HIP_PLATFORM eq "hcc")){
$EXPORT_LL=" ";
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_ir.ll\n";
}
+41 -173
Ver fichero
@@ -25,213 +25,81 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#if 0
#if __clang_major__ == 4
typedef __fp16 __half;
typedef struct __attribute__((aligned(4))){
int a;
union {
__half p[2];
unsigned int q;
};
} __half2;
extern "C" __half __hip_hadd_gfx803(__half a, __half b);
extern "C" __half __hip_hfma_gfx803(__half a, __half b);
extern "C" __half __hip_hmul_gfx803(__half a, __half b);
extern "C" __half __hip_hsub_gfx803(__half a, __half b);
extern "C" __half __hip_hc_ir_hadd_half(__half, __half);
extern "C" __half __hip_hc_ir_hfma_half(__half, __half, __half);
extern "C" __half __hip_hc_ir_hmul_half(__half, __half);
extern "C" __half __hip_hc_ir_hsub_half(__half, __half);
extern "C" int __hip_hadd2_gfx803(int a, int b);
extern "C" int __hip_hfma2_gfx803(int a, int b);
extern "C" int __hip_hmul2_gfx803(int a, int b);
extern "C" int __hip_hsub2_gfx803(int a, int b);
__device__ inline __half __hadd(__half a, __half b) {
return __hip_hadd_gfx803(a, b);
__device__ static inline __half __hadd(const __half a, const __half b) {
return __hip_hc_ir_hadd_half(a, b);
}
__device__ inline __half __hadd_sat(__half a, __half b) {
return __hip_hadd_gfx803(a, b);
__device__ static inline __half __hadd_sat(__half a, __half b) {
return __hip_hc_ir_hadd_half(a, b);
}
__device__ inline __half __hfma(__half a, __half b) {
return __hip_hfma_gfx803(a, b);
__device__ static inline __half __hfma(__half a, __half b, __half c) {
return __hip_hc_ir_hfma_half(a, b, c);
}
__device__ inline __half __hfma_sat(__half a, __half b) {
return __hip_hfma_gfx803(a, b);
__device__ static inline __half __hfma_sat(__half a, __half b, __half c) {
return __hip_hc_ir_hfma_half(a, b, c);
}
__device__ inline __half __hmul(__half a, __half b) {
return __hip_hmul_gfx803(a, b);
__device__ static inline __half __hmul(__half a, __half b) {
return __hip_hc_ir_hmul_half(a, b);
}
__device__ inline __half __hmul_sat(__half a, __half b) {
return __hip_hmul_gfx803(a, b);
__device__ static inline __half __hmul_sat(__half a, __half b) {
return __hip_hc_ir_hmul_half(a, b);
}
__device__ inline __half __hsub(__half a, __half b) {
return __hip_hsub_gfx803(a, b);
__device__ static inline __half __hneg(__half a) {
return -a;
}
__device__ inline __half __hsub_sat(__half a, __half b) {
return __hip_hsub_gfx803(a, b);
__device__ static inline __half __hsub(__half a, __half b) {
return __hip_hc_ir_hsub_half(a, b);
}
__device__ inline __half2 __hadd2(__half2 a, __half2 b) {
__half2 ret;
ret.a = __hip_hadd2_gfx803(a.a, b.a);
return ret;
__device__ static inline __half __hsub_sat(__half a, __half b) {
return __hip_hc_ir_hsub_half(a, b);
}
#else
__device__ static inline __half hdiv(__half a, __half b) {
return a/b;
}
typedef struct{
#endif
#if __clang_major__ == 3
typedef struct {
unsigned x: 16;
} __half;
typedef struct __attribute__((aligned(4))){
__half p,q;
union {
__half p[2];
unsigned int q;
};
} __half2;
typedef __half half;
typedef __half2 half2;
/*
Arithmetic functions
*/
__device__ __half __hadd(const __half a, const __half b);
__device__ __half __hadd_sat(const __half a, const __half b);
__device__ __half __hfma(const __half a, const __half b, const __half c);
__device__ __half __hfma_sat(const __half a, const __half b, const __half c);
__device__ __half __hmul(const __half a, const __half b);
__device__ __half __hmul_sat(const __half a, const __half b);
__device__ __half __hneq(const __half a);
__device__ __half __hsub(const __half a, const __half b);
__device__ __half __hsub_sat(const __half a, const __half b);
/*
Half2 Arithmetic Instructions
*/
__device__ __half2 __hadd2(const __half2 a, const __half2 b);
__device__ __half2 __hadd2_sat(const __half2 a, const __half2 b);
__device__ __half2 __hfma2(const __half2 a, const __half2 b, const __half2 c);
__device__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const __half2 c);
__device__ __half2 __hmul2(const __half2 a, const __half2 b);
__device__ __half2 __hmul2_sat(const __half2 a, const __half2 b);
__device__ __half2 __hneq2(const __half2 a);
__device__ __half2 __hsub2(const __half2 a, const __half2 b);
__device__ __half2 __hsub2_sat(const __half2 a, const __half2 b);
/*
Half Cmps
*/
__device__ bool __heq(const __half a, const __half b);
__device__ bool __hge(const __half a, const __half b);
__device__ bool __hgt(const __half a, const __half b);
__device__ bool __hisinf(const __half a);
__device__ bool __hisnan(const __half a);
__device__ bool __hle(const __half a, const __half b);
__device__ bool __hlt(const __half a, const __half b);
__device__ bool __hne(const __half a, const __half b);
/*
Half2 Cmps
*/
__device__ bool __hbeq2(const __half2 a, const __half2 b);
__device__ bool __hbge2(const __half2 a, const __half2 b);
__device__ bool __hbgt2(const __half2 a, const __half2 b);
__device__ bool __hble2(const __half2 a, const __half2 b);
__device__ bool __hblt2(const __half2 a, const __half2 b);
__device__ bool __hbne2(const __half2 a, const __half2 b);
__device__ __half2 __heq2(const __half2 a, const __half2 b);
__device__ __half2 __hge2(const __half2 a, const __half2 b);
__device__ __half2 __hgt2(const __half2 a, const __half2 b);
__device__ __half2 __hisnan2(const __half2 a);
__device__ __half2 __hle2(const __half2 a, const __half2 b);
__device__ __half2 __hlt2(const __half2 a, const __half2 b);
__device__ __half2 __hne2(const __half2 a, const __half2 b);
/*
Half Cnvs and Data Mvmnt
*/
__device__ __half2 __float22half2_rn(const float2 a);
__device__ __half __float2half(const float a);
__device__ __half2 __float2half2_rn(const float a);
__device__ __half2 __floats2half2_rn(const float a, const float b);
__device__ float2 __half22float2(const __half2 a);
__device__ float __half2float(const __half a);
__device__ __half2 __half2half2(const __half a);
__device__ __half2 __halves2half2(const __half a, const __half b);
__device__ float __high2float(const __half2 a);
__device__ __half __high2half(const __half2 a);
__device__ __half2 __high2half2(const __half2 a);
__device__ __half2 __highs2half2(const __half2 a, const __half2 b);
__device__ float __low2float(const __half2 a);
__device__ __half __low2half(const __half2 a);
__device__ __half2 __low2half2(const __half2 a);
__device__ __half2 __lows2half2(const __half2 a, const __half2 b);
__device__ __half2 __lowhigh2highlow(const __half2 a);
__device__ __half2 __low2half2(const __half2 a, const __half2 b);
#endif
#endif
+112 -108
Ver fichero
@@ -35,6 +35,8 @@ typedef struct{
};
} struct_float;
#if __clang_major__ == 3
static __device__ float cvt_half_to_float(__half a){
struct_float ret = {0};
if(a.x == 0){
@@ -64,44 +66,44 @@ static __device__ __half cvt_float_to_half(float b){
}
__device__ __half __hadd(const __half a, const __half b){
__device__ __half __soft_hadd(const __half a, const __half b){
return cvt_float_to_half(cvt_half_to_float(a)+cvt_half_to_float(b));
}
__device__ __half __hadd_sat(const __half a, const __half b){
__device__ __half __soft_hadd_sat(const __half a, const __half b){
float f = cvt_half_to_float(a) + cvt_half_to_float(b);
return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f)));
}
__device__ __half __hfma(const __half a, const __half b, const __half c){
__device__ __half __soft_hfma(const __half a, const __half b, const __half c){
return cvt_float_to_half(fmaf(cvt_half_to_float(a), cvt_half_to_float(b), cvt_half_to_float(c)));
}
__device__ __half __hfma_sat(const __half a, const __half b, const __half c){
__device__ __half __soft_hfma_sat(const __half a, const __half b, const __half c){
float f = fmaf(cvt_half_to_float(a), cvt_half_to_float(b), cvt_half_to_float(c));
return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f)));
}
__device__ __half __hmul(const __half a, const __half b){
__device__ __half __soft_hmul(const __half a, const __half b){
return cvt_float_to_half(cvt_half_to_float(a)*cvt_half_to_float(b));
}
__device__ __half __hmul_sat(const __half a, const __half b){
__device__ __half __soft_hmul_sat(const __half a, const __half b){
float f = cvt_half_to_float(a) * cvt_half_to_float(b);
return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f)));
}
__device__ __half __hneq(const __half a){
__device__ __half __soft_hneq(const __half a){
__half ret = {a.x};
ret.x ^= 1 << 15;
return ret;
}
__device__ __half __hsub(const __half a, const __half b){
__device__ __half __soft_hsub(const __half a, const __half b){
return cvt_float_to_half(cvt_half_to_float(a)-cvt_half_to_float(b));
}
__device__ __half __hsub_sat(const __half a, const __half b){
__device__ __half __soft_hsub_sat(const __half a, const __half b){
float f = cvt_half_to_float(a) - cvt_half_to_float(b);
return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f)));
}
@@ -111,66 +113,66 @@ __device__ __half __hsub_sat(const __half a, const __half b){
Half2 Arithmetic Instructions
*/
__device__ __half2 __hadd2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hadd2(const __half2 a, const __half2 b){
__half2 ret;
ret.p = __hadd(a.p, b.p);
ret.q = __hadd(a.q, b.q);
ret.p[1] = __soft_hadd(a.p[1], b.p[1]);
ret.p[0] = __soft_hadd(a.p[0], b.p[0]);
return ret;
}
__device__ __half2 __hadd2_sat(const __half2 a, const __half2 b){
__device__ __half2 __soft_hadd2_sat(const __half2 a, const __half2 b){
__half2 ret;
ret.p = __hadd_sat(a.p, b.p);
ret.q = __hadd_sat(a.q, b.q);
ret.p[1] = __soft_hadd_sat(a.p[1], b.p[1]);
ret.p[0] = __soft_hadd_sat(a.p[0], b.p[0]);
return ret;
}
__device__ __half2 __hfma2(const __half2 a, const __half2 b, const __half2 c){
__device__ __half2 __soft_hfma2(const __half2 a, const __half2 b, const __half2 c){
__half2 ret;
ret.p = __hfma(a.p, b.p, c.p);
ret.q = __hfma(a.q, b.q, c.q);
ret.p[1] = __soft_hfma(a.p[1], b.p[1], c.p[1]);
ret.p[0] = __soft_hfma(a.p[0], b.p[0], c.p[0]);
return ret;
}
__device__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const __half2 c){
__device__ __half2 __soft_hfma2_sat(const __half2 a, const __half2 b, const __half2 c){
__half2 ret;
ret.p = __hfma_sat(a.p, b.p, c.p);
ret.q = __hfma_sat(a.q, b.q, c.q);
ret.p[1] = __soft_hfma_sat(a.p[1], b.p[1], c.p[1]);
ret.p[0] = __soft_hfma_sat(a.p[0], b.p[0], c.p[0]);
return ret;
}
__device__ __half2 __hmul2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hmul2(const __half2 a, const __half2 b){
__half2 ret;
ret.p = __hmul(a.p, b.p);
ret.q = __hmul(a.q, b.q);
ret.p[1] = __soft_hmul(a.p[1], b.p[1]);
ret.p[0] = __soft_hmul(a.p[0], b.p[0]);
return ret;
}
__device__ __half2 __hmul2_sat(const __half2 a, const __half2 b){
__device__ __half2 __soft_hmul2_sat(const __half2 a, const __half2 b){
__half2 ret;
ret.p = __hmul_sat(a.p, b.p);
ret.q = __hmul_sat(a.q, b.q);
ret.p[1] = __soft_hmul_sat(a.p[1], b.p[1]);
ret.p[0] = __soft_hmul_sat(a.p[0], b.p[0]);
return ret;
}
__device__ __half2 __hneq2(const __half2 a){
__device__ __half2 __soft_hneq2(const __half2 a){
__half2 ret;
ret.p = __hneq(a.p);
ret.q = __hneq(a.q);
ret.p[1] = __soft_hneq(a.p[1]);
ret.p[0] = __soft_hneq(a.p[0]);
return ret;
}
__device__ __half2 __hsub2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hsub2(const __half2 a, const __half2 b){
__half2 ret;
ret.p = __hsub(a.p, b.p);
ret.q = __hsub(a.q, b.q);
ret.p[1] = __soft_hsub(a.p[1], b.p[1]);
ret.p[0] = __soft_hsub(a.p[0], b.p[0]);
return ret;
}
__device__ __half2 __hsub2_sat(const __half2 a, const __half2 b){
__device__ __half2 __soft_hsub2_sat(const __half2 a, const __half2 b){
__half2 ret;
ret.p = __hsub_sat(a.p, b.p);
ret.q = __hsub_sat(a.q, b.q);
ret.p[1] = __soft_hsub_sat(a.p[1], b.p[1]);
ret.p[0] = __soft_hsub_sat(a.p[0], b.p[0]);
return ret;
}
@@ -178,23 +180,23 @@ __device__ __half2 __hsub2_sat(const __half2 a, const __half2 b){
Half Cmps
*/
__device__ bool __heq(const __half a, const __half b){
__device__ bool __soft_heq(const __half a, const __half b){
return (a.x == b.x ? true:false);
}
__device__ bool __hge(const __half a, const __half b){
__device__ bool __soft_hge(const __half a, const __half b){
return (cvt_half_to_float(a) >= cvt_half_to_float(b));
}
__device__ bool __hgt(const __half a, const __half b){
__device__ bool __soft_hgt(const __half a, const __half b){
return (cvt_half_to_float(a) > cvt_half_to_float(b));
}
__device__ bool __hisinf(const __half a){
__device__ bool __soft_hisinf(const __half a){
return ((a.x == __half_neg_inf) ? -1 : (a.x == __half_pos_inf) ? 1 : 0);
}
__device__ bool __hisnan(const __half a){
__device__ bool __soft_hisnan(const __half a){
if(((a.x & __half_pos_inf) == a.x) || ((a.x & __half_neg_inf) == a.x)){
return true;
}else{
@@ -202,15 +204,15 @@ __device__ bool __hisnan(const __half a){
}
}
__device__ bool __hle(const __half a, const __half b){
__device__ bool __soft_hle(const __half a, const __half b){
return (cvt_half_to_float(a) <= cvt_half_to_float(b));
}
__device__ bool __hlt(const __half a, const __half b){
__device__ bool __soft_hlt(const __half a, const __half b){
return (cvt_half_to_float(a) < cvt_half_to_float(b));
}
__device__ bool __hne(const __half a, const __half b){
__device__ bool __soft_hne(const __half a, const __half b){
return a.x == b.x ? false : true;
}
@@ -218,78 +220,78 @@ __device__ bool __hne(const __half a, const __half b){
Half2 Cmps
*/
__device__ bool __hbeq2(const __half2 a, const __half2 b){
return __heq(a.p, b.p) && __heq(a.q, b.q);
__device__ bool __soft_hbeq2(const __half2 a, const __half2 b){
return __soft_heq(a.p[1], b.p[1]) && __soft_heq(a.p[0], b.p[0]);
}
__device__ bool __hbge2(const __half2 a, const __half2 b){
return __hge(a.p, b.p) && __hge(a.q, b.q);
__device__ bool __soft_hbge2(const __half2 a, const __half2 b){
return __soft_hge(a.p[1], b.p[1]) && __soft_hge(a.p[0], b.p[0]);
}
__device__ bool __hbgt2(const __half2 a, const __half2 b){
return __hgt(a.p, b.p) && __hgt(a.q, b.q);
__device__ bool __soft_hbgt2(const __half2 a, const __half2 b){
return __soft_hgt(a.p[1], b.p[1]) && __soft_hgt(a.p[0], b.p[0]);
}
__device__ bool __hble2(const __half2 a, const __half2 b){
return __hle(a.p, b.p) && __hle(a.q, b.q);
__device__ bool __soft_hble2(const __half2 a, const __half2 b){
return __soft_hle(a.p[1], b.p[1]) && __soft_hle(a.p[0], b.p[0]);
}
__device__ bool __hblt2(const __half2 a, const __half2 b){
return __hlt(a.p, b.p) && __hlt(a.q, b.q);
__device__ bool __soft_hblt2(const __half2 a, const __half2 b){
return __soft_hlt(a.p[1], b.p[1]) && __soft_hlt(a.p[0], b.p[0]);
}
__device__ bool __hbne2(const __half2 a, const __half2 b){
return __hne(a.p, b.p) && __hne(a.q, b.q);
__device__ bool __soft_hbne2(const __half2 a, const __half2 b){
return __soft_hne(a.p[1], b.p[1]) && __soft_hne(a.p[0], b.p[0]);
}
__device__ __half2 __heq2(const __half2 a, const __half2 b){
__device__ __half2 __soft_heq2(const __half2 a, const __half2 b){
__half2 ret = {0};
ret.p = (__heq(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float;
ret.q = (__heq(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = (__soft_heq(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = (__soft_heq(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
__device__ __half2 __hge2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hge2(const __half2 a, const __half2 b){
__half2 ret = {0};
ret.p = (__hge(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float;
ret.q = (__hge(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = (__soft_hge(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = (__soft_hge(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
__device__ __half2 __hgt2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hgt2(const __half2 a, const __half2 b){
__half2 ret = {0};
ret.p = (__hgt(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float;
ret.q = (__hgt(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = (__soft_hgt(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = (__soft_hgt(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
__device__ __half2 __hisnan2(const __half2 a){
__device__ __half2 __soft_hisnan2(const __half2 a){
__half2 ret = {0};
ret.p = __hisnan(a.p) ? __half_value_one_float : __half_value_zero_float;
ret.q = __hisnan(a.q) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = __soft_hisnan(a.p[1]) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = __soft_hisnan(a.p[0]) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
__device__ __half2 __hle2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hle2(const __half2 a, const __half2 b){
__half2 ret = {0};
ret.p = (__hle(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float;
ret.q = (__hle(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = (__soft_hle(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = (__soft_hle(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
__device__ __half2 __hlt2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hlt2(const __half2 a, const __half2 b){
__half2 ret = {0};
ret.p = (__hlt(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float;
ret.q = (__hlt(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = (__soft_hlt(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = (__soft_hlt(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
__device__ __half2 __hne2(const __half2 a, const __half2 b){
__device__ __half2 __soft_hne2(const __half2 a, const __half2 b){
__half2 ret = {0};
ret.p = (__hne(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float;
ret.q = (__hne(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float;
ret.p[1] = (__soft_hne(a.p[1], b.p[1])) ? __half_value_one_float : __half_value_zero_float;
ret.p[0] = (__soft_hne(a.p[0], b.p[0])) ? __half_value_one_float : __half_value_zero_float;
return ret;
}
@@ -297,78 +299,80 @@ __device__ __half2 __hne2(const __half2 a, const __half2 b){
Half Cnvs and Data Mvmnt
*/
__device__ __half2 __float22half2_rn(const float2 a){
__device__ __half2 __soft_float22half2_rn(const float2 a){
__half2 ret = {0};
ret.p = cvt_float_to_half(a.x);
ret.q = cvt_float_to_half(a.y);
ret.p[1] = cvt_float_to_half(a.x);
ret.p[0] = cvt_float_to_half(a.y);
return ret;
}
__device__ __half __float2half(const float a){
__device__ __half __soft_float2half(const float a){
return cvt_float_to_half(a);
}
__device__ __half2 __float2half2_rn(const float a){
__device__ __half2 __soft_float2half2_rn(const float a){
__half ret = cvt_float_to_half(a);
return {ret, ret};
}
__device__ __half2 __floats2half2_rn(const float a, const float b){
__device__ __half2 __soft_floats2half2_rn(const float a, const float b){
return {cvt_float_to_half(a), cvt_float_to_half(b)};
}
__device__ float2 __half22float2(const __half2 a){
return {cvt_half_to_float(a.p), cvt_half_to_float(a.q)};
__device__ float2 __soft_half22float2(const __half2 a){
return {cvt_half_to_float(a.p[1]), cvt_half_to_float(a.p[0])};
}
__device__ float __half2float(const __half a){
__device__ float __soft_half2float(const __half a){
return cvt_half_to_float(a);
}
__device__ __half2 __half2half2(const __half a){
__device__ __half2 __soft_half2half2(const __half a){
return {a,a};
}
__device__ __half2 __halves2half2(const __half a, const __half b){
__device__ __half2 __soft_halves2half2(const __half a, const __half b){
return {a,b};
}
__device__ float __high2float(const __half2 a){
return cvt_half_to_float(a.p);
__device__ float __soft_high2float(const __half2 a){
return cvt_half_to_float(a.p[1]);
}
__device__ __half __high2half(const __half2 a){
return a.p;
__device__ __half __soft_high2half(const __half2 a){
return a.p[1];
}
__device__ __half2 __high2half2(const __half2 a){
return {a.p, a.p};
__device__ __half2 __soft_high2half2(const __half2 a){
return {a.p[1], a.p[1]};
}
__device__ __half2 __highs2half2(const __half2 a, const __half2 b){
return {a.p, b.p};
__device__ __half2 __soft_highs2half2(const __half2 a, const __half2 b){
return {a.p[1], b.p[1]};
}
__device__ float __low2float(const __half2 a){
return cvt_half_to_float(a.q);
__device__ float __soft_low2float(const __half2 a){
return cvt_half_to_float(a.p[0]);
}
__device__ __half __low2half(const __half2 a){
return a.q;
__device__ __half __soft_low2half(const __half2 a){
return a.p[0];
}
__device__ __half2 __low2half2(const __half2 a){
return {a.q, a.q};
__device__ __half2 __soft_low2half2(const __half2 a){
return {a.p[0], a.p[0]};
}
__device__ __half2 __lows2half2(const __half2 a, const __half2 b){
return {a.q, b.q};
__device__ __half2 __soft_lows2half2(const __half2 a, const __half2 b){
return {a.p[0], b.p[0]};
}
__device__ __half2 __lowhigh2highlow(const __half2 a){
return {a.q, a.p};
__device__ __half2 __soft_lowhigh2highlow(const __half2 a){
return {a.p[0], a.p[1]};
}
__device__ __half2 __low2half2(const __half2 a, const __half2 b){
return {a.q, b.q};
__device__ __half2 __soft_low2half2(const __half2 a, const __half2 b){
return {a.p[0], b.p[0]};
}
#endif
+49
Ver fichero
@@ -12,6 +12,55 @@ define linkonce_odr spir_func void @__threadfence_block() #1 {
ret void
}
; Lightning does not support inline asm for 16-bit data types
; So, bitcast half to short and then extend to 32bit i32
; After inline asm, convert back to half
define half @__hip_hc_ir_hadd_half(half %a, half %b) #1 {
%1 = bitcast half %a to i16
%2 = bitcast half %b to i16
%3 = zext i16 %1 to i32
%4 = zext i16 %2 to i32
%5 = tail call i32 asm "v_add_f16 $0, $1, $2","=v,v,v"(i32 %3, i32 %4)
%6 = trunc i32 %5 to i16
%7 = bitcast i16 %6 to half
ret half %7
}
define half @__hip_hc_ir_hsub_half(half %a, half %b) #1 {
%1 = bitcast half %a to i16
%2 = bitcast half %b to i16
%3 = zext i16 %1 to i32
%4 = zext i16 %2 to i32
%5 = tail call i32 asm "v_sub_f16 $0, $1, $2","=v,v,v"(i32 %3, i32 %4)
%6 = trunc i32 %5 to i16
%7 = bitcast i16 %6 to half
ret half %7
}
define half @__hip_hc_ir_hmul_half(half %a, half %b) #1 {
%1 = bitcast half %a to i16
%2 = bitcast half %b to i16
%3 = zext i16 %1 to i32
%4 = zext i16 %2 to i32
%5 = tail call i32 asm "v_mul_f16 $0, $1, $2","=v,v,v"(i32 %3, i32 %4)
%6 = trunc i32 %5 to i16
%7 = bitcast i16 %6 to half
ret half %7
}
define half @__hip_hc_ir_hfma_half(half %a, half %b, half %c) #1 {
%1 = bitcast half %a to i16
%2 = bitcast half %b to i16
%3 = bitcast half %c to i16
%4 = zext i16 %1 to i32
%5 = zext i16 %2 to i32
%6 = zext i16 %3 to i32
%7 = tail call i32 asm "v_mad_f16 $0, $1, $2, $3","=v,v,v,v"(i32 %4, i32 %5, i32 %6)
%8 = trunc i32 %7 to i16
%9 = bitcast i16 %8 to half
ret half %9
}
attributes #1 = { alwaysinline nounwind }
+75
Ver fichero
@@ -0,0 +1,75 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* RUN: %t
* HIT_END
*/
#include "test_common.h"
#include "hip/hip_runtime.h"
#include "hip/hip_runtime_api.h"
#include "hip/hip_fp16.h"
#define hInf 0x7C00
#define hInfPK 0x7C007C00
#define h65504 0xF7FF
#define h65504PK 0xF7FFF7FF
#define h27 0x4EC0
#define h27PK 0x4EC04EC0
#define h7 0x4700
#define h7PK 0x47004700
#define h3 0x4200
#define h3PK 0x42004200
#define h1 0x3C00
#define h1PK 0x3C003C00
#define hPoint5 0x3800
#define hPoint5PK 0x38003800
#define hZero 0x0000
#define hNeg1 0xBC00
#define hNeg1PK 0xBC00BC00
struct holder{
union{
__half a;
unsigned short b;
};
};
__global__ void CheckHalf(hipLaunchParm lp, __half* In1, __half* In2, __half* In3, __half* Out){
Out[0] = __hadd(In1[0], In2[0]);
Out[1] = __hadd_sat(In1[1], In2[1]);
Out[2] = __hfma(In1[2], In2[2],In3[2]);
Out[3] = __hfma_sat(In1[3], In2[3], In3[3]);
Out[4] = __hmul(In1[4], In2[4]);
Out[5] = __hmul_sat(In1[5], In2[5]);
Out[6] = __hneg(In1[6]);
Out[7] = __hsub(In1[7], In2[7]);
Out[8] = __hsub_sat(In1[8], In2[8]);
Out[9] = hdiv(In1[9], In2[9]);
}
int main(){
}