added half2 support

Change-Id: I0f3b9b7037fed97e80ec99f5369c75a63f001aae
This commit is contained in:
Aditya Atluri
2016-12-14 14:18:48 -06:00
förälder d78649b978
incheckning d2daf6ad75
3 ändrade filer med 26 tillägg och 3 borttagningar
+19 -2
Visa fil
@@ -27,20 +27,30 @@ THE SOFTWARE.
#define __CLANG_VERSION__ __clang_major__ * 10 + __clang_minor__
#ifdef HIP_HALF_HW_SUPPORT
#if __CLANG_VERSION__ == 40
typedef __fp16 __half;
typedef struct __attribute__((aligned(4))){
int a;
} __half2;
extern "C" __half __hip_hadd_gfx803(__half a, __half b);
extern "C" __half __hip_hfma_gfx803(__half a, __half b);
extern "C" __half __hip_hmul_gfx803(__half a, __half b);
extern "C" __half __hip_hsub_gfx803(__half a, __half b);
extern "C" int __hip_hadd2_gfx803(int a, int b);
extern "C" int __hip_hfma2_gfx803(int a, int b);
extern "C" int __hip_hmul2_gfx803(int a, int b);
extern "C" int __hip_hsub2_gfx803(int a, int b);
__device__ inline __half __hadd(__half a, __half b) {
return __hip_hadd_gfx803(a, b);
}
__device__ inline __half __hadd_sat(__half a, __half b) {
return __hip_add_gfx803(a, b);
return __hip_hadd_gfx803(a, b);
}
__device__ inline __half __hfma(__half a, __half b) {
@@ -67,6 +77,13 @@ __device__ inline __half __hsub_sat(__half a, __half b) {
return __hip_hsub_gfx803(a, b);
}
__device__ inline __half2 __hadd2(__half2 a, __half2 b) {
__half2 ret;
ret.a = __hip_hadd2_gfx803(a.a, b.a);
return ret;
}
#else
typedef struct{
+2 -1
Visa fil
@@ -22,6 +22,7 @@ THE SOFTWARE.
#include"hip/hip_fp16.h"
#if __CLANG_VERSION__ == 35
static const unsigned sign_val = 0x8000;
static const __half __half_value_one_float = {0x3C00};
@@ -373,4 +374,4 @@ __device__ __half2 __lowhigh2highlow(const __half2 a){
__device__ __half2 __low2half2(const __half2 a, const __half2 b){
return {a.q, b.q};
}
#endif
+5
Visa fil
@@ -54,4 +54,9 @@ define linkonce_odr spir_func half @__hip_hsub_gfx803(half %a, half %b) #1 {
ret half %val
}
define linkonce_odr spir_func i32 @__hip_hadd2_gfx803(i32 %a i32 %b) #1 {
%val = tail call i32 asm "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_0","=v,v,v"(i32 %a, i32 %b)
ret i32 %val
}
attributes #1 = { alwaysinline nounwind }