diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 5acf114518..3b03174708 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -27,20 +27,30 @@ THE SOFTWARE. #define __CLANG_VERSION__ __clang_major__ * 10 + __clang_minor__ -#ifdef HIP_HALF_HW_SUPPORT +#if __CLANG_VERSION__ == 40 typedef __fp16 __half; + +typedef struct __attribute__((aligned(4))){ + int a; +} __half2; + extern "C" __half __hip_hadd_gfx803(__half a, __half b); extern "C" __half __hip_hfma_gfx803(__half a, __half b); extern "C" __half __hip_hmul_gfx803(__half a, __half b); extern "C" __half __hip_hsub_gfx803(__half a, __half b); +extern "C" int __hip_hadd2_gfx803(int a, int b); +extern "C" int __hip_hfma2_gfx803(int a, int b); +extern "C" int __hip_hmul2_gfx803(int a, int b); +extern "C" int __hip_hsub2_gfx803(int a, int b); + __device__ inline __half __hadd(__half a, __half b) { return __hip_hadd_gfx803(a, b); } __device__ inline __half __hadd_sat(__half a, __half b) { - return __hip_add_gfx803(a, b); + return __hip_hadd_gfx803(a, b); } __device__ inline __half __hfma(__half a, __half b) { @@ -67,6 +77,13 @@ __device__ inline __half __hsub_sat(__half a, __half b) { return __hip_hsub_gfx803(a, b); } + +__device__ inline __half2 __hadd2(__half2 a, __half2 b) { + __half2 ret; + ret.a = __hip_hadd2_gfx803(a.a, b.a); + return ret; +} + #else typedef struct{ diff --git a/src/hip_fp16.cpp b/src/hip_fp16.cpp index 63d91eb107..3bf6bd395f 100644 --- a/src/hip_fp16.cpp +++ b/src/hip_fp16.cpp @@ -22,6 +22,7 @@ THE SOFTWARE. #include"hip/hip_fp16.h" +#if __CLANG_VERSION__ == 35 static const unsigned sign_val = 0x8000; static const __half __half_value_one_float = {0x3C00}; @@ -373,4 +374,4 @@ __device__ __half2 __lowhigh2highlow(const __half2 a){ __device__ __half2 __low2half2(const __half2 a, const __half2 b){ return {a.q, b.q}; } - +#endif diff --git a/src/hip_ir.ll b/src/hip_ir.ll index 623be19084..831c4159f0 100644 --- a/src/hip_ir.ll +++ b/src/hip_ir.ll @@ -54,4 +54,9 @@ define linkonce_odr spir_func half @__hip_hsub_gfx803(half %a, half %b) #1 { ret half %val } +define linkonce_odr spir_func i32 @__hip_hadd2_gfx803(i32 %a i32 %b) #1 { + %val = tail call i32 asm "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_0","=v,v,v"(i32 %a, i32 %b) + ret i32 %val +} + attributes #1 = { alwaysinline nounwind }