added half2 support
Change-Id: I0f3b9b7037fed97e80ec99f5369c75a63f001aae
This commit is contained in:
@@ -27,20 +27,30 @@ THE SOFTWARE.
|
||||
|
||||
#define __CLANG_VERSION__ __clang_major__ * 10 + __clang_minor__
|
||||
|
||||
#ifdef HIP_HALF_HW_SUPPORT
|
||||
#if __CLANG_VERSION__ == 40
|
||||
|
||||
typedef __fp16 __half;
|
||||
|
||||
typedef struct __attribute__((aligned(4))){
|
||||
int a;
|
||||
} __half2;
|
||||
|
||||
extern "C" __half __hip_hadd_gfx803(__half a, __half b);
|
||||
extern "C" __half __hip_hfma_gfx803(__half a, __half b);
|
||||
extern "C" __half __hip_hmul_gfx803(__half a, __half b);
|
||||
extern "C" __half __hip_hsub_gfx803(__half a, __half b);
|
||||
|
||||
extern "C" int __hip_hadd2_gfx803(int a, int b);
|
||||
extern "C" int __hip_hfma2_gfx803(int a, int b);
|
||||
extern "C" int __hip_hmul2_gfx803(int a, int b);
|
||||
extern "C" int __hip_hsub2_gfx803(int a, int b);
|
||||
|
||||
__device__ inline __half __hadd(__half a, __half b) {
|
||||
return __hip_hadd_gfx803(a, b);
|
||||
}
|
||||
|
||||
__device__ inline __half __hadd_sat(__half a, __half b) {
|
||||
return __hip_add_gfx803(a, b);
|
||||
return __hip_hadd_gfx803(a, b);
|
||||
}
|
||||
|
||||
__device__ inline __half __hfma(__half a, __half b) {
|
||||
@@ -67,6 +77,13 @@ __device__ inline __half __hsub_sat(__half a, __half b) {
|
||||
return __hip_hsub_gfx803(a, b);
|
||||
}
|
||||
|
||||
|
||||
__device__ inline __half2 __hadd2(__half2 a, __half2 b) {
|
||||
__half2 ret;
|
||||
ret.a = __hip_hadd2_gfx803(a.a, b.a);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef struct{
|
||||
|
||||
+2
-1
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include"hip/hip_fp16.h"
|
||||
|
||||
#if __CLANG_VERSION__ == 35
|
||||
|
||||
static const unsigned sign_val = 0x8000;
|
||||
static const __half __half_value_one_float = {0x3C00};
|
||||
@@ -373,4 +374,4 @@ __device__ __half2 __lowhigh2highlow(const __half2 a){
|
||||
__device__ __half2 __low2half2(const __half2 a, const __half2 b){
|
||||
return {a.q, b.q};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -54,4 +54,9 @@ define linkonce_odr spir_func half @__hip_hsub_gfx803(half %a, half %b) #1 {
|
||||
ret half %val
|
||||
}
|
||||
|
||||
define linkonce_odr spir_func i32 @__hip_hadd2_gfx803(i32 %a i32 %b) #1 {
|
||||
%val = tail call i32 asm "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_0","=v,v,v"(i32 %a, i32 %b)
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
attributes #1 = { alwaysinline nounwind }
|
||||
|
||||
Referens i nytt ärende
Block a user