added packed math fp16 native device functions
1. Added SDWA implementation inside IR file 2. Added device functions to header + used them in test Change-Id: Ib4e059a58eee201cc82438689e3e9bc5f9d26653
This commit is contained in:
@@ -41,6 +41,11 @@ extern "C" __half __hip_hc_ir_hfma_half(__half, __half, __half);
|
||||
extern "C" __half __hip_hc_ir_hmul_half(__half, __half);
|
||||
extern "C" __half __hip_hc_ir_hsub_half(__half, __half);
|
||||
|
||||
extern "C" int __hip_hc_ir_hadd2_int(int, int);
|
||||
extern "C" int __hip_hc_ir_hfma2_int(int, int, int);
|
||||
extern "C" int __hip_hc_ir_hmul2_int(int, int);
|
||||
extern "C" int __hip_hc_ir_hsub2_int(int, int);
|
||||
|
||||
__device__ static inline __half __hadd(const __half a, const __half b) {
|
||||
return __hip_hc_ir_hadd_half(a, b);
|
||||
}
|
||||
@@ -81,6 +86,72 @@ __device__ static inline __half hdiv(__half a, __half b) {
|
||||
return a/b;
|
||||
}
|
||||
|
||||
/*
|
||||
Half2 Arithmetic Functions
|
||||
*/
|
||||
|
||||
__device__ static inline __half2 __hadd2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.q = __hip_hc_ir_hadd2_int(a.q, b.q);
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hadd2_sat(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.q = __hip_hc_ir_hadd2_int(a.q, b.q);
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hfma2(__half2 a, __half2 b, __half2 c) {
|
||||
__half2 d;
|
||||
d.q = __hip_hc_ir_hfma2_int(a.q, b.q, c.q);
|
||||
return d;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hfma2_sat(__half2 a, __half2 b, __half2 c) {
|
||||
__half2 d;
|
||||
d.q = __hip_hc_ir_hfma2_int(a.q, b.q, c.q);
|
||||
return d;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hmul2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.q = __hip_hc_ir_hmul2_int(a.q, b.q);
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hmul2_sat(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.q = __hip_hc_ir_hmul2_int(a.q, b.q);
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hsub2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.q = __hip_hc_ir_hsub2_int(a.q, b.q);
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hneg2(__half2 a) {
|
||||
__half2 c;
|
||||
c.p[0] = - a.p[0];
|
||||
c.p[1] = - a.p[1];
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 __hsub2_sat(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.q = __hip_hc_ir_hsub2_int(a.q, b.q);
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ static inline __half2 h2div(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.p[0] = a.p[0] / b.p[0];
|
||||
c.p[1] = a.p[1] / b.p[1];
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if __clang_major__ == 3
|
||||
|
||||
@@ -61,6 +61,29 @@ define half @__hip_hc_ir_hfma_half(half %a, half %b, half %c) #1 {
|
||||
ret half %9
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_hadd2_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_add_f16 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
tail call void asm sideeffect "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %1, i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_hfma2_int(i32 %a, i32 %b, i32 %c) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_mad_f16 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c)
|
||||
tail call void asm sideeffect "v_mul_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %1, i32 %a, i32 %b)
|
||||
tail call void asm sideeffect "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %1, i32 %1, i32 %c)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_hmul2_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_mul_f16 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
tail call void asm sideeffect "v_mul_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %1, i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @__hip_hc_ir_hsub2_int(i32 %a, i32 %b) #1 {
|
||||
%1 = tail call i32 asm sideeffect "v_sub_f16 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
|
||||
tail call void asm sideeffect "v_sub_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %1, i32 %a, i32 %b)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
attributes #1 = { alwaysinline nounwind }
|
||||
|
||||
@@ -69,6 +69,18 @@ __global__ void CheckHalf(hipLaunchParm lp, __half* In1, __half* In2, __half* In
|
||||
Out[9] = hdiv(In1[9], In2[9]);
|
||||
}
|
||||
|
||||
__global__ void CheckHalf2(hipLaunchParm lp, __half2* In1, __half2* In2, __half2* In3, __half2* Out){
|
||||
Out[0] = __hadd2(In1[0], In2[0]);
|
||||
Out[1] = __hadd2_sat(In1[1], In2[1]);
|
||||
Out[2] = __hfma2(In1[2], In2[2],In3[2]);
|
||||
Out[3] = __hfma2_sat(In1[3], In2[3], In3[3]);
|
||||
Out[4] = __hmul2(In1[4], In2[4]);
|
||||
Out[5] = __hmul2_sat(In1[5], In2[5]);
|
||||
Out[6] = __hneg2(In1[6]);
|
||||
Out[7] = __hsub2(In1[7], In2[7]);
|
||||
Out[8] = __hsub2_sat(In1[8], In2[8]);
|
||||
Out[9] = h2div(In1[9], In2[9]);
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
|
||||
Reference in New Issue
Block a user