added half add and fma intrinsic

Change-Id: Ifa60c1a7065f524f069291bb00d987b11c836cc4
Este commit está contenido en:
Aditya Atluri
2016-11-29 19:46:01 -06:00
padre 504fcaf786
commit e6df8cb28c
+22
Ver fichero
@@ -12,4 +12,26 @@ define void @__threadfence_block() #1 {
ret void
}
define linkonce_odr spir_func i32 @__rocm_dp4a(i32 %in1, i32 %in2, i32 %in3) {
%val1 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0","=v,v,v"(i32 %in1, i32 %in2)
%ret1 = add i32 %val1, %in3
%val2 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1","=v,v,v"(i32 %in1, i32 %in2)
%ret2 = add i32 %ret1, %val2
%val3 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2","=v,v,v"(i32 %in1, i32 %in2)
%ret3 = add i32 %val3, %ret2
%val4 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3","=v,v,v"(i32 %in1, i32 %in2)
%ret4 = add i32 %val4, %ret3
ret i32 %ret4
}
define linkonce_odr spir_func i32 @__rocm_hfma(i32 %in1, i32 %in2, i32 %in3) {
tail call void asm "v_mac_f16 $0, $1, $2","v,v,v"(i32 %in1, i32 %in2, i32 %in3)
ret i32 %in3
}
define linkonce_odr spir_func i32 @__rocm_hadd(i32 %in1, i32 %in2) {
%val = tail call i32 asm "v_add_f16 $0, $1, $2","=v,v,v"(i32 %in1, i32 %in2)
ret i32 %val
}
attributes #1 = { alwaysinline nounwind }