From cdbc90d4908ce31b6954bd26c93949dbee2ac65f Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 29 Nov 2016 19:46:01 -0600 Subject: [PATCH] added half add and fma intrinsic Change-Id: Ifa60c1a7065f524f069291bb00d987b11c836cc4 [ROCm/hip commit: 7b0650773c5db409d8f6ade0e35da9b58e08c6e3] --- projects/hip/src/hip_ir.ll | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/projects/hip/src/hip_ir.ll b/projects/hip/src/hip_ir.ll index 6850293778..d0e2a879a3 100644 --- a/projects/hip/src/hip_ir.ll +++ b/projects/hip/src/hip_ir.ll @@ -12,4 +12,26 @@ define void @__threadfence_block() #1 { ret void } +define linkonce_odr spir_func i32 @__rocm_dp4a(i32 %in1, i32 %in2, i32 %in3) { + %val1 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0","=v,v,v"(i32 %in1, i32 %in2) + %ret1 = add i32 %val1, %in3 + %val2 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1","=v,v,v"(i32 %in1, i32 %in2) + %ret2 = add i32 %ret1, %val2 + %val3 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2","=v,v,v"(i32 %in1, i32 %in2) + %ret3 = add i32 %val3, %ret2 + %val4 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3","=v,v,v"(i32 %in1, i32 %in2) + %ret4 = add i32 %val4, %ret3 + ret i32 %ret4 +} + +define linkonce_odr spir_func i32 @__rocm_hfma(i32 %in1, i32 %in2, i32 %in3) { + tail call void asm "v_mac_f16 $0, $1, $2","v,v,v"(i32 %in1, i32 %in2, i32 %in3) + ret i32 %in3 +} + +define linkonce_odr spir_func i32 @__rocm_hadd(i32 %in1, i32 %in2) { + %val = tail call i32 asm "v_add_f16 $0, $1, $2","=v,v,v"(i32 %in1, i32 %in2) + ret i32 %val +} + attributes #1 = { alwaysinline nounwind }