diff --git a/hipamd/src/hip_ir.ll b/hipamd/src/hip_ir.ll index 6850293778..d0e2a879a3 100644 --- a/hipamd/src/hip_ir.ll +++ b/hipamd/src/hip_ir.ll @@ -12,4 +12,26 @@ define void @__threadfence_block() #1 { ret void } +define linkonce_odr spir_func i32 @__rocm_dp4a(i32 %in1, i32 %in2, i32 %in3) { + %val1 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0","=v,v,v"(i32 %in1, i32 %in2) + %ret1 = add i32 %val1, %in3 + %val2 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1","=v,v,v"(i32 %in1, i32 %in2) + %ret2 = add i32 %ret1, %val2 + %val3 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2","=v,v,v"(i32 %in1, i32 %in2) + %ret3 = add i32 %val3, %ret2 + %val4 = tail call i32 asm "v_mul_u32_u24_sdwa $0, $1, $2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3","=v,v,v"(i32 %in1, i32 %in2) + %ret4 = add i32 %val4, %ret3 + ret i32 %ret4 +} + +define linkonce_odr spir_func i32 @__rocm_hfma(i32 %in1, i32 %in2, i32 %in3) { + tail call void asm "v_mac_f16 $0, $1, $2","v,v,v"(i32 %in1, i32 %in2, i32 %in3) + ret i32 %in3 +} + +define linkonce_odr spir_func i32 @__rocm_hadd(i32 %in1, i32 %in2) { + %val = tail call i32 asm "v_add_f16 $0, $1, $2","=v,v,v"(i32 %in1, i32 %in2) + ret i32 %val +} + attributes #1 = { alwaysinline nounwind }