From 6035907648055ff65503256a0ed2eaac46cbdc7e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 23 Dec 2022 10:17:02 -0500 Subject: [PATCH] SWDEV-1 - Eliminate rcp_2f16 pseudo intrinsic No such wrapper or intrinsic would ever exist because there is no such underlying instruction. Change-Id: I6c3f64cd2df2a58edf32037da8f5712868f296ea [ROCm/clr commit: bebbf3c4ec03ec9090b5629d592e5b5865e1a57e] --- projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h | 5 ++++- .../clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h | 5 ----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h index fb07cfb643..694deb228c 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h @@ -1672,7 +1672,10 @@ THE SOFTWARE. __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); } inline __HOST_DEVICE__ - __half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); } + __half2 h2rcp(__half2 x) { + return _Float16_2{__llvm_amdgcn_rcp_f16(x.x), + __llvm_amdgcn_rcp_f16(x.y)}; + } inline __HOST_DEVICE__ __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); } diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h b/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h index 36942c1a9b..aac0bcca53 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h @@ -73,11 +73,6 @@ extern "C" __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); - __device__ inline - __2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL. - { - return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)}; - } __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); __device__ __2f16 __ocml_sin_2f16(__2f16);