From 2d50392c17cec419a876d1cb537f6f74f11d71b0 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 18 Dec 2018 19:31:54 +0000 Subject: [PATCH] Add *_rn functions back into HIP intrinsics Add back the round-to-nearest-even intrinsics back to HIP math intrinsics as it caused regression. --- .../include/hip/hcc_detail/math_functions.h | 54 ++++++++++++++----- .../hipDoublePrecisionIntrinsics.cpp | 14 +++++ hipamd/tests/src/deviceLib/hipFloatMath.cpp | 4 +- .../hipSinglePrecisionIntrinsics.cpp | 16 ++++++ 4 files changed, 75 insertions(+), 13 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/math_functions.h b/hipamd/include/hip/hcc_detail/math_functions.h index 557257b2b0..63e48fab29 100644 --- a/hipamd/include/hip/hcc_detail/math_functions.h +++ b/hipamd/include/hip/hcc_detail/math_functions.h @@ -518,9 +518,11 @@ float __expf(float x) { return __ocml_exp_f32(x); } __DEVICE__ inline float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } +#endif __DEVICE__ inline -float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } +float __fadd_rn(float x, float y) { return x + y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } @@ -530,9 +532,11 @@ float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } __DEVICE__ inline float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } +#endif __DEVICE__ inline -float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } +float __fdiv_rn(float x, float y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } @@ -550,12 +554,14 @@ float __fmaf_rd(float x, float y, float z) { return __ocml_fma_rtn_f32(x, y, z); } +#endif __DEVICE__ inline float __fmaf_rn(float x, float y, float z) { - return __ocml_fma_rte_f32(x, y, z); + return __ocml_fma_f32(x, y, z); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fmaf_ru(float x, float y, float z) @@ -571,9 +577,11 @@ float __fmaf_rz(float x, float y, float z) __DEVICE__ inline float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +#endif __DEVICE__ inline -float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } +float __fmul_rn(float x, float y) { return x * y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } @@ -583,24 +591,30 @@ float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } __DEVICE__ inline float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } +#endif __DEVICE__ inline float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); } __DEVICE__ inline float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } +#endif __DEVICE__ inline float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } +#endif __DEVICE__ inline -float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } +float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } @@ -610,9 +624,11 @@ float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } __DEVICE__ inline float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +#endif __DEVICE__ inline -float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } +float __fsub_rn(float x, float y) { return x - y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } @@ -1042,9 +1058,11 @@ double yn(int n, double x) __DEVICE__ inline double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } +#endif __DEVICE__ inline -double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } +double __dadd_rn(double x, double y) { return x + y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } @@ -1054,9 +1072,11 @@ double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } __DEVICE__ inline double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } +#endif __DEVICE__ inline -double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } +double __ddiv_rn(double x, double y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } @@ -1066,9 +1086,11 @@ double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } __DEVICE__ inline double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +#endif __DEVICE__ inline -double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } +double __dmul_rn(double x, double y) { return x * y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } @@ -1078,9 +1100,11 @@ double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } __DEVICE__ inline double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } +#endif __DEVICE__ inline double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } @@ -1090,9 +1114,11 @@ double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } __DEVICE__ inline double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } +#endif __DEVICE__ inline -double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } +double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } @@ -1102,9 +1128,11 @@ double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } __DEVICE__ inline double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +#endif __DEVICE__ inline -double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } +double __dsub_rn(double x, double y) { return x - y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } @@ -1117,12 +1145,14 @@ double __fma_rd(double x, double y, double z) { return __ocml_fma_rtn_f64(x, y, z); } +#endif __DEVICE__ inline double __fma_rn(double x, double y, double z) { - return __ocml_fma_rte_f64(x, y, z); + return __ocml_fma_f64(x, y, z); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __fma_ru(double x, double y, double z) diff --git a/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index 939bdae743..24a8320840 100644 --- a/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -36,31 +36,45 @@ THE SOFTWARE. __device__ void double_precision_intrinsics() { #if defined OCML_BASIC_ROUNDED_OPERATIONS __dadd_rd(0.0, 1.0); +#endif __dadd_rn(0.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __dadd_ru(0.0, 1.0); __dadd_rz(0.0, 1.0); __ddiv_rd(0.0, 1.0); +#endif __ddiv_rn(0.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __ddiv_ru(0.0, 1.0); __ddiv_rz(0.0, 1.0); __dmul_rd(1.0, 2.0); +#endif __dmul_rn(1.0, 2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __dmul_ru(1.0, 2.0); __dmul_rz(1.0, 2.0); __drcp_rd(2.0); +#endif __drcp_rn(2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __drcp_ru(2.0); __drcp_rz(2.0); __dsqrt_rd(4.0); +#endif __dsqrt_rn(4.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __dsqrt_ru(4.0); __dsqrt_rz(4.0); __dsub_rd(2.0, 1.0); +#endif __dsub_rn(2.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __dsub_ru(2.0, 1.0); __dsub_rz(2.0, 1.0); __fma_rd(1.0, 2.0, 3.0); +#endif __fma_rn(1.0, 2.0, 3.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fma_ru(1.0, 2.0, 3.0); __fma_rz(1.0, 2.0, 3.0); #endif diff --git a/hipamd/tests/src/deviceLib/hipFloatMath.cpp b/hipamd/tests/src/deviceLib/hipFloatMath.cpp index c6a07e26a9..ab1deb477a 100644 --- a/hipamd/tests/src/deviceLib/hipFloatMath.cpp +++ b/hipamd/tests/src/deviceLib/hipFloatMath.cpp @@ -38,10 +38,12 @@ __global__ void floatMath(float* In, float* Out) { Out[tid] = __cosf(In[tid]); Out[tid] = __exp10f(Out[tid]); Out[tid] = __expf(Out[tid]); -#if defined OCML_BASIC_ROUNDED_OPERATIONS Out[tid] = __frsqrt_rn(Out[tid]); +#if defined OCML_BASIC_ROUNDED_OPERATIONS Out[tid] = __fsqrt_rd(Out[tid]); +#endif Out[tid] = __fsqrt_rn(Out[tid]); +#if defined OCML_BASIC_ROUNDED_OPERATIONS Out[tid] = __fsqrt_ru(Out[tid]); Out[tid] = __fsqrt_rz(Out[tid]); #endif diff --git a/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index b216b3cb54..751ea88641 100644 --- a/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -41,35 +41,51 @@ __device__ void single_precision_intrinsics() { __expf(0.0f); #if defined OCML_BASIC_ROUNDED_OPERATIONS __fadd_rd(0.0f, 1.0f); +#endif __fadd_rn(0.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fadd_ru(0.0f, 1.0f); __fadd_rz(0.0f, 1.0f); __fdiv_rd(4.0f, 2.0f); +#endif __fdiv_rn(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fdiv_ru(4.0f, 2.0f); __fdiv_rz(4.0f, 2.0f); #endif __fdividef(4.0f, 2.0f); #if defined OCML_BASIC_ROUNDED_OPERATIONS __fmaf_rd(1.0f, 2.0f, 3.0f); +#endif __fmaf_rn(1.0f, 2.0f, 3.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fmaf_ru(1.0f, 2.0f, 3.0f); __fmaf_rz(1.0f, 2.0f, 3.0f); __fmul_rd(1.0f, 2.0f); +#endif __fmul_rn(1.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fmul_ru(1.0f, 2.0f); __fmul_rz(1.0f, 2.0f); __frcp_rd(2.0f); +#endif __frcp_rn(2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __frcp_ru(2.0f); __frcp_rz(2.0f); +#endif __frsqrt_rn(4.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fsqrt_rd(4.0f); +#endif __fsqrt_rn(4.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fsqrt_ru(4.0f); __fsqrt_rz(4.0f); __fsub_rd(2.0f, 1.0f); +#endif __fsub_rn(2.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fsub_ru(2.0f, 1.0f); __fsub_rz(2.0f, 1.0f); #endif