From 4480bb6d065a6724db1a889f6197cc10b5940d6c Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 6 Nov 2018 16:32:14 +0000 Subject: [PATCH] Guard the OCML rounded operations instead Instead of commenting all these functions out, guard the functions with a macro OCML_BASIC_ROUNDED_OPERATIONS. [ROCm/clr commit: 9aa92238ab6cb42df6bcee70e7f54008ec0bd370] --- .../include/hip/hcc_detail/math_functions.h | 346 +++++++++--------- .../hipDoublePrecisionIntrinsics.cpp | 52 +-- .../hipSinglePrecisionIntrinsics.cpp | 54 +-- 3 files changed, 236 insertions(+), 216 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h b/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h index 8a6091858b..08be321d68 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h @@ -514,69 +514,73 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); } __DEVICE__ inline float __expf(float x) { return __ocml_exp_f32(x); } -// __DEVICE__ -// inline -// float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } +__DEVICE__ +inline +float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } +__DEVICE__ +inline +float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } +__DEVICE__ +inline +float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } +__DEVICE__ +inline +float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#endif __DEVICE__ inline float __fdividef(float x, float y) { return x / y; } -// __DEVICE__ -// inline -// float __fmaf_rd(float x, float y, float z) -// { -// return __ocml_fma_rtn_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmaf_rn(float x, float y, float z) -// { -// return __ocml_fma_rte_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmaf_ru(float x, float y, float z) -// { -// return __ocml_fma_rtp_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmaf_rz(float x, float y, float z) -// { -// return __ocml_fma_rtz_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmaf_rd(float x, float y, float z) +{ + return __ocml_fma_rtn_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_rn(float x, float y, float z) +{ + return __ocml_fma_rte_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_ru(float x, float y, float z) +{ + return __ocml_fma_rtp_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_rz(float x, float y, float z) +{ + return __ocml_fma_rtz_f32(x, y, z); +} +__DEVICE__ +inline +float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +__DEVICE__ +inline +float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } +__DEVICE__ +inline +float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } +__DEVICE__ +inline +float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +#endif __DEVICE__ inline float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } @@ -592,30 +596,32 @@ float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } __DEVICE__ inline float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } -// __DEVICE__ -// inline -// float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } +__DEVICE__ +inline +float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } +__DEVICE__ +inline +float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } +__DEVICE__ +inline +float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } +__DEVICE__ +inline +float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +__DEVICE__ +inline +float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } +__DEVICE__ +inline +float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } +__DEVICE__ +inline +float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#endif __DEVICE__ inline float __log10f(float x) { return __ocml_log10_f32(x); } @@ -1034,42 +1040,44 @@ double yn(int n, double x) } // BEGIN INTRINSICS -// __DEVICE__ -// inline -// double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } +__DEVICE__ +inline +double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } +__DEVICE__ +inline +double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } +__DEVICE__ +inline +double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } +__DEVICE__ +inline +double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } +__DEVICE__ +inline +double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +__DEVICE__ +inline +double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } +__DEVICE__ +inline +double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } +__DEVICE__ +inline +double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +#endif __DEVICE__ inline double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } @@ -1082,54 +1090,56 @@ double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } __DEVICE__ inline double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } -// __DEVICE__ -// inline -// double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } -// __DEVICE__ -// inline -// double __fma_rd(double x, double y, double z) -// { -// return __ocml_fma_rtn_f64(x, y, z); -// } -// __DEVICE__ -// inline -// double __fma_rn(double x, double y, double z) -// { -// return __ocml_fma_rte_f64(x, y, z); -// } -// __DEVICE__ -// inline -// double __fma_ru(double x, double y, double z) -// { -// return __ocml_fma_rtp_f64(x, y, z); -// } -// __DEVICE__ -// inline -// double __fma_rz(double x, double y, double z) -// { -// return __ocml_fma_rtz_f64(x, y, z); -// } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } +__DEVICE__ +inline +double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } +__DEVICE__ +inline +double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } +__DEVICE__ +inline +double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } +__DEVICE__ +inline +double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +__DEVICE__ +inline +double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } +__DEVICE__ +inline +double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } +__DEVICE__ +inline +double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } +__DEVICE__ +inline +double __fma_rd(double x, double y, double z) +{ + return __ocml_fma_rtn_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_rn(double x, double y, double z) +{ + return __ocml_fma_rte_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_ru(double x, double y, double z) +{ + return __ocml_fma_rtp_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_rz(double x, double y, double z) +{ + return __ocml_fma_rtz_f64(x, y, z); +} +#endif // END INTRINSICS // END DOUBLE diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index f6c515c03a..295fd83708 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -34,34 +34,38 @@ THE SOFTWARE. #pragma clang diagnostic ignored "-Wunused-variable" __device__ void double_precision_intrinsics() { - // __dadd_rd(0.0, 1.0); - // __dadd_rn(0.0, 1.0); - // __dadd_ru(0.0, 1.0); - // __dadd_rz(0.0, 1.0); - // __ddiv_rd(0.0, 1.0); - // __ddiv_rn(0.0, 1.0); - // __ddiv_ru(0.0, 1.0); - // __ddiv_rz(0.0, 1.0); - // __dmul_rd(1.0, 2.0); - // __dmul_rn(1.0, 2.0); - // __dmul_ru(1.0, 2.0); - // __dmul_rz(1.0, 2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dadd_rd(0.0, 1.0); + __dadd_rn(0.0, 1.0); + __dadd_ru(0.0, 1.0); + __dadd_rz(0.0, 1.0); + __ddiv_rd(0.0, 1.0); + __ddiv_rn(0.0, 1.0); + __ddiv_ru(0.0, 1.0); + __ddiv_rz(0.0, 1.0); + __dmul_rd(1.0, 2.0); + __dmul_rn(1.0, 2.0); + __dmul_ru(1.0, 2.0); + __dmul_rz(1.0, 2.0); +#endif __drcp_rd(2.0); __drcp_rn(2.0); __drcp_ru(2.0); __drcp_rz(2.0); - // __dsqrt_rd(4.0); - // __dsqrt_rn(4.0); - // __dsqrt_ru(4.0); - // __dsqrt_rz(4.0); - // __dsub_rd(2.0, 1.0); - // __dsub_rn(2.0, 1.0); - // __dsub_ru(2.0, 1.0); - // __dsub_rz(2.0, 1.0); - // __fma_rd(1.0, 2.0, 3.0); - // __fma_rn(1.0, 2.0, 3.0); - // __fma_ru(1.0, 2.0, 3.0); - // __fma_rz(1.0, 2.0, 3.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dsqrt_rd(4.0); + __dsqrt_rn(4.0); + __dsqrt_ru(4.0); + __dsqrt_rz(4.0); + __dsub_rd(2.0, 1.0); + __dsub_rn(2.0, 1.0); + __dsub_ru(2.0, 1.0); + __dsub_rz(2.0, 1.0); + __fma_rd(1.0, 2.0, 3.0); + __fma_rn(1.0, 2.0, 3.0); + __fma_ru(1.0, 2.0, 3.0); + __fma_rz(1.0, 2.0, 3.0); +#endif } __global__ void compileDoublePrecisionIntrinsics(int ignored) { diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index 623ea08a94..db60099558 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -39,36 +39,42 @@ __device__ void single_precision_intrinsics() { __cosf(0.0f); __exp10f(0.0f); __expf(0.0f); - // __fadd_rd(0.0f, 1.0f); - // __fadd_rn(0.0f, 1.0f); - // __fadd_ru(0.0f, 1.0f); - // __fadd_rz(0.0f, 1.0f); - // __fdiv_rd(4.0f, 2.0f); - // __fdiv_rn(4.0f, 2.0f); - // __fdiv_ru(4.0f, 2.0f); - // __fdiv_rz(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fadd_rd(0.0f, 1.0f); + __fadd_rn(0.0f, 1.0f); + __fadd_ru(0.0f, 1.0f); + __fadd_rz(0.0f, 1.0f); + __fdiv_rd(4.0f, 2.0f); + __fdiv_rn(4.0f, 2.0f); + __fdiv_ru(4.0f, 2.0f); + __fdiv_rz(4.0f, 2.0f); +#endif __fdividef(4.0f, 2.0f); - // __fmaf_rd(1.0f, 2.0f, 3.0f); - // __fmaf_rn(1.0f, 2.0f, 3.0f); - // __fmaf_ru(1.0f, 2.0f, 3.0f); - // __fmaf_rz(1.0f, 2.0f, 3.0f); - // __fmul_rd(1.0f, 2.0f); - // __fmul_rn(1.0f, 2.0f); - // __fmul_ru(1.0f, 2.0f); - // __fmul_rz(1.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmaf_rd(1.0f, 2.0f, 3.0f); + __fmaf_rn(1.0f, 2.0f, 3.0f); + __fmaf_ru(1.0f, 2.0f, 3.0f); + __fmaf_rz(1.0f, 2.0f, 3.0f); + __fmul_rd(1.0f, 2.0f); + __fmul_rn(1.0f, 2.0f); + __fmul_ru(1.0f, 2.0f); + __fmul_rz(1.0f, 2.0f); +#endif __frcp_rd(2.0f); __frcp_rn(2.0f); __frcp_ru(2.0f); __frcp_rz(2.0f); __frsqrt_rn(4.0f); - // __fsqrt_rd(4.0f); - // __fsqrt_rn(4.0f); - // __fsqrt_ru(4.0f); - // __fsqrt_rz(4.0f); - // __fsub_rd(2.0f, 1.0f); - // __fsub_rn(2.0f, 1.0f); - // __fsub_ru(2.0f, 1.0f); - // __fsub_rz(2.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsqrt_rd(4.0f); + __fsqrt_rn(4.0f); + __fsqrt_ru(4.0f); + __fsqrt_rz(4.0f); + __fsub_rd(2.0f, 1.0f); + __fsub_rn(2.0f, 1.0f); + __fsub_ru(2.0f, 1.0f); + __fsub_rz(2.0f, 1.0f); +#endif __log10f(1.0f); __log2f(1.0f); __logf(1.0f);