Merge pull request #817 from aaronenyeshi/fix-rn-intrinsics
Add *_rn functions back into HIP intrinsics
Tento commit je obsažen v:
@@ -518,9 +518,11 @@ float __expf(float x) { return __ocml_exp_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); }
|
||||
float __fadd_rn(float x, float y) { return x + y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
|
||||
@@ -530,9 +532,11 @@ float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); }
|
||||
float __fdiv_rn(float x, float y) { return x / y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
|
||||
@@ -550,12 +554,14 @@ float __fmaf_rd(float x, float y, float z)
|
||||
{
|
||||
return __ocml_fma_rtn_f32(x, y, z);
|
||||
}
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmaf_rn(float x, float y, float z)
|
||||
{
|
||||
return __ocml_fma_rte_f32(x, y, z);
|
||||
return __ocml_fma_f32(x, y, z);
|
||||
}
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmaf_ru(float x, float y, float z)
|
||||
@@ -571,9 +577,11 @@ float __fmaf_rz(float x, float y, float z)
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); }
|
||||
float __fmul_rn(float x, float y) { return x * y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
|
||||
@@ -583,24 +591,30 @@ float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); }
|
||||
float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
|
||||
@@ -610,9 +624,11 @@ float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); }
|
||||
float __fsub_rn(float x, float y) { return x - y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
|
||||
@@ -1042,9 +1058,11 @@ double yn(int n, double x)
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); }
|
||||
double __dadd_rn(double x, double y) { return x + y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
|
||||
@@ -1054,9 +1072,11 @@ double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); }
|
||||
double __ddiv_rn(double x, double y) { return x / y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
|
||||
@@ -1066,9 +1086,11 @@ double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); }
|
||||
double __dmul_rn(double x, double y) { return x * y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
|
||||
@@ -1078,9 +1100,11 @@ double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
|
||||
@@ -1090,9 +1114,11 @@ double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); }
|
||||
double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
|
||||
@@ -1102,9 +1128,11 @@ double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); }
|
||||
double __dsub_rn(double x, double y) { return x - y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
|
||||
@@ -1117,12 +1145,14 @@ double __fma_rd(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtn_f64(x, y, z);
|
||||
}
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
double __fma_rn(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rte_f64(x, y, z);
|
||||
return __ocml_fma_f64(x, y, z);
|
||||
}
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __fma_ru(double x, double y, double z)
|
||||
|
||||
@@ -36,31 +36,45 @@ THE SOFTWARE.
|
||||
__device__ void double_precision_intrinsics() {
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_rd(0.0, 1.0);
|
||||
#endif
|
||||
__dadd_rn(0.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_ru(0.0, 1.0);
|
||||
__dadd_rz(0.0, 1.0);
|
||||
__ddiv_rd(0.0, 1.0);
|
||||
#endif
|
||||
__ddiv_rn(0.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__ddiv_ru(0.0, 1.0);
|
||||
__ddiv_rz(0.0, 1.0);
|
||||
__dmul_rd(1.0, 2.0);
|
||||
#endif
|
||||
__dmul_rn(1.0, 2.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dmul_ru(1.0, 2.0);
|
||||
__dmul_rz(1.0, 2.0);
|
||||
__drcp_rd(2.0);
|
||||
#endif
|
||||
__drcp_rn(2.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__drcp_ru(2.0);
|
||||
__drcp_rz(2.0);
|
||||
__dsqrt_rd(4.0);
|
||||
#endif
|
||||
__dsqrt_rn(4.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dsqrt_ru(4.0);
|
||||
__dsqrt_rz(4.0);
|
||||
__dsub_rd(2.0, 1.0);
|
||||
#endif
|
||||
__dsub_rn(2.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dsub_ru(2.0, 1.0);
|
||||
__dsub_rz(2.0, 1.0);
|
||||
__fma_rd(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
__fma_rn(1.0, 2.0, 3.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fma_ru(1.0, 2.0, 3.0);
|
||||
__fma_rz(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
|
||||
@@ -38,10 +38,12 @@ __global__ void floatMath(float* In, float* Out) {
|
||||
Out[tid] = __cosf(In[tid]);
|
||||
Out[tid] = __exp10f(Out[tid]);
|
||||
Out[tid] = __expf(Out[tid]);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
Out[tid] = __frsqrt_rn(Out[tid]);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
Out[tid] = __fsqrt_rd(Out[tid]);
|
||||
#endif
|
||||
Out[tid] = __fsqrt_rn(Out[tid]);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
Out[tid] = __fsqrt_ru(Out[tid]);
|
||||
Out[tid] = __fsqrt_rz(Out[tid]);
|
||||
#endif
|
||||
|
||||
@@ -41,35 +41,51 @@ __device__ void single_precision_intrinsics() {
|
||||
__expf(0.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_rd(0.0f, 1.0f);
|
||||
#endif
|
||||
__fadd_rn(0.0f, 1.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_ru(0.0f, 1.0f);
|
||||
__fadd_rz(0.0f, 1.0f);
|
||||
__fdiv_rd(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdiv_rn(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fdiv_ru(4.0f, 2.0f);
|
||||
__fdiv_rz(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdividef(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
#endif
|
||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rz(1.0f, 2.0f, 3.0f);
|
||||
__fmul_rd(1.0f, 2.0f);
|
||||
#endif
|
||||
__fmul_rn(1.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmul_ru(1.0f, 2.0f);
|
||||
__fmul_rz(1.0f, 2.0f);
|
||||
__frcp_rd(2.0f);
|
||||
#endif
|
||||
__frcp_rn(2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__frcp_ru(2.0f);
|
||||
__frcp_rz(2.0f);
|
||||
#endif
|
||||
__frsqrt_rn(4.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsqrt_rd(4.0f);
|
||||
#endif
|
||||
__fsqrt_rn(4.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsqrt_ru(4.0f);
|
||||
__fsqrt_rz(4.0f);
|
||||
__fsub_rd(2.0f, 1.0f);
|
||||
#endif
|
||||
__fsub_rn(2.0f, 1.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsub_ru(2.0f, 1.0f);
|
||||
__fsub_rz(2.0f, 1.0f);
|
||||
#endif
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele