Merge pull request #750 from aaronenyeshi/remove-non-default-rounded-math
Remove non-default-rounded math apis
[ROCm/clr commit: 4ccea03e15]
Этот коммит содержится в:
@@ -1433,7 +1433,7 @@ __device__ float __expf(float x);
|
||||
__device__ static float __fadd_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fadd_rn
|
||||
@@ -1441,7 +1441,7 @@ __device__ static float __fadd_rd(float x, float y);
|
||||
__device__ static float __fadd_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fadd_ru
|
||||
@@ -1449,7 +1449,7 @@ __device__ static float __fadd_rn(float x, float y);
|
||||
__device__ static float __fadd_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fadd_rz
|
||||
@@ -1457,7 +1457,7 @@ __device__ static float __fadd_ru(float x, float y);
|
||||
__device__ static float __fadd_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_rd
|
||||
@@ -1465,7 +1465,7 @@ __device__ static float __fadd_rz(float x, float y);
|
||||
__device__ static float __fdiv_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_rn
|
||||
@@ -1473,7 +1473,7 @@ __device__ static float __fdiv_rd(float x, float y);
|
||||
__device__ static float __fdiv_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_ru
|
||||
@@ -1481,7 +1481,7 @@ __device__ static float __fdiv_rn(float x, float y);
|
||||
__device__ static float __fdiv_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_rz
|
||||
@@ -1489,7 +1489,7 @@ __device__ static float __fdiv_ru(float x, float y);
|
||||
__device__ static float __fdiv_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdividef
|
||||
@@ -1505,7 +1505,7 @@ __device__ static float __fdividef(float x, float y);
|
||||
__device__ float __fmaf_rd(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmaf_rn
|
||||
@@ -1513,7 +1513,7 @@ __device__ float __fmaf_rd(float x, float y, float z);
|
||||
__device__ float __fmaf_rn(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmaf_ru
|
||||
@@ -1521,7 +1521,7 @@ __device__ float __fmaf_rn(float x, float y, float z);
|
||||
__device__ float __fmaf_ru(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmaf_rz
|
||||
@@ -1529,7 +1529,7 @@ __device__ float __fmaf_ru(float x, float y, float z);
|
||||
__device__ float __fmaf_rz(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_rd
|
||||
@@ -1537,7 +1537,7 @@ __device__ float __fmaf_rz(float x, float y, float z);
|
||||
__device__ static float __fmul_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_rn
|
||||
@@ -1545,7 +1545,7 @@ __device__ static float __fmul_rd(float x, float y);
|
||||
__device__ static float __fmul_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_ru
|
||||
@@ -1553,7 +1553,7 @@ __device__ static float __fmul_rn(float x, float y);
|
||||
__device__ static float __fmul_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_rz
|
||||
@@ -1561,7 +1561,7 @@ __device__ static float __fmul_ru(float x, float y);
|
||||
__device__ static float __fmul_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_rd
|
||||
@@ -1569,7 +1569,7 @@ __device__ static float __fmul_rz(float x, float y);
|
||||
__device__ float __frcp_rd(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_rn
|
||||
@@ -1577,7 +1577,7 @@ __device__ float __frcp_rd(float x);
|
||||
__device__ float __frcp_rn(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_ru
|
||||
@@ -1585,7 +1585,7 @@ __device__ float __frcp_rn(float x);
|
||||
__device__ float __frcp_ru(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_rz
|
||||
@@ -1593,7 +1593,7 @@ __device__ float __frcp_ru(float x);
|
||||
__device__ float __frcp_rz(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frsqrt_rn
|
||||
@@ -1601,7 +1601,7 @@ __device__ float __frcp_rz(float x);
|
||||
__device__ float __frsqrt_rn(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_rd
|
||||
@@ -1609,7 +1609,7 @@ __device__ float __frsqrt_rn(float x);
|
||||
__device__ float __fsqrt_rd(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_rn
|
||||
@@ -1617,7 +1617,7 @@ __device__ float __fsqrt_rd(float x);
|
||||
__device__ float __fsqrt_rn(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_ru
|
||||
@@ -1625,7 +1625,7 @@ __device__ float __fsqrt_rn(float x);
|
||||
__device__ float __fsqrt_ru(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_rz
|
||||
@@ -1633,7 +1633,7 @@ __device__ float __fsqrt_ru(float x);
|
||||
__device__ float __fsqrt_rz(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_rd
|
||||
@@ -1641,7 +1641,7 @@ __device__ float __fsqrt_rz(float x);
|
||||
__device__ static float __fsub_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_rn
|
||||
@@ -1649,7 +1649,7 @@ __device__ static float __fsub_rd(float x, float y);
|
||||
__device__ static float __fsub_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_ru
|
||||
@@ -1657,7 +1657,15 @@ __device__ static float __fsub_rn(float x, float y);
|
||||
__device__ static float __fsub_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_rz
|
||||
```cpp
|
||||
__device__ static float __fsub_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __log10f
|
||||
@@ -1729,7 +1737,7 @@ __device__ float __tanf(float x);
|
||||
__device__ static double __dadd_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dadd_rn
|
||||
@@ -1737,7 +1745,7 @@ __device__ static double __dadd_rd(double x, double y);
|
||||
__device__ static double __dadd_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dadd_ru
|
||||
@@ -1745,7 +1753,7 @@ __device__ static double __dadd_rn(double x, double y);
|
||||
__device__ static double __dadd_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dadd_rz
|
||||
@@ -1753,7 +1761,7 @@ __device__ static double __dadd_ru(double x, double y);
|
||||
__device__ static double __dadd_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_rd
|
||||
@@ -1761,7 +1769,7 @@ __device__ static double __dadd_rz(double x, double y);
|
||||
__device__ static double __ddiv_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_rn
|
||||
@@ -1769,7 +1777,7 @@ __device__ static double __ddiv_rd(double x, double y);
|
||||
__device__ static double __ddiv_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_ru
|
||||
@@ -1777,7 +1785,7 @@ __device__ static double __ddiv_rn(double x, double y);
|
||||
__device__ static double __ddiv_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_rz
|
||||
@@ -1785,7 +1793,7 @@ __device__ static double __ddiv_ru(double x, double y);
|
||||
__device__ static double __ddiv_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_rd
|
||||
@@ -1793,7 +1801,7 @@ __device__ static double __ddiv_rz(double x, double y);
|
||||
__device__ static double __dmul_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_rn
|
||||
@@ -1801,7 +1809,7 @@ __device__ static double __dmul_rd(double x, double y);
|
||||
__device__ static double __dmul_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_ru
|
||||
@@ -1809,7 +1817,7 @@ __device__ static double __dmul_rn(double x, double y);
|
||||
__device__ static double __dmul_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_rz
|
||||
@@ -1817,7 +1825,7 @@ __device__ static double __dmul_ru(double x, double y);
|
||||
__device__ static double __dmul_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_rd
|
||||
@@ -1825,7 +1833,7 @@ __device__ static double __dmul_rz(double x, double y);
|
||||
__device__ double __drcp_rd(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_rn
|
||||
@@ -1833,7 +1841,7 @@ __device__ double __drcp_rd(double x);
|
||||
__device__ double __drcp_rn(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_ru
|
||||
@@ -1841,7 +1849,7 @@ __device__ double __drcp_rn(double x);
|
||||
__device__ double __drcp_ru(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_rz
|
||||
@@ -1849,7 +1857,7 @@ __device__ double __drcp_ru(double x);
|
||||
__device__ double __drcp_rz(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_rd
|
||||
@@ -1857,7 +1865,7 @@ __device__ double __drcp_rz(double x);
|
||||
__device__ double __dsqrt_rd(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_rn
|
||||
@@ -1865,7 +1873,7 @@ __device__ double __dsqrt_rd(double x);
|
||||
__device__ double __dsqrt_rn(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_ru
|
||||
@@ -1873,7 +1881,7 @@ __device__ double __dsqrt_rn(double x);
|
||||
__device__ double __dsqrt_ru(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_rz
|
||||
@@ -1881,7 +1889,7 @@ __device__ double __dsqrt_ru(double x);
|
||||
__device__ double __dsqrt_rz(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_rd
|
||||
@@ -1889,7 +1897,7 @@ __device__ double __dsqrt_rz(double x);
|
||||
__device__ static double __dsub_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_rn
|
||||
@@ -1897,7 +1905,7 @@ __device__ static double __dsub_rd(double x, double y);
|
||||
__device__ static double __dsub_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_ru
|
||||
@@ -1905,7 +1913,7 @@ __device__ static double __dsub_rn(double x, double y);
|
||||
__device__ static double __dsub_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_rz
|
||||
@@ -1913,7 +1921,7 @@ __device__ static double __dsub_ru(double x, double y);
|
||||
__device__ static double __dsub_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_rd
|
||||
@@ -1921,7 +1929,7 @@ __device__ static double __dsub_rz(double x, double y);
|
||||
__device__ double __fma_rd(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_rn
|
||||
@@ -1929,7 +1937,7 @@ __device__ double __fma_rd(double x, double y, double z);
|
||||
__device__ double __fma_rn(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_ru
|
||||
@@ -1937,7 +1945,7 @@ __device__ double __fma_rn(double x, double y, double z);
|
||||
__device__ double __fma_ru(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_rz
|
||||
@@ -1945,7 +1953,7 @@ __device__ double __fma_ru(double x, double y, double z);
|
||||
__device__ double __fma_rz(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __brev
|
||||
|
||||
@@ -514,38 +514,41 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __expf(float x) { return __ocml_exp_f32(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rd(float x, float y) { return __ocml_add_rtp_f32(x, y); }
|
||||
float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_ru(float x, float y) { return __ocml_add_rtn_f32(x, y); }
|
||||
float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rd(float x, float y) { return x / y; }
|
||||
float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rn(float x, float y) { return x / y; }
|
||||
float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_ru(float x, float y) { return x / y; }
|
||||
float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rz(float x, float y) { return x / y; }
|
||||
float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdividef(float x, float y) { return x / y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmaf_rd(float x, float y, float z)
|
||||
{
|
||||
return __ocml_fma_rtp_f32(x, y, z);
|
||||
return __ocml_fma_rtn_f32(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -557,7 +560,7 @@ __DEVICE__
|
||||
inline
|
||||
float __fmaf_ru(float x, float y, float z)
|
||||
{
|
||||
return __ocml_fma_rtn_f32(x, y, z);
|
||||
return __ocml_fma_rtp_f32(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -567,13 +570,13 @@ float __fmaf_rz(float x, float y, float z)
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rd(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
|
||||
float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_ru(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
|
||||
float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
|
||||
@@ -594,28 +597,29 @@ inline
|
||||
float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rd(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_ru(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rz(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rd(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
|
||||
float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_ru(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
|
||||
float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __log10f(float x) { return __ocml_log10_f32(x); }
|
||||
@@ -1034,39 +1038,40 @@ double yn(int n, double x)
|
||||
}
|
||||
|
||||
// BEGIN INTRINSICS
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rd(double x, double y) { return __ocml_add_rtp_f64(x, y); }
|
||||
double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_ru(double x, double y) { return __ocml_add_rtn_f64(x, y); }
|
||||
double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rd(double x, double y) { return x / y; }
|
||||
double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rn(double x, double y) { return x / y; }
|
||||
double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_ru(double x, double y) { return x / y; }
|
||||
double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rz(double x, double y) { return x / y; }
|
||||
double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rd(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
|
||||
double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_ru(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
|
||||
double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
|
||||
@@ -1084,25 +1089,25 @@ inline
|
||||
double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rd(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_ru(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rz(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rd(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
|
||||
double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_ru(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
|
||||
double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
|
||||
@@ -1110,7 +1115,7 @@ __DEVICE__
|
||||
inline
|
||||
double __fma_rd(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtp_f64(x, y, z);
|
||||
return __ocml_fma_rtn_f64(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -1122,7 +1127,7 @@ __DEVICE__
|
||||
inline
|
||||
double __fma_ru(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtn_f64(x, y, z);
|
||||
return __ocml_fma_rtp_f64(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -1130,6 +1135,7 @@ double __fma_rz(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtz_f64(x, y, z);
|
||||
}
|
||||
#endif
|
||||
// END INTRINSICS
|
||||
// END DOUBLE
|
||||
|
||||
|
||||
@@ -288,6 +288,30 @@ __attribute__((const))
|
||||
float __ocml_mul_rtz_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rte_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rtn_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rtp_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rtz_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rte_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rtn_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rtp_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rtz_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_fma_rte_f32(float, float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
@@ -572,6 +596,30 @@ __attribute__((const))
|
||||
double __ocml_mul_rtz_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rte_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rtn_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rtp_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rtz_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rte_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rtn_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rtp_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rtz_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_fma_rte_f64(double, double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
@@ -594,4 +642,4 @@ double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64");
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@ THE SOFTWARE.
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void double_precision_intrinsics() {
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_rd(0.0, 1.0);
|
||||
__dadd_rn(0.0, 1.0);
|
||||
__dadd_ru(0.0, 1.0);
|
||||
@@ -62,6 +63,7 @@ __device__ void double_precision_intrinsics() {
|
||||
__fma_rn(1.0, 2.0, 3.0);
|
||||
__fma_ru(1.0, 2.0, 3.0);
|
||||
__fma_rz(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__global__ void compileDoublePrecisionIntrinsics(int ignored) {
|
||||
|
||||
@@ -38,11 +38,13 @@ __global__ void floatMath(float* In, float* Out) {
|
||||
Out[tid] = __cosf(In[tid]);
|
||||
Out[tid] = __exp10f(Out[tid]);
|
||||
Out[tid] = __expf(Out[tid]);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
Out[tid] = __frsqrt_rn(Out[tid]);
|
||||
//Out[tid] = __fsqrt_rd(Out[tid]);
|
||||
//Out[tid] = __fsqrt_rn(Out[tid]);
|
||||
//Out[tid] = __fsqrt_ru(Out[tid]);
|
||||
//Out[tid] = __fsqrt_rz(Out[tid]);
|
||||
Out[tid] = __fsqrt_rd(Out[tid]);
|
||||
Out[tid] = __fsqrt_rn(Out[tid]);
|
||||
Out[tid] = __fsqrt_ru(Out[tid]);
|
||||
Out[tid] = __fsqrt_rz(Out[tid]);
|
||||
#endif
|
||||
Out[tid] = __log10f(Out[tid]);
|
||||
Out[tid] = __log2f(Out[tid]);
|
||||
Out[tid] = __logf(Out[tid]);
|
||||
|
||||
@@ -39,6 +39,7 @@ __device__ void single_precision_intrinsics() {
|
||||
__cosf(0.0f);
|
||||
__exp10f(0.0f);
|
||||
__expf(0.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_rd(0.0f, 1.0f);
|
||||
__fadd_rn(0.0f, 1.0f);
|
||||
__fadd_ru(0.0f, 1.0f);
|
||||
@@ -47,7 +48,9 @@ __device__ void single_precision_intrinsics() {
|
||||
__fdiv_rn(4.0f, 2.0f);
|
||||
__fdiv_ru(4.0f, 2.0f);
|
||||
__fdiv_rz(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdividef(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
@@ -69,6 +72,7 @@ __device__ void single_precision_intrinsics() {
|
||||
__fsub_rn(2.0f, 1.0f);
|
||||
__fsub_ru(2.0f, 1.0f);
|
||||
__fsub_rz(2.0f, 1.0f);
|
||||
#endif
|
||||
__log10f(1.0f);
|
||||
__log2f(1.0f);
|
||||
__logf(1.0f);
|
||||
|
||||
Ссылка в новой задаче
Block a user