diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index e2e0eb7eb3..aa634cdf1e 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -123,17 +123,20 @@ __device__ float coshf(float x); __device__ float cyl_bessel_i0f(float x); __device__ float cyl_bessel_i1f(float x); __device__ float erfcf(float x); -__device__ float erfcinvf(float y); +__device__ float erfcinvf(float y); +__host__ float erfcinvf(float y); __device__ float erfcxf(float x); +__host__ float erfcxf(float x); __device__ float erff(float x); __device__ float erfinvf(float y); +__host__ float erfinvf(float y); __device__ float exp10f(float x); __device__ float exp2f(float x); __device__ float expf(float x); __device__ float expm1f(float x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); -__device__ float fdividef(float x, float y); +__device__ __host__ float fdividef(float x, float y); __device__ float floorf(float x); __device__ float fmaf(float x, float y, float z); __device__ float fmaxf(float x, float y); diff --git a/src/device_util.cpp b/src/device_util.cpp index 3843989347..cb97f04038 100644 --- a/src/device_util.cpp +++ b/src/device_util.cpp @@ -27,6 +27,8 @@ THE SOFTWARE. using namespace hc::precise_math; #endif +#define HIP_SQRT_2 1.41421356237 + #define __hip_erfinva3 -0.140543331 #define __hip_erfinva2 0.914624893 #define __hip_erfinva1 -1.645349621 @@ -66,22 +68,22 @@ __device__ float __hip_erfinvf(float x){ } if (x <= 0.7) { float x1 = x * x; - float x2 = hc::precise_math::fmaf(__hip_erfinva3, x1, __hip_erfinva2); - float x3 = hc::precise_math::fmaf(x2, x1, __hip_erfinva1); - float x4 = x * hc::precise_math::fmaf(x3, x1, __hip_erfinva0); + float x2 = __hip_erfinva3 * x1 + __hip_erfinva2; + float x3 = x2 * x1 + __hip_erfinva1; + float x4 = x * (x3 * x1 + __hip_erfinva0); - float r1 = hc::precise_math::fmaf(__hip_erfinvb4, x1, __hip_erfinvb3); - float r2 = hc::precise_math::fmaf(r1, x1, __hip_erfinvb2); - float r3 = hc::precise_math::fmaf(r2, x1, __hip_erfinvb1); - ret = x4 / hc::precise_math::fmaf(r3, x1, __hip_erfinvb0); + float r1 = __hip_erfinvb4 * x1 + __hip_erfinvb3; + float r2 = r1 * x1 + __hip_erfinvb2; + float r3 = r2 * x1 + __hip_erfinvb1; + ret = x4 / (r3 * x1 + __hip_erfinvb0); } else { float x1 = hc::precise_math::sqrtf(-hc::precise_math::logf((1 - x) / 2)); - float x2 = hc::precise_math::fmaf(__hip_erfinvc3, x1, __hip_erfinvc2); - float x3 = hc::precise_math::fmaf(x2, x1, __hip_erfinvc1); - float x4 = hc::precise_math::fmaf(x3, x1, __hip_erfinvc0); + float x2 = __hip_erfinvc3 * x1 + __hip_erfinvc2; + float x3 = x2 * x1 + __hip_erfinvc1; + float x4 = x3 * x1 + __hip_erfinvc0; - float r1 = hc::precise_math::fmaf(__hip_erfinvd2, x1, __hip_erfinvd1); - ret = x4 / hc::precise_math::fmaf(r1, x1, __hip_erfinvd0); + float r1 = __hip_erfinvd2 * x1 + __hip_erfinvd1; + ret = x4 / (r1 * x1 + __hip_erfinvd0); } ret = ret * sign; @@ -110,22 +112,22 @@ __device__ double __hip_erfinv(double x){ } if (x <= 0.7) { double x1 = x * x; - double x2 = hc::precise_math::fma(__hip_erfinva3, x1, __hip_erfinva2); - double x3 = hc::precise_math::fma(x2, x1, __hip_erfinva1); - double x4 = x * hc::precise_math::fma(x3, x1, __hip_erfinva0); + double x2 = __hip_erfinva3 * x1 + __hip_erfinva2; + double x3 = x2 * x1 + __hip_erfinva1; + double x4 = x * (x3 * x1 + __hip_erfinva0); - double r1 = hc::precise_math::fma(__hip_erfinvb4, x1, __hip_erfinvb3); - double r2 = hc::precise_math::fma(r1, x1, __hip_erfinvb2); - double r3 = hc::precise_math::fma(r2, x1, __hip_erfinvb1); - ret = x4 / hc::precise_math::fma(r3, x1, __hip_erfinvb0); + double r1 = __hip_erfinvb4 * x1 + __hip_erfinvb3; + double r2 = r1 * x1 + __hip_erfinvb2; + double r3 = r2 * x1 + __hip_erfinvb1; + ret = x4 / (r3 * x1 + __hip_erfinvb0); } else { double x1 = hc::precise_math::sqrt(-hc::precise_math::log((1 - x) / 2)); - double x2 = hc::precise_math::fma(__hip_erfinvc3, x1, __hip_erfinvc2); - double x3 = hc::precise_math::fma(x2, x1, __hip_erfinvc1); - double x4 = hc::precise_math::fma(x3, x1, __hip_erfinvc0); + double x2 = __hip_erfinvc3 * x1 + __hip_erfinvc2; + double x3 = x2 * x1 + __hip_erfinvc1; + double x4 = x3 * x1 + __hip_erfinvc0; - double r1 = hc::precise_math::fma(__hip_erfinvd2, x1, __hip_erfinvd1); - ret = x4 / hc::precise_math::fma(r1, x1, __hip_erfinvd0); + double r1 = __hip_erfinvd2 * x1 + __hip_erfinvd1; + ret = x4 / (r1 * x1 + __hip_erfinvd0); } ret = ret * sign; @@ -659,8 +661,14 @@ __device__ float erfcf(float x) { return hc::precise_math::erfcf(x); } -__device__ float erfcinvf(float y); -__device__ float erfcxf(float x); +__device__ float erfcinvf(float y) +{ + return __hip_erfinvf(1 - y); +} +__device__ float erfcxf(float x) +{ + return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x); +} __device__ float erff(float x) { return hc::precise_math::erff(x); @@ -741,8 +749,14 @@ __device__ unsigned isnan(float a) { return hc::precise_math::isnan(a); } -__device__ float j0f(float x); -__device__ float j1f(float x); +__device__ float j0f(float x) +{ + return __hip_j0f(x); +} +__device__ float j1f(float x) +{ + return __hip_j1f(x); +} __device__ float jnf(int n, float x); __device__ float ldexpf(float x, int exp) { @@ -821,15 +835,15 @@ __device__ float norm4df(float a, float b, float c, float d) float y = c*c + d*d; return hc::precise_math::sqrtf(x+y); } -/* -The below conversion seems easy, takes a -full page of integral calculus to deduce the following equation -*/ + __device__ float normcdff(float y) { return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2; } -__device__ float normcdfinvf(float y); +__device__ float normcdfinvf(float y) +{ + return HIP_SQRT_2 * __hip_erfinvf(2*y-1); +} __device__ float normf(int dim, const float *a) { float x = 0.0f; @@ -933,8 +947,14 @@ __device__ float truncf(float x) { return hc::precise_math::truncf(x); } -__device__ float y0f(float x); -__device__ float y1f(float x); +__device__ float y0f(float x) +{ + return __hip_y0f(x); +} +__device__ float y1f(float x) +{ + return __hip_y1f(x); +} __device__ float ynf(int n, float x); @@ -966,7 +986,6 @@ __device__ double acos(double x) { return hc::precise_math::acos(x); } - __device__ double acosh(double x) { return hc::precise_math::acosh(x); @@ -1015,6 +1034,8 @@ __device__ double cospi(double x) { return hc::precise_math::cospi(x); } +__device__ double cyl_bessel_i0(double x); +__device__ double cyl_bessel_i1(double x); __device__ double erf(double x) { return hc::precise_math::erf(x); @@ -1023,6 +1044,14 @@ __device__ double erfc(double x) { return hc::precise_math::erfc(x); } +__device__ double erfcinv(double x) +{ + return __hip_erfinv(1 - x); +} +__device__ double erfcx(double x) +{ + return hc::precise_math::exp(x*x)*hc::precise_math::erf(x); +} __device__ double erfinv(double x) { return __hip_erfinv(x); @@ -1095,6 +1124,15 @@ __device__ unsigned isnan(double x) { return hc::precise_math::isnan(x); } +__device__ double j0(double x) +{ + return __hip_j0(x); +} +__device__ double j1(double x) +{ + return __hip_j1(x); +} +__device__ double jn(double x); __device__ double ldexp(double x, int exp) { return hc::precise_math::ldexp(x, exp); @@ -1103,10 +1141,6 @@ __device__ double lgamma(double x, int *sign) { return hc::precise_math::lgamma(x, sign); } -__device__ double log(double x) -{ - return hc::precise_math::log(x); -} __device__ long long int llrint(double x) { long long int y = hc::precise_math::round(x); @@ -1117,6 +1151,10 @@ __device__ long long int llround(double x) long long int y = hc::precise_math::round(x); return y; } +__device__ double log(double x) +{ + return hc::precise_math::log(x); +} __device__ double log10(double x) { return hc::precise_math::log10(x); @@ -1172,7 +1210,7 @@ __device__ double norm4d(double a, double b, double c, double d) } __device__ double normcdf(float y) { - return ((hc::precise_math::erf(y)/1.41421356237) + 1)/2; + return ((hc::precise_math::erf(y)/HIP_SQRT_2) + 1)/2; } __device__ double pow(double x, double y) { @@ -1277,7 +1315,15 @@ __device__ double trunc(double x) { return hc::precise_math::trunc(x); } - +__device__ double y0(double x) +{ + return __hip_y0(x); +} +__device__ double y1(double x) +{ + return __hip_y1(x); +} +__device__ double yn(int n, double x); const int warpSize = 64; @@ -2125,6 +2171,11 @@ double __hip_host_erfcinv(double y) return __hip_host_erfinv(1 - y); } +__host__ float modff(float x, float *iptr) +{ + return std::modf(x, iptr); +} + __host__ float erfcinvf(float y) { return __hip_host_erfcinvf(y); diff --git a/tests/src/hipTestHost.cpp b/tests/src/hipTestHost.cpp new file mode 100644 index 0000000000..7c2b396b68 --- /dev/null +++ b/tests/src/hipTestHost.cpp @@ -0,0 +1,58 @@ +#include "test_common.h" +#include +#include "hip_runtime.h" +#include "hip_runtime_api.h" + +#define N 512 + +bool check_erfcinvf(){ + uint32_t len = 4; + float Val[] = {0.1, 1.2, 1, 0.9}; + float Out[] = {1.16309, -0.179144, 0, 0.0889}; + for(int i=0;i 0.0001) + { + return false; + } + } + return true; +} + +bool check_erfcxf(){ + uint32_t len = 4; + float Val[] = {-0.5, 15, 3.2, 1}; + float Out[] = {1.9524, 0.0375, 0.1687, 0.4276}; + for(int i=0;i 0.0001) + { + return false; + } + } + return true; +} + +bool check_erfinvf() +{ + uint32_t len = 4; + float Val[] = {0, -0.5, 0.9, -0.2}; + float Out[] = {0, -0.4769, 1.1631, -0.1791}; + for(int i=0;i 0.0001){ + return false; + } + } + return true; +} + +int main(){ + float *Af = new float[N]; + double *A = new double[N]; + for(int i=0;i