/* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include"hip_runtime.h" #include #include #include // TODO: Choose whether default is precise math or fast math based on compilation flag. #ifdef __HCC_ACCELERATOR__ using namespace hc::precise_math; #endif #define __hip_erfinva3 -0.140543331 #define __hip_erfinva2 0.914624893 #define __hip_erfinva1 -1.645349621 #define __hip_erfinva0 0.886226899 #define __hip_erfinvb4 0.012229801 #define __hip_erfinvb3 -0.329097515 #define __hip_erfinvb2 1.442710462 #define __hip_erfinvb1 -2.118377725 #define __hip_erfinvb0 1 #define __hip_erfinvc3 1.641345311 #define __hip_erfinvc2 3.429567803 #define __hip_erfinvc1 -1.62490649 #define __hip_erfinvc0 -1.970840454 #define __hip_erfinvd2 1.637067800 #define __hip_erfinvd1 3.543889200 #define __hip_erfinvd0 1 #define HIP_PI 3.14159265358979323846 __device__ float __hip_erfinvf(float x){ float ret; int sign; if (x < -1 || x > 1){ return NAN; } if (x == 0){ return 0; } if (x > 0){ sign = 1; } else { sign = -1; x = -x; } if (x <= 0.7) { float x1 = x * x; float x2 = hc::precise_math::fmaf(__hip_erfinva3, x1, __hip_erfinva2); float x3 = hc::precise_math::fmaf(x2, x1, __hip_erfinva1); float x4 = x * hc::precise_math::fmaf(x3, x1, __hip_erfinva0); float r1 = hc::precise_math::fmaf(__hip_erfinvb4, x1, __hip_erfinvb3); float r2 = hc::precise_math::fmaf(r1, x1, __hip_erfinvb2); float r3 = hc::precise_math::fmaf(r2, x1, __hip_erfinvb1); ret = x4 / hc::precise_math::fmaf(r3, x1, __hip_erfinvb0); } else { float x1 = hc::precise_math::sqrtf(-hc::precise_math::logf((1 - x) / 2)); float x2 = hc::precise_math::fmaf(__hip_erfinvc3, x1, __hip_erfinvc2); float x3 = hc::precise_math::fmaf(x2, x1, __hip_erfinvc1); float x4 = hc::precise_math::fmaf(x3, x1, __hip_erfinvc0); float r1 = hc::precise_math::fmaf(__hip_erfinvd2, x1, __hip_erfinvd1); ret = x4 / hc::precise_math::fmaf(r1, x1, __hip_erfinvd0); } ret = ret * sign; x = x * sign; ret -= (hc::precise_math::erff(ret) - x) / (2 / hc::precise_math::sqrtf(HIP_PI) * hc::precise_math::expf(-ret * ret)); ret -= (hc::precise_math::erff(ret) - x) / (2 / hc::precise_math::sqrtf(HIP_PI) * hc::precise_math::expf(-ret * ret)); return ret; } __device__ double __hip_erfinv(double x){ double ret; int sign; if (x < -1 || x > 1){ return NAN; } if (x == 0){ return 0; } if (x > 0){ sign = 1; } else { sign = -1; x = -x; } if (x <= 0.7) { double x1 = x * x; double x2 = hc::precise_math::fma(__hip_erfinva3, x1, __hip_erfinva2); double x3 = hc::precise_math::fma(x2, x1, __hip_erfinva1); double x4 = x * hc::precise_math::fma(x3, x1, __hip_erfinva0); double r1 = hc::precise_math::fma(__hip_erfinvb4, x1, __hip_erfinvb3); double r2 = hc::precise_math::fma(r1, x1, __hip_erfinvb2); double r3 = hc::precise_math::fma(r2, x1, __hip_erfinvb1); ret = x4 / hc::precise_math::fma(r3, x1, __hip_erfinvb0); } else { double x1 = hc::precise_math::sqrt(-hc::precise_math::log((1 - x) / 2)); double x2 = hc::precise_math::fma(__hip_erfinvc3, x1, __hip_erfinvc2); double x3 = hc::precise_math::fma(x2, x1, __hip_erfinvc1); double x4 = hc::precise_math::fma(x3, x1, __hip_erfinvc0); double r1 = hc::precise_math::fma(__hip_erfinvd2, x1, __hip_erfinvd1); ret = x4 / hc::precise_math::fma(r1, x1, __hip_erfinvd0); } ret = ret * sign; x = x * sign; ret -= (hc::precise_math::erf(ret) - x) / (2 / hc::precise_math::sqrt(HIP_PI) * hc::precise_math::exp(-ret * ret)); ret -= (hc::precise_math::erf(ret) - x) / (2 / hc::precise_math::sqrt(HIP_PI) * hc::precise_math::exp(-ret * ret)); return ret; } __device__ float acosf(float x) { return hc::precise_math::acosf(x); } __device__ float acoshf(float x) { return hc::precise_math::acoshf(x); } __device__ float asinf(float x) { return hc::precise_math::asinf(x); } __device__ float asinhf(float x) { return hc::precise_math::asinhf(x); } __device__ float atan2f(float y, float x) { return hc::precise_math::atan2f(x, y); } __device__ float atanf(float x) { return hc::precise_math::atanf(x); } __device__ float atanhf(float x) { return hc::precise_math::atanhf(x); } __device__ float cbrtf(float x) { return hc::precise_math::cbrtf(x); } __device__ float ceilf(float x) { return hc::precise_math::ceilf(x); } __device__ float copysignf(float x, float y) { return hc::precise_math::copysignf(x, y); } __device__ float cosf(float x) { return hc::precise_math::cosf(x); } __device__ float coshf(float x) { return hc::precise_math::coshf(x); } __device__ float cyl_bessel_i0f(float x); __device__ float cyl_bessel_i1f(float x); __device__ float erfcf(float x) { return hc::precise_math::erfcf(x); } __device__ float erfcinvf(float y); __device__ float erfcxf(float x); __device__ float erff(float x) { return hc::precise_math::erff(x); } __device__ float erfinvf(float y) { return __hip_erfinvf(y); } __device__ float exp10f(float x) { return hc::precise_math::exp10f(x); } __device__ float exp2f(float x) { return hc::precise_math::exp2f(x); } __device__ float expf(float x) { return hc::precise_math::expf(x); } __device__ float expm1f(float x) { return hc::precise_math::expm1f(x); } __device__ float fabsf(float x) { return hc::precise_math::fabsf(x); } __device__ float fdimf(float x, float y) { return hc::precise_math::fdimf(x, y); } __device__ float fdividef(float x, float y) { return x/y; } __device__ float floorf(float x) { return hc::precise_math::floorf(x); } __device__ float fmaf(float x, float y, float z) { return hc::precise_math::fmaf(x, y, z); } __device__ float fmaxf(float x, float y) { return hc::precise_math::fmaxf(x, y); } __device__ float fminf(float x, float y) { return hc::precise_math::fminf(x, y); } __device__ float fmodf(float x, float y) { return hc::precise_math::fmodf(x, y); } __device__ float frexpf(float x, int *nptr) { return hc::precise_math::frexpf(x, nptr); } __device__ float hypotf(float x, float y) { return hc::precise_math::hypotf(x, y); } __device__ float ilogbf(float x) { return hc::precise_math::ilogbf(x); } __device__ unsigned isfinite(float a) { return hc::precise_math::isfinite(a); } __device__ unsigned isinf(float a) { return hc::precise_math::isinf(a); } __device__ unsigned isnan(float a) { return hc::precise_math::isnan(a); } __device__ float j0f(float x); __device__ float j1f(float x); __device__ float jnf(int n, float x); __device__ float ldexpf(float x, int exp) { return hc::precise_math::ldexpf(x, exp); } __device__ float lgammaf(float x, int *sign) { return hc::precise_math::lgammaf(x, sign); } __device__ long long int llrintf(float x) { int y = hc::precise_math::roundf(x); long long int z = y; return z; } __device__ long long int llroundf(float x) { int y = hc::precise_math::roundf(x); long long int z = y; return z; }__device__ float log10f(float x) { return hc::precise_math::log10f(x); } __device__ float log1pf(float x) { return hc::precise_math::log1pf(x); } __device__ float log2f(float x) { return hc::precise_math::log2f(x); } __device__ float logbf(float x) { return hc::precise_math::logbf(x); } __device__ float logf(float x) { return hc::precise_math::logf(x); } __device__ long int lrintf(float x) { int y = hc::precise_math::roundf(x); long int z = y; return z; } __device__ long int lroundf(float x) { long int y = hc::precise_math::roundf(x); return y; } __device__ float modff(float x, float *iptr) { return hc::precise_math::modff(x, iptr); } __device__ float nanf(const char* tagp) { return hc::precise_math::nanf((int)*tagp); } __device__ float nearbyintf(float x) { return hc::precise_math::nearbyintf(x); } __device__ float nextafterf(float x, float y) { return hc::precise_math::nextafter(x, y); } __device__ float norm3df(float a, float b, float c) { float x = a*a + b*b + c*c; return hc::precise_math::sqrtf(x); } __device__ float norm4df(float a, float b, float c, float d) { float x = a*a + b*b; float y = c*c + d*d; return hc::precise_math::sqrtf(x+y); } /* The below conversion seems easy, takes a full page of integral calculus to deduce the following equation */ __device__ float normcdff(float y) { return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2; } __device__ float normcdfinvf(float y); __device__ float normf(int dim, const float *a) { float x = 0.0f; for(int i=0;i