Merge pull request #277 from AlexVlx/feature_use_module_based_dispatch_instead_of_pfe

Fix hang by purging archaisms

[ROCm/clr commit: a833ec9f7f]
Этот коммит содержится в:
Ben Sander
2017-12-02 07:11:55 -06:00
коммит произвёл GitHub
родитель a5e32fb896 1ba0d0ca14
Коммит 032ef0370a
2 изменённых файлов: 5 добавлений и 90 удалений
+2 -82
Просмотреть файл
@@ -147,91 +147,11 @@ __device__ void* __hip_hc_memset(void* dst, uint8_t val, size_t size)
}
__device__ float __hip_erfinvf(float x){
float ret;
int sign;
if (x < -1 || x > 1){
return NAN;
}
if (x == 0){
return 0;
}
if (x > 0){
sign = 1;
} else {
sign = -1;
x = -x;
}
if (x <= 0.7) {
float x1 = x * x;
float x2 = __hip_erfinva3 * x1 + __hip_erfinva2;
float x3 = x2 * x1 + __hip_erfinva1;
float x4 = x * (x3 * x1 + __hip_erfinva0);
float r1 = __hip_erfinvb4 * x1 + __hip_erfinvb3;
float r2 = r1 * x1 + __hip_erfinvb2;
float r3 = r2 * x1 + __hip_erfinvb1;
ret = x4 / (r3 * x1 + __hip_erfinvb0);
} else {
float x1 = hc::precise_math::sqrtf(-hc::precise_math::logf((1 - x) / 2));
float x2 = __hip_erfinvc3 * x1 + __hip_erfinvc2;
float x3 = x2 * x1 + __hip_erfinvc1;
float x4 = x3 * x1 + __hip_erfinvc0;
float r1 = __hip_erfinvd2 * x1 + __hip_erfinvd1;
ret = x4 / (r1 * x1 + __hip_erfinvd0);
}
ret = ret * sign;
x = x * sign;
ret -= (hc::precise_math::erff(ret) - x) / (2 / HIP_SQRT_PI * hc::precise_math::expf(-ret * ret));
ret -= (hc::precise_math::erff(ret) - x) / (2 / HIP_SQRT_PI * hc::precise_math::expf(-ret * ret));
return ret;
return hc::precise_math::erfinvf(x);
}
__device__ double __hip_erfinv(double x){
double ret;
int sign;
if (x < -1 || x > 1){
return NAN;
}
if (x == 0){
return 0;
}
if (x > 0){
sign = 1;
} else {
sign = -1;
x = -x;
}
if (x <= 0.7) {
double x1 = x * x;
double x2 = __hip_erfinva3 * x1 + __hip_erfinva2;
double x3 = x2 * x1 + __hip_erfinva1;
double x4 = x * (x3 * x1 + __hip_erfinva0);
double r1 = __hip_erfinvb4 * x1 + __hip_erfinvb3;
double r2 = r1 * x1 + __hip_erfinvb2;
double r3 = r2 * x1 + __hip_erfinvb1;
ret = x4 / (r3 * x1 + __hip_erfinvb0);
} else {
double x1 = hc::precise_math::sqrt(-hc::precise_math::log((1 - x) / 2));
double x2 = __hip_erfinvc3 * x1 + __hip_erfinvc2;
double x3 = x2 * x1 + __hip_erfinvc1;
double x4 = x3 * x1 + __hip_erfinvc0;
double r1 = __hip_erfinvd2 * x1 + __hip_erfinvd1;
ret = x4 / (r1 * x1 + __hip_erfinvd0);
}
ret = ret * sign;
x = x * sign;
ret -= (hc::precise_math::erf(ret) - x) / (2 / HIP_SQRT_PI * hc::precise_math::exp(-ret * ret));
ret -= (hc::precise_math::erf(ret) - x) / (2 / HIP_SQRT_PI * hc::precise_math::exp(-ret * ret));
return ret;
return hc::precise_math::erfinv(x);
}
#define __hip_j0a1 57568490574.0
+3 -8
Просмотреть файл
@@ -84,7 +84,7 @@ __device__ float erfcf(float x)
}
__device__ float erfcinvf(float y)
{
return __hip_erfinvf(1 - y);
return hc::precise_math::erfcinvf(y);
}
__device__ float erfcxf(float x)
{
@@ -96,7 +96,7 @@ __device__ float erff(float x)
}
__device__ float erfinvf(float y)
{
return __hip_erfinvf(y);
return hc::precise_math::erfinvf(y);
}
__device__ float exp10f(float x)
{
@@ -192,12 +192,7 @@ __device__ float ldexpf(float x, int exp)
}
__device__ float lgammaf(float x)
{
float val = 0.0f;
float y = x - 1;
while(y > 0){
val += logf(y--);
}
return val;
return hc::precise_math::lgammaf(x);
}
__device__ long long int llrintf(float x)
{