Added fast math flag

1. Use -DHIP_FAST_MATH to make precise math functions compiled to fast math
2. Added double fast math functions for sqrt
3. Changed hipcc to parse -use_fast_math (not working)
4. Added passed tag to hipFloatMath test

Change-Id: I72884b2436b4efe61e9a9297346c1358fee38a2d


[ROCm/clr commit: c2f6ecf264]
Tento commit je obsažen v:
Aditya Atluri
2016-11-23 11:19:15 -06:00
rodič 79c92fdb23
revize a2ecbc2d6e
4 změnil soubory, kde provedl 177 přidání a 70 odebrání
+5
Zobrazit soubor
@@ -274,6 +274,11 @@ foreach $arg (@ARGV)
$buildDeps = 1;
}
if($arg eq '-use_fast_math') {
print "In fast Math";
$HIPCXXFLAGS .= " -DHIP_FAST_MATH ";
}
if ($arg =~ m/^-/) {
# options start with -
+49 -70
Zobrazit soubor
@@ -130,7 +130,6 @@ __device__ float atanhf(float x);
__device__ float cbrtf(float x);
__device__ float ceilf(float x);
__device__ float copysignf(float x, float y);
__device__ float cosf(float x);
__device__ float coshf(float x);
__device__ float cyl_bessel_i0f(float x);
__device__ float cyl_bessel_i1f(float x);
@@ -142,9 +141,7 @@ __host__ float erfcxf(float x);
__device__ float erff(float x);
__device__ float erfinvf(float y);
__host__ float erfinvf(float y);
__device__ float exp10f(float x);
__device__ float exp2f(float x);
__device__ float expf(float x);
__device__ float expm1f(float x);
__device__ float fabsf(float x);
__device__ float fdimf(float x, float y);
@@ -167,11 +164,8 @@ __device__ float ldexpf(float x, int exp);
__device__ float lgammaf(float x);
__device__ long long int llrintf(float x);
__device__ long long int llroundf(float x);
__device__ float log10f(float x);
__device__ float log1pf(float x);
__device__ float log2f(float x);
__device__ float logbf(float x);
__device__ float logf(float x);
__device__ long int lrintf(float x);
__device__ long int lroundf(float x);
__device__ float modff(float x, float *iptr);
@@ -187,7 +181,6 @@ __host__ float normcdff(float y);
__device__ float normcdfinvf(float y);
__host__ float normcdfinvf(float y);
__device__ float normf(int dim, const float *a);
__device__ float powf(float x, float y);
__device__ float rcbrtf(float x);
__host__ float rcbrtf(float x);
__device__ float remainderf(float x, float y);
@@ -206,14 +199,11 @@ __device__ float rsqrtf(float x);
__device__ float scalblnf(float x, long int n);
__device__ float scalbnf(float x, int n);
__host__ __device__ unsigned signbit(float a);
__device__ void sincosf(float x, float *sptr, float *cptr);
__device__ void sincospif(float x, float *sptr, float *cptr);
__host__ void sincospif(float x, float *sptr, float *cptr);
__device__ float sinf(float x);
__device__ float sinhf(float x);
__device__ float sinpif(float x);
__device__ float sqrtf(float x);
__device__ float tanf(float x);
__device__ float tanhf(float x);
__device__ float tgammaf(float x);
__device__ float truncf(float x);
@@ -519,90 +509,65 @@ __device__ float __hip_fast_powf(float, float);
__device__ void __hip_fast_sincosf(float,float*,float*);
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
__device__ float __hip_fast_tanf(float);
extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
#ifdef HIP_PRECISE_MATH
#ifdef HIP_FAST_MATH
// Single Precision Precise Math when enabled
__device__ inline float __cosf(float x) {
return __hip_precise_cosf(x);
__device__ inline float cosf(float x) {
return __hip_fast_cosf(x);
}
__device__ inline float __exp10f(float x) {
return __hip_precise_exp10f(x);
__device__ inline float exp10f(float x) {
return __hip_fast_exp10f(x);
}
__device__ inline float __expf(float x) {
return __hip_precise_expf(x);
__device__ inline float expf(float x) {
return __hip_fast_expf(x);
}
__device__ inline float __frsqrt_rn(float x) {
return __hip_precise_frsqrt_rn(x);
__device__ inline float log10f(float x) {
return __hip_fast_log10f(x);
}
__device__ inline float __fsqrt_rd(float x) {
return __hip_precise_fsqrt_rd(x);
__device__ inline float log2f(float x) {
return __hip_fast_log2f(x);
}
__device__ inline float __fsqrt_rn(float x) {
return __hip_precise_fsqrt_rn(x);
__device__ inline float logf(float x) {
return __hip_fast_logf(x);
}
__device__ inline float __fsqrt_ru(float x) {
return __hip_precise_fsqrt_ru(x);
__device__ inline float powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ inline float __fsqrt_rz(float x) {
return __hip_precise_fsqrt_rz(x);
__device__ inline void sincosf(float x, float *s, float *c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float __log10f(float x) {
return __hip_precise_log10f(x);
__device__ inline float sinf(float x) {
return __hip_fast_sinf(x);
}
__device__ inline float __log2f(float x) {
return __hip_precise_log2f(x);
}
__device__ inline float __logf(float x) {
return __hip_precise_logf(x);
}
__device__ inline float __powf(float base, float exponent) {
return __hip_precise_powf(base, exponent);
}
__device__ inline void __sincosf(float x, float *s, float *c) {
return __hip_precise_sincosf(x, s, c);
}
__device__ inline float __sinf(float x) {
return __hip_precise_sinf(x);
}
__device__ inline float __tanf(float x) {
return __hip_precise_tanf(x);
}
// Double Precision
__device__ double __dsqrt_rd(double x) {
return __hip_precise_dsqrt_rd(x);
}
__device__ double __dsqrt_rn(double x) {
return __hip_precise_dsqrt_rn(x);
}
__device__ double __dsqrt_ru(double x) {
return __hip_precise_dsqrt_ru(x);
}
__device__ double __dsqrt_rz(double x) {
return __hip_precise_dsqrt_rz(x);
__device__ inline float tanf(float x) {
return __hip_fast_tanf(x);
}
#else
__device__ float sinf(float);
__device__ float cosf(float);
__device__ float tanf(float);
__device__ void sincosf(float, float*, float*);
__device__ float logf(float);
__device__ float log2f(float);
__device__ float log10f(float);
__device__ float expf(float);
__device__ float exp10f(float);
__device__ float powf(float, float);
#endif
// Single Precision Fast Math
__device__ inline float __cosf(float x) {
return __hip_fast_cosf(x);
@@ -664,8 +629,22 @@ __device__ inline float __tanf(float x) {
return __hip_fast_tanf(x);
}
__device__ inline double __dsqrt_rd(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_rn(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_ru(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_rz(double x) {
return __hip_fast_dsqrt(x);
}
#endif
/**
* CUDA 8 device function features
+1
Zobrazit soubor
@@ -58,4 +58,5 @@ int main(){
hipMalloc((void**)&Ind, SIZE);
hipMalloc((void**)&Outd, SIZE);
hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd);
passed();
}
+122
Zobrazit soubor
@@ -0,0 +1,122 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "test_common.h"
__global__ void FloatMathPrecise(hipLaunchParm lp)
{
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
fX = ceilf(0.0f);
fX = copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
cospif(0.0f);
//cyl_bessel_i0f(0.0f);
//cyl_bessel_i1f(0.0f);
erfcf(0.0f);
erfcinvf(2.0f);
erfcxf(0.0f);
erff(0.0f);
erfinvf(1.0f);
exp10f(0.0f);
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fX = fabsf(1.0f);
fdimf(1.0f, 0.0f);
fdividef(0.0f, 1.0f);
fX = floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fX = fmaxf(0.0f, 0.0f);
fX = fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
//frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
isfinite(0.0f);
fX = isinf(0.0f);
fX = isnan(0.0f);
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
ldexpf(0.0f, 0);
//lgammaf(1.0f);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
//modff(0.0f, &fX);
fX = nanf("1");
fX = nearbyintf(0.0f);
//nextafterf(0.0f);
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
normcdff(0.0f);
normcdfinvf(1.0f);
fX = 1.0f; normf(1, &fX);
powf(1.0f, 0.0f);
rcbrtf(1.0f);
remainderf(2.0f, 1.0f);
//remquof(1.0f, 2.0f, &iX);
rhypotf(0.0f, 1.0f);
fY = rintf(1.0f);
rnorm3df(0.0f, 0.0f, 1.0f);
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
fX = 1.0f; rnormf(1, &fX);
fY = roundf(0.0f);
rsqrtf(1.0f);
scalblnf(0.0f, 1);
scalbnf(0.0f, 1);
signbit(1.0f);
sincosf(0.0f, &fX, &fY);
sincospif(0.0f, &fX, &fY);
sinf(0.0f);
sinhf(0.0f);
sinpif(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
fY = truncf(0.0f);
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
}
int main() {
hipLaunchKernel(FloatMathPrecise, dim3(1,1,1), dim3(1,1,1), 0, 0);
}