Added fast math flag

1. Use -DHIP_FAST_MATH to make precise math functions compiled to fast math 2. Added double fast math functions for sqrt 3. Changed hipcc to parse -use_fast_math (not working) 4. Added passed tag to hipFloatMath test Change-Id: I72884b2436b4efe61e9a9297346c1358fee38a2d [ROCm/clr commit: c2f6ecf264]
2016-11-23 11:19:15 -06:00
@@ -274,6 +274,11 @@ foreach $arg (@ARGV)
        $buildDeps = 1;
    }

+    if($arg eq '-use_fast_math') {
+        print "In fast Math";
+        $HIPCXXFLAGS .= " -DHIP_FAST_MATH ";
+    }
+
    if ($arg =~ m/^-/) {
        # options start with -

@@ -130,7 +130,6 @@ __device__ float atanhf(float x);
 __device__ float cbrtf(float x);
 __device__ float ceilf(float x);
 __device__ float copysignf(float x, float y);
-__device__ float cosf(float x);
 __device__ float coshf(float x);
 __device__ float cyl_bessel_i0f(float x);
 __device__ float cyl_bessel_i1f(float x);
@@ -142,9 +141,7 @@ __host__ float erfcxf(float x);
 __device__ float erff(float x);
 __device__ float erfinvf(float y);
 __host__ float erfinvf(float y);
-__device__ float exp10f(float x);
 __device__ float exp2f(float x);
-__device__ float expf(float x);
 __device__ float expm1f(float x);
 __device__ float fabsf(float x);
 __device__ float fdimf(float x, float y);
@@ -167,11 +164,8 @@ __device__ float ldexpf(float x, int exp);
 __device__ float lgammaf(float x);
 __device__ long long int llrintf(float x);
 __device__ long long int llroundf(float x);
-__device__ float log10f(float x);
 __device__ float log1pf(float x);
-__device__ float log2f(float x);
 __device__ float logbf(float x);
-__device__ float logf(float x);
 __device__ long int lrintf(float x);
 __device__ long int lroundf(float x);
 __device__ float modff(float x, float *iptr);
@@ -187,7 +181,6 @@ __host__ float normcdff(float y);
 __device__ float normcdfinvf(float y);
 __host__ float normcdfinvf(float y);
 __device__ float normf(int dim, const float *a);
-__device__ float powf(float x, float y);
 __device__ float rcbrtf(float x);
 __host__ float rcbrtf(float x);
 __device__ float remainderf(float x, float y);
@@ -206,14 +199,11 @@ __device__ float rsqrtf(float x);
 __device__ float scalblnf(float x, long int n);
 __device__ float scalbnf(float x, int n);
 __host__ __device__ unsigned signbit(float a);
-__device__ void sincosf(float x, float *sptr, float *cptr);
 __device__ void sincospif(float x, float *sptr, float *cptr);
 __host__ void sincospif(float x, float *sptr, float *cptr);
-__device__ float sinf(float x);
 __device__ float sinhf(float x);
 __device__ float sinpif(float x);
 __device__ float sqrtf(float x);
-__device__ float tanf(float x);
 __device__ float tanhf(float x);
 __device__ float tgammaf(float x);
 __device__ float truncf(float x);
@@ -519,90 +509,65 @@ __device__ float __hip_fast_powf(float, float);
 __device__ void __hip_fast_sincosf(float,float*,float*);
 extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
 __device__ float __hip_fast_tanf(float);
+extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");

-#ifdef HIP_PRECISE_MATH
+#ifdef HIP_FAST_MATH
 // Single Precision Precise Math when enabled

-__device__ inline float __cosf(float x) {
-  return __hip_precise_cosf(x);
+__device__ inline float cosf(float x) {
+  return __hip_fast_cosf(x);
 }

-__device__ inline float __exp10f(float x) {
-  return __hip_precise_exp10f(x);
+__device__ inline float exp10f(float x) {
+  return __hip_fast_exp10f(x);
 }

-__device__ inline float __expf(float x) {
-  return __hip_precise_expf(x);
+__device__ inline float expf(float x) {
+  return __hip_fast_expf(x);
 }

-__device__ inline float __frsqrt_rn(float x) {
-  return __hip_precise_frsqrt_rn(x);
+__device__ inline float log10f(float x) {
+  return __hip_fast_log10f(x);
 }

-__device__ inline float __fsqrt_rd(float x) {
-  return __hip_precise_fsqrt_rd(x);
+__device__ inline float log2f(float x) {
+  return __hip_fast_log2f(x);
 }

-__device__ inline float __fsqrt_rn(float x) {
-  return __hip_precise_fsqrt_rn(x);
+__device__ inline float logf(float x) {
+  return __hip_fast_logf(x);
 }

-__device__ inline float __fsqrt_ru(float x) {
-  return __hip_precise_fsqrt_ru(x);
+__device__ inline float powf(float base, float exponent) {
+  return __hip_fast_powf(base, exponent);
 }

-__device__ inline float __fsqrt_rz(float x) {
-  return __hip_precise_fsqrt_rz(x);
+__device__ inline void sincosf(float x, float *s, float *c) {
+  return __hip_fast_sincosf(x, s, c);
 }

-__device__ inline float __log10f(float x) {
-  return __hip_precise_log10f(x);
+__device__ inline float sinf(float x) {
+  return __hip_fast_sinf(x);
 }

-__device__ inline float __log2f(float x) {
-  return __hip_precise_log2f(x);
-}
-
-__device__ inline float __logf(float x) {
-  return __hip_precise_logf(x);
-}
-
-__device__ inline float __powf(float base, float exponent) {
-  return __hip_precise_powf(base, exponent);
-}
-
-__device__ inline void __sincosf(float x, float *s, float *c) {
-  return __hip_precise_sincosf(x, s, c);
-}
-
-__device__ inline float __sinf(float x) {
-  return __hip_precise_sinf(x);
-}
-
-__device__ inline float __tanf(float x) {
-  return __hip_precise_tanf(x);
-}
-
-// Double Precision
-
-__device__ double __dsqrt_rd(double x) {
-  return __hip_precise_dsqrt_rd(x);
-}
-
-__device__ double __dsqrt_rn(double x) {
-  return __hip_precise_dsqrt_rn(x);
-}
-
-__device__ double __dsqrt_ru(double x) {
-  return __hip_precise_dsqrt_ru(x);
-}
-
-__device__ double __dsqrt_rz(double x) {
-  return __hip_precise_dsqrt_rz(x);
+__device__ inline float tanf(float x) {
+  return __hip_fast_tanf(x);
 }

 #else

+__device__ float sinf(float);
+__device__ float cosf(float);
+__device__ float tanf(float);
+__device__ void sincosf(float, float*, float*);
+__device__ float logf(float);
+__device__ float log2f(float);
+__device__ float log10f(float);
+__device__ float expf(float);
+__device__ float exp10f(float);
+__device__ float powf(float, float);
+
+#endif
 // Single Precision Fast Math
 __device__ inline float __cosf(float x) {
  return __hip_fast_cosf(x);
@@ -664,8 +629,22 @@ __device__ inline float __tanf(float x) {
  return __hip_fast_tanf(x);
 }

+__device__ inline double __dsqrt_rd(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __dsqrt_rn(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __dsqrt_ru(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __dsqrt_rz(double x) {
+  return __hip_fast_dsqrt(x);
+}

-#endif
 /**
 * CUDA 8 device function features

@@ -58,4 +58,5 @@ int main(){
  hipMalloc((void**)&Ind, SIZE);
  hipMalloc((void**)&Outd, SIZE);
  hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd);
+  passed();
 }
@@ -0,0 +1,122 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+#include "hip/hip_runtime.h"
+#include "test_common.h"
+
+__global__ void FloatMathPrecise(hipLaunchParm lp)
+{
+    int iX;
+    float fX, fY;
+
+    acosf(1.0f);
+    acoshf(1.0f);
+    asinf(0.0f);
+    asinhf(0.0f);
+    atan2f(0.0f, 1.0f);
+    atanf(0.0f);
+    atanhf(0.0f);
+    cbrtf(0.0f);
+    fX = ceilf(0.0f);
+    fX = copysignf(1.0f, -2.0f);
+    cosf(0.0f);
+    coshf(0.0f);
+    cospif(0.0f);
+    //cyl_bessel_i0f(0.0f);
+    //cyl_bessel_i1f(0.0f);
+    erfcf(0.0f);
+    erfcinvf(2.0f);
+    erfcxf(0.0f);
+    erff(0.0f);
+    erfinvf(1.0f);
+    exp10f(0.0f);
+    exp2f(0.0f);
+    expf(0.0f);
+    expm1f(0.0f);
+    fX = fabsf(1.0f);
+    fdimf(1.0f, 0.0f);
+    fdividef(0.0f, 1.0f);
+    fX = floorf(0.0f);
+    fmaf(1.0f, 2.0f, 3.0f);
+    fX = fmaxf(0.0f, 0.0f);
+    fX = fminf(0.0f, 0.0f);
+    fmodf(0.0f, 1.0f);
+    //frexpf(0.0f, &iX);
+    hypotf(1.0f, 0.0f);
+    ilogbf(1.0f);
+    isfinite(0.0f);
+    fX = isinf(0.0f);
+    fX = isnan(0.0f);
+    j0f(0.0f);
+    j1f(0.0f);
+    jnf(-1.0f, 1.0f);
+    ldexpf(0.0f, 0);
+    //lgammaf(1.0f);
+    llrintf(0.0f);
+    llroundf(0.0f);
+    log10f(1.0f);
+    log1pf(-1.0f);
+    log2f(1.0f);
+    logbf(1.0f);
+    logf(1.0f);
+    lrintf(0.0f);
+    lroundf(0.0f);
+    //modff(0.0f, &fX);
+    fX = nanf("1");
+    fX = nearbyintf(0.0f);
+    //nextafterf(0.0f);
+    norm3df(1.0f, 0.0f, 0.0f);
+    norm4df(1.0f, 0.0f, 0.0f, 0.0f);
+    normcdff(0.0f);
+    normcdfinvf(1.0f);
+    fX = 1.0f; normf(1, &fX);
+    powf(1.0f, 0.0f);
+    rcbrtf(1.0f);
+    remainderf(2.0f, 1.0f);
+    //remquof(1.0f, 2.0f, &iX);
+    rhypotf(0.0f, 1.0f);
+    fY = rintf(1.0f);
+    rnorm3df(0.0f, 0.0f, 1.0f);
+    rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
+    fX = 1.0f; rnormf(1, &fX);
+    fY = roundf(0.0f);
+    rsqrtf(1.0f);
+    scalblnf(0.0f, 1);
+    scalbnf(0.0f, 1);
+    signbit(1.0f);
+    sincosf(0.0f, &fX, &fY);
+    sincospif(0.0f, &fX, &fY);
+    sinf(0.0f);
+    sinhf(0.0f);
+    sinpif(0.0f);
+    sqrtf(0.0f);
+    tanf(0.0f);
+    tanhf(0.0f);
+    tgammaf(2.0f);
+    fY = truncf(0.0f);
+    y0f(1.0f);
+    y1f(1.0f);
+    ynf(1, 1.0f);
+}
+
+int main() {
+    hipLaunchKernel(FloatMathPrecise, dim3(1,1,1), dim3(1,1,1), 0, 0);
+}