From a2ecbc2d6ef347988b70399b7e2799bb0c6cbca4 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Wed, 23 Nov 2016 11:19:15 -0600 Subject: [PATCH] Added fast math flag 1. Use -DHIP_FAST_MATH to make precise math functions compiled to fast math 2. Added double fast math functions for sqrt 3. Changed hipcc to parse -use_fast_math (not working) 4. Added passed tag to hipFloatMath test Change-Id: I72884b2436b4efe61e9a9297346c1358fee38a2d [ROCm/clr commit: c2f6ecf2640474c6d00449e5f4195abc1172326d] --- projects/clr/hipamd/bin/hipcc | 5 + .../include/hip/hcc_detail/hip_runtime.h | 119 +++++++---------- .../tests/src/deviceLib/hipFloatMath.cpp | 1 + .../src/deviceLib/hipFloatMathPrecise.cpp | 122 ++++++++++++++++++ 4 files changed, 177 insertions(+), 70 deletions(-) create mode 100644 projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index 641f70b065..09c4d813d0 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -274,6 +274,11 @@ foreach $arg (@ARGV) $buildDeps = 1; } + if($arg eq '-use_fast_math') { + print "In fast Math"; + $HIPCXXFLAGS .= " -DHIP_FAST_MATH "; + } + if ($arg =~ m/^-/) { # options start with - diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h index 4b781b44ae..45dbeff5a4 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -130,7 +130,6 @@ __device__ float atanhf(float x); __device__ float cbrtf(float x); __device__ float ceilf(float x); __device__ float copysignf(float x, float y); -__device__ float cosf(float x); __device__ float coshf(float x); __device__ float cyl_bessel_i0f(float x); __device__ float cyl_bessel_i1f(float x); @@ -142,9 +141,7 @@ __host__ float erfcxf(float x); __device__ float erff(float x); __device__ float erfinvf(float y); __host__ float erfinvf(float y); -__device__ float exp10f(float x); __device__ float exp2f(float x); -__device__ float expf(float x); __device__ float expm1f(float x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); @@ -167,11 +164,8 @@ __device__ float ldexpf(float x, int exp); __device__ float lgammaf(float x); __device__ long long int llrintf(float x); __device__ long long int llroundf(float x); -__device__ float log10f(float x); __device__ float log1pf(float x); -__device__ float log2f(float x); __device__ float logbf(float x); -__device__ float logf(float x); __device__ long int lrintf(float x); __device__ long int lroundf(float x); __device__ float modff(float x, float *iptr); @@ -187,7 +181,6 @@ __host__ float normcdff(float y); __device__ float normcdfinvf(float y); __host__ float normcdfinvf(float y); __device__ float normf(int dim, const float *a); -__device__ float powf(float x, float y); __device__ float rcbrtf(float x); __host__ float rcbrtf(float x); __device__ float remainderf(float x, float y); @@ -206,14 +199,11 @@ __device__ float rsqrtf(float x); __device__ float scalblnf(float x, long int n); __device__ float scalbnf(float x, int n); __host__ __device__ unsigned signbit(float a); -__device__ void sincosf(float x, float *sptr, float *cptr); __device__ void sincospif(float x, float *sptr, float *cptr); __host__ void sincospif(float x, float *sptr, float *cptr); -__device__ float sinf(float x); __device__ float sinhf(float x); __device__ float sinpif(float x); __device__ float sqrtf(float x); -__device__ float tanf(float x); __device__ float tanhf(float x); __device__ float tgammaf(float x); __device__ float truncf(float x); @@ -519,90 +509,65 @@ __device__ float __hip_fast_powf(float, float); __device__ void __hip_fast_sincosf(float,float*,float*); extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32"); __device__ float __hip_fast_tanf(float); +extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64"); -#ifdef HIP_PRECISE_MATH +#ifdef HIP_FAST_MATH // Single Precision Precise Math when enabled -__device__ inline float __cosf(float x) { - return __hip_precise_cosf(x); +__device__ inline float cosf(float x) { + return __hip_fast_cosf(x); } -__device__ inline float __exp10f(float x) { - return __hip_precise_exp10f(x); +__device__ inline float exp10f(float x) { + return __hip_fast_exp10f(x); } -__device__ inline float __expf(float x) { - return __hip_precise_expf(x); +__device__ inline float expf(float x) { + return __hip_fast_expf(x); } -__device__ inline float __frsqrt_rn(float x) { - return __hip_precise_frsqrt_rn(x); +__device__ inline float log10f(float x) { + return __hip_fast_log10f(x); } -__device__ inline float __fsqrt_rd(float x) { - return __hip_precise_fsqrt_rd(x); +__device__ inline float log2f(float x) { + return __hip_fast_log2f(x); } -__device__ inline float __fsqrt_rn(float x) { - return __hip_precise_fsqrt_rn(x); +__device__ inline float logf(float x) { + return __hip_fast_logf(x); } -__device__ inline float __fsqrt_ru(float x) { - return __hip_precise_fsqrt_ru(x); +__device__ inline float powf(float base, float exponent) { + return __hip_fast_powf(base, exponent); } -__device__ inline float __fsqrt_rz(float x) { - return __hip_precise_fsqrt_rz(x); +__device__ inline void sincosf(float x, float *s, float *c) { + return __hip_fast_sincosf(x, s, c); } -__device__ inline float __log10f(float x) { - return __hip_precise_log10f(x); +__device__ inline float sinf(float x) { + return __hip_fast_sinf(x); } -__device__ inline float __log2f(float x) { - return __hip_precise_log2f(x); -} - -__device__ inline float __logf(float x) { - return __hip_precise_logf(x); -} - -__device__ inline float __powf(float base, float exponent) { - return __hip_precise_powf(base, exponent); -} - -__device__ inline void __sincosf(float x, float *s, float *c) { - return __hip_precise_sincosf(x, s, c); -} - -__device__ inline float __sinf(float x) { - return __hip_precise_sinf(x); -} - -__device__ inline float __tanf(float x) { - return __hip_precise_tanf(x); -} - -// Double Precision - -__device__ double __dsqrt_rd(double x) { - return __hip_precise_dsqrt_rd(x); -} - -__device__ double __dsqrt_rn(double x) { - return __hip_precise_dsqrt_rn(x); -} - -__device__ double __dsqrt_ru(double x) { - return __hip_precise_dsqrt_ru(x); -} - -__device__ double __dsqrt_rz(double x) { - return __hip_precise_dsqrt_rz(x); +__device__ inline float tanf(float x) { + return __hip_fast_tanf(x); } #else +__device__ float sinf(float); +__device__ float cosf(float); +__device__ float tanf(float); +__device__ void sincosf(float, float*, float*); +__device__ float logf(float); +__device__ float log2f(float); +__device__ float log10f(float); +__device__ float expf(float); +__device__ float exp10f(float); +__device__ float powf(float, float); + +#endif // Single Precision Fast Math __device__ inline float __cosf(float x) { return __hip_fast_cosf(x); @@ -664,8 +629,22 @@ __device__ inline float __tanf(float x) { return __hip_fast_tanf(x); } +__device__ inline double __dsqrt_rd(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_rn(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_ru(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_rz(double x) { + return __hip_fast_dsqrt(x); +} -#endif /** * CUDA 8 device function features diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipFloatMath.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipFloatMath.cpp index eb70eb6b0b..f137ca2602 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipFloatMath.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipFloatMath.cpp @@ -58,4 +58,5 @@ int main(){ hipMalloc((void**)&Ind, SIZE); hipMalloc((void**)&Outd, SIZE); hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd); + passed(); } diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp new file mode 100644 index 0000000000..4f6c2cd44a --- /dev/null +++ b/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp @@ -0,0 +1,122 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip/hip_runtime.h" +#include "test_common.h" + +__global__ void FloatMathPrecise(hipLaunchParm lp) +{ + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + fX = ceilf(0.0f); + fX = copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + cospif(0.0f); + //cyl_bessel_i0f(0.0f); + //cyl_bessel_i1f(0.0f); + erfcf(0.0f); + erfcinvf(2.0f); + erfcxf(0.0f); + erff(0.0f); + erfinvf(1.0f); + exp10f(0.0f); + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fX = fabsf(1.0f); + fdimf(1.0f, 0.0f); + fdividef(0.0f, 1.0f); + fX = floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fX = fmaxf(0.0f, 0.0f); + fX = fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + //frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + isfinite(0.0f); + fX = isinf(0.0f); + fX = isnan(0.0f); + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + ldexpf(0.0f, 0); + //lgammaf(1.0f); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + //modff(0.0f, &fX); + fX = nanf("1"); + fX = nearbyintf(0.0f); + //nextafterf(0.0f); + norm3df(1.0f, 0.0f, 0.0f); + norm4df(1.0f, 0.0f, 0.0f, 0.0f); + normcdff(0.0f); + normcdfinvf(1.0f); + fX = 1.0f; normf(1, &fX); + powf(1.0f, 0.0f); + rcbrtf(1.0f); + remainderf(2.0f, 1.0f); + //remquof(1.0f, 2.0f, &iX); + rhypotf(0.0f, 1.0f); + fY = rintf(1.0f); + rnorm3df(0.0f, 0.0f, 1.0f); + rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); + fX = 1.0f; rnormf(1, &fX); + fY = roundf(0.0f); + rsqrtf(1.0f); + scalblnf(0.0f, 1); + scalbnf(0.0f, 1); + signbit(1.0f); + sincosf(0.0f, &fX, &fY); + sincospif(0.0f, &fX, &fY); + sinf(0.0f); + sinhf(0.0f); + sinpif(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + fY = truncf(0.0f); + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); +} + +int main() { + hipLaunchKernel(FloatMathPrecise, dim3(1,1,1), dim3(1,1,1), 0, 0); +}