Added fast math flag
1. Use -DHIP_FAST_MATH to make precise math functions compiled to fast math
2. Added double fast math functions for sqrt
3. Changed hipcc to parse -use_fast_math (not working)
4. Added passed tag to hipFloatMath test
Change-Id: I72884b2436b4efe61e9a9297346c1358fee38a2d
[ROCm/clr commit: c2f6ecf264]
Tento commit je obsažen v:
@@ -274,6 +274,11 @@ foreach $arg (@ARGV)
|
||||
$buildDeps = 1;
|
||||
}
|
||||
|
||||
if($arg eq '-use_fast_math') {
|
||||
print "In fast Math";
|
||||
$HIPCXXFLAGS .= " -DHIP_FAST_MATH ";
|
||||
}
|
||||
|
||||
if ($arg =~ m/^-/) {
|
||||
# options start with -
|
||||
|
||||
|
||||
@@ -130,7 +130,6 @@ __device__ float atanhf(float x);
|
||||
__device__ float cbrtf(float x);
|
||||
__device__ float ceilf(float x);
|
||||
__device__ float copysignf(float x, float y);
|
||||
__device__ float cosf(float x);
|
||||
__device__ float coshf(float x);
|
||||
__device__ float cyl_bessel_i0f(float x);
|
||||
__device__ float cyl_bessel_i1f(float x);
|
||||
@@ -142,9 +141,7 @@ __host__ float erfcxf(float x);
|
||||
__device__ float erff(float x);
|
||||
__device__ float erfinvf(float y);
|
||||
__host__ float erfinvf(float y);
|
||||
__device__ float exp10f(float x);
|
||||
__device__ float exp2f(float x);
|
||||
__device__ float expf(float x);
|
||||
__device__ float expm1f(float x);
|
||||
__device__ float fabsf(float x);
|
||||
__device__ float fdimf(float x, float y);
|
||||
@@ -167,11 +164,8 @@ __device__ float ldexpf(float x, int exp);
|
||||
__device__ float lgammaf(float x);
|
||||
__device__ long long int llrintf(float x);
|
||||
__device__ long long int llroundf(float x);
|
||||
__device__ float log10f(float x);
|
||||
__device__ float log1pf(float x);
|
||||
__device__ float log2f(float x);
|
||||
__device__ float logbf(float x);
|
||||
__device__ float logf(float x);
|
||||
__device__ long int lrintf(float x);
|
||||
__device__ long int lroundf(float x);
|
||||
__device__ float modff(float x, float *iptr);
|
||||
@@ -187,7 +181,6 @@ __host__ float normcdff(float y);
|
||||
__device__ float normcdfinvf(float y);
|
||||
__host__ float normcdfinvf(float y);
|
||||
__device__ float normf(int dim, const float *a);
|
||||
__device__ float powf(float x, float y);
|
||||
__device__ float rcbrtf(float x);
|
||||
__host__ float rcbrtf(float x);
|
||||
__device__ float remainderf(float x, float y);
|
||||
@@ -206,14 +199,11 @@ __device__ float rsqrtf(float x);
|
||||
__device__ float scalblnf(float x, long int n);
|
||||
__device__ float scalbnf(float x, int n);
|
||||
__host__ __device__ unsigned signbit(float a);
|
||||
__device__ void sincosf(float x, float *sptr, float *cptr);
|
||||
__device__ void sincospif(float x, float *sptr, float *cptr);
|
||||
__host__ void sincospif(float x, float *sptr, float *cptr);
|
||||
__device__ float sinf(float x);
|
||||
__device__ float sinhf(float x);
|
||||
__device__ float sinpif(float x);
|
||||
__device__ float sqrtf(float x);
|
||||
__device__ float tanf(float x);
|
||||
__device__ float tanhf(float x);
|
||||
__device__ float tgammaf(float x);
|
||||
__device__ float truncf(float x);
|
||||
@@ -519,90 +509,65 @@ __device__ float __hip_fast_powf(float, float);
|
||||
__device__ void __hip_fast_sincosf(float,float*,float*);
|
||||
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
|
||||
__device__ float __hip_fast_tanf(float);
|
||||
extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
|
||||
|
||||
#ifdef HIP_PRECISE_MATH
|
||||
#ifdef HIP_FAST_MATH
|
||||
// Single Precision Precise Math when enabled
|
||||
|
||||
__device__ inline float __cosf(float x) {
|
||||
return __hip_precise_cosf(x);
|
||||
__device__ inline float cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __exp10f(float x) {
|
||||
return __hip_precise_exp10f(x);
|
||||
__device__ inline float exp10f(float x) {
|
||||
return __hip_fast_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __expf(float x) {
|
||||
return __hip_precise_expf(x);
|
||||
__device__ inline float expf(float x) {
|
||||
return __hip_fast_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frsqrt_rn(float x) {
|
||||
return __hip_precise_frsqrt_rn(x);
|
||||
__device__ inline float log10f(float x) {
|
||||
return __hip_fast_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rd(float x) {
|
||||
return __hip_precise_fsqrt_rd(x);
|
||||
__device__ inline float log2f(float x) {
|
||||
return __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rn(float x) {
|
||||
return __hip_precise_fsqrt_rn(x);
|
||||
__device__ inline float logf(float x) {
|
||||
return __hip_fast_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_ru(float x) {
|
||||
return __hip_precise_fsqrt_ru(x);
|
||||
__device__ inline float powf(float base, float exponent) {
|
||||
return __hip_fast_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rz(float x) {
|
||||
return __hip_precise_fsqrt_rz(x);
|
||||
__device__ inline void sincosf(float x, float *s, float *c) {
|
||||
return __hip_fast_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float __log10f(float x) {
|
||||
return __hip_precise_log10f(x);
|
||||
__device__ inline float sinf(float x) {
|
||||
return __hip_fast_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log2f(float x) {
|
||||
return __hip_precise_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __logf(float x) {
|
||||
return __hip_precise_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __powf(float base, float exponent) {
|
||||
return __hip_precise_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void __sincosf(float x, float *s, float *c) {
|
||||
return __hip_precise_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float __sinf(float x) {
|
||||
return __hip_precise_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __tanf(float x) {
|
||||
return __hip_precise_tanf(x);
|
||||
}
|
||||
|
||||
// Double Precision
|
||||
|
||||
__device__ double __dsqrt_rd(double x) {
|
||||
return __hip_precise_dsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ double __dsqrt_rn(double x) {
|
||||
return __hip_precise_dsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ double __dsqrt_ru(double x) {
|
||||
return __hip_precise_dsqrt_ru(x);
|
||||
}
|
||||
|
||||
__device__ double __dsqrt_rz(double x) {
|
||||
return __hip_precise_dsqrt_rz(x);
|
||||
__device__ inline float tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__device__ float sinf(float);
|
||||
__device__ float cosf(float);
|
||||
__device__ float tanf(float);
|
||||
__device__ void sincosf(float, float*, float*);
|
||||
__device__ float logf(float);
|
||||
__device__ float log2f(float);
|
||||
__device__ float log10f(float);
|
||||
__device__ float expf(float);
|
||||
__device__ float exp10f(float);
|
||||
__device__ float powf(float, float);
|
||||
|
||||
#endif
|
||||
// Single Precision Fast Math
|
||||
__device__ inline float __cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
@@ -664,8 +629,22 @@ __device__ inline float __tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rd(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rn(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_ru(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rz(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
#endif
|
||||
/**
|
||||
* CUDA 8 device function features
|
||||
|
||||
|
||||
@@ -58,4 +58,5 @@ int main(){
|
||||
hipMalloc((void**)&Ind, SIZE);
|
||||
hipMalloc((void**)&Outd, SIZE);
|
||||
hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd);
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
__global__ void FloatMathPrecise(hipLaunchParm lp)
|
||||
{
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
fX = ceilf(0.0f);
|
||||
fX = copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
cospif(0.0f);
|
||||
//cyl_bessel_i0f(0.0f);
|
||||
//cyl_bessel_i1f(0.0f);
|
||||
erfcf(0.0f);
|
||||
erfcinvf(2.0f);
|
||||
erfcxf(0.0f);
|
||||
erff(0.0f);
|
||||
erfinvf(1.0f);
|
||||
exp10f(0.0f);
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fX = fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
fdividef(0.0f, 1.0f);
|
||||
fX = floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fX = fmaxf(0.0f, 0.0f);
|
||||
fX = fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
//frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
isfinite(0.0f);
|
||||
fX = isinf(0.0f);
|
||||
fX = isnan(0.0f);
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
ldexpf(0.0f, 0);
|
||||
//lgammaf(1.0f);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
//modff(0.0f, &fX);
|
||||
fX = nanf("1");
|
||||
fX = nearbyintf(0.0f);
|
||||
//nextafterf(0.0f);
|
||||
norm3df(1.0f, 0.0f, 0.0f);
|
||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
normcdff(0.0f);
|
||||
normcdfinvf(1.0f);
|
||||
fX = 1.0f; normf(1, &fX);
|
||||
powf(1.0f, 0.0f);
|
||||
rcbrtf(1.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
//remquof(1.0f, 2.0f, &iX);
|
||||
rhypotf(0.0f, 1.0f);
|
||||
fY = rintf(1.0f);
|
||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
fX = 1.0f; rnormf(1, &fX);
|
||||
fY = roundf(0.0f);
|
||||
rsqrtf(1.0f);
|
||||
scalblnf(0.0f, 1);
|
||||
scalbnf(0.0f, 1);
|
||||
signbit(1.0f);
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
sincospif(0.0f, &fX, &fY);
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sinpif(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
fY = truncf(0.0f);
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
}
|
||||
|
||||
int main() {
|
||||
hipLaunchKernel(FloatMathPrecise, dim3(1,1,1), dim3(1,1,1), 0, 0);
|
||||
}
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele