added fast math intrinsics to HIP

1. Added fast math intrinsics for single precision data types
2. Added test to check the intrinsics
3. Added HIP_PRECISE_MATH macro to enable precise math on fast math

Change-Id: Iadacbb6182c31252c5e3252854372d1b80dfd27b


[ROCm/hip commit: f843928ddd]
このコミットが含まれているのは:
Aditya Atluri
2016-11-22 15:26:00 -06:00
コミット 467d8d1aea
3個のファイルの変更391行の追加38行の削除
+186 -19
ファイルの表示
@@ -479,26 +479,193 @@ __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr();
//TODO - add a couple fast math operations here, the set here will grow :
__device__ float __expf(float x);
__device__ float __frsqrt_rn(float x);
__device__ float __fsqrt_rd(float x);
__device__ float __fsqrt_rn(float x);
__device__ float __fsqrt_ru(float x);
__device__ float __fsqrt_rz(float x);
__device__ float __log10f(float x);
//__device__ float __log2f(float x);
__device__ float __logf(float x);
__device__ float __powf(float base, float exponent);
__device__ void __sincosf(float x, float *s, float *c) ;
extern __attribute__((const)) float __sinf(float) __asm("llvm.sin.f32");
extern __attribute__((const)) float __cosf(float) __asm("llvm.cos.f32");
extern __attribute__((const)) float __log2f(float) __asm("llvm.log2.f32");
__device__ float __tanf(float x);
__device__ float __dsqrt_rd(double x);
__device__ float __dsqrt_rn(double x);
__device__ float __dsqrt_ru(double x);
__device__ float __dsqrt_rz(double x);
// Single Precision Precise Math
__device__ float __hip_precise_cosf(float);
__device__ float __hip_precise_exp10f(float);
__device__ float __hip_precise_expf(float);
__device__ float __hip_precise_frsqrt_rn(float);
__device__ float __hip_precise_fsqrt_rd(float);
__device__ float __hip_precise_fsqrt_rn(float);
__device__ float __hip_precise_fsqrt_ru(float);
__device__ float __hip_precise_fsqrt_rz(float);
__device__ float __hip_precise_log10f(float);
__device__ float __hip_precise_log2f(float);
__device__ float __hip_precise_logf(float);
__device__ float __hip_precise_powf(float, float);
__device__ void __hip_precise_sincosf(float,float*,float*);
__device__ float __hip_precise_sinf(float);
__device__ float __hip_precise_tanf(float);
// Double Precision Precise Math
__device__ double __hip_precise_dsqrt_rd(double);
__device__ double __hip_precise_dsqrt_rn(double);
__device__ double __hip_precise_dsqrt_ru(double);
__device__ double __hip_precise_dsqrt_rz(double);
// Single Precision Fast Math
extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
__device__ float __hip_fast_exp10f(float);
__device__ float __hip_fast_expf(float);
__device__ float __hip_fast_frsqrt_rn(float);
extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
__device__ float __hip_fast_fsqrt_rn(float);
__device__ float __hip_fast_fsqrt_ru(float);
__device__ float __hip_fast_fsqrt_rz(float);
__device__ float __hip_fast_log10f(float);
extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
__device__ float __hip_fast_logf(float);
__device__ float __hip_fast_powf(float, float);
__device__ void __hip_fast_sincosf(float,float*,float*);
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
__device__ float __hip_fast_tanf(float);
#ifdef HIP_PRECISE_MATH
// Single Precision Precise Math when enabled
__device__ inline float __cosf(float x) {
return __hip_precise_cosf(x);
}
__device__ inline float __exp10f(float x) {
return __hip_precise_exp10f(x);
}
__device__ inline float __expf(float x) {
return __hip_precise_expf(x);
}
__device__ inline float __frsqrt_rn(float x) {
return __hip_precise_frsqrt_rn(x);
}
__device__ inline float __fsqrt_rd(float x) {
return __hip_precise_fsqrt_rd(x);
}
__device__ inline float __fsqrt_rn(float x) {
return __hip_precise_fsqrt_rn(x);
}
__device__ inline float __fsqrt_ru(float x) {
return __hip_precise_fsqrt_ru(x);
}
__device__ inline float __fsqrt_rz(float x) {
return __hip_precise_fsqrt_rz(x);
}
__device__ inline float __log10f(float x) {
return __hip_precise_log10f(x);
}
__device__ inline float __log2f(float x) {
return __hip_precise_log2f(x);
}
__device__ inline float __logf(float x) {
return __hip_precise_logf(x);
}
__device__ inline float __powf(float base, float exponent) {
return __hip_precise_powf(base, exponent);
}
__device__ inline void __sincosf(float x, float *s, float *c) {
return __hip_precise_sincosf(x, s, c);
}
__device__ inline float __sinf(float x) {
return __hip_precise_sinf(x);
}
__device__ inline float __tanf(float x) {
return __hip_precise_tanf(x);
}
// Double Precision
__device__ double __dsqrt_rd(double x) {
return __hip_precise_dsqrt_rd(x);
}
__device__ double __dsqrt_rn(double x) {
return __hip_precise_dsqrt_rn(x);
}
__device__ double __dsqrt_ru(double x) {
return __hip_precise_dsqrt_ru(x);
}
__device__ double __dsqrt_rz(double x) {
return __hip_precise_dsqrt_rz(x);
}
#else
// Single Precision Fast Math
__device__ inline float __cosf(float x) {
return __hip_fast_cosf(x);
}
__device__ inline float __exp10f(float x) {
return __hip_fast_exp10f(x);
}
__device__ inline float __expf(float x) {
return __hip_fast_expf(x);
}
__device__ inline float __frsqrt_rn(float x) {
return __hip_fast_frsqrt_rn(x);
}
__device__ inline float __fsqrt_rd(float x) {
return __hip_fast_fsqrt_rd(x);
}
__device__ inline float __fsqrt_rn(float x) {
return __hip_fast_fsqrt_rn(x);
}
__device__ inline float __fsqrt_ru(float x) {
return __hip_fast_fsqrt_ru(x);
}
__device__ inline float __fsqrt_rz(float x) {
return __hip_fast_fsqrt_rz(x);
}
__device__ inline float __log10f(float x) {
return __hip_fast_log10f(x);
}
__device__ inline float __log2f(float x) {
return __hip_fast_log2f(x);
}
__device__ inline float __logf(float x) {
return __hip_fast_logf(x);
}
__device__ inline float __powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ inline void __sincosf(float x, float *s, float *c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float __sinf(float x) {
return __hip_fast_sinf(x);
}
__device__ inline float __tanf(float x) {
return __hip_fast_tanf(x);
}
#endif
/**
* CUDA 8 device function features
+144 -19
ファイルの表示
@@ -2043,26 +2043,151 @@ __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr()
}
// Precise Math Functions
__device__ float __hip_precise_cosf(float x) {
return hc::precise_math::cosf(x);
}
//TODO - add a couple fast math operations here, the set here will grow :
//__device__ float __cosf(float x) {return hc::fast_math::cosf(x); };
__device__ float __expf(float x) {return hc::fast_math::expf(x); };
__device__ float __frsqrt_rn(float x) {return hc::fast_math::rsqrt(x); };
__device__ float __fsqrt_rd(float x) {return hc::fast_math::sqrt(x); };
__device__ float __fsqrt_rn(float x) {return hc::fast_math::sqrt(x); };
__device__ float __fsqrt_ru(float x) {return hc::fast_math::sqrt(x); };
__device__ float __fsqrt_rz(float x) {return hc::fast_math::sqrt(x); };
__device__ float __log10f(float x) {return hc::fast_math::log10f(x); };
//__device__ float __log2f(float x) {return hc::fast_math::log2f(x); };
__device__ float __logf(float x) {return hc::fast_math::logf(x); };
__device__ float __powf(float base, float exponent) {return hc::fast_math::powf(base, exponent); };
__device__ void __sincosf(float x, float *s, float *c) { *s = __sinf(x); *c = __cosf(x); };
//__device__ float __sinf(float x) {return hc::fast_math::sinf(x); };
__device__ float __tanf(float x) {return __sinf(x)/__cosf(x); };
__device__ float __dsqrt_rd(double x) {return hc::fast_math::sqrt(x); };
__device__ float __dsqrt_rn(double x) {return hc::fast_math::sqrt(x); };
__device__ float __dsqrt_ru(double x) {return hc::fast_math::sqrt(x); };
__device__ float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); };
__device__ float __hip_precise_exp10f(float x) {
return hc::precise_math::exp10f(x);
}
__device__ float __hip_precise_expf(float x) {
return hc::precise_math::expf(x);
}
__device__ float __hip_precise_frsqrt_rn(float x) {
return hc::precise_math::rsqrt(x);
}
__device__ float __hip_precise_fsqrt_rd(float x) {
return hc::precise_math::sqrt(x);
}
__device__ float __hip_precise_fsqrt_rn(float x) {
return hc::precise_math::sqrt(x);
}
__device__ float __hip_precise_fsqrt_ru(float x) {
return hc::precise_math::sqrt(x);
}
__device__ float __hip_precise_fsqrt_rz(float x) {
return hc::precise_math::sqrt(x);
}
__device__ float __hip_precise_log10f(float x) {
return hc::precise_math::log10(x);
}
__device__ float __hip_precise_log2f(float x) {
return hc::precise_math::log2(x);
}
__device__ float __hip_precise_logf(float x) {
return hc::precise_math::logf(x);
}
__device__ float __hip_precise_powf(float base, float exponent) {
return hc::precise_math::powf(base, exponent);
}
__device__ void __hip_precise_sincosf(float x, float *s, float *c) {
hc::precise_math::sincosf(x, s, c);
}
__device__ float __hip_precise_sinf(float x) {
return hc::precise_math::sinf(x);
}
__device__ float __hip_precise_tanf(float x) {
return hc::precise_math::tanf(x);
}
// Double Precision Math
__device__ double __hip_precise_dsqrt_rd(double x) {
return hc::precise_math::sqrt(x);
}
__device__ double __hip_precise_dsqrt_rn(double x) {
return hc::precise_math::sqrt(x);
}
__device__ double __hip_precise_dsqrt_ru(double x) {
return hc::precise_math::sqrt(x);
}
__device__ double __hip_precise_dsqrt_rz(double x) {
return hc::precise_math::sqrt(x);
}
#define LOG_BASE2_E_DIV_2 0.4426950408894701
#define LOG_BASE2_5 2.321928094887362
#define ONE_DIV_LOG_BASE2_E 0.69314718056
#define ONE_DIV_LOG_BASE2_10 0.30102999566
// Fast Math Intrinsics
__device__ float __hip_fast_exp10f(float x) {
return __hip_fast_exp2f(x*LOG_BASE2_E_DIV_2);
}
__device__ float __hip_fast_expf(float x) {
return __hip_fast_expf(x*LOG_BASE2_5);
}
__device__ float __hip_fast_frsqrt_rn(float x) {
return 1 / __hip_fast_fsqrt_rd(x);;
}
__device__ float __hip_fast_fsqrt_rn(float x) {
return __hip_fast_fsqrt_rd(x);
}
__device__ float __hip_fast_fsqrt_ru(float x) {
return __hip_fast_fsqrt_rd(x);
}
__device__ float __hip_fast_fsqrt_rz(float x) {
return __hip_fast_fsqrt_rd(x);
}
__device__ float __hip_fast_log10f(float x) {
return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x);
}
__device__ float __hip_fast_logf(float x) {
return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x);
}
__device__ float __hip_fast_powf(float base, float exponent) {
return hc::fast_math::powf(base, exponent);
}
__device__ void __hip_fast_sincosf(float x, float *s, float *c) {
*s = __hip_fast_sinf(x);
*c = __hip_fast_cosf(x);
}
__device__ float __hip_fast_tanf(float x) {
return hc::fast_math::tanf(x);
}
// Double Precision Math
__device__ double __hip_fast_dsqrt_rd(double x) {
return hc::fast_math::sqrt(x);
}
__device__ double __hip_fast_dsqrt_rn(double x) {
return hc::fast_math::sqrt(x);
}
__device__ double __hip_fast_dsqrt_ru(double x) {
return hc::fast_math::sqrt(x);
}
__device__ double __hip_fast_dsqrt_rz(double x) {
return hc::fast_math::sqrt(x);
}
__HIP_DEVICE__ char1 make_char1(signed char x)
{
+61
ファイルの表示
@@ -0,0 +1,61 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s
* RUN: %t
* HIT_END
*/
#include "test_common.h"
#define LEN 512
#define SIZE LEN<<2
__global__ void floatMath(hipLaunchParm lp, float *In, float *Out) {
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
Out[tid] = __cosf(In[tid]);
Out[tid] = __exp10f(Out[tid]);
Out[tid] = __expf(Out[tid]);
Out[tid] = __frsqrt_rn(Out[tid]);
Out[tid] = __fsqrt_rd(Out[tid]);
Out[tid] = __fsqrt_rn(Out[tid]);
Out[tid] = __fsqrt_ru(Out[tid]);
Out[tid] = __fsqrt_rz(Out[tid]);
Out[tid] = __log10f(Out[tid]);
Out[tid] = __log2f(Out[tid]);
Out[tid] = __logf(Out[tid]);
Out[tid] = __powf(2.0f, Out[tid]);
__sincosf(Out[tid], &In[tid], &Out[tid]);
Out[tid] = __sinf(Out[tid]);
Out[tid] = __cosf(Out[tid]);
Out[tid] = __tanf(Out[tid]);
}
int main(){
float *Inh, *Outh, *Ind, *Outd;
hipMalloc((void**)&Ind, SIZE);
hipMalloc((void**)&Outd, SIZE);
hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd);
}