added fast math intrinsics to HIP
1. Added fast math intrinsics for single precision data types
2. Added test to check the intrinsics
3. Added HIP_PRECISE_MATH macro to enable precise math on fast math
Change-Id: Iadacbb6182c31252c5e3252854372d1b80dfd27b
[ROCm/hip commit: f843928ddd]
このコミットが含まれているのは:
@@ -479,26 +479,193 @@ __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr();
|
||||
|
||||
//TODO - add a couple fast math operations here, the set here will grow :
|
||||
|
||||
__device__ float __expf(float x);
|
||||
__device__ float __frsqrt_rn(float x);
|
||||
__device__ float __fsqrt_rd(float x);
|
||||
__device__ float __fsqrt_rn(float x);
|
||||
__device__ float __fsqrt_ru(float x);
|
||||
__device__ float __fsqrt_rz(float x);
|
||||
__device__ float __log10f(float x);
|
||||
//__device__ float __log2f(float x);
|
||||
__device__ float __logf(float x);
|
||||
__device__ float __powf(float base, float exponent);
|
||||
__device__ void __sincosf(float x, float *s, float *c) ;
|
||||
extern __attribute__((const)) float __sinf(float) __asm("llvm.sin.f32");
|
||||
extern __attribute__((const)) float __cosf(float) __asm("llvm.cos.f32");
|
||||
extern __attribute__((const)) float __log2f(float) __asm("llvm.log2.f32");
|
||||
__device__ float __tanf(float x);
|
||||
__device__ float __dsqrt_rd(double x);
|
||||
__device__ float __dsqrt_rn(double x);
|
||||
__device__ float __dsqrt_ru(double x);
|
||||
__device__ float __dsqrt_rz(double x);
|
||||
// Single Precision Precise Math
|
||||
__device__ float __hip_precise_cosf(float);
|
||||
__device__ float __hip_precise_exp10f(float);
|
||||
__device__ float __hip_precise_expf(float);
|
||||
__device__ float __hip_precise_frsqrt_rn(float);
|
||||
__device__ float __hip_precise_fsqrt_rd(float);
|
||||
__device__ float __hip_precise_fsqrt_rn(float);
|
||||
__device__ float __hip_precise_fsqrt_ru(float);
|
||||
__device__ float __hip_precise_fsqrt_rz(float);
|
||||
__device__ float __hip_precise_log10f(float);
|
||||
__device__ float __hip_precise_log2f(float);
|
||||
__device__ float __hip_precise_logf(float);
|
||||
__device__ float __hip_precise_powf(float, float);
|
||||
__device__ void __hip_precise_sincosf(float,float*,float*);
|
||||
__device__ float __hip_precise_sinf(float);
|
||||
__device__ float __hip_precise_tanf(float);
|
||||
|
||||
// Double Precision Precise Math
|
||||
__device__ double __hip_precise_dsqrt_rd(double);
|
||||
__device__ double __hip_precise_dsqrt_rn(double);
|
||||
__device__ double __hip_precise_dsqrt_ru(double);
|
||||
__device__ double __hip_precise_dsqrt_rz(double);
|
||||
|
||||
// Single Precision Fast Math
|
||||
extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
|
||||
extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
|
||||
__device__ float __hip_fast_exp10f(float);
|
||||
__device__ float __hip_fast_expf(float);
|
||||
__device__ float __hip_fast_frsqrt_rn(float);
|
||||
extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
|
||||
__device__ float __hip_fast_fsqrt_rn(float);
|
||||
__device__ float __hip_fast_fsqrt_ru(float);
|
||||
__device__ float __hip_fast_fsqrt_rz(float);
|
||||
__device__ float __hip_fast_log10f(float);
|
||||
extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
|
||||
__device__ float __hip_fast_logf(float);
|
||||
__device__ float __hip_fast_powf(float, float);
|
||||
__device__ void __hip_fast_sincosf(float,float*,float*);
|
||||
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
|
||||
__device__ float __hip_fast_tanf(float);
|
||||
|
||||
#ifdef HIP_PRECISE_MATH
|
||||
// Single Precision Precise Math when enabled
|
||||
|
||||
__device__ inline float __cosf(float x) {
|
||||
return __hip_precise_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __exp10f(float x) {
|
||||
return __hip_precise_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __expf(float x) {
|
||||
return __hip_precise_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frsqrt_rn(float x) {
|
||||
return __hip_precise_frsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rd(float x) {
|
||||
return __hip_precise_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rn(float x) {
|
||||
return __hip_precise_fsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_ru(float x) {
|
||||
return __hip_precise_fsqrt_ru(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rz(float x) {
|
||||
return __hip_precise_fsqrt_rz(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log10f(float x) {
|
||||
return __hip_precise_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log2f(float x) {
|
||||
return __hip_precise_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __logf(float x) {
|
||||
return __hip_precise_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __powf(float base, float exponent) {
|
||||
return __hip_precise_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void __sincosf(float x, float *s, float *c) {
|
||||
return __hip_precise_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float __sinf(float x) {
|
||||
return __hip_precise_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __tanf(float x) {
|
||||
return __hip_precise_tanf(x);
|
||||
}
|
||||
|
||||
// Double Precision
|
||||
|
||||
__device__ double __dsqrt_rd(double x) {
|
||||
return __hip_precise_dsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ double __dsqrt_rn(double x) {
|
||||
return __hip_precise_dsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ double __dsqrt_ru(double x) {
|
||||
return __hip_precise_dsqrt_ru(x);
|
||||
}
|
||||
|
||||
__device__ double __dsqrt_rz(double x) {
|
||||
return __hip_precise_dsqrt_rz(x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Single Precision Fast Math
|
||||
__device__ inline float __cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __exp10f(float x) {
|
||||
return __hip_fast_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __expf(float x) {
|
||||
return __hip_fast_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frsqrt_rn(float x) {
|
||||
return __hip_fast_frsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rd(float x) {
|
||||
return __hip_fast_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rn(float x) {
|
||||
return __hip_fast_fsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_ru(float x) {
|
||||
return __hip_fast_fsqrt_ru(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rz(float x) {
|
||||
return __hip_fast_fsqrt_rz(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log10f(float x) {
|
||||
return __hip_fast_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log2f(float x) {
|
||||
return __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __logf(float x) {
|
||||
return __hip_fast_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __powf(float base, float exponent) {
|
||||
return __hip_fast_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void __sincosf(float x, float *s, float *c) {
|
||||
return __hip_fast_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float __sinf(float x) {
|
||||
return __hip_fast_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
/**
|
||||
* CUDA 8 device function features
|
||||
|
||||
|
||||
+144
-19
@@ -2043,26 +2043,151 @@ __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr()
|
||||
}
|
||||
|
||||
|
||||
// Precise Math Functions
|
||||
__device__ float __hip_precise_cosf(float x) {
|
||||
return hc::precise_math::cosf(x);
|
||||
}
|
||||
|
||||
//TODO - add a couple fast math operations here, the set here will grow :
|
||||
//__device__ float __cosf(float x) {return hc::fast_math::cosf(x); };
|
||||
__device__ float __expf(float x) {return hc::fast_math::expf(x); };
|
||||
__device__ float __frsqrt_rn(float x) {return hc::fast_math::rsqrt(x); };
|
||||
__device__ float __fsqrt_rd(float x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __fsqrt_rn(float x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __fsqrt_ru(float x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __fsqrt_rz(float x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __log10f(float x) {return hc::fast_math::log10f(x); };
|
||||
//__device__ float __log2f(float x) {return hc::fast_math::log2f(x); };
|
||||
__device__ float __logf(float x) {return hc::fast_math::logf(x); };
|
||||
__device__ float __powf(float base, float exponent) {return hc::fast_math::powf(base, exponent); };
|
||||
__device__ void __sincosf(float x, float *s, float *c) { *s = __sinf(x); *c = __cosf(x); };
|
||||
//__device__ float __sinf(float x) {return hc::fast_math::sinf(x); };
|
||||
__device__ float __tanf(float x) {return __sinf(x)/__cosf(x); };
|
||||
__device__ float __dsqrt_rd(double x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __dsqrt_rn(double x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __dsqrt_ru(double x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); };
|
||||
__device__ float __hip_precise_exp10f(float x) {
|
||||
return hc::precise_math::exp10f(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_expf(float x) {
|
||||
return hc::precise_math::expf(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_frsqrt_rn(float x) {
|
||||
return hc::precise_math::rsqrt(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_fsqrt_rd(float x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_fsqrt_rn(float x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_fsqrt_ru(float x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_fsqrt_rz(float x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_log10f(float x) {
|
||||
return hc::precise_math::log10(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_log2f(float x) {
|
||||
return hc::precise_math::log2(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_logf(float x) {
|
||||
return hc::precise_math::logf(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_powf(float base, float exponent) {
|
||||
return hc::precise_math::powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ void __hip_precise_sincosf(float x, float *s, float *c) {
|
||||
hc::precise_math::sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_sinf(float x) {
|
||||
return hc::precise_math::sinf(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_tanf(float x) {
|
||||
return hc::precise_math::tanf(x);
|
||||
}
|
||||
|
||||
// Double Precision Math
|
||||
__device__ double __hip_precise_dsqrt_rd(double x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ double __hip_precise_dsqrt_rn(double x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ double __hip_precise_dsqrt_ru(double x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ double __hip_precise_dsqrt_rz(double x) {
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
|
||||
#define LOG_BASE2_E_DIV_2 0.4426950408894701
|
||||
#define LOG_BASE2_5 2.321928094887362
|
||||
#define ONE_DIV_LOG_BASE2_E 0.69314718056
|
||||
#define ONE_DIV_LOG_BASE2_10 0.30102999566
|
||||
|
||||
// Fast Math Intrinsics
|
||||
__device__ float __hip_fast_exp10f(float x) {
|
||||
return __hip_fast_exp2f(x*LOG_BASE2_E_DIV_2);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_expf(float x) {
|
||||
return __hip_fast_expf(x*LOG_BASE2_5);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_frsqrt_rn(float x) {
|
||||
return 1 / __hip_fast_fsqrt_rd(x);;
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_fsqrt_rn(float x) {
|
||||
return __hip_fast_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_fsqrt_ru(float x) {
|
||||
return __hip_fast_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_fsqrt_rz(float x) {
|
||||
return __hip_fast_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_log10f(float x) {
|
||||
return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_logf(float x) {
|
||||
return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_powf(float base, float exponent) {
|
||||
return hc::fast_math::powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ void __hip_fast_sincosf(float x, float *s, float *c) {
|
||||
*s = __hip_fast_sinf(x);
|
||||
*c = __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_tanf(float x) {
|
||||
return hc::fast_math::tanf(x);
|
||||
}
|
||||
|
||||
// Double Precision Math
|
||||
__device__ double __hip_fast_dsqrt_rd(double x) {
|
||||
return hc::fast_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ double __hip_fast_dsqrt_rn(double x) {
|
||||
return hc::fast_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ double __hip_fast_dsqrt_ru(double x) {
|
||||
return hc::fast_math::sqrt(x);
|
||||
}
|
||||
|
||||
__device__ double __hip_fast_dsqrt_rz(double x) {
|
||||
return hc::fast_math::sqrt(x);
|
||||
}
|
||||
|
||||
__HIP_DEVICE__ char1 make_char1(signed char x)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE LEN<<2
|
||||
|
||||
|
||||
|
||||
__global__ void floatMath(hipLaunchParm lp, float *In, float *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Out[tid] = __cosf(In[tid]);
|
||||
Out[tid] = __exp10f(Out[tid]);
|
||||
Out[tid] = __expf(Out[tid]);
|
||||
Out[tid] = __frsqrt_rn(Out[tid]);
|
||||
Out[tid] = __fsqrt_rd(Out[tid]);
|
||||
Out[tid] = __fsqrt_rn(Out[tid]);
|
||||
Out[tid] = __fsqrt_ru(Out[tid]);
|
||||
Out[tid] = __fsqrt_rz(Out[tid]);
|
||||
Out[tid] = __log10f(Out[tid]);
|
||||
Out[tid] = __log2f(Out[tid]);
|
||||
Out[tid] = __logf(Out[tid]);
|
||||
Out[tid] = __powf(2.0f, Out[tid]);
|
||||
__sincosf(Out[tid], &In[tid], &Out[tid]);
|
||||
Out[tid] = __sinf(Out[tid]);
|
||||
Out[tid] = __cosf(Out[tid]);
|
||||
Out[tid] = __tanf(Out[tid]);
|
||||
}
|
||||
|
||||
int main(){
|
||||
float *Inh, *Outh, *Ind, *Outd;
|
||||
hipMalloc((void**)&Ind, SIZE);
|
||||
hipMalloc((void**)&Outd, SIZE);
|
||||
hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd);
|
||||
}
|
||||
新しいイシューから参照
ユーザーをブロックする