diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index 079d681c39..4b781b44ae 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -479,26 +479,193 @@ __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr(); //TODO - add a couple fast math operations here, the set here will grow : -__device__ float __expf(float x); -__device__ float __frsqrt_rn(float x); -__device__ float __fsqrt_rd(float x); -__device__ float __fsqrt_rn(float x); -__device__ float __fsqrt_ru(float x); -__device__ float __fsqrt_rz(float x); -__device__ float __log10f(float x); -//__device__ float __log2f(float x); -__device__ float __logf(float x); -__device__ float __powf(float base, float exponent); -__device__ void __sincosf(float x, float *s, float *c) ; -extern __attribute__((const)) float __sinf(float) __asm("llvm.sin.f32"); -extern __attribute__((const)) float __cosf(float) __asm("llvm.cos.f32"); -extern __attribute__((const)) float __log2f(float) __asm("llvm.log2.f32"); -__device__ float __tanf(float x); -__device__ float __dsqrt_rd(double x); -__device__ float __dsqrt_rn(double x); -__device__ float __dsqrt_ru(double x); -__device__ float __dsqrt_rz(double x); +// Single Precision Precise Math +__device__ float __hip_precise_cosf(float); +__device__ float __hip_precise_exp10f(float); +__device__ float __hip_precise_expf(float); +__device__ float __hip_precise_frsqrt_rn(float); +__device__ float __hip_precise_fsqrt_rd(float); +__device__ float __hip_precise_fsqrt_rn(float); +__device__ float __hip_precise_fsqrt_ru(float); +__device__ float __hip_precise_fsqrt_rz(float); +__device__ float __hip_precise_log10f(float); +__device__ float __hip_precise_log2f(float); +__device__ float __hip_precise_logf(float); +__device__ float __hip_precise_powf(float, float); +__device__ void __hip_precise_sincosf(float,float*,float*); +__device__ float __hip_precise_sinf(float); +__device__ float __hip_precise_tanf(float); +// Double Precision Precise Math +__device__ double __hip_precise_dsqrt_rd(double); +__device__ double __hip_precise_dsqrt_rn(double); +__device__ double __hip_precise_dsqrt_ru(double); +__device__ double __hip_precise_dsqrt_rz(double); + +// Single Precision Fast Math +extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32"); +extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32"); +__device__ float __hip_fast_exp10f(float); +__device__ float __hip_fast_expf(float); +__device__ float __hip_fast_frsqrt_rn(float); +extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32"); +__device__ float __hip_fast_fsqrt_rn(float); +__device__ float __hip_fast_fsqrt_ru(float); +__device__ float __hip_fast_fsqrt_rz(float); +__device__ float __hip_fast_log10f(float); +extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32"); +__device__ float __hip_fast_logf(float); +__device__ float __hip_fast_powf(float, float); +__device__ void __hip_fast_sincosf(float,float*,float*); +extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32"); +__device__ float __hip_fast_tanf(float); + +#ifdef HIP_PRECISE_MATH +// Single Precision Precise Math when enabled + +__device__ inline float __cosf(float x) { + return __hip_precise_cosf(x); +} + +__device__ inline float __exp10f(float x) { + return __hip_precise_exp10f(x); +} + +__device__ inline float __expf(float x) { + return __hip_precise_expf(x); +} + +__device__ inline float __frsqrt_rn(float x) { + return __hip_precise_frsqrt_rn(x); +} + +__device__ inline float __fsqrt_rd(float x) { + return __hip_precise_fsqrt_rd(x); +} + +__device__ inline float __fsqrt_rn(float x) { + return __hip_precise_fsqrt_rn(x); +} + +__device__ inline float __fsqrt_ru(float x) { + return __hip_precise_fsqrt_ru(x); +} + +__device__ inline float __fsqrt_rz(float x) { + return __hip_precise_fsqrt_rz(x); +} + +__device__ inline float __log10f(float x) { + return __hip_precise_log10f(x); +} + +__device__ inline float __log2f(float x) { + return __hip_precise_log2f(x); +} + +__device__ inline float __logf(float x) { + return __hip_precise_logf(x); +} + +__device__ inline float __powf(float base, float exponent) { + return __hip_precise_powf(base, exponent); +} + +__device__ inline void __sincosf(float x, float *s, float *c) { + return __hip_precise_sincosf(x, s, c); +} + +__device__ inline float __sinf(float x) { + return __hip_precise_sinf(x); +} + +__device__ inline float __tanf(float x) { + return __hip_precise_tanf(x); +} + +// Double Precision + +__device__ double __dsqrt_rd(double x) { + return __hip_precise_dsqrt_rd(x); +} + +__device__ double __dsqrt_rn(double x) { + return __hip_precise_dsqrt_rn(x); +} + +__device__ double __dsqrt_ru(double x) { + return __hip_precise_dsqrt_ru(x); +} + +__device__ double __dsqrt_rz(double x) { + return __hip_precise_dsqrt_rz(x); +} + +#else + +// Single Precision Fast Math +__device__ inline float __cosf(float x) { + return __hip_fast_cosf(x); +} + +__device__ inline float __exp10f(float x) { + return __hip_fast_exp10f(x); +} + +__device__ inline float __expf(float x) { + return __hip_fast_expf(x); +} + +__device__ inline float __frsqrt_rn(float x) { + return __hip_fast_frsqrt_rn(x); +} + +__device__ inline float __fsqrt_rd(float x) { + return __hip_fast_fsqrt_rd(x); +} + +__device__ inline float __fsqrt_rn(float x) { + return __hip_fast_fsqrt_rn(x); +} + +__device__ inline float __fsqrt_ru(float x) { + return __hip_fast_fsqrt_ru(x); +} + +__device__ inline float __fsqrt_rz(float x) { + return __hip_fast_fsqrt_rz(x); +} + +__device__ inline float __log10f(float x) { + return __hip_fast_log10f(x); +} + +__device__ inline float __log2f(float x) { + return __hip_fast_log2f(x); +} + +__device__ inline float __logf(float x) { + return __hip_fast_logf(x); +} + +__device__ inline float __powf(float base, float exponent) { + return __hip_fast_powf(base, exponent); +} + +__device__ inline void __sincosf(float x, float *s, float *c) { + return __hip_fast_sincosf(x, s, c); +} + +__device__ inline float __sinf(float x) { + return __hip_fast_sinf(x); +} + +__device__ inline float __tanf(float x) { + return __hip_fast_tanf(x); +} + + +#endif /** * CUDA 8 device function features diff --git a/projects/hip/src/device_util.cpp b/projects/hip/src/device_util.cpp index fc966f3395..203e7a7826 100644 --- a/projects/hip/src/device_util.cpp +++ b/projects/hip/src/device_util.cpp @@ -2043,26 +2043,151 @@ __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr() } +// Precise Math Functions +__device__ float __hip_precise_cosf(float x) { + return hc::precise_math::cosf(x); +} -//TODO - add a couple fast math operations here, the set here will grow : -//__device__ float __cosf(float x) {return hc::fast_math::cosf(x); }; -__device__ float __expf(float x) {return hc::fast_math::expf(x); }; -__device__ float __frsqrt_rn(float x) {return hc::fast_math::rsqrt(x); }; -__device__ float __fsqrt_rd(float x) {return hc::fast_math::sqrt(x); }; -__device__ float __fsqrt_rn(float x) {return hc::fast_math::sqrt(x); }; -__device__ float __fsqrt_ru(float x) {return hc::fast_math::sqrt(x); }; -__device__ float __fsqrt_rz(float x) {return hc::fast_math::sqrt(x); }; -__device__ float __log10f(float x) {return hc::fast_math::log10f(x); }; -//__device__ float __log2f(float x) {return hc::fast_math::log2f(x); }; -__device__ float __logf(float x) {return hc::fast_math::logf(x); }; -__device__ float __powf(float base, float exponent) {return hc::fast_math::powf(base, exponent); }; -__device__ void __sincosf(float x, float *s, float *c) { *s = __sinf(x); *c = __cosf(x); }; -//__device__ float __sinf(float x) {return hc::fast_math::sinf(x); }; -__device__ float __tanf(float x) {return __sinf(x)/__cosf(x); }; -__device__ float __dsqrt_rd(double x) {return hc::fast_math::sqrt(x); }; -__device__ float __dsqrt_rn(double x) {return hc::fast_math::sqrt(x); }; -__device__ float __dsqrt_ru(double x) {return hc::fast_math::sqrt(x); }; -__device__ float __dsqrt_rz(double x) {return hc::fast_math::sqrt(x); }; +__device__ float __hip_precise_exp10f(float x) { + return hc::precise_math::exp10f(x); +} + +__device__ float __hip_precise_expf(float x) { + return hc::precise_math::expf(x); +} + +__device__ float __hip_precise_frsqrt_rn(float x) { + return hc::precise_math::rsqrt(x); +} + +__device__ float __hip_precise_fsqrt_rd(float x) { + return hc::precise_math::sqrt(x); +} + +__device__ float __hip_precise_fsqrt_rn(float x) { + return hc::precise_math::sqrt(x); +} + +__device__ float __hip_precise_fsqrt_ru(float x) { + return hc::precise_math::sqrt(x); +} + +__device__ float __hip_precise_fsqrt_rz(float x) { + return hc::precise_math::sqrt(x); +} + +__device__ float __hip_precise_log10f(float x) { + return hc::precise_math::log10(x); +} + +__device__ float __hip_precise_log2f(float x) { + return hc::precise_math::log2(x); +} + +__device__ float __hip_precise_logf(float x) { + return hc::precise_math::logf(x); +} + +__device__ float __hip_precise_powf(float base, float exponent) { + return hc::precise_math::powf(base, exponent); +} + +__device__ void __hip_precise_sincosf(float x, float *s, float *c) { + hc::precise_math::sincosf(x, s, c); +} + +__device__ float __hip_precise_sinf(float x) { + return hc::precise_math::sinf(x); +} + +__device__ float __hip_precise_tanf(float x) { + return hc::precise_math::tanf(x); +} + +// Double Precision Math +__device__ double __hip_precise_dsqrt_rd(double x) { + return hc::precise_math::sqrt(x); +} + +__device__ double __hip_precise_dsqrt_rn(double x) { + return hc::precise_math::sqrt(x); +} + +__device__ double __hip_precise_dsqrt_ru(double x) { + return hc::precise_math::sqrt(x); +} + +__device__ double __hip_precise_dsqrt_rz(double x) { + return hc::precise_math::sqrt(x); +} + +#define LOG_BASE2_E_DIV_2 0.4426950408894701 +#define LOG_BASE2_5 2.321928094887362 +#define ONE_DIV_LOG_BASE2_E 0.69314718056 +#define ONE_DIV_LOG_BASE2_10 0.30102999566 + +// Fast Math Intrinsics +__device__ float __hip_fast_exp10f(float x) { + return __hip_fast_exp2f(x*LOG_BASE2_E_DIV_2); +} + +__device__ float __hip_fast_expf(float x) { + return __hip_fast_expf(x*LOG_BASE2_5); +} + +__device__ float __hip_fast_frsqrt_rn(float x) { + return 1 / __hip_fast_fsqrt_rd(x);; +} + +__device__ float __hip_fast_fsqrt_rn(float x) { + return __hip_fast_fsqrt_rd(x); +} + +__device__ float __hip_fast_fsqrt_ru(float x) { + return __hip_fast_fsqrt_rd(x); +} + +__device__ float __hip_fast_fsqrt_rz(float x) { + return __hip_fast_fsqrt_rd(x); +} + +__device__ float __hip_fast_log10f(float x) { + return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x); +} + +__device__ float __hip_fast_logf(float x) { + return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x); +} + +__device__ float __hip_fast_powf(float base, float exponent) { + return hc::fast_math::powf(base, exponent); +} + +__device__ void __hip_fast_sincosf(float x, float *s, float *c) { + *s = __hip_fast_sinf(x); + *c = __hip_fast_cosf(x); +} + +__device__ float __hip_fast_tanf(float x) { + return hc::fast_math::tanf(x); +} + +// Double Precision Math +__device__ double __hip_fast_dsqrt_rd(double x) { + return hc::fast_math::sqrt(x); +} + +__device__ double __hip_fast_dsqrt_rn(double x) { + return hc::fast_math::sqrt(x); +} + +__device__ double __hip_fast_dsqrt_ru(double x) { + return hc::fast_math::sqrt(x); +} + +__device__ double __hip_fast_dsqrt_rz(double x) { + return hc::fast_math::sqrt(x); +} __HIP_DEVICE__ char1 make_char1(signed char x) { diff --git a/projects/hip/tests/src/deviceLib/hipFloatMath.cpp b/projects/hip/tests/src/deviceLib/hipFloatMath.cpp new file mode 100644 index 0000000000..eb70eb6b0b --- /dev/null +++ b/projects/hip/tests/src/deviceLib/hipFloatMath.cpp @@ -0,0 +1,61 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +#define LEN 512 +#define SIZE LEN<<2 + + + +__global__ void floatMath(hipLaunchParm lp, float *In, float *Out) { + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + Out[tid] = __cosf(In[tid]); + Out[tid] = __exp10f(Out[tid]); + Out[tid] = __expf(Out[tid]); + Out[tid] = __frsqrt_rn(Out[tid]); + Out[tid] = __fsqrt_rd(Out[tid]); + Out[tid] = __fsqrt_rn(Out[tid]); + Out[tid] = __fsqrt_ru(Out[tid]); + Out[tid] = __fsqrt_rz(Out[tid]); + Out[tid] = __log10f(Out[tid]); + Out[tid] = __log2f(Out[tid]); + Out[tid] = __logf(Out[tid]); + Out[tid] = __powf(2.0f, Out[tid]); + __sincosf(Out[tid], &In[tid], &Out[tid]); + Out[tid] = __sinf(Out[tid]); + Out[tid] = __cosf(Out[tid]); + Out[tid] = __tanf(Out[tid]); +} + +int main(){ + float *Inh, *Outh, *Ind, *Outd; + hipMalloc((void**)&Ind, SIZE); + hipMalloc((void**)&Outd, SIZE); + hipLaunchKernel(floatMath, dim3(LEN,1,1), dim3(1,1,1), 0, 0, Ind, Outd); +}