diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 0489a72c8b..a2894f3d9b 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -61,6 +61,90 @@ __device__ inline float __expf(float x) { return __hip_fast_expf(x); } +__device__ static inline float __fadd_rd(float x, float y) { + return x + y; +} + +__device__ static inline float __fadd_rn(float x, float y) { + return x + y; +} + +__device__ static inline float __fadd_ru(float x, float y) { + return x + y; +} + +__device__ static inline float __fadd_rz(float x, float y) { + return x + y; +} + +__device__ static inline float __fdiv_rd(float x, float y) { + return x / y; +} + +__device__ static inline float __fdiv_rn(float x, float y) { + return x / y; +} + +__device__ static inline float __fdiv_ru(float x, float y) { + return x / y; +} + +__device__ static inline float __fdiv_rz(float x, float y) { + return x / y; +} + +__device__ static inline float __fdividef(float x, float y) { + return x / y; +} + +__device__ inline float __fmaf_rd(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_rn(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_ru(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_rz(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ static inline float __fmul_rd(float x, float y) { + return x * y; +} + +__device__ static inline float __fmul_rn(float x, float y) { + return x * y; +} + +__device__ static inline float __fmul_ru(float x, float y) { + return x * y; +} + +__device__ static inline float __fmul_rz(float x, float y) { + return x * y; +} + +__device__ inline float __frcp_rd(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline float __frcp_rn(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline float __frcp_ru(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline float __frcp_rz(float x) { + return __hip_fast_frcp(x); +} + __device__ inline float __frsqrt_rn(float x) { return __hip_fast_frsqrt_rn(x); } @@ -81,6 +165,23 @@ __device__ inline float __fsqrt_rz(float x) { return __hip_fast_fsqrt_rz(x); } +__device__ static inline float __fsub_rd(float x, float y) { + return x - y; +} + +__device__ static inline float __fsub_rn(float x, float y) { + return x - y; +} + +__device__ static inline float __fsub_ru(float x, float y) { + return x - y; +} + +__device__ static inline float __fsub_rz(float x, float y) { + return x - y; +} + + __device__ inline float __log10f(float x) { return __hip_fast_log10f(x); } @@ -97,6 +198,12 @@ __device__ inline float __powf(float base, float exponent) { return __hip_fast_powf(base, exponent); } +__device__ static inline float __saturatef(float x) { + x = x > 1.0f ? 1.0f : x; + x = x < 0.0f ? 0.0f : x; + return x; +} + __device__ inline void __sincosf(float x, float *s, float *c) { return __hip_fast_sincosf(x, s, c); } @@ -109,68 +216,57 @@ __device__ inline float __tanf(float x) { return __hip_fast_tanf(x); } -__device__ inline float __fmaf_rd(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); + +/* +Double Precision Intrinsics +*/ + +__device__ static inline double __dadd_rd(double x, double y) { + return x + y; } -__device__ inline float __fmaf_rn(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); +__device__ static inline double __dadd_rn(double x, double y) { + return x + y; } -__device__ inline float __fmaf_ru(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); +__device__ static inline double __dadd_ru(double x, double y) { + return x + y; } -__device__ inline float __fmaf_rz(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); +__device__ static inline double __dadd_rz(double x, double y) { + return x + y; } -__device__ inline float __frcp_rd(float x) { - return __hip_fast_frcp(x); +__device__ static inline double __ddiv_rd(double x, double y) { + return x / y; } -__device__ inline float __frcp_rn(float x) { - return __hip_fast_frcp(x); +__device__ static inline double __ddiv_rn(double x, double y) { + return x / y; } -__device__ inline float __frcp_ru(float x) { - return __hip_fast_frcp(x); +__device__ static inline double __ddiv_ru(double x, double y) { + return x / y; } -__device__ inline float __frcp_rz(float x) { - return __hip_fast_frcp(x); +__device__ static inline double __ddiv_rz(double x, double y) { + return x / y; } -__device__ inline double __dsqrt_rd(double x) { - return __hip_fast_dsqrt(x); +__device__ static inline double __dmul_rd(double x, double y) { + return x * y; } -__device__ inline double __dsqrt_rn(double x) { - return __hip_fast_dsqrt(x); +__device__ static inline double __dmul_rn(double x, double y) { + return x * y; } -__device__ inline double __dsqrt_ru(double x) { - return __hip_fast_dsqrt(x); +__device__ static inline double __dmul_ru(double x, double y) { + return x * y; } -__device__ inline double __dsqrt_rz(double x) { - return __hip_fast_dsqrt(x); -} - -__device__ inline double __fma_rd(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __fma_rn(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __fma_ru(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __fma_rz(double x, double y, double z) { - return __hip_fast_fma(x, y, z); +__device__ static inline double __dmul_rz(double x, double y) { + return x * y; } __device__ inline double __drcp_rd(double x) { @@ -190,6 +286,55 @@ __device__ inline double __drcp_rz(double x) { } +__device__ inline double __dsqrt_rd(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_rn(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_ru(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_rz(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ static inline double __dsub_rd(double x, double y) { + return x - y; +} + +__device__ static inline double __dsub_rn(double x, double y) { + return x - y; +} + +__device__ static inline double __dsub_ru(double x, double y) { + return x - y; +} + +__device__ static inline double __dsub_rz(double x, double y) { + return x - y; +} + +__device__ inline double __fma_rd(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_rn(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_ru(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_rz(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + + extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int); extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int); @@ -204,51 +349,42 @@ __device__ unsigned int __clz(int x); __device__ unsigned int __clzll(long long int x); __device__ unsigned int __ffs(int x); __device__ unsigned int __ffsll(long long int x); -__device__ static inline unsigned int __hadd(int x, int y) -{ +__device__ static inline unsigned int __hadd(int x, int y) { int z = x + y; int sign = z & 0x8000000; int value = z & 0x7FFFFFFF; return ((value) >> 1 || sign); } -__device__ static inline int __mul24(int x, int y) -{ +__device__ static inline int __mul24(int x, int y) { return __hip_hc_ir_mul24_int(x, y); } __device__ long long int __mul64hi(long long int x, long long int y); -__device__ static inline int __mulhi(int x, int y) -{ +__device__ static inline int __mulhi(int x, int y) { return __hip_hc_ir_mulhi_int(x, y); } __device__ unsigned int __popc( unsigned int x); __device__ unsigned int __popcll( unsigned long long int x); -__device__ static inline int __rhadd(int x, int y) -{ +__device__ static inline int __rhadd(int x, int y) { int z = x + y + 1; int sign = z & 0x8000000; int value = z & 0x7FFFFFFF; return ((value) >> 1 || sign); } -__device__ static inline unsigned int __sad(int x, int y, int z) -{ +__device__ static inline unsigned int __sad(int x, int y, int z) { return x > y ? x - y + z : y - x + z; } -__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) -{ +__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { return (x + y) >> 1; } -__device__ static inline int __umul24(unsigned int x, unsigned int y) -{ +__device__ static inline int __umul24(unsigned int x, unsigned int y) { return __hip_hc_ir_umul24_int(x, y); } __device__ unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); -__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) -{ +__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { return __hip_hc_ir_umulhi_int(x, y); } -__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) -{ +__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { return (x + y + 1) >> 1; } __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) @@ -266,7 +402,6 @@ __device__ float __double2float_ru(double x); __device__ float __double2float_rz(double x); __device__ int __double2hiint(double x); -__device__ int __double2loint(double x); __device__ int __double2int_rd(double x); __device__ int __double2int_rn(double x); @@ -278,6 +413,8 @@ __device__ long long int __double2ll_rn(double x); __device__ long long int __double2ll_ru(double x); __device__ long long int __double2ll_rz(double x); +__device__ int __double2loint(double x); + __device__ unsigned int __double2uint_rd(double x); __device__ unsigned int __double2uint_rn(double x); __device__ unsigned int __double2uint_ru(double x); diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index 5a0e21f83c..21ec4510c6 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -34,16 +34,19 @@ __device__ float atanhf(float x); __device__ float cbrtf(float x); __device__ float ceilf(float x); __device__ float copysignf(float x, float y); +__device__ float cosf(float x); __device__ float coshf(float x); -__device__ float cyl_bessel_i0f(float x); -__device__ float cyl_bessel_i1f(float x); +__device__ __host__ float cospif(float x); +//__device__ float cyl_bessel_i0f(float x); +//__device__ float cyl_bessel_i1f(float x); __device__ float erfcf(float x); __device__ float erfcinvf(float y); - __device__ float erfcxf(float x); __device__ float erff(float x); __device__ float erfinvf(float y); +__device__ float exp10f(float x); __device__ float exp2f(float x); +__device__ float expf(float x); __device__ float expm1f(float x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); @@ -53,32 +56,34 @@ __device__ float fmaf(float x, float y, float z); __device__ float fmaxf(float x, float y); __device__ float fminf(float x, float y); __device__ float fmodf(float x, float y); -__device__ float frexpf(float x, float y); +//__device__ float frexpf(float x, int* nptr); __device__ float hypotf(float x, float y); __device__ float ilogbf(float x); -__host__ __device__ int isfinite(float a); +__device__ __host__ int isfinite(float a); __device__ unsigned isinf(float a); __device__ unsigned isnan(float a); __device__ float j0f(float x); __device__ float j1f(float x); __device__ float jnf(int n, float x); __device__ float ldexpf(float x, int exp); -__device__ float lgammaf(float x); +//__device__ float lgammaf(float x); __device__ long long int llrintf(float x); __device__ long long int llroundf(float x); +__device__ float log10f(float x); __device__ float log1pf(float x); __device__ float logbf(float x); __device__ long int lrintf(float x); __device__ long int lroundf(float x); -__device__ float modff(float x, float *iptr); +//__device__ float modff(float x, float *iptr); __device__ float nanf(const char* tagp); __device__ float nearbyintf(float x); -__device__ float nextafterf(float x, float y); +//__device__ float nextafterf(float x, float y); __device__ float norm3df(float a, float b, float c); __device__ float norm4df(float a, float b, float c, float d); __device__ float normcdff(float y); __device__ float normcdfinvf(float y); __device__ float normf(int dim, const float *a); +__device__ float powf(float x, float y); __device__ float rcbrtf(float x); __device__ float remainderf(float x, float y); __device__ float remquof(float x, float y, int *quo); @@ -88,14 +93,17 @@ __device__ float rnorm3df(float a, float b, float c); __device__ float rnorm4df(float a, float b, float c, float d); __device__ float rnormf(int dim, const float* a); __device__ float roundf(float x); -__device__ float rsqrtf(float x); +__device__ __host__ float rsqrtf(float x); __device__ float scalblnf(float x, long int n); __device__ float scalbnf(float x, int n); -__host__ __device__ unsigned signbit(float a); +__device__ __host__ unsigned signbit(float a); +__device__ void sincosf(float x, float *sptr, float *cptr); __device__ void sincospif(float x, float *sptr, float *cptr); +__device__ float sinf(float x); __device__ float sinhf(float x); -__device__ float sinpif(float x); +__device__ __host__ float sinpif(float x); __device__ float sqrtf(float x); +__device__ float tanf(float x); __device__ float tanhf(float x); __device__ float tgammaf(float x); __device__ float truncf(float x); @@ -103,12 +111,8 @@ __device__ float y0f(float x); __device__ float y1f(float x); __device__ float ynf(int n, float x); -__host__ __device__ float cospif(float x); -__host__ __device__ float sinpif(float x); -// /__device__ float sqrtf(float x); -__host__ __device__ float rsqrtf(float x); -__host__ float normcdff(float y); +__host__ float normcdff(float y); __host__ float erfcinvf(float y); __host__ float erfcxf(float x); __host__ float erfinvf(float y); @@ -122,6 +126,8 @@ __host__ float rnormf(int dim, const float* a); __host__ float rnorm4df(float a, float b, float c, float d); __host__ void sincospif(float x, float *sptr, float *cptr); + + __device__ double acos(double x); __device__ double acosh(double x); __device__ double asin(double x); @@ -134,7 +140,7 @@ __device__ double ceil(double x); __device__ double copysign(double x, double y); __device__ double cos(double x); __device__ double cosh(double x); -__host__ __device__ double cospi(double x); +__device__ __host__ double cospi(double x); __device__ double cyl_bessel_i0(double x); __device__ double cyl_bessel_i1(double x); __device__ double erf(double x); @@ -153,10 +159,10 @@ __device__ double fma(double x, double y, double z); __device__ double fmax(double x, double y); __device__ double fmin(double x, double y); __device__ double fmod(double x, double y); -__device__ double frexp(double x, int *nptr); +//__device__ double frexp(double x, int *nptr); __device__ double hypot(double x, double y); __device__ double ilogb(double x); -__host__ __device__ unsigned isfinite(double x); +__device__ __host__ unsigned isfinite(double x); __device__ unsigned isinf(double x); __device__ unsigned isnan(double x); __device__ double j0(double x); @@ -173,44 +179,34 @@ __device__ double log2(double x); __device__ double logb(double x); __device__ long int lrint(double x); __device__ long int lround(double x); -__device__ double modf(double x, double *iptr); +//__device__ double modf(double x, double *iptr); __device__ double nan(const char* tagp); __device__ double nearbyint(double x); __device__ double nextafter(double x, double y); __device__ double norm(int dim, const double* t); __device__ double norm3d(double a, double b, double c); -__host__ double norm3d(double a, double b, double c); __device__ double norm4d(double a, double b, double c, double d); -__host__ double norm4d(double a, double b, double c, double d); __device__ double normcdf(double y); -__host__ double normcdf(double y); __device__ double normcdfinv(double y); -__host__ double normcdfinv(double y); __device__ double pow(double x, double y); __device__ double rcbrt(double x); -__host__ double rcbrt(double x); __device__ double remainder(double x, double y); -__device__ double remquo(double x, double y, int *quo); +//__device__ double remquo(double x, double y, int *quo); __device__ double rhypot(double x, double y); -__host__ double rhypot(double x, double y); __device__ double rint(double x); __device__ double rnorm(int dim, const double* t); -__host__ double rnorm(int dim, const double* t); __device__ double rnorm3d(double a, double b, double c); -__host__ double rnorm3d(double a, double b, double c); __device__ double rnorm4d(double a, double b, double c, double d); -__host__ double rnorm4d(double a, double b, double c, double d); __device__ double round(double x); -__host__ __device__ double rsqrt(double x); +__device__ __host__ double rsqrt(double x); __device__ double scalbln(double x, long int n); __device__ double scalbn(double x, int n); -__host__ __device__ unsigned signbit(double a); +__device__ __host__ unsigned signbit(double a); __device__ double sin(double a); __device__ void sincos(double x, double *sptr, double *cptr); __device__ void sincospi(double x, double *sptr, double *cptr); -__host__ void sincospi(double x, double *sptr, double *cptr); __device__ double sinh(double x); -__host__ __device__ double sinpi(double x); +__device__ __host__ double sinpi(double x); __device__ double sqrt(double x); __device__ double tan(double x); __device__ double tanh(double x); @@ -224,7 +220,17 @@ __host__ double erfcinv(double y); __host__ double erfcx(double x); __host__ double erfinv(double y); __host__ double fdivide(double x, double y); -__host__ double norm(double x, const double *t); +__host__ double norm(int dim, const double *t); +__host__ double norm3d(double a, double b, double c); +__host__ double norm4d(double a, double b, double c, double d); +__host__ double normcdf(double y); +__host__ double normcdfinv(double y); +__host__ double rcbrt(double x); +__host__ double rhypot(double x, double y); +__host__ double rnorm(int dim, const double* t); +__host__ double rnorm3d(double a, double b, double c); +__host__ double rnorm4d(double a, double b, double c, double d); +__host__ void sincospi(double x, double *sptr, double *cptr); #ifdef HIP_FAST_MATH // Single Precision Precise Math when enabled diff --git a/src/math_functions.cpp b/src/math_functions.cpp index 34a80448db..130d4152ae 100644 --- a/src/math_functions.cpp +++ b/src/math_functions.cpp @@ -186,9 +186,10 @@ __device__ float ldexpf(float x, int exp) { return hc::precise_math::ldexpf(x, exp); } -__device__ float lgammaf(float x, int *sign) +__device__ float lgammaf(float x) { - return hc::precise_math::lgammaf(x, sign); + int sign; + return hc::precise_math::lgammaf(x, &sign); } __device__ long long int llrintf(float x) { @@ -566,9 +567,10 @@ __device__ double ldexp(double x, int exp) { return hc::precise_math::ldexp(x, exp); } -__device__ double lgamma(double x, int *sign) +__device__ double lgamma(double x) { - return hc::precise_math::lgamma(x, sign); + int sign; + return hc::precise_math::lgamma(x, &sign); } __device__ long long int llrint(double x) { @@ -626,6 +628,14 @@ __device__ double nextafter(double x, double y) { return hc::precise_math::nextafter(x, y); } +__device__ double norm(int x, const double *d) +{ + double val = 0; + for(int i=0;i