added device math functions

[ROCm/hip commit: 26985b746b]
2016-03-31 12:29:19 -05:00
@@ -97,9 +97,6 @@ extern int HIP_TRACE_API;
 #endif


-
-
-
 //TODO-HCC  this is currently ignored by HCC target of HIP
 #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)

@@ -110,6 +107,186 @@ extern int HIP_TRACE_API;
 #define __HCC_C__
 #endif

+__device__ float acosf(float x);
+__device__ float acoshf(float x);
+__device__ float asinf(float x);
+__device__ float asinhf(float x);
+__device__ float atan2f(float y, float x);
+__device__ float atanf(float x);
+__device__ float atanhf(float x);
+__device__ float cbrtf(float x);
+__device__ float ceilf(float x);
+__device__ float copysignf(float x, float y);
+__device__ float cosf(float x);
+__device__ float coshf(float x);
+__device__ float cyl_bessel_i0f(float x);
+__device__ float cyl_bessel_i1f(float x);
+__device__ float erfcf(float x);
+__device__ float erfcinvf(float y);
+__device__ float erfcxf(float x);
+__device__ float erff(float x);
+__device__ float erfinvf(float y);
+__device__ float exp10f(float x);
+__device__ float exp2f(float x);
+__device__ float expf(float x);
+__device__ float expm1f(float x);
+__device__ float fabsf(float x);
+__device__ float fdimf(float x, float y);
+__device__ float fdividef(float x, float y);
+__device__ float floorf(float x);
+__device__ float fmaf(float x, float y, float z);
+__device__ float fmaxf(float x, float y);
+__device__ float fminf(float x, float y);
+__device__ float fmodf(float x, float y);
+__device__ float frexpf(float x, float y);
+__device__ float hypotf(float x, float y);
+__device__ float ilogbf(float x);
+__device__ unsigned isfinite(float a);
+__device__ unsigned isinf(float a);
+__device__ unsigned isnan(float a);
+__device__ float j0f(float x);
+__device__ float j1f(float x);
+__device__ float jnf(int n, float x);
+__device__ float ldexpf(float x, int exp);
+__device__ float lgammaf(float x);
+__device__ long long int llrintf(float x);
+__device__ long long int llroundf(float x);
+__device__ float log10f(float x);
+__device__ float log1pf(float x);
+__device__ float log2f(float x);
+__device__ float logbf(float x);
+__device__ float logf(float x);
+__device__ long int lrintf(float x);
+__device__ long int lroundf(float x);
+__device__ float modff(float x, float *iptr);
+__device__ float nanf(const char* tagp);
+__device__ float nearbyintf(float x);
+__device__ float nextafterf(float x, float y);
+__device__ float norm3df(float a, float b, float c);
+__device__ float norm4df(float a, float b, float c, float d);
+__device__ float normcdff(float y);
+__device__ float normcdfinvf(float y);
+__device__ float normf(int dim, const float *a);
+__device__ float powf(float x, float y);
+__device__ float rcbtrf(float x);
+__device__ float remainderf(float x, float y);
+__device__ float remquof(float x, float y, int *quo);
+__device__ float rhypotf(float x, float y);
+__device__ float rintf(float x);
+__device__ float rnorm3df(float a, float b, float c);
+__device__ float rnorm4df(float a, float b, float c, float d);
+__device__ float rnormf(int dim, const float* a);
+__device__ float roundf(float x);
+__device__ float rsqrtf(float x);
+__device__ float scalblnf(float x, long int n);
+__device__ float scalbnf(float x, int n);
+__device__ unsigned signbit(float a);
+__device__ void sincosf(float x, float *sptr, float *cptr);
+__device__ void sincospif(float x, float *sptr, float *cptr);
+__device__ float sinf(float x);
+__device__ float sinhf(float x);
+__device__ float sinpif(float x);
+__device__ float sqrtf(float x);
+__device__ float tanf(float x);
+__device__ float tanhf(float x);
+__device__ float tgammaf(float x);
+__device__ float truncf(float x);
+__device__ float y0f(float x);
+__device__ float y1f(float x);
+__device__ float ynf(int n, float x);
+
+__host__ __device__ float cospif(float x);
+__host__ __device__ float sinpif(float x);
+__device__ float sqrtf(float x);
+__host__ __device__ float rsqrtf(float x);
+
+__device__ double acos(double x);
+__device__ double acosh(double x);
+__device__ double asin(double x);
+__device__ double asinh(double x);
+__device__ double atan(double x);
+__device__ double atan2(double y, double x);
+__device__ double atanh(double x);
+__device__ double cbrt(double x);
+__device__ double ceil(double x);
+__device__ double copysign(double x, double y);
+__device__ double cos(double x);
+__device__ double cosh(double x);
+__host__ __device__ double cospi(double x);
+__device__ double cyl_bessel_i0(double x);
+__device__ double cyl_bessel_i1(double x);
+__device__ double erf(double x);
+__device__ double erfc(double x);
+__device__ double erfcinv(double y);
+__device__ double erfcx(double x);
+__device__ double exp(double x);
+__device__ double exp10(double x);
+__device__ double exp2(double x);
+__device__ double expm1(double x);
+__device__ double fabs(double x);
+__device__ double fdim(double x, double y);
+__device__ double floor(double x);
+__device__ double fma(double x, double y, double z);
+__device__ double fmax(double x, double y);
+__device__ double fmin(double x, double y);
+__device__ double fmod(double x, double y);
+__device__ double frexp(double x, int *nptr);
+__device__ double hypot(double x, double y);
+__device__ double ilogb(double x);
+__device__ unsigned isfinite(double x);
+__device__ unsigned isinf(double x);
+__device__ unsigned isnan(double x);
+__device__ double j0(double x);
+__device__ double j1(double x);
+__device__ double jn(int n, double x);
+__device__ double ldexp(double x, int exp);
+__device__ double lgamma(double x);
+__device__ long long llrint(double x);
+__device__ long llround(double x);
+__device__ double log(double x);
+__device__ double log10(double x);
+__device__ double log1p(double x);
+__device__ double log2(double x);
+__device__ double logb(double x);
+__device__ long int lrint(double x);
+__device__ long int lround(double x);
+__device__ double modf(double x, double *iptr);
+__device__ double nan(const char* tagp);
+__device__ double nearbyint(double x);
+__device__ double nextafter(double x, double y);
+__device__ double norm(int dim, const double* t);
+__device__ double norm3d(double a, double b, double c);
+__device__ double norm4d(double a, double b, double d);
+__device__ double normcdf(double y);
+__device__ double normcdfinv(double y);
+__device__ double pow(double x, double y);
+__device__ double rcbrt(double x);
+__device__ double remainder(double x, double y);
+__device__ double remquo(double x, double y, int *quo);
+__device__ double rhypot(double x, double y);
+__device__ double rint(double x);
+__device__ double rnorm(int dim, const double* t);
+__device__ double rnorm3d(double a, double b, double c);
+__device__ double rnorm4d(double a, double b, double c, double d);
+__device__ double round(double x);
+__host__ __device__ double rsqrt(double x);
+__device__ double scalbln(double x, long int n);
+__device__ double scalbn(double x, int n);
+__device__ unsigned signbit(double a);
+__device__ double sin(double a);
+__device__ double sincos(double x, double *sptr, double *cptr);
+__device__ double sincospi(double x, double *sptr, double *cptr);
+__device__ double sinh(double x);
+__host__ __device__ double sinpi(double x);
+__device__ double sqrt(double x);
+__device__ double tan(double x);
+__device__ double tanh(double x);
+__device__ double tgamma(double x);
+__device__ double trunc(double x);
+__device__ double y0(double x);
+__device__ double y1(double y);
+__device__ double yn(int n, double x);
+

 // TODO - hipify-clang - change to use the function call.
 //#define warpSize hc::__wavesize()
@@ -200,7 +377,6 @@ __device__ unsigned long long int atomicXor(unsigned long long int* address,
                                 unsigned long long int val);


-#include <hc.hpp>
 // integer intrinsic function __poc __clz __ffs __brev
 __device__ unsigned int __popc( unsigned int input);
 __device__ unsigned int __popcll( unsigned long long int input);
@@ -243,18 +419,8 @@ __device__ float __shfl_down(float input, unsigned int lane_delta, int width);
 __device__ float __shfl_xor(float input, int lane_mask, int width);
 #endif

-#include <hc_math.hpp>
-// TODO: Choose whether default is precise math or fast math based on compilation flag.
-#ifdef __HCC_ACCELERATOR__
-using namespace hc::precise_math;
-#endif
-
-//TODO: Undo this once min/max functions are supported by hc
-inline int min(int arg1, int arg2) __attribute((hc,cpu)) { \
-  return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));}
-inline int max(int arg1, int arg2) __attribute((hc,cpu)) { \
-  return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));}
-
+__host__ __device__ int min(int arg1, int arg2);
+__host__ __device__ int max(int arg1, int arg2);  

 //TODO - add a couple fast math operations here, the set here will grow :
 __device__ float __cosf(float x);
@@ -318,7 +484,6 @@ __device__ float __dsqrt_rz(double x);

 #endif

-
 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)

 #define HIP_KERNEL_NAME(...) __VA_ARGS__
@@ -2,6 +2,484 @@
 #include<hc.hpp>
 #include<grid_launch.h>

+#include <hc_math.hpp>
+// TODO: Choose whether default is precise math or fast math based on compilation flag.
+#ifdef __HCC_ACCELERATOR__
+using namespace hc::precise_math;
+#endif
+
+__device__ float acosf(float x)
+{
+    return hc::precise_math::acosf(x);
+}
+__device__ float acoshf(float x)
+{
+    return hc::precise_math::acoshf(x);
+}
+__device__ float asinf(float x)
+{
+    return hc::precise_math::asinf(x);
+}
+__device__ float asinhf(float x)
+{
+    return hc::precise_math::asinhf(x);
+}
+__device__ float atan2f(float y, float x)
+{
+    return hc::precise_math::atan2f(x, y);
+}
+__device__ float atanf(float x)
+{
+    return hc::precise_math::atanf(x);
+}
+__device__ float atanhf(float x)
+{
+    return hc::precise_math::atanhf(x);
+}
+__device__ float cbrtf(float x)
+{
+    return hc::precise_math::cbrtf(x);
+}
+__device__ float ceilf(float x)
+{
+    return hc::precise_math::ceilf(x);
+}
+__device__ float copysignf(float x, float y)
+{
+    return hc::precise_math::copysignf(x, y);
+}
+__device__ float cosf(float x)
+{
+    return hc::precise_math::cosf(x);
+}
+__device__ float coshf(float x)
+{
+    return hc::precise_math::coshf(x);
+}
+__device__ float cyl_bessel_i0f(float x);
+__device__ float cyl_bessel_i1f(float x);
+__device__ float erfcf(float x)
+{
+    return hc::precise_math::erfcf(x);
+}
+__device__ float erfcinvf(float y);
+__device__ float erfcxf(float x);
+__device__ float erff(float x)
+{
+    return hc::precise_math::erff(x);
+}
+__device__ float erfinvf(float y);
+__device__ float exp10f(float x)
+{
+    return hc::precise_math::exp10f(x);
+}
+__device__ float exp2f(float x)
+{
+    return hc::precise_math::exp2f(x);
+}
+__device__ float expf(float x)
+{
+    return hc::precise_math::expf(x);
+}
+__device__ float expm1f(float x)
+{
+    return hc::precise_math::expm1f(x);
+}
+__device__ float fabsf(float x)
+{
+    return hc::precise_math::fabsf(x);
+}
+__device__ float fdimf(float x, float y)
+{
+    return hc::precise_math::fdimf(x, y);
+}
+__device__ float fdividef(float x, float y);
+__device__ float floorf(float x)
+{
+    return hc::precise_math::floorf(x);
+}
+__device__ float fmaf(float x, float y, float z)
+{
+    return hc::precise_math::fmaf(x, y, z);
+}
+__device__ float fmaxf(float x, float y)
+{
+    return hc::precise_math::fmaxf(x, y);
+}
+__device__ float fminf(float x, float y)
+{
+    return hc::precise_math::fminf(x, y);
+}
+__device__ float fmodf(float x, float y)
+{
+    return hc::precise_math::fmodf(x, y);
+}
+__device__ float frexpf(float x, float y);
+__device__ float hypotf(float x, float y)
+{
+    return hc::precise_math::hypotf(x, y);
+}
+__device__ float ilogbf(float x)
+{
+    return hc::precise_math::ilogbf(x);
+}
+__device__ unsigned isfinite(float a)
+{
+    return hc::precise_math::isfinite(a);
+}
+__device__ unsigned isinf(float a)
+{
+    return hc::precise_math::isinf(a);
+}
+__device__ unsigned isnan(float a)
+{
+    return hc::precise_math::isnan(a);
+}
+__device__ float j0f(float x);
+__device__ float j1f(float x);
+__device__ float jnf(int n, float x);
+__device__ float ldexpf(float x, int exp)
+{
+    return hc::precise_math::ldexpf(x, exp);
+}
+__device__ float lgammaf(float x);
+__device__ long long int llrintf(float x);
+__device__ long long int llroundf(float x);
+__device__ float log10f(float x)
+{
+    return hc::precise_math::log10f(x);
+}
+__device__ float log1pf(float x)
+{
+    return hc::precise_math::log1pf(x);
+}
+__device__ float log2f(float x)
+{
+    return hc::precise_math::log2f(x);
+}
+__device__ float logbf(float x)
+{
+    return hc::precise_math::logbf(x);
+}
+__device__ float logf(float x)
+{
+    return hc::precise_math::logf(x);
+}
+__device__ long int lrintf(float x);
+__device__ long int lroundf(float x);
+__device__ float modff(float x, float *iptr);
+__device__ float nanf(const char* tagp)
+{
+    return hc::precise_math::nanf((int)*tagp);
+}
+__device__ float nearbyintf(float x)
+{
+    return hc::precise_math::nearbyintf(x);
+}
+__device__ float nextafterf(float x, float y);
+__device__ float norm3df(float a, float b, float c);
+__device__ float norm4df(float a, float b, float c, float d);
+__device__ float normcdff(float y);
+__device__ float normcdfinvf(float y);
+__device__ float normf(int dim, const float *a);
+__device__ float powf(float x, float y)
+{
+    return hc::precise_math::powf(x, y);
+}
+__device__ float rcbtrf(float x);
+__device__ float remainderf(float x, float y)
+{
+    return hc::precise_math::remainderf(x, y);
+}
+__device__ float remquof(float x, float y, int *quo);
+__device__ float rhypotf(float x, float y);
+__device__ float rintf(float x);
+__device__ float rnorm3df(float a, float b, float c);
+__device__ float rnorm4df(float a, float b, float c, float d);
+__device__ float rnormf(int dim, const float* a);
+__device__ float roundf(float x)
+{
+    return hc::precise_math::roundf(x);
+}
+__device__ float scalblnf(float x, long int n);
+__device__ float scalbnf(float x, int n)
+{
+    return hc::precise_math::scalbnf(x, n);
+}
+__device__ unsigned signbit(float a)
+{
+    return hc::precise_math::signbit(a);
+}
+__device__ void sincosf(float x, float *sptr, float *cptr);
+__device__ void sincospif(float x, float *sptr, float *cptr);
+__device__ float sinf(float x)
+{
+    return hc::precise_math::sinf(x);
+}
+__device__ float sinhf(float x)
+{
+    return hc::precise_math::sinhf(x);
+}
+__device__ float tanf(float x)
+{
+    return hc::precise_math::tanf(x);
+}
+__device__ float tanhf(float x)
+{
+    return hc::precise_math::tanhf(x);
+}
+__device__ float tgammaf(float x)
+{
+    return hc::precise_math::tgammaf(x);
+}
+__device__ float truncf(float x)
+{
+    return hc::precise_math::truncf(x);
+}
+__device__ float y0f(float x);
+__device__ float y1f(float x);
+__device__ float ynf(int n, float x);
+
+
+__device__ float cospif(float x)
+{
+    return hc::precise_math::cospif(x);
+}
+
+__device__ float sinpif(float x)
+{
+    return hc::precise_math::sinpif(x);
+}
+
+__device__ float sqrtf(float x)
+{
+    return hc::precise_math::sqrtf(x);
+}
+
+__device__ float rsqrtf(float x)
+{
+    return hc::precise_math::rsqrtf(x);
+}
+
+/*
+ * Double precision device math functions
+ */
+
+__device__ double acos(double x)
+{
+    return hc::precise_math::acos(x);
+}
+
+__device__ double acosh(double x)
+{
+    return hc::precise_math::acosh(x);
+}
+__device__ double asin(double x)
+{
+    return hc::precise_math::asin(x);
+}
+__device__ double asinh(double x)
+{
+    return hc::precise_math::asinh(x);
+}
+__device__ double atan(double x)
+{
+    return hc::precise_math::atan(x);
+}
+__device__ double atan2(double y, double x)
+{
+    return hc::precise_math::atan2(y, x);
+}
+__device__ double atanh(double x)
+{
+    return hc::precise_math::atanh(x);
+}
+__device__ double cbrt(double x)
+{
+    return hc::precise_math::cbrt(x);
+}
+__device__ double ceil(double x)
+{
+    return hc::precise_math::ceil(x);
+}
+__device__ double copysign(double x, double y)
+{
+    return hc::precise_math::copysign(x, y);
+}
+__device__ double cos(double x)
+{
+    return hc::precise_math::cos(x);
+}
+__device__ double cosh(double x)
+{
+    return hc::precise_math::cosh(x);
+}
+__device__ double cospi(double x)
+{
+    return hc::precise_math::cospi(x);
+}
+__device__ double erf(double x)
+{
+    return hc::precise_math::erf(x);
+}
+__device__ double erfc(double x)
+{
+    return hc::precise_math::erfc(x);
+}
+__device__ double exp(double x)
+{
+    return hc::precise_math::exp(x);
+}
+__device__ double exp10(double x)
+{
+    return hc::precise_math::exp10(x);
+}
+__device__ double exp2(double x)
+{
+    return hc::precise_math::exp2(x);
+}
+__device__ double expm1(double x)
+{
+    return hc::precise_math::expm1(x);
+}
+__device__ double fabs(double x)
+{
+    return hc::precise_math::fabs(x);
+}
+__device__ double fdim(double x, double y)
+{
+    return hc::precise_math::fdim(x, y);
+}
+__device__ double floor(double x)
+{
+    return hc::precise_math::floor(x);
+}
+__device__ double fma(double x, double y, double z)
+{
+    return hc::precise_math::fma(x, y, z);
+}
+__device__ double fmax(double x, double y)
+{
+    return hc::precise_math::fmax(x, y);
+}
+__device__ double fmin(double x, double y)
+{
+    return hc::precise_math::fmin(x, y);
+}
+__device__ double fmod(double x, double y)
+{
+    return hc::precise_math::fmod(x, y);
+}
+__device__ double hypot(double x, double y)
+{
+    return hc::precise_math::hypot(x, y);
+}
+__device__ double ilogb(double x)
+{
+    return hc::precise_math::ilogb(x);
+}
+__device__ unsigned isfinite(double x)
+{
+    return hc::precise_math::isfinite(x);
+}
+__device__ unsigned isinf(double x)
+{
+    return hc::precise_math::isinf(x);
+}
+__device__ unsigned isnan(double x)
+{
+    return hc::precise_math::isnan(x);
+}
+__device__ double ldexp(double x, int exp)
+{
+    return hc::precise_math::ldexp(x, exp);
+}
+__device__ double log(double x)
+{
+    return hc::precise_math::log(x);
+}
+__device__ double log10(double x)
+{
+    return hc::precise_math::log10(x);
+}
+__device__ double log1p(double x)
+{
+    return hc::precise_math::log1p(x);
+}
+__device__ double log2(double x)
+{
+    return hc::precise_math::log2(x);
+}
+__device__ double logb(double x)
+{
+    return hc::precise_math::logb(x);
+}
+__device__ double nan(const char *tagp)
+{
+    return hc::precise_math::nan((int)*tagp);
+}
+__device__ double nearbyint(double x)
+{
+    return hc::precise_math::nearbyint(x);
+}
+__device__ double pow(double x, double y)
+{
+    return hc::precise_math::pow(x, y);
+}
+__device__ double remainder(double x, double y)
+{
+    return hc::precise_math::remainder(x, y);
+}
+__device__ double round(double x)
+{
+    return hc::precise_math::round(x);
+}
+__device__ double rsqrt(double x)
+{
+    return hc::precise_math::rsqrt(x);
+}
+__device__ double scalbn(double x, int n)
+{
+    return hc::precise_math::scalbn(x, n);
+}
+__device__ unsigned signbit(double x)
+{
+    return hc::precise_math::signbit(x);
+}
+__device__ double sin(double x)
+{
+    return hc::precise_math::sin(x);
+}
+__device__ double sinh(double x)
+{
+    return hc::precise_math::sinh(x);
+}
+__device__ double sinpi(double x)
+{
+    return hc::precise_math::sinpi(x);
+}
+__device__ double sqrt(double x)
+{
+    return hc::precise_math::sqrt(x);
+}
+__device__ double tan(double x)
+{
+    return hc::precise_math::tan(x);
+}
+__device__ double tanh(double x)
+{
+    return hc::precise_math::tanh(x);
+}
+__device__ double tgamma(double x)
+{
+    return hc::precise_math::tgamma(x);
+}
+__device__ double trunc(double x)
+{
+    return hc::precise_math::trunc(x);
+}
+
+
 const int warpSize = 64;

 __device__ long long int clock64() { return (long long int)hc::__clock_u64(); };
@@ -290,6 +768,15 @@ __device__  float __shfl_xor(float input, int lane_mask, int width)
  return hc::__shfl_xor(input,lane_mask,width);
 }

+__host__ __device__ int min(int arg1, int arg2) 
+{ 
+  return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));
+}
+__host__ __device__ int max(int arg1, int arg2) 
+{
+  return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));
+}
+


 //TODO - add a couple fast math operations here, the set here will grow :
@@ -40,6 +40,8 @@ __device__ void integer_intrinsics()
    //__mulhi((int)1, (int)2);
    __popc((unsigned int)4);
    __popcll((unsigned long long)4);
+    int a = min((int)4, (int)5);
+    int b = max((int)4, (int)5);
    //__rhadd((int)1, (int)2);
    //__sad((int)1, (int)2, 0);
    //__uhadd((unsigned int)1, (unsigned int)3);