diff --git a/projects/hip/include/hcc_detail/hip_runtime.h b/projects/hip/include/hcc_detail/hip_runtime.h index 20f19a6977..5b09d50ef4 100644 --- a/projects/hip/include/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hcc_detail/hip_runtime.h @@ -97,9 +97,6 @@ extern int HIP_TRACE_API; #endif - - - //TODO-HCC this is currently ignored by HCC target of HIP #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) @@ -110,6 +107,186 @@ extern int HIP_TRACE_API; #define __HCC_C__ #endif +__device__ float acosf(float x); +__device__ float acoshf(float x); +__device__ float asinf(float x); +__device__ float asinhf(float x); +__device__ float atan2f(float y, float x); +__device__ float atanf(float x); +__device__ float atanhf(float x); +__device__ float cbrtf(float x); +__device__ float ceilf(float x); +__device__ float copysignf(float x, float y); +__device__ float cosf(float x); +__device__ float coshf(float x); +__device__ float cyl_bessel_i0f(float x); +__device__ float cyl_bessel_i1f(float x); +__device__ float erfcf(float x); +__device__ float erfcinvf(float y); +__device__ float erfcxf(float x); +__device__ float erff(float x); +__device__ float erfinvf(float y); +__device__ float exp10f(float x); +__device__ float exp2f(float x); +__device__ float expf(float x); +__device__ float expm1f(float x); +__device__ float fabsf(float x); +__device__ float fdimf(float x, float y); +__device__ float fdividef(float x, float y); +__device__ float floorf(float x); +__device__ float fmaf(float x, float y, float z); +__device__ float fmaxf(float x, float y); +__device__ float fminf(float x, float y); +__device__ float fmodf(float x, float y); +__device__ float frexpf(float x, float y); +__device__ float hypotf(float x, float y); +__device__ float ilogbf(float x); +__device__ unsigned isfinite(float a); +__device__ unsigned isinf(float a); +__device__ unsigned isnan(float a); +__device__ float j0f(float x); +__device__ float j1f(float x); +__device__ float jnf(int n, float x); +__device__ float ldexpf(float x, int exp); +__device__ float lgammaf(float x); +__device__ long long int llrintf(float x); +__device__ long long int llroundf(float x); +__device__ float log10f(float x); +__device__ float log1pf(float x); +__device__ float log2f(float x); +__device__ float logbf(float x); +__device__ float logf(float x); +__device__ long int lrintf(float x); +__device__ long int lroundf(float x); +__device__ float modff(float x, float *iptr); +__device__ float nanf(const char* tagp); +__device__ float nearbyintf(float x); +__device__ float nextafterf(float x, float y); +__device__ float norm3df(float a, float b, float c); +__device__ float norm4df(float a, float b, float c, float d); +__device__ float normcdff(float y); +__device__ float normcdfinvf(float y); +__device__ float normf(int dim, const float *a); +__device__ float powf(float x, float y); +__device__ float rcbtrf(float x); +__device__ float remainderf(float x, float y); +__device__ float remquof(float x, float y, int *quo); +__device__ float rhypotf(float x, float y); +__device__ float rintf(float x); +__device__ float rnorm3df(float a, float b, float c); +__device__ float rnorm4df(float a, float b, float c, float d); +__device__ float rnormf(int dim, const float* a); +__device__ float roundf(float x); +__device__ float rsqrtf(float x); +__device__ float scalblnf(float x, long int n); +__device__ float scalbnf(float x, int n); +__device__ unsigned signbit(float a); +__device__ void sincosf(float x, float *sptr, float *cptr); +__device__ void sincospif(float x, float *sptr, float *cptr); +__device__ float sinf(float x); +__device__ float sinhf(float x); +__device__ float sinpif(float x); +__device__ float sqrtf(float x); +__device__ float tanf(float x); +__device__ float tanhf(float x); +__device__ float tgammaf(float x); +__device__ float truncf(float x); +__device__ float y0f(float x); +__device__ float y1f(float x); +__device__ float ynf(int n, float x); + +__host__ __device__ float cospif(float x); +__host__ __device__ float sinpif(float x); +__device__ float sqrtf(float x); +__host__ __device__ float rsqrtf(float x); + +__device__ double acos(double x); +__device__ double acosh(double x); +__device__ double asin(double x); +__device__ double asinh(double x); +__device__ double atan(double x); +__device__ double atan2(double y, double x); +__device__ double atanh(double x); +__device__ double cbrt(double x); +__device__ double ceil(double x); +__device__ double copysign(double x, double y); +__device__ double cos(double x); +__device__ double cosh(double x); +__host__ __device__ double cospi(double x); +__device__ double cyl_bessel_i0(double x); +__device__ double cyl_bessel_i1(double x); +__device__ double erf(double x); +__device__ double erfc(double x); +__device__ double erfcinv(double y); +__device__ double erfcx(double x); +__device__ double exp(double x); +__device__ double exp10(double x); +__device__ double exp2(double x); +__device__ double expm1(double x); +__device__ double fabs(double x); +__device__ double fdim(double x, double y); +__device__ double floor(double x); +__device__ double fma(double x, double y, double z); +__device__ double fmax(double x, double y); +__device__ double fmin(double x, double y); +__device__ double fmod(double x, double y); +__device__ double frexp(double x, int *nptr); +__device__ double hypot(double x, double y); +__device__ double ilogb(double x); +__device__ unsigned isfinite(double x); +__device__ unsigned isinf(double x); +__device__ unsigned isnan(double x); +__device__ double j0(double x); +__device__ double j1(double x); +__device__ double jn(int n, double x); +__device__ double ldexp(double x, int exp); +__device__ double lgamma(double x); +__device__ long long llrint(double x); +__device__ long llround(double x); +__device__ double log(double x); +__device__ double log10(double x); +__device__ double log1p(double x); +__device__ double log2(double x); +__device__ double logb(double x); +__device__ long int lrint(double x); +__device__ long int lround(double x); +__device__ double modf(double x, double *iptr); +__device__ double nan(const char* tagp); +__device__ double nearbyint(double x); +__device__ double nextafter(double x, double y); +__device__ double norm(int dim, const double* t); +__device__ double norm3d(double a, double b, double c); +__device__ double norm4d(double a, double b, double d); +__device__ double normcdf(double y); +__device__ double normcdfinv(double y); +__device__ double pow(double x, double y); +__device__ double rcbrt(double x); +__device__ double remainder(double x, double y); +__device__ double remquo(double x, double y, int *quo); +__device__ double rhypot(double x, double y); +__device__ double rint(double x); +__device__ double rnorm(int dim, const double* t); +__device__ double rnorm3d(double a, double b, double c); +__device__ double rnorm4d(double a, double b, double c, double d); +__device__ double round(double x); +__host__ __device__ double rsqrt(double x); +__device__ double scalbln(double x, long int n); +__device__ double scalbn(double x, int n); +__device__ unsigned signbit(double a); +__device__ double sin(double a); +__device__ double sincos(double x, double *sptr, double *cptr); +__device__ double sincospi(double x, double *sptr, double *cptr); +__device__ double sinh(double x); +__host__ __device__ double sinpi(double x); +__device__ double sqrt(double x); +__device__ double tan(double x); +__device__ double tanh(double x); +__device__ double tgamma(double x); +__device__ double trunc(double x); +__device__ double y0(double x); +__device__ double y1(double y); +__device__ double yn(int n, double x); + // TODO - hipify-clang - change to use the function call. //#define warpSize hc::__wavesize() @@ -200,7 +377,6 @@ __device__ unsigned long long int atomicXor(unsigned long long int* address, unsigned long long int val); -#include // integer intrinsic function __poc __clz __ffs __brev __device__ unsigned int __popc( unsigned int input); __device__ unsigned int __popcll( unsigned long long int input); @@ -243,18 +419,8 @@ __device__ float __shfl_down(float input, unsigned int lane_delta, int width); __device__ float __shfl_xor(float input, int lane_mask, int width); #endif -#include -// TODO: Choose whether default is precise math or fast math based on compilation flag. -#ifdef __HCC_ACCELERATOR__ -using namespace hc::precise_math; -#endif - -//TODO: Undo this once min/max functions are supported by hc -inline int min(int arg1, int arg2) __attribute((hc,cpu)) { \ - return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));} -inline int max(int arg1, int arg2) __attribute((hc,cpu)) { \ - return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));} - +__host__ __device__ int min(int arg1, int arg2); +__host__ __device__ int max(int arg1, int arg2); //TODO - add a couple fast math operations here, the set here will grow : __device__ float __cosf(float x); @@ -318,7 +484,6 @@ __device__ float __dsqrt_rz(double x); #endif - #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) #define HIP_KERNEL_NAME(...) __VA_ARGS__ diff --git a/projects/hip/src/device_util.cpp b/projects/hip/src/device_util.cpp index f7e21b954f..8a99282cc1 100644 --- a/projects/hip/src/device_util.cpp +++ b/projects/hip/src/device_util.cpp @@ -2,6 +2,484 @@ #include #include +#include +// TODO: Choose whether default is precise math or fast math based on compilation flag. +#ifdef __HCC_ACCELERATOR__ +using namespace hc::precise_math; +#endif + +__device__ float acosf(float x) +{ + return hc::precise_math::acosf(x); +} +__device__ float acoshf(float x) +{ + return hc::precise_math::acoshf(x); +} +__device__ float asinf(float x) +{ + return hc::precise_math::asinf(x); +} +__device__ float asinhf(float x) +{ + return hc::precise_math::asinhf(x); +} +__device__ float atan2f(float y, float x) +{ + return hc::precise_math::atan2f(x, y); +} +__device__ float atanf(float x) +{ + return hc::precise_math::atanf(x); +} +__device__ float atanhf(float x) +{ + return hc::precise_math::atanhf(x); +} +__device__ float cbrtf(float x) +{ + return hc::precise_math::cbrtf(x); +} +__device__ float ceilf(float x) +{ + return hc::precise_math::ceilf(x); +} +__device__ float copysignf(float x, float y) +{ + return hc::precise_math::copysignf(x, y); +} +__device__ float cosf(float x) +{ + return hc::precise_math::cosf(x); +} +__device__ float coshf(float x) +{ + return hc::precise_math::coshf(x); +} +__device__ float cyl_bessel_i0f(float x); +__device__ float cyl_bessel_i1f(float x); +__device__ float erfcf(float x) +{ + return hc::precise_math::erfcf(x); +} +__device__ float erfcinvf(float y); +__device__ float erfcxf(float x); +__device__ float erff(float x) +{ + return hc::precise_math::erff(x); +} +__device__ float erfinvf(float y); +__device__ float exp10f(float x) +{ + return hc::precise_math::exp10f(x); +} +__device__ float exp2f(float x) +{ + return hc::precise_math::exp2f(x); +} +__device__ float expf(float x) +{ + return hc::precise_math::expf(x); +} +__device__ float expm1f(float x) +{ + return hc::precise_math::expm1f(x); +} +__device__ float fabsf(float x) +{ + return hc::precise_math::fabsf(x); +} +__device__ float fdimf(float x, float y) +{ + return hc::precise_math::fdimf(x, y); +} +__device__ float fdividef(float x, float y); +__device__ float floorf(float x) +{ + return hc::precise_math::floorf(x); +} +__device__ float fmaf(float x, float y, float z) +{ + return hc::precise_math::fmaf(x, y, z); +} +__device__ float fmaxf(float x, float y) +{ + return hc::precise_math::fmaxf(x, y); +} +__device__ float fminf(float x, float y) +{ + return hc::precise_math::fminf(x, y); +} +__device__ float fmodf(float x, float y) +{ + return hc::precise_math::fmodf(x, y); +} +__device__ float frexpf(float x, float y); +__device__ float hypotf(float x, float y) +{ + return hc::precise_math::hypotf(x, y); +} +__device__ float ilogbf(float x) +{ + return hc::precise_math::ilogbf(x); +} +__device__ unsigned isfinite(float a) +{ + return hc::precise_math::isfinite(a); +} +__device__ unsigned isinf(float a) +{ + return hc::precise_math::isinf(a); +} +__device__ unsigned isnan(float a) +{ + return hc::precise_math::isnan(a); +} +__device__ float j0f(float x); +__device__ float j1f(float x); +__device__ float jnf(int n, float x); +__device__ float ldexpf(float x, int exp) +{ + return hc::precise_math::ldexpf(x, exp); +} +__device__ float lgammaf(float x); +__device__ long long int llrintf(float x); +__device__ long long int llroundf(float x); +__device__ float log10f(float x) +{ + return hc::precise_math::log10f(x); +} +__device__ float log1pf(float x) +{ + return hc::precise_math::log1pf(x); +} +__device__ float log2f(float x) +{ + return hc::precise_math::log2f(x); +} +__device__ float logbf(float x) +{ + return hc::precise_math::logbf(x); +} +__device__ float logf(float x) +{ + return hc::precise_math::logf(x); +} +__device__ long int lrintf(float x); +__device__ long int lroundf(float x); +__device__ float modff(float x, float *iptr); +__device__ float nanf(const char* tagp) +{ + return hc::precise_math::nanf((int)*tagp); +} +__device__ float nearbyintf(float x) +{ + return hc::precise_math::nearbyintf(x); +} +__device__ float nextafterf(float x, float y); +__device__ float norm3df(float a, float b, float c); +__device__ float norm4df(float a, float b, float c, float d); +__device__ float normcdff(float y); +__device__ float normcdfinvf(float y); +__device__ float normf(int dim, const float *a); +__device__ float powf(float x, float y) +{ + return hc::precise_math::powf(x, y); +} +__device__ float rcbtrf(float x); +__device__ float remainderf(float x, float y) +{ + return hc::precise_math::remainderf(x, y); +} +__device__ float remquof(float x, float y, int *quo); +__device__ float rhypotf(float x, float y); +__device__ float rintf(float x); +__device__ float rnorm3df(float a, float b, float c); +__device__ float rnorm4df(float a, float b, float c, float d); +__device__ float rnormf(int dim, const float* a); +__device__ float roundf(float x) +{ + return hc::precise_math::roundf(x); +} +__device__ float scalblnf(float x, long int n); +__device__ float scalbnf(float x, int n) +{ + return hc::precise_math::scalbnf(x, n); +} +__device__ unsigned signbit(float a) +{ + return hc::precise_math::signbit(a); +} +__device__ void sincosf(float x, float *sptr, float *cptr); +__device__ void sincospif(float x, float *sptr, float *cptr); +__device__ float sinf(float x) +{ + return hc::precise_math::sinf(x); +} +__device__ float sinhf(float x) +{ + return hc::precise_math::sinhf(x); +} +__device__ float tanf(float x) +{ + return hc::precise_math::tanf(x); +} +__device__ float tanhf(float x) +{ + return hc::precise_math::tanhf(x); +} +__device__ float tgammaf(float x) +{ + return hc::precise_math::tgammaf(x); +} +__device__ float truncf(float x) +{ + return hc::precise_math::truncf(x); +} +__device__ float y0f(float x); +__device__ float y1f(float x); +__device__ float ynf(int n, float x); + + +__device__ float cospif(float x) +{ + return hc::precise_math::cospif(x); +} + +__device__ float sinpif(float x) +{ + return hc::precise_math::sinpif(x); +} + +__device__ float sqrtf(float x) +{ + return hc::precise_math::sqrtf(x); +} + +__device__ float rsqrtf(float x) +{ + return hc::precise_math::rsqrtf(x); +} + +/* + * Double precision device math functions + */ + +__device__ double acos(double x) +{ + return hc::precise_math::acos(x); +} + +__device__ double acosh(double x) +{ + return hc::precise_math::acosh(x); +} +__device__ double asin(double x) +{ + return hc::precise_math::asin(x); +} +__device__ double asinh(double x) +{ + return hc::precise_math::asinh(x); +} +__device__ double atan(double x) +{ + return hc::precise_math::atan(x); +} +__device__ double atan2(double y, double x) +{ + return hc::precise_math::atan2(y, x); +} +__device__ double atanh(double x) +{ + return hc::precise_math::atanh(x); +} +__device__ double cbrt(double x) +{ + return hc::precise_math::cbrt(x); +} +__device__ double ceil(double x) +{ + return hc::precise_math::ceil(x); +} +__device__ double copysign(double x, double y) +{ + return hc::precise_math::copysign(x, y); +} +__device__ double cos(double x) +{ + return hc::precise_math::cos(x); +} +__device__ double cosh(double x) +{ + return hc::precise_math::cosh(x); +} +__device__ double cospi(double x) +{ + return hc::precise_math::cospi(x); +} +__device__ double erf(double x) +{ + return hc::precise_math::erf(x); +} +__device__ double erfc(double x) +{ + return hc::precise_math::erfc(x); +} +__device__ double exp(double x) +{ + return hc::precise_math::exp(x); +} +__device__ double exp10(double x) +{ + return hc::precise_math::exp10(x); +} +__device__ double exp2(double x) +{ + return hc::precise_math::exp2(x); +} +__device__ double expm1(double x) +{ + return hc::precise_math::expm1(x); +} +__device__ double fabs(double x) +{ + return hc::precise_math::fabs(x); +} +__device__ double fdim(double x, double y) +{ + return hc::precise_math::fdim(x, y); +} +__device__ double floor(double x) +{ + return hc::precise_math::floor(x); +} +__device__ double fma(double x, double y, double z) +{ + return hc::precise_math::fma(x, y, z); +} +__device__ double fmax(double x, double y) +{ + return hc::precise_math::fmax(x, y); +} +__device__ double fmin(double x, double y) +{ + return hc::precise_math::fmin(x, y); +} +__device__ double fmod(double x, double y) +{ + return hc::precise_math::fmod(x, y); +} +__device__ double hypot(double x, double y) +{ + return hc::precise_math::hypot(x, y); +} +__device__ double ilogb(double x) +{ + return hc::precise_math::ilogb(x); +} +__device__ unsigned isfinite(double x) +{ + return hc::precise_math::isfinite(x); +} +__device__ unsigned isinf(double x) +{ + return hc::precise_math::isinf(x); +} +__device__ unsigned isnan(double x) +{ + return hc::precise_math::isnan(x); +} +__device__ double ldexp(double x, int exp) +{ + return hc::precise_math::ldexp(x, exp); +} +__device__ double log(double x) +{ + return hc::precise_math::log(x); +} +__device__ double log10(double x) +{ + return hc::precise_math::log10(x); +} +__device__ double log1p(double x) +{ + return hc::precise_math::log1p(x); +} +__device__ double log2(double x) +{ + return hc::precise_math::log2(x); +} +__device__ double logb(double x) +{ + return hc::precise_math::logb(x); +} +__device__ double nan(const char *tagp) +{ + return hc::precise_math::nan((int)*tagp); +} +__device__ double nearbyint(double x) +{ + return hc::precise_math::nearbyint(x); +} +__device__ double pow(double x, double y) +{ + return hc::precise_math::pow(x, y); +} +__device__ double remainder(double x, double y) +{ + return hc::precise_math::remainder(x, y); +} +__device__ double round(double x) +{ + return hc::precise_math::round(x); +} +__device__ double rsqrt(double x) +{ + return hc::precise_math::rsqrt(x); +} +__device__ double scalbn(double x, int n) +{ + return hc::precise_math::scalbn(x, n); +} +__device__ unsigned signbit(double x) +{ + return hc::precise_math::signbit(x); +} +__device__ double sin(double x) +{ + return hc::precise_math::sin(x); +} +__device__ double sinh(double x) +{ + return hc::precise_math::sinh(x); +} +__device__ double sinpi(double x) +{ + return hc::precise_math::sinpi(x); +} +__device__ double sqrt(double x) +{ + return hc::precise_math::sqrt(x); +} +__device__ double tan(double x) +{ + return hc::precise_math::tan(x); +} +__device__ double tanh(double x) +{ + return hc::precise_math::tanh(x); +} +__device__ double tgamma(double x) +{ + return hc::precise_math::tgamma(x); +} +__device__ double trunc(double x) +{ + return hc::precise_math::trunc(x); +} + + const int warpSize = 64; __device__ long long int clock64() { return (long long int)hc::__clock_u64(); }; @@ -290,6 +768,15 @@ __device__ float __shfl_xor(float input, int lane_mask, int width) return hc::__shfl_xor(input,lane_mask,width); } +__host__ __device__ int min(int arg1, int arg2) +{ + return (int)(hc::precise_math::fmin((float)arg1, (float)arg2)); +} +__host__ __device__ int max(int arg1, int arg2) +{ + return (int)(hc::precise_math::fmax((float)arg1, (float)arg2)); +} + //TODO - add a couple fast math operations here, the set here will grow : diff --git a/projects/hip/tests/src/hipIntegerIntrinsics.cpp b/projects/hip/tests/src/hipIntegerIntrinsics.cpp index ea6bafcb18..2f5f474161 100644 --- a/projects/hip/tests/src/hipIntegerIntrinsics.cpp +++ b/projects/hip/tests/src/hipIntegerIntrinsics.cpp @@ -40,6 +40,8 @@ __device__ void integer_intrinsics() //__mulhi((int)1, (int)2); __popc((unsigned int)4); __popcll((unsigned long long)4); + int a = min((int)4, (int)5); + int b = max((int)4, (int)5); //__rhadd((int)1, (int)2); //__sad((int)1, (int)2, 0); //__uhadd((unsigned int)1, (unsigned int)3);