diff --git a/CMakeLists.txt b/CMakeLists.txt index 1168b65be2..cbd2050025 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,7 +181,8 @@ if(HIP_PLATFORM STREQUAL "hcc") src/device_util.cpp src/hip_ldg.cpp src/hip_fp16.cpp - src/device_functions.cpp) + src/device_functions.cpp + src/math_functions.cpp) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${HCC_HOME}/lib -lmcwamp -Wl,-Bsymbolic -Wl,-rpath ${HCC_HOME}/lib") add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME}) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 06beeb23f8..0489a72c8b 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -23,6 +23,173 @@ THE SOFTWARE. #include #include +// Single Precision Fast Math +extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32"); +extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32"); +__device__ float __hip_fast_exp10f(float); +__device__ float __hip_fast_expf(float); +__device__ float __hip_fast_frsqrt_rn(float); +extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32"); +__device__ float __hip_fast_fsqrt_rn(float); +__device__ float __hip_fast_fsqrt_ru(float); +__device__ float __hip_fast_fsqrt_rz(float); +__device__ float __hip_fast_log10f(float); +extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32"); +__device__ float __hip_fast_logf(float); +__device__ float __hip_fast_powf(float, float); +__device__ void __hip_fast_sincosf(float,float*,float*); +extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32"); +__device__ float __hip_fast_tanf(float); +extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32"); +extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32"); + +extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64"); +extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64"); +extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64"); + + +// Single Precision Fast Math +__device__ inline float __cosf(float x) { + return __hip_fast_cosf(x); +} + +__device__ inline float __exp10f(float x) { + return __hip_fast_exp10f(x); +} + +__device__ inline float __expf(float x) { + return __hip_fast_expf(x); +} + +__device__ inline float __frsqrt_rn(float x) { + return __hip_fast_frsqrt_rn(x); +} + +__device__ inline float __fsqrt_rd(float x) { + return __hip_fast_fsqrt_rd(x); +} + +__device__ inline float __fsqrt_rn(float x) { + return __hip_fast_fsqrt_rn(x); +} + +__device__ inline float __fsqrt_ru(float x) { + return __hip_fast_fsqrt_ru(x); +} + +__device__ inline float __fsqrt_rz(float x) { + return __hip_fast_fsqrt_rz(x); +} + +__device__ inline float __log10f(float x) { + return __hip_fast_log10f(x); +} + +__device__ inline float __log2f(float x) { + return __hip_fast_log2f(x); +} + +__device__ inline float __logf(float x) { + return __hip_fast_logf(x); +} + +__device__ inline float __powf(float base, float exponent) { + return __hip_fast_powf(base, exponent); +} + +__device__ inline void __sincosf(float x, float *s, float *c) { + return __hip_fast_sincosf(x, s, c); +} + +__device__ inline float __sinf(float x) { + return __hip_fast_sinf(x); +} + +__device__ inline float __tanf(float x) { + return __hip_fast_tanf(x); +} + +__device__ inline float __fmaf_rd(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_rn(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_ru(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_rz(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __frcp_rd(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline float __frcp_rn(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline float __frcp_ru(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline float __frcp_rz(float x) { + return __hip_fast_frcp(x); +} + +__device__ inline double __dsqrt_rd(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_rn(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_ru(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __dsqrt_rz(double x) { + return __hip_fast_dsqrt(x); +} + +__device__ inline double __fma_rd(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_rn(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_ru(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_rz(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __drcp_rd(double x) { + return __hip_fast_drcp(x); +} + +__device__ inline double __drcp_rn(double x) { + return __hip_fast_drcp(x); +} + +__device__ inline double __drcp_ru(double x) { + return __hip_fast_drcp(x); +} + +__device__ inline double __drcp_rz(double x) { + return __hip_fast_drcp(x); +} + + extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int); extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int); diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 755cb19f6d..73049eb5fb 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_FP16_H -#define HIP_FP16_H +#ifndef HIP_HCC_DETAIL_FP16_H +#define HIP_HCC_DETAIL_FP16_H #include "hip/hip_runtime.h" @@ -452,8 +452,6 @@ typedef struct __attribute__((aligned(4))){ } __half2; - - #endif diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index e911d17ebb..f11846a0da 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -121,208 +121,6 @@ extern int HIP_TRACE_API; #define __HCC_C__ #endif -__device__ float acosf(float x); -__device__ float acoshf(float x); -__device__ float asinf(float x); -__device__ float asinhf(float x); -__device__ float atan2f(float y, float x); -__device__ float atanf(float x); -__device__ float atanhf(float x); -__device__ float cbrtf(float x); -__device__ float ceilf(float x); -__device__ float copysignf(float x, float y); -__device__ float coshf(float x); -__device__ float cyl_bessel_i0f(float x); -__device__ float cyl_bessel_i1f(float x); -__device__ float erfcf(float x); -__device__ float erfcinvf(float y); - -__device__ float erfcxf(float x); -__device__ float erff(float x); -__device__ float erfinvf(float y); -__device__ float exp2f(float x); -__device__ float expm1f(float x); -__device__ float fabsf(float x); -__device__ float fdimf(float x, float y); -__device__ __host__ float fdividef(float x, float y); -__device__ float floorf(float x); -__device__ float fmaf(float x, float y, float z); -__device__ float fmaxf(float x, float y); -__device__ float fminf(float x, float y); -__device__ float fmodf(float x, float y); -__device__ float frexpf(float x, float y); -__device__ float hypotf(float x, float y); -__device__ float ilogbf(float x); -__host__ __device__ unsigned isfinite(float a); -__device__ unsigned isinf(float a); -__device__ unsigned isnan(float a); -__device__ float j0f(float x); -__device__ float j1f(float x); -__device__ float jnf(int n, float x); -__device__ float ldexpf(float x, int exp); -__device__ float lgammaf(float x); -__device__ long long int llrintf(float x); -__device__ long long int llroundf(float x); -__device__ float log1pf(float x); -__device__ float logbf(float x); -__device__ long int lrintf(float x); -__device__ long int lroundf(float x); -__device__ float modff(float x, float *iptr); -__device__ float nanf(const char* tagp); -__device__ float nearbyintf(float x); -__device__ float nextafterf(float x, float y); -__device__ float norm3df(float a, float b, float c); -__device__ float norm4df(float a, float b, float c, float d); -__device__ float normcdff(float y); -__device__ float normcdfinvf(float y); -__device__ float normf(int dim, const float *a); -__device__ float rcbrtf(float x); -__device__ float remainderf(float x, float y); -__device__ float remquof(float x, float y, int *quo); -__device__ float rhypotf(float x, float y); -__device__ float rintf(float x); -__device__ float rnorm3df(float a, float b, float c); -__device__ float rnorm4df(float a, float b, float c, float d); -__device__ float rnormf(int dim, const float* a); -__device__ float roundf(float x); -__device__ float rsqrtf(float x); -__device__ float scalblnf(float x, long int n); -__device__ float scalbnf(float x, int n); -__host__ __device__ unsigned signbit(float a); -__device__ void sincospif(float x, float *sptr, float *cptr); -__device__ float sinhf(float x); -__device__ float sinpif(float x); -__device__ float sqrtf(float x); -__device__ float tanhf(float x); -__device__ float tgammaf(float x); -__device__ float truncf(float x); -__device__ float y0f(float x); -__device__ float y1f(float x); -__device__ float ynf(int n, float x); - -__host__ __device__ float cospif(float x); -__host__ __device__ float sinpif(float x); -// /__device__ float sqrtf(float x); -__host__ __device__ float rsqrtf(float x); -__host__ float normcdff(float y); - -__host__ float erfcinvf(float y); -__host__ float erfcxf(float x); -__host__ float erfinvf(float y); -__host__ float norm3df(float a, float b, float c); -__host__ float normcdfinvf(float y); -__host__ float norm4df(float a, float b, float c, float d); -__host__ float rcbrtf(float x); -__host__ float rhypotf(float x, float y); -__host__ float rnorm3df(float a, float b, float c); -__host__ float rnormf(int dim, const float* a); -__host__ float rnorm4df(float a, float b, float c, float d); -__host__ void sincospif(float x, float *sptr, float *cptr); - -__device__ double acos(double x); -__device__ double acosh(double x); -__device__ double asin(double x); -__device__ double asinh(double x); -__device__ double atan(double x); -__device__ double atan2(double y, double x); -__device__ double atanh(double x); -__device__ double cbrt(double x); -__device__ double ceil(double x); -__device__ double copysign(double x, double y); -__device__ double cos(double x); -__device__ double cosh(double x); -__host__ __device__ double cospi(double x); -__device__ double cyl_bessel_i0(double x); -__device__ double cyl_bessel_i1(double x); -__device__ double erf(double x); -__device__ double erfc(double x); -__device__ double erfcinv(double y); -__device__ double erfcx(double x); -__device__ double erfinv(double x); -__device__ double exp(double x); -__device__ double exp10(double x); -__device__ double exp2(double x); -__device__ double expm1(double x); -__device__ double fabs(double x); -__device__ double fdim(double x, double y); -__device__ double fdivide(double x, double y); -__device__ double floor(double x); -__device__ double fma(double x, double y, double z); -__device__ double fmax(double x, double y); -__device__ double fmin(double x, double y); -__device__ double fmod(double x, double y); -__device__ double frexp(double x, int *nptr); -__device__ double hypot(double x, double y); -__device__ double ilogb(double x); -__host__ __device__ unsigned isfinite(double x); -__device__ unsigned isinf(double x); -__device__ unsigned isnan(double x); -__device__ double j0(double x); -__device__ double j1(double x); -__device__ double jn(int n, double x); -__device__ double ldexp(double x, int exp); -__device__ double lgamma(double x); -__device__ long long llrint(double x); -__device__ long long llround(double x); -__device__ double log(double x); -__device__ double log10(double x); -__device__ double log1p(double x); -__device__ double log2(double x); -__device__ double logb(double x); -__device__ long int lrint(double x); -__device__ long int lround(double x); -__device__ double modf(double x, double *iptr); -__device__ double nan(const char* tagp); -__device__ double nearbyint(double x); -__device__ double nextafter(double x, double y); -__device__ double norm(int dim, const double* t); -__device__ double norm3d(double a, double b, double c); -__host__ double norm3d(double a, double b, double c); -__device__ double norm4d(double a, double b, double c, double d); -__host__ double norm4d(double a, double b, double c, double d); -__device__ double normcdf(double y); -__host__ double normcdf(double y); -__device__ double normcdfinv(double y); -__host__ double normcdfinv(double y); -__device__ double pow(double x, double y); -__device__ double rcbrt(double x); -__host__ double rcbrt(double x); -__device__ double remainder(double x, double y); -__device__ double remquo(double x, double y, int *quo); -__device__ double rhypot(double x, double y); -__host__ double rhypot(double x, double y); -__device__ double rint(double x); -__device__ double rnorm(int dim, const double* t); -__host__ double rnorm(int dim, const double* t); -__device__ double rnorm3d(double a, double b, double c); -__host__ double rnorm3d(double a, double b, double c); -__device__ double rnorm4d(double a, double b, double c, double d); -__host__ double rnorm4d(double a, double b, double c, double d); -__device__ double round(double x); -__host__ __device__ double rsqrt(double x); -__device__ double scalbln(double x, long int n); -__device__ double scalbn(double x, int n); -__host__ __device__ unsigned signbit(double a); -__device__ double sin(double a); -__device__ void sincos(double x, double *sptr, double *cptr); -__device__ void sincospi(double x, double *sptr, double *cptr); -__host__ void sincospi(double x, double *sptr, double *cptr); -__device__ double sinh(double x); -__host__ __device__ double sinpi(double x); -__device__ double sqrt(double x); -__device__ double tan(double x); -__device__ double tanh(double x); -__device__ double tgamma(double x); -__device__ double trunc(double x); -__device__ double y0(double x); -__device__ double y1(double y); -__device__ double yn(int n, double x); - -__host__ double erfcinv(double y); -__host__ double erfcx(double x); -__host__ double erfinv(double y); -__host__ double fdivide(double x, double y); - // TODO - hipify-clang - change to use the function call. //#define warpSize hc::__wavesize() extern const int warpSize; @@ -451,252 +249,6 @@ __host__ __device__ int max(int arg1, int arg2); __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr(); -//TODO - add a couple fast math operations here, the set here will grow : - -// Single Precision Precise Math -__device__ float __hip_precise_cosf(float); -__device__ float __hip_precise_exp10f(float); -__device__ float __hip_precise_expf(float); -__device__ float __hip_precise_frsqrt_rn(float); -__device__ float __hip_precise_fsqrt_rd(float); -__device__ float __hip_precise_fsqrt_rn(float); -__device__ float __hip_precise_fsqrt_ru(float); -__device__ float __hip_precise_fsqrt_rz(float); -__device__ float __hip_precise_log10f(float); -__device__ float __hip_precise_log2f(float); -__device__ float __hip_precise_logf(float); -__device__ float __hip_precise_powf(float, float); -__device__ void __hip_precise_sincosf(float,float*,float*); -__device__ float __hip_precise_sinf(float); -__device__ float __hip_precise_tanf(float); - -// Double Precision Precise Math -__device__ double __hip_precise_dsqrt_rd(double); -__device__ double __hip_precise_dsqrt_rn(double); -__device__ double __hip_precise_dsqrt_ru(double); -__device__ double __hip_precise_dsqrt_rz(double); - -// Single Precision Fast Math -extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32"); -extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32"); -__device__ float __hip_fast_exp10f(float); -__device__ float __hip_fast_expf(float); -__device__ float __hip_fast_frsqrt_rn(float); -extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32"); -__device__ float __hip_fast_fsqrt_rn(float); -__device__ float __hip_fast_fsqrt_ru(float); -__device__ float __hip_fast_fsqrt_rz(float); -__device__ float __hip_fast_log10f(float); -extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32"); -__device__ float __hip_fast_logf(float); -__device__ float __hip_fast_powf(float, float); -__device__ void __hip_fast_sincosf(float,float*,float*); -extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32"); -__device__ float __hip_fast_tanf(float); -extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32"); -extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32"); - -extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64"); -extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64"); -extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64"); - -#ifdef HIP_FAST_MATH -// Single Precision Precise Math when enabled - -__device__ inline float cosf(float x) { - return __hip_fast_cosf(x); -} - -__device__ inline float exp10f(float x) { - return __hip_fast_exp10f(x); -} - -__device__ inline float expf(float x) { - return __hip_fast_expf(x); -} - -__device__ inline float log10f(float x) { - return __hip_fast_log10f(x); -} - -__device__ inline float log2f(float x) { - return __hip_fast_log2f(x); -} - -__device__ inline float logf(float x) { - return __hip_fast_logf(x); -} - -__device__ inline float powf(float base, float exponent) { - return __hip_fast_powf(base, exponent); -} - -__device__ inline void sincosf(float x, float *s, float *c) { - return __hip_fast_sincosf(x, s, c); -} - -__device__ inline float sinf(float x) { - return __hip_fast_sinf(x); -} - -__device__ inline float tanf(float x) { - return __hip_fast_tanf(x); -} - -#else - -__device__ float sinf(float); -__device__ float cosf(float); -__device__ float tanf(float); -__device__ void sincosf(float, float*, float*); -__device__ float logf(float); -__device__ float log2f(float); -__device__ float log10f(float); -__device__ float expf(float); -__device__ float exp10f(float); -__device__ float powf(float, float); - -#endif -// Single Precision Fast Math -__device__ inline float __cosf(float x) { - return __hip_fast_cosf(x); -} - -__device__ inline float __exp10f(float x) { - return __hip_fast_exp10f(x); -} - -__device__ inline float __expf(float x) { - return __hip_fast_expf(x); -} - -__device__ inline float __frsqrt_rn(float x) { - return __hip_fast_frsqrt_rn(x); -} - -__device__ inline float __fsqrt_rd(float x) { - return __hip_fast_fsqrt_rd(x); -} - -__device__ inline float __fsqrt_rn(float x) { - return __hip_fast_fsqrt_rn(x); -} - -__device__ inline float __fsqrt_ru(float x) { - return __hip_fast_fsqrt_ru(x); -} - -__device__ inline float __fsqrt_rz(float x) { - return __hip_fast_fsqrt_rz(x); -} - -__device__ inline float __log10f(float x) { - return __hip_fast_log10f(x); -} - -__device__ inline float __log2f(float x) { - return __hip_fast_log2f(x); -} - -__device__ inline float __logf(float x) { - return __hip_fast_logf(x); -} - -__device__ inline float __powf(float base, float exponent) { - return __hip_fast_powf(base, exponent); -} - -__device__ inline void __sincosf(float x, float *s, float *c) { - return __hip_fast_sincosf(x, s, c); -} - -__device__ inline float __sinf(float x) { - return __hip_fast_sinf(x); -} - -__device__ inline float __tanf(float x) { - return __hip_fast_tanf(x); -} - -__device__ inline float __fmaf_rd(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); -} - -__device__ inline float __fmaf_rn(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); -} - -__device__ inline float __fmaf_ru(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); -} - -__device__ inline float __fmaf_rz(float x, float y, float z) { - return __hip_fast_fmaf(x, y, z); -} - -__device__ inline float __frcp_rd(float x) { - return __hip_fast_frcp(x); -} - -__device__ inline float __frcp_rn(float x) { - return __hip_fast_frcp(x); -} - -__device__ inline float __frcp_ru(float x) { - return __hip_fast_frcp(x); -} - -__device__ inline float __frcp_rz(float x) { - return __hip_fast_frcp(x); -} - -__device__ inline double __dsqrt_rd(double x) { - return __hip_fast_dsqrt(x); -} - -__device__ inline double __dsqrt_rn(double x) { - return __hip_fast_dsqrt(x); -} - -__device__ inline double __dsqrt_ru(double x) { - return __hip_fast_dsqrt(x); -} - -__device__ inline double __dsqrt_rz(double x) { - return __hip_fast_dsqrt(x); -} - -__device__ inline double __fma_rd(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __fma_rn(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __fma_ru(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __fma_rz(double x, double y, double z) { - return __hip_fast_fma(x, y, z); -} - -__device__ inline double __drcp_rd(double x) { - return __hip_fast_drcp(x); -} - -__device__ inline double __drcp_rn(double x) { - return __hip_fast_drcp(x); -} - -__device__ inline double __drcp_ru(double x) { - return __hip_fast_drcp(x); -} - -__device__ inline double __drcp_rz(double x) { - return __hip_fast_drcp(x); -} /** * CUDA 8 device function features diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h new file mode 100644 index 0000000000..5a0e21f83c --- /dev/null +++ b/include/hip/hcc_detail/math_functions.h @@ -0,0 +1,288 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_HCC_DETAIL_MATH_FUNCTIONS_H +#define HIP_HCC_DETAIL_MATH_FUNCTIONS_H + +#include +#include +#include + +__device__ float acosf(float x); +__device__ float acoshf(float x); +__device__ float asinf(float x); +__device__ float asinhf(float x); +__device__ float atan2f(float y, float x); +__device__ float atanf(float x); +__device__ float atanhf(float x); +__device__ float cbrtf(float x); +__device__ float ceilf(float x); +__device__ float copysignf(float x, float y); +__device__ float coshf(float x); +__device__ float cyl_bessel_i0f(float x); +__device__ float cyl_bessel_i1f(float x); +__device__ float erfcf(float x); +__device__ float erfcinvf(float y); + +__device__ float erfcxf(float x); +__device__ float erff(float x); +__device__ float erfinvf(float y); +__device__ float exp2f(float x); +__device__ float expm1f(float x); +__device__ float fabsf(float x); +__device__ float fdimf(float x, float y); +__device__ __host__ float fdividef(float x, float y); +__device__ float floorf(float x); +__device__ float fmaf(float x, float y, float z); +__device__ float fmaxf(float x, float y); +__device__ float fminf(float x, float y); +__device__ float fmodf(float x, float y); +__device__ float frexpf(float x, float y); +__device__ float hypotf(float x, float y); +__device__ float ilogbf(float x); +__host__ __device__ int isfinite(float a); +__device__ unsigned isinf(float a); +__device__ unsigned isnan(float a); +__device__ float j0f(float x); +__device__ float j1f(float x); +__device__ float jnf(int n, float x); +__device__ float ldexpf(float x, int exp); +__device__ float lgammaf(float x); +__device__ long long int llrintf(float x); +__device__ long long int llroundf(float x); +__device__ float log1pf(float x); +__device__ float logbf(float x); +__device__ long int lrintf(float x); +__device__ long int lroundf(float x); +__device__ float modff(float x, float *iptr); +__device__ float nanf(const char* tagp); +__device__ float nearbyintf(float x); +__device__ float nextafterf(float x, float y); +__device__ float norm3df(float a, float b, float c); +__device__ float norm4df(float a, float b, float c, float d); +__device__ float normcdff(float y); +__device__ float normcdfinvf(float y); +__device__ float normf(int dim, const float *a); +__device__ float rcbrtf(float x); +__device__ float remainderf(float x, float y); +__device__ float remquof(float x, float y, int *quo); +__device__ float rhypotf(float x, float y); +__device__ float rintf(float x); +__device__ float rnorm3df(float a, float b, float c); +__device__ float rnorm4df(float a, float b, float c, float d); +__device__ float rnormf(int dim, const float* a); +__device__ float roundf(float x); +__device__ float rsqrtf(float x); +__device__ float scalblnf(float x, long int n); +__device__ float scalbnf(float x, int n); +__host__ __device__ unsigned signbit(float a); +__device__ void sincospif(float x, float *sptr, float *cptr); +__device__ float sinhf(float x); +__device__ float sinpif(float x); +__device__ float sqrtf(float x); +__device__ float tanhf(float x); +__device__ float tgammaf(float x); +__device__ float truncf(float x); +__device__ float y0f(float x); +__device__ float y1f(float x); +__device__ float ynf(int n, float x); + +__host__ __device__ float cospif(float x); +__host__ __device__ float sinpif(float x); +// /__device__ float sqrtf(float x); +__host__ __device__ float rsqrtf(float x); +__host__ float normcdff(float y); + +__host__ float erfcinvf(float y); +__host__ float erfcxf(float x); +__host__ float erfinvf(float y); +__host__ float norm3df(float a, float b, float c); +__host__ float normcdfinvf(float y); +__host__ float norm4df(float a, float b, float c, float d); +__host__ float rcbrtf(float x); +__host__ float rhypotf(float x, float y); +__host__ float rnorm3df(float a, float b, float c); +__host__ float rnormf(int dim, const float* a); +__host__ float rnorm4df(float a, float b, float c, float d); +__host__ void sincospif(float x, float *sptr, float *cptr); + +__device__ double acos(double x); +__device__ double acosh(double x); +__device__ double asin(double x); +__device__ double asinh(double x); +__device__ double atan(double x); +__device__ double atan2(double y, double x); +__device__ double atanh(double x); +__device__ double cbrt(double x); +__device__ double ceil(double x); +__device__ double copysign(double x, double y); +__device__ double cos(double x); +__device__ double cosh(double x); +__host__ __device__ double cospi(double x); +__device__ double cyl_bessel_i0(double x); +__device__ double cyl_bessel_i1(double x); +__device__ double erf(double x); +__device__ double erfc(double x); +__device__ double erfcinv(double y); +__device__ double erfcx(double x); +__device__ double erfinv(double x); +__device__ double exp(double x); +__device__ double exp10(double x); +__device__ double exp2(double x); +__device__ double expm1(double x); +__device__ double fabs(double x); +__device__ double fdim(double x, double y); +__device__ double floor(double x); +__device__ double fma(double x, double y, double z); +__device__ double fmax(double x, double y); +__device__ double fmin(double x, double y); +__device__ double fmod(double x, double y); +__device__ double frexp(double x, int *nptr); +__device__ double hypot(double x, double y); +__device__ double ilogb(double x); +__host__ __device__ unsigned isfinite(double x); +__device__ unsigned isinf(double x); +__device__ unsigned isnan(double x); +__device__ double j0(double x); +__device__ double j1(double x); +__device__ double jn(int n, double x); +__device__ double ldexp(double x, int exp); +__device__ double lgamma(double x); +__device__ long long llrint(double x); +__device__ long long llround(double x); +__device__ double log(double x); +__device__ double log10(double x); +__device__ double log1p(double x); +__device__ double log2(double x); +__device__ double logb(double x); +__device__ long int lrint(double x); +__device__ long int lround(double x); +__device__ double modf(double x, double *iptr); +__device__ double nan(const char* tagp); +__device__ double nearbyint(double x); +__device__ double nextafter(double x, double y); +__device__ double norm(int dim, const double* t); +__device__ double norm3d(double a, double b, double c); +__host__ double norm3d(double a, double b, double c); +__device__ double norm4d(double a, double b, double c, double d); +__host__ double norm4d(double a, double b, double c, double d); +__device__ double normcdf(double y); +__host__ double normcdf(double y); +__device__ double normcdfinv(double y); +__host__ double normcdfinv(double y); +__device__ double pow(double x, double y); +__device__ double rcbrt(double x); +__host__ double rcbrt(double x); +__device__ double remainder(double x, double y); +__device__ double remquo(double x, double y, int *quo); +__device__ double rhypot(double x, double y); +__host__ double rhypot(double x, double y); +__device__ double rint(double x); +__device__ double rnorm(int dim, const double* t); +__host__ double rnorm(int dim, const double* t); +__device__ double rnorm3d(double a, double b, double c); +__host__ double rnorm3d(double a, double b, double c); +__device__ double rnorm4d(double a, double b, double c, double d); +__host__ double rnorm4d(double a, double b, double c, double d); +__device__ double round(double x); +__host__ __device__ double rsqrt(double x); +__device__ double scalbln(double x, long int n); +__device__ double scalbn(double x, int n); +__host__ __device__ unsigned signbit(double a); +__device__ double sin(double a); +__device__ void sincos(double x, double *sptr, double *cptr); +__device__ void sincospi(double x, double *sptr, double *cptr); +__host__ void sincospi(double x, double *sptr, double *cptr); +__device__ double sinh(double x); +__host__ __device__ double sinpi(double x); +__device__ double sqrt(double x); +__device__ double tan(double x); +__device__ double tanh(double x); +__device__ double tgamma(double x); +__device__ double trunc(double x); +__device__ double y0(double x); +__device__ double y1(double y); +__device__ double yn(int n, double x); + +__host__ double erfcinv(double y); +__host__ double erfcx(double x); +__host__ double erfinv(double y); +__host__ double fdivide(double x, double y); +__host__ double norm(double x, const double *t); + +#ifdef HIP_FAST_MATH +// Single Precision Precise Math when enabled + +__device__ inline float cosf(float x) { + return __hip_fast_cosf(x); +} + +__device__ inline float exp10f(float x) { + return __hip_fast_exp10f(x); +} + +__device__ inline float expf(float x) { + return __hip_fast_expf(x); +} + +__device__ inline float log10f(float x) { + return __hip_fast_log10f(x); +} + +__device__ inline float log2f(float x) { + return __hip_fast_log2f(x); +} + +__device__ inline float logf(float x) { + return __hip_fast_logf(x); +} + +__device__ inline float powf(float base, float exponent) { + return __hip_fast_powf(base, exponent); +} + +__device__ inline void sincosf(float x, float *s, float *c) { + return __hip_fast_sincosf(x, s, c); +} + +__device__ inline float sinf(float x) { + return __hip_fast_sinf(x); +} + +__device__ inline float tanf(float x) { + return __hip_fast_tanf(x); +} + +#else + +__device__ float sinf(float); +__device__ float cosf(float); +__device__ float tanf(float); +__device__ void sincosf(float, float*, float*); +__device__ float logf(float); +__device__ float log2f(float); +__device__ float log10f(float); +__device__ float expf(float); +__device__ float exp10f(float); +__device__ float powf(float, float); + +#endif + + +#endif diff --git a/include/hip/math_functions.h b/include/hip/math_functions.h new file mode 100644 index 0000000000..d33f7a2e90 --- /dev/null +++ b/include/hip/math_functions.h @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! HIP = Heterogeneous-compute Interface for Portability +//! +//! Define a extremely thin runtime layer that allows source code to be compiled unmodified +//! through either AMD HCC or NVCC. Key features tend to be in the spirit +//! and terminology of CUDA, but with a portable path to other accelerators as well: +// +//! Both paths support rich C++ features including classes, templates, lambdas, etc. +//! Runtime API is C +//! Memory management is based on pure pointers and resembles malloc/free/copy. +// +//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch macros. +//! hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ features. + +#pragma once + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + + +#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__) +#include +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif diff --git a/src/device_functions.cpp b/src/device_functions.cpp index 4b0eb9a5ff..abc9db570e 100644 --- a/src/device_functions.cpp +++ b/src/device_functions.cpp @@ -523,3 +523,71 @@ __device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long uHold1.ul = uHold1.ui[1] * uHold2.ui[1]; return uHold1.ul; } + +/* +HIP specific device functions +*/ + +__device__ unsigned __hip_ds_bpermute(int index, unsigned src) { + return hc::__amdgcn_ds_bpermute(index, src); +} + +__device__ float __hip_ds_bpermutef(int index, float src) { + return hc::__amdgcn_ds_bpermute(index, src); +} + +__device__ unsigned __hip_ds_permute(int index, unsigned src) { + return hc::__amdgcn_ds_permute(index, src); +} + +__device__ float __hip_ds_permutef(int index, float src) { + return hc::__amdgcn_ds_permute(index, src); +} + +__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) { + return hc::__amdgcn_ds_swizzle(src, pattern); +} + +__device__ float __hip_ds_swizzlef(float src, int pattern) { + return hc::__amdgcn_ds_swizzle(src, pattern); +} + +__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) { + return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); +} + +#define MASK1 0x00ff00ff +#define MASK2 0xff00ff00 + +__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.a & MASK1; + unsigned one2 = in2.a & MASK1; + out.a = (one1 + one2) & MASK1; + one1 = in1.a & MASK2; + one2 = in2.a & MASK2; + out.a = out.a | ((one1 + one2) & MASK2); + return out; +} + +__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.a & MASK1; + unsigned one2 = in2.a & MASK1; + out.a = (one1 - one2) & MASK1; + one1 = in1.a & MASK2; + one2 = in2.a & MASK2; + out.a = out.a | ((one1 - one2) & MASK2); + return out; +} + +__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.a & MASK1; + unsigned one2 = in2.a & MASK1; + out.a = (one1 * one2) & MASK1; + one1 = in1.a & MASK2; + one2 = in2.a & MASK2; + out.a = out.a | ((one1 * one2) & MASK2); + return out; +} diff --git a/src/device_util.cpp b/src/device_util.cpp index e875db1cf9..d80d9e7ef5 100644 --- a/src/device_util.cpp +++ b/src/device_util.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. #include #include #include "device_util.h" - +#include "hip/hcc_detail/device_functions.h" #include "hip/hip_runtime.h" //================================================================================================= @@ -96,69 +96,7 @@ __device__ void* __hip_hc_free(void *ptr) return nullptr; } -__device__ unsigned __hip_ds_bpermute(int index, unsigned src) { - return hc::__amdgcn_ds_bpermute(index, src); -} -__device__ float __hip_ds_bpermutef(int index, float src) { - return hc::__amdgcn_ds_bpermute(index, src); -} - -__device__ unsigned __hip_ds_permute(int index, unsigned src) { - return hc::__amdgcn_ds_permute(index, src); -} - -__device__ float __hip_ds_permutef(int index, float src) { - return hc::__amdgcn_ds_permute(index, src); -} - -__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) { - return hc::__amdgcn_ds_swizzle(src, pattern); -} - -__device__ float __hip_ds_swizzlef(float src, int pattern) { - return hc::__amdgcn_ds_swizzle(src, pattern); -} - -__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) { - return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); -} - -#define MASK1 0x00ff00ff -#define MASK2 0xff00ff00 - -__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 + one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 + one2) & MASK2); - return out; -} - -__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 - one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 - one2) & MASK2); - return out; -} - -__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 * one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 * one2) & MASK2); - return out; -} // loop unrolling __device__ void* memcpy(void* dst, void* src, size_t size) @@ -192,39 +130,6 @@ __device__ void* free(void *ptr) return __hip_hc_free(ptr); } -//================================================================================================= - -// TODO: Choose whether default is precise math or fast math based on compilation flag. -#ifdef __HCC_ACCELERATOR__ -using namespace hc::precise_math; -#endif - - -#define HIP_SQRT_2 1.41421356237 -#define HIP_SQRT_PI 1.77245385091 - -#define __hip_erfinva3 -0.140543331 -#define __hip_erfinva2 0.914624893 -#define __hip_erfinva1 -1.645349621 -#define __hip_erfinva0 0.886226899 - -#define __hip_erfinvb4 0.012229801 -#define __hip_erfinvb3 -0.329097515 -#define __hip_erfinvb2 1.442710462 -#define __hip_erfinvb1 -2.118377725 -#define __hip_erfinvb0 1 - -#define __hip_erfinvc3 1.641345311 -#define __hip_erfinvc2 3.429567803 -#define __hip_erfinvc1 -1.62490649 -#define __hip_erfinvc0 -1.970840454 - -#define __hip_erfinvd2 1.637067800 -#define __hip_erfinvd1 3.543889200 -#define __hip_erfinvd0 1 - -#define HIP_PI 3.14159265358979323846 - __device__ float __hip_erfinvf(float x){ float ret; int sign; @@ -942,735 +847,6 @@ __device__ float __hip_ynf(int n, float x) -__device__ float acosf(float x) -{ - return hc::precise_math::acosf(x); -} -__device__ float acoshf(float x) -{ - return hc::precise_math::acoshf(x); -} -__device__ float asinf(float x) -{ - return hc::precise_math::asinf(x); -} -__device__ float asinhf(float x) -{ - return hc::precise_math::asinhf(x); -} -__device__ float atan2f(float y, float x) -{ - return hc::precise_math::atan2f(x, y); -} -__device__ float atanf(float x) -{ - return hc::precise_math::atanf(x); -} -__device__ float atanhf(float x) -{ - return hc::precise_math::atanhf(x); -} -__device__ float cbrtf(float x) -{ - return hc::precise_math::cbrtf(x); -} -__device__ float ceilf(float x) -{ - return hc::precise_math::ceilf(x); -} -__device__ float copysignf(float x, float y) -{ - return hc::precise_math::copysignf(x, y); -} -__device__ float cosf(float x) -{ - return hc::precise_math::cosf(x); -} -__device__ float coshf(float x) -{ - return hc::precise_math::coshf(x); -} -__device__ float cyl_bessel_i0f(float x); -__device__ float cyl_bessel_i1f(float x); -__device__ float erfcf(float x) -{ - return hc::precise_math::erfcf(x); -} -__device__ float erfcinvf(float y) -{ - return __hip_erfinvf(1 - y); -} -__device__ float erfcxf(float x) -{ - return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x); -} -__device__ float erff(float x) -{ - return hc::precise_math::erff(x); -} -__device__ float erfinvf(float y) -{ - return __hip_erfinvf(y); -} -__device__ float exp10f(float x) -{ - return hc::precise_math::exp10f(x); -} -__device__ float exp2f(float x) -{ - return hc::precise_math::exp2f(x); -} -__device__ float expf(float x) -{ - return hc::precise_math::expf(x); -} -__device__ float expm1f(float x) -{ - return hc::precise_math::expm1f(x); -} -__device__ float fabsf(float x) -{ - return hc::precise_math::fabsf(x); -} -__device__ float fdimf(float x, float y) -{ - return hc::precise_math::fdimf(x, y); -} -__device__ float fdividef(float x, float y) -{ - return x/y; -} -__device__ float floorf(float x) -{ - return hc::precise_math::floorf(x); -} -__device__ float fmaf(float x, float y, float z) -{ - return hc::precise_math::fmaf(x, y, z); -} -__device__ float fmaxf(float x, float y) -{ - return hc::precise_math::fmaxf(x, y); -} -__device__ float fminf(float x, float y) -{ - return hc::precise_math::fminf(x, y); -} -__device__ float fmodf(float x, float y) -{ - return hc::precise_math::fmodf(x, y); -} -__device__ float frexpf(float x, int *nptr) -{ - return hc::precise_math::frexpf(x, nptr); -} -__device__ float hypotf(float x, float y) -{ - return hc::precise_math::hypotf(x, y); -} -__device__ float ilogbf(float x) -{ - return hc::precise_math::ilogbf(x); -} -__device__ unsigned isfinite(float a) -{ - return hc::precise_math::isfinite(a); -} -__device__ unsigned isinf(float a) -{ - return hc::precise_math::isinf(a); -} -__device__ unsigned isnan(float a) -{ - return hc::precise_math::isnan(a); -} -__device__ float j0f(float x) -{ - return __hip_j0f(x); -} -__device__ float j1f(float x) -{ - return __hip_j1f(x); -} -__device__ float jnf(int n, float x) -{ - return __hip_jnf(n, x); -} -__device__ float ldexpf(float x, int exp) -{ - return hc::precise_math::ldexpf(x, exp); -} -__device__ float lgammaf(float x, int *sign) -{ - return hc::precise_math::lgammaf(x, sign); -} -__device__ long long int llrintf(float x) -{ - int y = hc::precise_math::roundf(x); - long long int z = y; - return z; -} -__device__ long long int llroundf(float x) -{ - int y = hc::precise_math::roundf(x); - long long int z = y; - return z; -}__device__ float log10f(float x) -{ - return hc::precise_math::log10f(x); -} -__device__ float log1pf(float x) -{ - return hc::precise_math::log1pf(x); -} -__device__ float log2f(float x) -{ - return hc::precise_math::log2f(x); -} -__device__ float logbf(float x) -{ - return hc::precise_math::logbf(x); -} -__device__ float logf(float x) -{ - return hc::precise_math::logf(x); -} -__device__ long int lrintf(float x) -{ - int y = hc::precise_math::roundf(x); - long int z = y; - return z; -} -__device__ long int lroundf(float x) -{ - long int y = hc::precise_math::roundf(x); - return y; -} -__device__ float modff(float x, float *iptr) -{ - return hc::precise_math::modff(x, iptr); -} -__device__ float nanf(const char* tagp) -{ - return hc::precise_math::nanf((int)*tagp); -} -__device__ float nearbyintf(float x) -{ - return hc::precise_math::nearbyintf(x); -} -__device__ float nextafterf(float x, float y) -{ - return hc::precise_math::nextafter(x, y); -} -__device__ float norm3df(float a, float b, float c) -{ - float x = a*a + b*b + c*c; - return hc::precise_math::sqrtf(x); -} -__device__ float norm4df(float a, float b, float c, float d) -{ - float x = a*a + b*b; - float y = c*c + d*d; - return hc::precise_math::sqrtf(x+y); -} - -__device__ float normcdff(float y) -{ - return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2; -} -__device__ float normcdfinvf(float y) -{ - return HIP_SQRT_2 * __hip_erfinvf(2*y-1); -} -__device__ float normf(int dim, const float *a) -{ - float x = 0.0f; - for(int i=0;i + /* Heap size computation for malloc and free device functions. */ @@ -35,4 +37,119 @@ THE SOFTWARE. #define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE #define SIZE_OF_HEAP SIZE_MALLOC +#define HIP_SQRT_2 1.41421356237 +#define HIP_SQRT_PI 1.77245385091 + +#define __hip_erfinva3 -0.140543331 +#define __hip_erfinva2 0.914624893 +#define __hip_erfinva1 -1.645349621 +#define __hip_erfinva0 0.886226899 + +#define __hip_erfinvb4 0.012229801 +#define __hip_erfinvb3 -0.329097515 +#define __hip_erfinvb2 1.442710462 +#define __hip_erfinvb1 -2.118377725 +#define __hip_erfinvb0 1 + +#define __hip_erfinvc3 1.641345311 +#define __hip_erfinvc2 3.429567803 +#define __hip_erfinvc1 -1.62490649 +#define __hip_erfinvc0 -1.970840454 + +#define __hip_erfinvd2 1.637067800 +#define __hip_erfinvd1 3.543889200 +#define __hip_erfinvd0 1 + +#define HIP_PI 3.14159265358979323846 + +__device__ void* __hip_hc_malloc(size_t size); +__device__ void* __hip_hc_free(void* ptr); + +__device__ float __hip_erfinvf(float x); +__device__ double __hip_erfinv(double x); + +__device__ float __hip_j0f(float x); +__device__ double __hip_j0(double x); + +__device__ float __hip_j1f(float x); +__device__ double __hip_j1(double x); + +__device__ float __hip_y0f(float x); +__device__ double __hip_y0(double x); + +__device__ float __hip_y1f(float x); +__device__ double __hip_y1(double x); + +__device__ float __hip_jnf(int n, float x); +__device__ double __hip_jn(int n, double x); + +__device__ float __hip_ynf(int n, float x); +__device__ double __hip_yn(int n, double x); + +__device__ float __hip_precise_cosf(float x); +__device__ float __hip_precise_exp10f(float x); +__device__ float __hip_precise_expf(float x); +__device__ float __hip_precise_frsqrt_rn(float x); +__device__ float __hip_precise_fsqrt_rd(float x); +__device__ float __hip_precise_fsqrt_rn(float x); +__device__ float __hip_precise_fsqrt_ru(float x); +__device__ float __hip_precise_fsqrt_rz(float x); +__device__ float __hip_precise_log10f(float x); +__device__ float __hip_precise_log2f(float x); +__device__ float __hip_precise_logf(float x); +__device__ float __hip_precise_powf(float base, float exponent); +__device__ void __hip_precise_sincosf(float x, float *s, float *c); +__device__ float __hip_precise_sinf(float x); +__device__ float __hip_precise_tanf(float x); +// Double Precision Math +__device__ double __hip_precise_dsqrt_rd(double x); +__device__ double __hip_precise_dsqrt_rn(double x); +__device__ double __hip_precise_dsqrt_ru(double x); +__device__ double __hip_precise_dsqrt_rz(double x); + + + +// Float Fast Math +__device__ float __hip_fast_exp10f(float x); +__device__ float __hip_fast_expf(float x); +__device__ float __hip_fast_frsqrt_rn(float x); +__device__ float __hip_fast_fsqrt_rn(float x); +__device__ float __hip_fast_fsqrt_ru(float x); +__device__ float __hip_fast_fsqrt_rz(float x); +__device__ float __hip_fast_log10f(float x); +__device__ float __hip_fast_logf(float x); +__device__ float __hip_fast_powf(float base, float exponent); +__device__ void __hip_fast_sincosf(float x, float *s, float *c); +__device__ float __hip_fast_tanf(float x); +// Double Precision Math +__device__ double __hip_fast_dsqrt_rd(double x); +__device__ double __hip_fast_dsqrt_rn(double x); +__device__ double __hip_fast_dsqrt_ru(double x); +__device__ double __hip_fast_dsqrt_rz(double x); +__device__ void __threadfence_system(void); + +float __hip_host_erfinvf(float x); +double __hip_host_erfinv(double x); + +float __hip_host_erfcinvf(float y); +double __hip_host_erfcinv(double y); + +float __hip_host_j0f(float x); +double __hip_host_j0(double x); + +float __hip_host_j1f(float x); +double __hip_host_j1(double x); + +float __hip_host_y0f(float x); +double __hip_host_y1(double x); + +float __hip_host_y1f(float x); +double __hip_host_y1(double x); + +float __hip_host_jnf(int n, float x); +double __hip_host_jn(int n, double x); + +float __hip_host_ynf(int n, float x); +double __hip_host_yn(int n, double x); + #endif diff --git a/src/math_functions.cpp b/src/math_functions.cpp new file mode 100644 index 0000000000..34a80448db --- /dev/null +++ b/src/math_functions.cpp @@ -0,0 +1,971 @@ + +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include "device_util.h" +#include "hip/hcc_detail/device_functions.h" +#include "hip/hip_runtime.h" + +__device__ float acosf(float x) +{ + return hc::precise_math::acosf(x); +} +__device__ float acoshf(float x) +{ + return hc::precise_math::acoshf(x); +} +__device__ float asinf(float x) +{ + return hc::precise_math::asinf(x); +} +__device__ float asinhf(float x) +{ + return hc::precise_math::asinhf(x); +} +__device__ float atan2f(float y, float x) +{ + return hc::precise_math::atan2f(x, y); +} +__device__ float atanf(float x) +{ + return hc::precise_math::atanf(x); +} +__device__ float atanhf(float x) +{ + return hc::precise_math::atanhf(x); +} +__device__ float cbrtf(float x) +{ + return hc::precise_math::cbrtf(x); +} +__device__ float ceilf(float x) +{ + return hc::precise_math::ceilf(x); +} +__device__ float copysignf(float x, float y) +{ + return hc::precise_math::copysignf(x, y); +} +__device__ float cosf(float x) +{ + return hc::precise_math::cosf(x); +} +__device__ float coshf(float x) +{ + return hc::precise_math::coshf(x); +} +__device__ float cyl_bessel_i0f(float x); +__device__ float cyl_bessel_i1f(float x); +__device__ float erfcf(float x) +{ + return hc::precise_math::erfcf(x); +} +__device__ float erfcinvf(float y) +{ + return __hip_erfinvf(1 - y); +} +__device__ float erfcxf(float x) +{ + return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x); +} +__device__ float erff(float x) +{ + return hc::precise_math::erff(x); +} +__device__ float erfinvf(float y) +{ + return __hip_erfinvf(y); +} +__device__ float exp10f(float x) +{ + return hc::precise_math::exp10f(x); +} +__device__ float exp2f(float x) +{ + return hc::precise_math::exp2f(x); +} +__device__ float expf(float x) +{ + return hc::precise_math::expf(x); +} +__device__ float expm1f(float x) +{ + return hc::precise_math::expm1f(x); +} +__device__ float fabsf(float x) +{ + return hc::precise_math::fabsf(x); +} +__device__ float fdimf(float x, float y) +{ + return hc::precise_math::fdimf(x, y); +} +__device__ float fdividef(float x, float y) +{ + return x/y; +} +__device__ float floorf(float x) +{ + return hc::precise_math::floorf(x); +} +__device__ float fmaf(float x, float y, float z) +{ + return hc::precise_math::fmaf(x, y, z); +} +__device__ float fmaxf(float x, float y) +{ + return hc::precise_math::fmaxf(x, y); +} +__device__ float fminf(float x, float y) +{ + return hc::precise_math::fminf(x, y); +} +__device__ float fmodf(float x, float y) +{ + return hc::precise_math::fmodf(x, y); +} +__device__ float frexpf(float x, int *nptr) +{ + return hc::precise_math::frexpf(x, nptr); +} +__device__ float hypotf(float x, float y) +{ + return hc::precise_math::hypotf(x, y); +} +__device__ float ilogbf(float x) +{ + return hc::precise_math::ilogbf(x); +} +__device__ unsigned isfinite(float a) +{ + return hc::precise_math::isfinite(a); +} +__device__ unsigned isinf(float a) +{ + return hc::precise_math::isinf(a); +} +__device__ unsigned isnan(float a) +{ + return hc::precise_math::isnan(a); +} +__device__ float j0f(float x) +{ + return __hip_j0f(x); +} +__device__ float j1f(float x) +{ + return __hip_j1f(x); +} +__device__ float jnf(int n, float x) +{ + return __hip_jnf(n, x); +} +__device__ float ldexpf(float x, int exp) +{ + return hc::precise_math::ldexpf(x, exp); +} +__device__ float lgammaf(float x, int *sign) +{ + return hc::precise_math::lgammaf(x, sign); +} +__device__ long long int llrintf(float x) +{ + int y = hc::precise_math::roundf(x); + long long int z = y; + return z; +} +__device__ long long int llroundf(float x) +{ + int y = hc::precise_math::roundf(x); + long long int z = y; + return z; +}__device__ float log10f(float x) +{ + return hc::precise_math::log10f(x); +} +__device__ float log1pf(float x) +{ + return hc::precise_math::log1pf(x); +} +__device__ float log2f(float x) +{ + return hc::precise_math::log2f(x); +} +__device__ float logbf(float x) +{ + return hc::precise_math::logbf(x); +} +__device__ float logf(float x) +{ + return hc::precise_math::logf(x); +} +__device__ long int lrintf(float x) +{ + int y = hc::precise_math::roundf(x); + long int z = y; + return z; +} +__device__ long int lroundf(float x) +{ + long int y = hc::precise_math::roundf(x); + return y; +} +__device__ float modff(float x, float *iptr) +{ + return hc::precise_math::modff(x, iptr); +} +__device__ float nanf(const char* tagp) +{ + return hc::precise_math::nanf((int)*tagp); +} +__device__ float nearbyintf(float x) +{ + return hc::precise_math::nearbyintf(x); +} +__device__ float nextafterf(float x, float y) +{ + return hc::precise_math::nextafter(x, y); +} +__device__ float norm3df(float a, float b, float c) +{ + float x = a*a + b*b + c*c; + return hc::precise_math::sqrtf(x); +} +__device__ float norm4df(float a, float b, float c, float d) +{ + float x = a*a + b*b; + float y = c*c + d*d; + return hc::precise_math::sqrtf(x+y); +} + +__device__ float normcdff(float y) +{ + return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2; +} +__device__ float normcdfinvf(float y) +{ + return HIP_SQRT_2 * __hip_erfinvf(2*y-1); +} +__device__ float normf(int dim, const float *a) +{ + float x = 0.0f; + for(int i=0;i +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" @@ -27,18 +28,18 @@ THE SOFTWARE. __device__ void double_precision_intrinsics() { - //__dadd_rd(0.0, 1.0); - //__dadd_rn(0.0, 1.0); - //__dadd_ru(0.0, 1.0); - //__dadd_rz(0.0, 1.0); - //__ddiv_rd(0.0, 1.0); - //__ddiv_rn(0.0, 1.0); - //__ddiv_ru(0.0, 1.0); - //__ddiv_rz(0.0, 1.0); - //__dmul_rd(1.0, 2.0); - //__dmul_rn(1.0, 2.0); - //__dmul_ru(1.0, 2.0); - //__dmul_rz(1.0, 2.0); + __dadd_rd(0.0, 1.0); + __dadd_rn(0.0, 1.0); + __dadd_ru(0.0, 1.0); + __dadd_rz(0.0, 1.0); + __ddiv_rd(0.0, 1.0); + __ddiv_rn(0.0, 1.0); + __ddiv_ru(0.0, 1.0); + __ddiv_rz(0.0, 1.0); + __dmul_rd(1.0, 2.0); + __dmul_rn(1.0, 2.0); + __dmul_ru(1.0, 2.0); + __dmul_rz(1.0, 2.0); __drcp_rd(2.0); __drcp_rn(2.0); __drcp_ru(2.0); @@ -47,10 +48,10 @@ __device__ void double_precision_intrinsics() __dsqrt_rn(4.0); __dsqrt_ru(4.0); __dsqrt_rz(4.0); - //__dsub_rd(2.0, 1.0); - //__dsub_rn(2.0, 1.0); - //__dsub_ru(2.0, 1.0); - //__dsub_rz(2.0, 1.0); + __dsub_rd(2.0, 1.0); + __dsub_rn(2.0, 1.0); + __dsub_ru(2.0, 1.0); + __dsub_rz(2.0, 1.0); __fma_rd(1.0, 2.0, 3.0); __fma_rn(1.0, 2.0, 3.0); __fma_ru(1.0, 2.0, 3.0); diff --git a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp index 996577e840..537fcbbba8 100644 --- a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp +++ b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp @@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" +#include +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" @@ -43,8 +44,8 @@ __device__ void double_precision_math_functions() cos(0.0); cosh(0.0); cospi(0.0); - //cyl_bessel_i0(0.0); - //cyl_bessel_i1(0.0); + cyl_bessel_i0(0.0); + cyl_bessel_i1(0.0); erf(0.0); erfc(0.0); erfcinv(2.0); @@ -61,7 +62,7 @@ __device__ void double_precision_math_functions() fmax(0.0, 0.0); fmin(0.0, 0.0); fmod(0.0, 1.0); - //frexp(0.0, &iX); + frexp(0.0, &iX); hypot(1.0, 0.0); ilogb(1.0); isfinite(0.0); @@ -71,7 +72,7 @@ __device__ void double_precision_math_functions() j1(0.0); jn(-1.0, 1.0); ldexp(0.0, 0); - //lgamma(1.0); + lgamma(1.0); llrint(0.0); llround(0.0); log(1.0); @@ -81,19 +82,19 @@ __device__ void double_precision_math_functions() logb(1.0); lrint(0.0); lround(0.0); - //modf(0.0, &fX); + modf(0.0, &fX); nan("1"); nearbyint(0.0); - //nextafter(0.0); - //fX = 1.0; norm(1, &fX); + nextafter(0.0, 0.0); + fX = 1.0; norm(1, &fX); norm3d(1.0, 0.0, 0.0); norm4d(1.0, 0.0, 0.0, 0.0); normcdf(0.0); - //normcdfinv(1.0); + normcdfinv(1.0); pow(1.0, 0.0); rcbrt(1.0); remainder(2.0, 1.0); - //remquo(1.0, 2.0, &iX); + remquo(1.0, 2.0, &iX); rhypot(0.0, 1.0); rint(1.0); fX = 1.0; rnorm(1, &fX); diff --git a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp index 9980dad277..eff39102c6 100644 --- a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp +++ b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp @@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" +#include +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" @@ -85,7 +86,7 @@ __host__ void double_precision_math_functions() nan("1"); nearbyint(0.0); //nextafter(0.0); - //fX = 1.0; norm(1, &fX); + fX = 1.0; norm(1, &fX); #if defined(__HIP_PLATFORM_HCC__) norm3d(1.0, 0.0, 0.0); norm4d(1.0, 0.0, 0.0, 0.0); diff --git a/tests/src/deviceLib/hipFloatMath.cpp b/tests/src/deviceLib/hipFloatMath.cpp index f137ca2602..7a96b5cd0d 100644 --- a/tests/src/deviceLib/hipFloatMath.cpp +++ b/tests/src/deviceLib/hipFloatMath.cpp @@ -27,6 +27,7 @@ THE SOFTWARE. */ #include "test_common.h" +#include #define LEN 512 #define SIZE LEN<<2 diff --git a/tests/src/deviceLib/hipFloatMathPrecise.cpp b/tests/src/deviceLib/hipFloatMathPrecise.cpp index 4f6c2cd44a..12f7875949 100644 --- a/tests/src/deviceLib/hipFloatMathPrecise.cpp +++ b/tests/src/deviceLib/hipFloatMathPrecise.cpp @@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" +#include +#include #include "test_common.h" __global__ void FloatMathPrecise(hipLaunchParm lp) diff --git a/tests/src/deviceLib/hipIntegerIntrinsics.cpp b/tests/src/deviceLib/hipIntegerIntrinsics.cpp index 63530574d8..d712c5a93b 100644 --- a/tests/src/deviceLib/hipIntegerIntrinsics.cpp +++ b/tests/src/deviceLib/hipIntegerIntrinsics.cpp @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" -#include "hip/device_functions.h" +#include +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" diff --git a/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index caddcc0149..6737c6ee9d 100644 --- a/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" +#include +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" @@ -30,44 +31,44 @@ __device__ void single_precision_intrinsics() float fX, fY; __cosf(0.0f); - //__exp10f(0.0f); + __exp10f(0.0f); __expf(0.0f); - //__fadd_rd(0.0f, 1.0f); - //__fadd_rn(0.0f, 1.0f); - //__fadd_ru(0.0f, 1.0f); - //__fadd_rz(0.0f, 1.0f); - //__fdiv_rd(4.0f, 2.0f); - //__fdiv_rn(4.0f, 2.0f); - //__fdiv_ru(4.0f, 2.0f); - //__fdiv_rz(4.0f, 2.0f); - //__fdividef(4.0f, 2.0f); - //__fmaf_rd(1.0f, 2.0f, 3.0f); - //__fmaf_rn(1.0f, 2.0f, 3.0f); - //__fmaf_ru(1.0f, 2.0f, 3.0f); - //__fmaf_rz(1.0f, 2.0f, 3.0f); - //__fmul_rd(1.0f, 2.0f); - //__fmul_rn(1.0f, 2.0f); - //__fmul_ru(1.0f, 2.0f); - //__fmul_rz(1.0f, 2.0f); - //__frcp_rd(2.0f); - //__frcp_rn(2.0f); - //__frcp_ru(2.0f); - //__frcp_rz(2.0f); + __fadd_rd(0.0f, 1.0f); + __fadd_rn(0.0f, 1.0f); + __fadd_ru(0.0f, 1.0f); + __fadd_rz(0.0f, 1.0f); + __fdiv_rd(4.0f, 2.0f); + __fdiv_rn(4.0f, 2.0f); + __fdiv_ru(4.0f, 2.0f); + __fdiv_rz(4.0f, 2.0f); + __fdividef(4.0f, 2.0f); + __fmaf_rd(1.0f, 2.0f, 3.0f); + __fmaf_rn(1.0f, 2.0f, 3.0f); + __fmaf_ru(1.0f, 2.0f, 3.0f); + __fmaf_rz(1.0f, 2.0f, 3.0f); + __fmul_rd(1.0f, 2.0f); + __fmul_rn(1.0f, 2.0f); + __fmul_ru(1.0f, 2.0f); + __fmul_rz(1.0f, 2.0f); + __frcp_rd(2.0f); + __frcp_rn(2.0f); + __frcp_ru(2.0f); + __frcp_rz(2.0f); __frsqrt_rn(4.0f); __fsqrt_rd(4.0f); __fsqrt_rn(4.0f); __fsqrt_ru(4.0f); __fsqrt_rz(4.0f); - //__fsub_rd(2.0f, 1.0f); - //__fsub_rn(2.0f, 1.0f); - //__fsub_ru(2.0f, 1.0f); - //__fsub_rz(2.0f, 1.0f); + __fsub_rd(2.0f, 1.0f); + __fsub_rn(2.0f, 1.0f); + __fsub_ru(2.0f, 1.0f); + __fsub_rz(2.0f, 1.0f); __log10f(1.0f); __log2f(1.0f); __logf(1.0f); __powf(1.0f, 0.0f); - //__saturatef(0.1f); - //__sincosf(0.0f, &fX, &fY); + __saturatef(0.1f); + __sincosf(0.0f, &fX, &fY); __sinf(0.0f); __tanf(0.0f); } diff --git a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp index a8c1194aab..4576faed93 100644 --- a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp +++ b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp @@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" +#include +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" diff --git a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp index 36aa852d81..d48cea5ff6 100644 --- a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp +++ b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp @@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip/hip_runtime.h" +#include +#include #include "test_common.h" #pragma GCC diagnostic ignored "-Wall" diff --git a/tests/src/deviceLib/hipTestDevice.cpp b/tests/src/deviceLib/hipTestDevice.cpp index 9d90eb7de0..2c7488671b 100644 --- a/tests/src/deviceLib/hipTestDevice.cpp +++ b/tests/src/deviceLib/hipTestDevice.cpp @@ -24,8 +24,9 @@ THE SOFTWARE. */ #include"test_common.h" -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" +#include +#include +#include #define N 512 #define SIZE N*sizeof(float) diff --git a/tests/src/deviceLib/hipTestDeviceDouble.cpp b/tests/src/deviceLib/hipTestDeviceDouble.cpp index c401a44cbd..f4e8ee20b8 100644 --- a/tests/src/deviceLib/hipTestDeviceDouble.cpp +++ b/tests/src/deviceLib/hipTestDeviceDouble.cpp @@ -24,8 +24,9 @@ THE SOFTWARE. */ #include"test_common.h" -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" +#include +#include +#include #define N 512 #define SIZE N*sizeof(double) diff --git a/tests/src/deviceLib/hip_anyall.cpp b/tests/src/deviceLib/hip_anyall.cpp index a562b7810e..bba7915052 100644 --- a/tests/src/deviceLib/hip_anyall.cpp +++ b/tests/src/deviceLib/hip_anyall.cpp @@ -29,7 +29,8 @@ THE SOFTWARE. #include #include -#include "hip/hip_runtime.h" +#include +#include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) __global__ void diff --git a/tests/src/deviceLib/hip_ballot.cpp b/tests/src/deviceLib/hip_ballot.cpp index 629e676bc7..742c47a065 100644 --- a/tests/src/deviceLib/hip_ballot.cpp +++ b/tests/src/deviceLib/hip_ballot.cpp @@ -25,8 +25,8 @@ THE SOFTWARE. #include -#include "hip/hip_runtime.h" -#include "hip/device_functions.h" +#include +#include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) diff --git a/tests/src/deviceLib/hip_brev.cpp b/tests/src/deviceLib/hip_brev.cpp index d9228fe23c..855a8bec47 100644 --- a/tests/src/deviceLib/hip_brev.cpp +++ b/tests/src/deviceLib/hip_brev.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. #include #include #include "hip/hip_runtime.h" -#include "hip/device_functions.h" +#include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) diff --git a/tests/src/deviceLib/hip_clz.cpp b/tests/src/deviceLib/hip_clz.cpp index 869f4406f5..bdb31f3e8d 100644 --- a/tests/src/deviceLib/hip_clz.cpp +++ b/tests/src/deviceLib/hip_clz.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. #include #include #include "hip/hip_runtime.h" -#include "hip/device_functions.h" +#include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) #define WIDTH 8 diff --git a/tests/src/deviceLib/hip_ffs.cpp b/tests/src/deviceLib/hip_ffs.cpp index ba9bd7b9a0..c855ede060 100644 --- a/tests/src/deviceLib/hip_ffs.cpp +++ b/tests/src/deviceLib/hip_ffs.cpp @@ -31,8 +31,8 @@ THE SOFTWARE. #include #include #include -#include "hip/hip_runtime.h" -#include "hip/device_functions.h" +#include +#include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) diff --git a/tests/src/deviceLib/hip_popc.cpp b/tests/src/deviceLib/hip_popc.cpp index 6fe214c7fa..e503e55b42 100644 --- a/tests/src/deviceLib/hip_popc.cpp +++ b/tests/src/deviceLib/hip_popc.cpp @@ -31,8 +31,8 @@ THE SOFTWARE. #include #include #include -#include "hip/hip_runtime.h" -#include "hip/device_functions.h" +#include +#include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp index 7f9b5d60b0..5ec28101f3 100644 --- a/tests/src/deviceLib/hip_trig.cpp +++ b/tests/src/deviceLib/hip_trig.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s + * BUILD: %t %s * RUN: %t * HIT_END */ @@ -30,6 +30,7 @@ THE SOFTWARE. #include #include #include"test_common.h" +#include #define LEN 512 #define SIZE LEN<<2 diff --git a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp index a7cace0ebe..4e343121ed 100644 --- a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp +++ b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include"test_common.h" +#include"hip/math_functions.h" const int NN = 1 << 21; @@ -31,7 +32,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){ int tid = hipThreadIdx_x; if(tid < 1){ for(int i=0;i #include"test_common.h" +#include"hip/math_functions.h" const int NN = 1 << 21; @@ -33,7 +34,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){ int tid = hipThreadIdx_x; if(tid < 1){ for(int i=0;i