diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt index 075b916a4d..46355b5a85 100644 --- a/projects/clr/hipamd/CMakeLists.txt +++ b/projects/clr/hipamd/CMakeLists.txt @@ -196,8 +196,7 @@ if(HIP_PLATFORM STREQUAL "hcc") src/device_util.cpp src/hip_ldg.cpp src/hip_fp16.cpp - src/device_functions.cpp - src/math_functions.cpp) + src/device_functions.cpp) execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") diff --git a/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h b/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h index 28d874b27a..ca236a1125 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h @@ -26,263 +26,6 @@ THE SOFTWARE. #include #include - -// Single Precision Fast Math -__device__ float __cosf(float x); -__device__ float __exp10f(float x); -__device__ float __expf(float x); -__device__ static float __fadd_rd(float x, float y); -__device__ static float __fadd_rn(float x, float y); -__device__ static float __fadd_ru(float x, float y); -__device__ static float __fadd_rz(float x, float y); -__device__ static float __fdiv_rd(float x, float y); -__device__ static float __fdiv_rn(float x, float y); -__device__ static float __fdiv_ru(float x, float y); -__device__ static float __fdiv_rz(float x, float y); -__device__ static float __fdividef(float x, float y); -__device__ float __fmaf_rd(float x, float y, float z); -__device__ float __fmaf_rn(float x, float y, float z); -__device__ float __fmaf_ru(float x, float y, float z); -__device__ float __fmaf_rz(float x, float y, float z); -__device__ static float __fmul_rd(float x, float y); -__device__ static float __fmul_rn(float x, float y); -__device__ static float __fmul_ru(float x, float y); -__device__ static float __fmul_rz(float x, float y); -__device__ float __frcp_rd(float x); -__device__ float __frcp_rn(float x); -__device__ float __frcp_ru(float x); -__device__ float __frcp_rz(float x); -__device__ float __frsqrt_rn(float x); -__device__ float __fsqrt_rd(float x); -__device__ float __fsqrt_rn(float x); -__device__ float __fsqrt_ru(float x); -__device__ float __fsqrt_rz(float x); -__device__ static float __fsub_rd(float x, float y); -__device__ static float __fsub_rn(float x, float y); -__device__ static float __fsub_ru(float x, float y); -__device__ float __log10f(float x); -__device__ float __log2f(float x); -__device__ float __logf(float x); -__device__ float __powf(float base, float exponent); -__device__ static float __saturatef(float x); -__device__ void __sincosf(float x, float* s, float* c); -__device__ float __sinf(float x); -__device__ float __tanf(float x); - - -/* -Double Precision Intrinsics -*/ - -__device__ static double __dadd_rd(double x, double y); -__device__ static double __dadd_rn(double x, double y); -__device__ static double __dadd_ru(double x, double y); -__device__ static double __dadd_rz(double x, double y); -__device__ static double __ddiv_rd(double x, double y); -__device__ static double __ddiv_rn(double x, double y); -__device__ static double __ddiv_ru(double x, double y); -__device__ static double __ddiv_rz(double x, double y); -__device__ static double __dmul_rd(double x, double y); -__device__ static double __dmul_rn(double x, double y); -__device__ static double __dmul_ru(double x, double y); -__device__ static double __dmul_rz(double x, double y); -__device__ double __drcp_rd(double x); -__device__ double __drcp_rn(double x); -__device__ double __drcp_ru(double x); -__device__ double __drcp_rz(double x); -__device__ double __dsqrt_rd(double x); -__device__ double __dsqrt_rn(double x); -__device__ double __dsqrt_ru(double x); -__device__ double __dsqrt_rz(double x); -__device__ static double __dsub_rd(double x, double y); -__device__ static double __dsub_rn(double x, double y); -__device__ static double __dsub_ru(double x, double y); -__device__ static double __dsub_rz(double x, double y); -__device__ double __fma_rd(double x, double y, double z); -__device__ double __fma_rn(double x, double y, double z); -__device__ double __fma_ru(double x, double y, double z); -__device__ double __fma_rz(double x, double y, double z); - -// Single Precision Fast Math -extern __device__ __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32"); -extern __device__ __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32"); -__device__ float __hip_fast_exp10f(float); -__device__ float __hip_fast_expf(float); -__device__ float __hip_fast_frsqrt_rn(float); -extern __device__ __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32"); -__device__ float __hip_fast_fsqrt_rn(float); -__device__ float __hip_fast_fsqrt_ru(float); -__device__ float __hip_fast_fsqrt_rz(float); -__device__ float __hip_fast_log10f(float); -extern __device__ __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32"); -__device__ float __hip_fast_logf(float); -__device__ float __hip_fast_powf(float, float); -__device__ void __hip_fast_sincosf(float, float*, float*); -extern __device__ __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32"); -__device__ float __hip_fast_tanf(float); -extern __device__ __attribute__((const)) float __hip_fast_fmaf(float, float, float) __asm("llvm.fma.f32"); -extern __device__ __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32"); - -extern __device__ __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64"); -extern __device__ __attribute__((const)) double __hip_fast_fma(double, double, double) __asm("llvm.fma.f64"); -extern __device__ __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64"); - - -// Single Precision Fast Math -__device__ inline float __cosf(float x) { return __hip_fast_cosf(x); } - -__device__ inline float __exp10f(float x) { return __hip_fast_exp10f(x); } - -__device__ inline float __expf(float x) { return __hip_fast_expf(x); } - -__device__ static inline float __fadd_rd(float x, float y) { return x + y; } - -__device__ static inline float __fadd_rn(float x, float y) { return x + y; } - -__device__ static inline float __fadd_ru(float x, float y) { return x + y; } - -__device__ static inline float __fadd_rz(float x, float y) { return x + y; } - -__device__ static inline float __fdiv_rd(float x, float y) { return x / y; } - -__device__ static inline float __fdiv_rn(float x, float y) { return x / y; } - -__device__ static inline float __fdiv_ru(float x, float y) { return x / y; } - -__device__ static inline float __fdiv_rz(float x, float y) { return x / y; } - -__device__ static inline float __fdividef(float x, float y) { return x / y; } - -__device__ inline float __fmaf_rd(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); } - -__device__ inline float __fmaf_rn(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); } - -__device__ inline float __fmaf_ru(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); } - -__device__ inline float __fmaf_rz(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); } - -__device__ static inline float __fmul_rd(float x, float y) { return x * y; } - -__device__ static inline float __fmul_rn(float x, float y) { return x * y; } - -__device__ static inline float __fmul_ru(float x, float y) { return x * y; } - -__device__ static inline float __fmul_rz(float x, float y) { return x * y; } - -__device__ inline float __frcp_rd(float x) { return __hip_fast_frcp(x); } - -__device__ inline float __frcp_rn(float x) { return __hip_fast_frcp(x); } - -__device__ inline float __frcp_ru(float x) { return __hip_fast_frcp(x); } - -__device__ inline float __frcp_rz(float x) { return __hip_fast_frcp(x); } - -__device__ inline float __frsqrt_rn(float x) { return __hip_fast_frsqrt_rn(x); } - -__device__ inline float __fsqrt_rd(float x) { return __hip_fast_fsqrt_rd(x); } - -__device__ inline float __fsqrt_rn(float x) { return __hip_fast_fsqrt_rn(x); } - -__device__ inline float __fsqrt_ru(float x) { return __hip_fast_fsqrt_ru(x); } - -__device__ inline float __fsqrt_rz(float x) { return __hip_fast_fsqrt_rz(x); } - -__device__ static inline float __fsub_rd(float x, float y) { return x - y; } - -__device__ static inline float __fsub_rn(float x, float y) { return x - y; } - -__device__ static inline float __fsub_ru(float x, float y) { return x - y; } - -__device__ static inline float __fsub_rz(float x, float y) { return x - y; } - - -__device__ inline float __log10f(float x) { return __hip_fast_log10f(x); } - -__device__ inline float __log2f(float x) { return __hip_fast_log2f(x); } - -__device__ inline float __logf(float x) { return __hip_fast_logf(x); } - -__device__ inline float __powf(float base, float exponent) { - return __hip_fast_powf(base, exponent); -} - -__device__ static inline float __saturatef(float x) { - x = x > 1.0f ? 1.0f : x; - x = x < 0.0f ? 0.0f : x; - return x; -} - -__device__ inline void __sincosf(float x, float* s, float* c) { - return __hip_fast_sincosf(x, s, c); -} - -__device__ inline float __sinf(float x) { return __hip_fast_sinf(x); } - -__device__ inline float __tanf(float x) { return __hip_fast_tanf(x); } - - -/* -Double Precision Intrinsics -*/ - -__device__ static inline double __dadd_rd(double x, double y) { return x + y; } - -__device__ static inline double __dadd_rn(double x, double y) { return x + y; } - -__device__ static inline double __dadd_ru(double x, double y) { return x + y; } - -__device__ static inline double __dadd_rz(double x, double y) { return x + y; } - -__device__ static inline double __ddiv_rd(double x, double y) { return x / y; } - -__device__ static inline double __ddiv_rn(double x, double y) { return x / y; } - -__device__ static inline double __ddiv_ru(double x, double y) { return x / y; } - -__device__ static inline double __ddiv_rz(double x, double y) { return x / y; } - -__device__ static inline double __dmul_rd(double x, double y) { return x * y; } - -__device__ static inline double __dmul_rn(double x, double y) { return x * y; } - -__device__ static inline double __dmul_ru(double x, double y) { return x * y; } - -__device__ static inline double __dmul_rz(double x, double y) { return x * y; } - -__device__ inline double __drcp_rd(double x) { return __hip_fast_drcp(x); } - -__device__ inline double __drcp_rn(double x) { return __hip_fast_drcp(x); } - -__device__ inline double __drcp_ru(double x) { return __hip_fast_drcp(x); } - -__device__ inline double __drcp_rz(double x) { return __hip_fast_drcp(x); } - - -__device__ inline double __dsqrt_rd(double x) { return __hip_fast_dsqrt(x); } - -__device__ inline double __dsqrt_rn(double x) { return __hip_fast_dsqrt(x); } - -__device__ inline double __dsqrt_ru(double x) { return __hip_fast_dsqrt(x); } - -__device__ inline double __dsqrt_rz(double x) { return __hip_fast_dsqrt(x); } - -__device__ static inline double __dsub_rd(double x, double y) { return x - y; } - -__device__ static inline double __dsub_rn(double x, double y) { return x - y; } - -__device__ static inline double __dsub_ru(double x, double y) { return x - y; } - -__device__ static inline double __dsub_rz(double x, double y) { return x - y; } - -__device__ inline double __fma_rd(double x, double y, double z) { return __hip_fast_fma(x, y, z); } - -__device__ inline double __fma_rn(double x, double y, double z) { return __hip_fast_fma(x, y, z); } - -__device__ inline double __fma_ru(double x, double y, double z) { return __hip_fast_fma(x, y, z); } - -__device__ inline double __fma_rz(double x, double y, double z) { return __hip_fast_fma(x, y, z); } - - extern "C" __device__ unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); extern "C" __device__ signed int __hip_hc_ir_mul24_int(signed int, signed int); extern "C" __device__ signed int __hip_hc_ir_mulhi_int(signed int, signed int); diff --git a/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h b/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h index e717df07c1..c11112b1dc 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/math_functions.h @@ -20,234 +20,1085 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_MATH_FUNCTIONS_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_MATH_FUNCTIONS_H - -#if defined(__HCC__) -#include -#endif +#pragma once +#include "ocml_math_fwd.h" #include -#include -#include -__device__ float acosf(float x); -__device__ float acoshf(float x); -__device__ float asinf(float x); -__device__ float asinhf(float x); -__device__ float atan2f(float y, float x); -__device__ float atanf(float x); -__device__ float atanhf(float x); -__device__ float cbrtf(float x); -__device__ float ceilf(float x); -__device__ float copysignf(float x, float y); -__device__ float cosf(float x); -__device__ float coshf(float x); -__device__ float cospif(float x); -//__device__ float cyl_bessel_i0f(float x); -//__device__ float cyl_bessel_i1f(float x); -__device__ float erfcf(float x); -__device__ float erfcinvf(float y); -__device__ float erfcxf(float x); -__device__ float erff(float x); -__device__ float erfinvf(float y); -__device__ float exp10f(float x); -__device__ float exp2f(float x); -__device__ float expf(float x); -__device__ float expm1f(float x); -__device__ int abs(int x); -__device__ long long abs(long long x); -__device__ float fabsf(float x); -__device__ float fdimf(float x, float y); -__device__ float fdividef(float x, float y); -__device__ float floorf(float x); -__device__ float fmaf(float x, float y, float z); -__device__ float fmaxf(float x, float y); -__device__ float fminf(float x, float y); -__device__ float fmodf(float x, float y); -__device__ float frexpf(float x, int* nptr); -__device__ float hypotf(float x, float y); -__device__ int ilogbf(float x); -__device__ int isfinite(float a); -__device__ unsigned isinf(float a); -__device__ unsigned isnan(float a); -__device__ float j0f(float x); -__device__ float j1f(float x); -__device__ float jnf(int n, float x); -__device__ float ldexpf(float x, int exp); -__device__ float lgammaf(float x); -__device__ long long int llrintf(float x); -__device__ long long int llroundf(float x); -__device__ float log10f(float x); -__device__ float log1pf(float x); -__device__ float logbf(float x); -__device__ long int lrintf(float x); -__device__ long int lroundf(float x); -//__device__ float modff(float x, float *iptr); -__device__ float nanf(const char* tagp); -__device__ float nearbyintf(float x); -//__device__ float nextafterf(float x, float y); -__device__ float norm3df(float a, float b, float c); -__device__ float norm4df(float a, float b, float c, float d); -__device__ float normcdff(float y); -__device__ float normcdfinvf(float y); -__device__ float normf(int dim, const float* a); -__device__ float powf(float x, float y); -__device__ float rcbrtf(float x); -__device__ float remainderf(float x, float y); -__device__ float remquof(float x, float y, int* quo); -__device__ float rhypotf(float x, float y); -__device__ float rintf(float x); -__device__ float rnorm3df(float a, float b, float c); -__device__ float rnorm4df(float a, float b, float c, float d); -__device__ float rnormf(int dim, const float* a); -__device__ float roundf(float x); -__device__ float rsqrtf(float x); -__device__ float scalblnf(float x, long int n); -__device__ float scalbnf(float x, int n); -__device__ int signbit(float a); -__device__ void sincosf(float x, float* sptr, float* cptr); -__device__ void sincospif(float x, float* sptr, float* cptr); -__device__ float sinf(float x); -__device__ float sinhf(float x); -__device__ float sinpif(float x); -__device__ float sqrtf(float x); -__device__ float tanf(float x); -__device__ float tanhf(float x); -__device__ float tgammaf(float x); -__device__ float truncf(float x); -__device__ float y0f(float x); -__device__ float y1f(float x); -__device__ float ynf(int n, float x); +#include +#include -__device__ double acos(double x); -__device__ double acosh(double x); -__device__ double asin(double x); -__device__ double asinh(double x); -__device__ double atan(double x); -__device__ double atan2(double y, double x); -__device__ double atanh(double x); -__device__ double cbrt(double x); -__device__ double ceil(double x); -__device__ double copysign(double x, double y); -__device__ double cos(double x); -__device__ double cosh(double x); -__device__ double cospi(double x); -//__device__ double cyl_bessel_i0(double x); -//__device__ double cyl_bessel_i1(double x); -__device__ double erf(double x); -__device__ double erfc(double x); -__device__ double erfcinv(double y); -__device__ double erfcx(double x); -__device__ double erfinv(double x); -__device__ double exp(double x); -__device__ double exp10(double x); -__device__ double exp2(double x); -__device__ double expm1(double x); -__device__ double fabs(double x); -__device__ double fdim(double x, double y); -__device__ double floor(double x); -__device__ double fma(double x, double y, double z); -__device__ double fmax(double x, double y); -__device__ double fmin(double x, double y); -__device__ double fmod(double x, double y); -__device__ double frexp(double x, int* nptr); -__device__ double hypot(double x, double y); -__device__ int ilogb(double x); -__device__ int isfinite(double x); -__device__ unsigned isinf(double x); -__device__ unsigned isnan(double x); -__device__ double j0(double x); -__device__ double j1(double x); -__device__ double jn(int n, double x); -__device__ double ldexp(double x, int exp); -__device__ double lgamma(double x); -__device__ long long llrint(double x); -__device__ long long llround(double x); -__device__ double log(double x); -__device__ double log10(double x); -__device__ double log1p(double x); -__device__ double log2(double x); -__device__ double logb(double x); -__device__ long int lrint(double x); -__device__ long int lround(double x); -//__device__ double modf(double x, double *iptr); -__device__ double nan(const char* tagp); -__device__ double nearbyint(double x); -__device__ double nextafter(double x, double y); -__device__ double norm(int dim, const double* t); -__device__ double norm3d(double a, double b, double c); -__device__ double norm4d(double a, double b, double c, double d); -__device__ double normcdf(double y); -__device__ double normcdfinv(double y); -__device__ double pow(double x, double y); -__device__ double rcbrt(double x); -__device__ double remainder(double x, double y); -//__device__ double remquo(double x, double y, int *quo); -__device__ double rhypot(double x, double y); -__device__ double rint(double x); -__device__ double rnorm(int dim, const double* t); -__device__ double rnorm3d(double a, double b, double c); -__device__ double rnorm4d(double a, double b, double c, double d); -__device__ double round(double x); -__device__ double rsqrt(double x); -__device__ double scalbln(double x, long int n); -__device__ double scalbn(double x, int n); -__device__ int signbit(double a); -__device__ double sin(double a); -__device__ void sincos(double x, double* sptr, double* cptr); -__device__ void sincospi(double x, double* sptr, double* cptr); -__device__ double sinh(double x); -__device__ double sinpi(double x); -__device__ double sqrt(double x); -__device__ double tan(double x); -__device__ double tanh(double x); -__device__ double tgamma(double x); -__device__ double trunc(double x); -__device__ double y0(double x); -__device__ double y1(double y); -__device__ double yn(int n, double x); +__device__ +inline +uint64_t __make_mantissa_base8(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; -// ENDPARSER + if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0'; + else return 0; -#ifdef HIP_FAST_MATH -// Single Precision Precise Math when enabled + ++tagp; + } -__device__ inline float cosf(float x) { return __hip_fast_cosf(x); } + return r; +} -__device__ inline float exp10f(float x) { return __hip_fast_exp10f(x); } +__device__ +inline +uint64_t __make_mantissa_base10(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; -__device__ inline float expf(float x) { return __hip_fast_expf(x); } + if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0'; + else return 0; -__device__ inline float log10f(float x) { return __hip_fast_log10f(x); } + ++tagp; + } -__device__ inline float log2f(float x) { return __hip_fast_log2f(x); } + return r; +} -__device__ inline float logf(float x) { return __hip_fast_logf(x); } +__device__ +inline +uint64_t __make_mantissa_base16(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; -__device__ inline float powf(float base, float exponent) { return __hip_fast_powf(base, exponent); } + if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0'; + else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10; + else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10; + else return 0; -__device__ inline void sincosf(float x, float* s, float* c) { return __hip_fast_sincosf(x, s, c); } + ++tagp; + } -__device__ inline float sinf(float x) { return __hip_fast_sinf(x); } + return r; +} -__device__ inline float tanf(float x) { return __hip_fast_tanf(x); } +__device__ +inline +uint64_t __make_mantissa(const char* tagp) +{ + if (!tagp) return 0u; -#else + if (*tagp == '0') { + ++tagp; -__device__ float sinf(float); -__device__ float cosf(float); -__device__ float tanf(float); -__device__ void sincosf(float, float*, float*); -__device__ float logf(float); -__device__ float log2f(float); -__device__ float log10f(float); -__device__ float expf(float); -__device__ float exp10f(float); -__device__ float powf(float, float); + if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp); + else return __make_mantissa_base8(tagp); + } -#endif + return __make_mantissa_base10(tagp); +} +// BEGIN FLOAT +__device__ +inline +float acosf(float x) { return __ocml_acos_f32(x); } +__device__ +inline +float acoshf(float x) { return __ocml_acosh_f32(x); } +__device__ +inline +float asinf(float x) { return __ocml_asin_f32(x); } +__device__ +inline +float asinhf(float x) { return __ocml_asinh_f32(x); } +__device__ +inline +float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); } +__device__ +inline +float atanf(float x) { return __ocml_atan_f32(x); } +__device__ +inline +float atanhf(float x) { return __ocml_atanh_f32(x); } +__device__ +inline +float cbrtf(float x) { return __ocml_cbrt_f32(x); } +__device__ +inline +float ceilf(float x) { return __ocml_ceil_f32(x); } +__device__ +inline +float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); } +__device__ +inline +float cosf(float x) { return __ocml_cos_f32(x); } +__device__ +inline +float coshf(float x) { return __ocml_cosh_f32(x); } +__device__ +inline +float cospif(float x) { return __ocml_cospi_f32(x); } +__device__ +inline +float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); } +__device__ +inline +float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); } +__device__ +inline +float erfcf(float x) { return __ocml_erfc_f32(x); } +__device__ +inline +float erfcinvf(float x) { return __ocml_erfcinv_f32(x); } +__device__ +inline +float erfcxf(float x) { return __ocml_erfcx_f32(x); } +__device__ +inline +float erff(float x) { return __ocml_erf_f32(x); } +__device__ +inline +float erfinvf(float x) { return __ocml_erfinv_f32(x); } +__device__ +inline +float exp10f(float x) { return __ocml_exp10_f32(x); } +__device__ +inline +float exp2f(float x) { return __ocml_exp2_f32(x); } +__device__ +inline +float expf(float x) { return __ocml_exp_f32(x); } +__device__ +inline +float expm1f(float x) { return __ocml_expm1_f32(x); } +__device__ +inline +float fabsf(float x) { return __ocml_fabs_f32(x); } +__device__ +inline +float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); } +__device__ +inline +float fdividef(float x, float y) { return x / y; } +__device__ +inline +float floorf(float x) { return __ocml_floor_f32(x); } +__device__ +inline +float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); } +__device__ +inline +float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); } +__device__ +inline +float fminf(float x, float y) { return __ocml_fmin_f32(x, y); } +__device__ +inline +float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); } +__device__ +inline +float frexpf(float x, int* nptr) +{ + int tmp; + float r = + __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp); + *nptr = tmp; -#endif + return r; +} +__device__ +inline +float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); } +__device__ +inline +int ilogbf(float x) { return __ocml_ilogb_f32(x); } +__device__ +inline +int isfinite(float x) { return __ocml_isfinite_f32(x); } +__device__ +inline +int isinf(float x) { return __ocml_isinf_f32(x); } +__device__ +inline +int isnan(float x) { return __ocml_isnan_f32(x); } +__device__ +inline +float j0f(float x) { return __ocml_j0_f32(x); } +__device__ +inline +float j1f(float x) { return __ocml_j1_f32(x); } +__device__ +inline +float jnf(int n, float x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + float x0 = j0f(x); + float x1 = j1f(x); + for (int i = 1; i < n; ++i) { + float x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} +__device__ +inline +float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); } +__device__ +inline +float lgammaf(float x) { return __ocml_lgamma_f32(x); } +__device__ +inline +long long int llrintf(float x) { return __ocml_rint_f32(x); } +__device__ +inline +long long int llroundf(float x) { return __ocml_round_f32(x); } +__device__ +inline +float log10f(float x) { return __ocml_log10_f32(x); } +__device__ +inline +float log1pf(float x) { return __ocml_log1p_f32(x); } +__device__ +inline +float log2f(float x) { return __ocml_log2_f32(x); } +__device__ +inline +float logbf(float x) { return __ocml_logb_f32(x); } +__device__ +inline +float logf(float x) { return __ocml_log_f32(x); } +__device__ +inline +long int lrintf(float x) { return __ocml_rint_f32(x); } +__device__ +inline +long int lroundf(float x) { return __ocml_round_f32(x); } +__device__ +inline +float modff(float x, float* iptr) +{ + float tmp; + float r = + __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *iptr = tmp; + + return r; +} +__device__ +inline +float nanf(const char* tagp) +{ + union { + float val; + struct ieee_float { + uint32_t mantissa : 22; + uint32_t quiet : 1; + uint32_t exponent : 8; + uint32_t sign : 1; + } bits; + + static_assert(sizeof(float) == sizeof(ieee_float), ""); + } tmp; + + tmp.bits.sign = 0u; + tmp.bits.exponent = ~0u; + tmp.bits.quiet = 1u; + tmp.bits.mantissa = __make_mantissa(tagp); + + return tmp.val; +} +__device__ +inline +float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__device__ +inline +float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); } +__device__ +inline +float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); } +__device__ +inline +float norm4df(float x, float y, float z, float w) +{ + return __ocml_len4_f32(x, y, z, w); +} +__device__ +inline +float normcdff(float x) { return __ocml_ncdf_f32(x); } +__device__ +inline +float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); } +__device__ +inline +float normf(int dim, const float* a) +{ + float r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_sqrt_f32(r); +} +__device__ +inline +float powf(float x, float y) { return __ocml_pow_f32(x, y); } +__device__ +inline +float rcbrtf(float x) { return __ocml_rcbrt_f32(x); } +__device__ +inline +float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); } +__device__ +inline +float remquof(float x, float y, int* quo) +{ + int tmp; + float r = + __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp); + *quo = tmp; + + return r; +} +__device__ +inline +float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); } +__device__ +inline +float rintf(float x) { return __ocml_rint_f32(x); } +__device__ +inline +float rnorm3df(float x, float y, float z) +{ + return __ocml_rlen3_f32(x, y, z); +} + +__device__ +inline +float rnorm4df(float x, float y, float z, float w) +{ + return __ocml_rlen4_f32(x, y, z, w); +} +__device__ +inline +float rnormf(int dim, const float* a) +{ + float r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_rsqrt_f32(r); +} +__device__ +inline +float roundf(float x) { return __ocml_round_f32(x); } +__device__ +inline +float rsqrtf(float x) { return __ocml_rsqrt_f32(x); } +__device__ +inline +float scalblnf(float x, long int n) +{ + return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n); +} +__device__ +inline +float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); } +__device__ +inline +int signbit(float x) { return __ocml_signbit_f32(x); } +__device__ +inline +void sincosf(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__device__ +inline +void sincospif(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__device__ +inline +float sinf(float x) { return __ocml_sin_f32(x); } +__device__ +inline +float sinhf(float x) { return __ocml_sinh_f32(x); } +__device__ +inline +float sinpif(float x) { return __ocml_sinpi_f32(x); } +__device__ +inline +float sqrtf(float x) { return __ocml_sqrt_f32(x); } +__device__ +inline +float tanf(float x) { return __ocml_tan_f32(x); } +__device__ +inline +float tanhf(float x) { return __ocml_tanh_f32(x); } +__device__ +inline +float tgammaf(float x) { return __ocml_tgamma_f32(x); } +__device__ +inline +float truncf(float x) { return __ocml_trunc_f32(x); } +__device__ +inline +float y0f(float x) { return __ocml_y0_f32(x); } +__device__ +inline +float y1f(float x) { return __ocml_y1_f32(x); } +__device__ +inline +float ynf(int n, float x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. + if (n == 0) return y0f(x); + if (n == 1) return y1f(x); + + float x0 = y0f(x); + float x1 = y1f(x); + for (int i = 1; i < n; ++i) { + float x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} + +// BEGIN INTRINSICS +__device__ +inline +float __cosf(float x) { return __llvm_amdgcn_cos_f32(x); } +__device__ +inline +float __exp10f(float x) { return __ocml_exp10_f32(x); } +__device__ +inline +float __expf(float x) { return __ocml_exp_f32(x); } +__device__ +inline +float __fadd_rd(float x, float y) { return __llvm_add_rte_f32(x, y); } +__device__ +inline +float __fadd_rn(float x, float y) { return __llvm_add_rtn_f32(x, y); } +__device__ +inline +float __fadd_ru(float x, float y) { return __llvm_add_rtp_f32(x, y); } +__device__ +inline +float __fadd_rz(float x, float y) { return __llvm_add_rtz_f32(x, y); } +__device__ +inline +float __fdiv_rd(float x, float y) { return __llvm_div_rte_f32(x, y); } +__device__ +inline +float __fdiv_rn(float x, float y) { return __llvm_div_rtn_f32(x, y); } +__device__ +inline +float __fdiv_ru(float x, float y) { return __llvm_div_rtp_f32(x, y); } +__device__ +inline +float __fdiv_rz(float x, float y) { return __llvm_div_rtz_f32(x, y); } +__device__ +inline +float __fdividef(float x, float y) { return __llvm_div_rte_f32(x, y); } +__device__ +inline +float __fmaf_rd(float x, float y, float z) +{ + return __llvm_fma_rte_f32(x, y, z); +} +__device__ +inline +float __fmaf_rn(float x, float y, float z) +{ + return __llvm_fma_rtn_f32(x, y, z); +} +__device__ +inline +float __fmaf_ru(float x, float y, float z) +{ + return __llvm_fma_rtp_f32(x, y, z); +} +__device__ +inline +float __fmaf_rz(float x, float y, float z) +{ + return __llvm_fma_rtz_f32(x, y, z); +} +__device__ +inline +float __fmul_rd(float x, float y) { return __llvm_mul_rte_f32(x, y); } +__device__ +inline +float __fmul_rn(float x, float y) { return __llvm_mul_rtn_f32(x, y); } +__device__ +inline +float __fmul_ru(float x, float y) { return __llvm_mul_rtp_f32(x, y); } +__device__ +inline +float __fmul_rz(float x, float y) { return __llvm_mul_rtz_f32(x, y); } +__device__ +inline +float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } +__device__ +inline +float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); } +__device__ +inline +float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); } +__device__ +inline +float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } +__device__ +inline +float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } +__device__ +inline +float __fsqrt_rd(float x) { return __llvm_sqrt_rte_f32(x); } +__device__ +inline +float __fsqrt_rn(float x) { return __llvm_sqrt_rtn_f32(x); } +__device__ +inline +float __fsqrt_ru(float x) { return __llvm_sqrt_rtp_f32(x); } +__device__ +inline +float __fsqrt_rz(float x) { return __llvm_sqrt_rtz_f32(x); } +__device__ +inline +float __fsub_rd(float x, float y) { return __llvm_sub_rte_f32(x, y); } +__device__ +inline +float __fsub_rn(float x, float y) { return __llvm_sub_rtn_f32(x, y); } +__device__ +inline +float __fsub_ru(float x, float y) { return __llvm_sub_rtp_f32(x, y); } +__device__ +inline +float __fsub_rz(float x, float y) { return __llvm_sub_rtz_f32(x, y); } +__device__ +inline +float __log10f(float x) { return __ocml_log10_f32(x); } +__device__ +inline +float __log2f(float x) { return __ocml_log2_f32(x); } +__device__ +inline +float __logf(float x) { return __ocml_log_f32(x); } +__device__ +inline +float __powf(float x, float y) { return __ocml_pow_f32(x, y); } +__device__ +inline +float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); } +__device__ +inline +void __sincosf(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__device__ +inline +float __sinf(float x) { return __llvm_amdgcn_sin_f32(x); } +__device__ +inline +float __tanf(float x) { return __ocml_tan_f32(x); } +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__device__ +inline +double acos(double x) { return __ocml_acos_f64(x); } +__device__ +inline +double acosh(double x) { return __ocml_acosh_f64(x); } +__device__ +inline +double asin(double x) { return __ocml_asin_f64(x); } +__device__ +inline +double asinh(double x) { return __ocml_asinh_f64(x); } +__device__ +inline +double atan(double x) { return __ocml_atan_f64(x); } +__device__ +inline +double atan2(double x, double y) { return __ocml_atan2_f64(x, y); } +__device__ +inline +double atanh(double x) { return __ocml_atanh_f64(x); } +__device__ +inline +double cbrt(double x) { return __ocml_cbrt_f64(x); } +__device__ +inline +double ceil(double x) { return __ocml_ceil_f64(x); } +__device__ +inline +double copysign(double x, double y) { return __ocml_copysign_f64(x, y); } +__device__ +inline +double cos(double x) { return __ocml_cos_f64(x); } +__device__ +inline +double cosh(double x) { return __ocml_cosh_f64(x); } +__device__ +inline +double cospi(double x) { return __ocml_cospi_f64(x); } +__device__ +inline +double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); } +__device__ +inline +double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); } +__device__ +inline +double erf(double x) { return __ocml_erf_f64(x); } +__device__ +inline +double erfc(double x) { return __ocml_erfc_f64(x); } +__device__ +inline +double erfcinv(double x) { return __ocml_erfcinv_f64(x); } +__device__ +inline +double erfcx(double x) { return __ocml_erfcx_f64(x); } +__device__ +inline +double erfinv(double x) { return __ocml_erfinv_f64(x); } +__device__ +inline +double exp(double x) { return __ocml_exp_f64(x); } +__device__ +inline +double exp10(double x) { return __ocml_exp10_f64(x); } +__device__ +inline +double exp2(double x) { return __ocml_exp2_f64(x); } +__device__ +inline +double expm1(double x) { return __ocml_expm1_f64(x); } +__device__ +inline +double fabs(double x) { return __ocml_fabs_f64(x); } +__device__ +inline +double fdim(double x, double y) { return __ocml_fdim_f64(x, y); } +__device__ +inline +double floor(double x) { return __ocml_floor_f64(x); } +__device__ +inline +double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); } +__device__ +inline +double fmax(double x, double y) { return __ocml_fmax_f64(x, y); } +__device__ +inline +double fmin(double x, double y) { return __ocml_fmin_f64(x, y); } +__device__ +inline +double fmod(double x, double y) { return __ocml_fmod_f64(x, y); } +__device__ +inline +double frexp(double x, int* nptr) +{ + int tmp; + double r = + __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp); + *nptr = tmp; + + return r; +} +__device__ +inline +double hypot(double x, double y) { return __ocml_hypot_f64(x, y); } +__device__ +inline +int ilogb(double x) { return __ocml_ilogb_f64(x); } +__device__ +inline +int isfinite(double x) { return __ocml_isfinite_f64(x); } +__device__ +inline +int isinf(double x) { return __ocml_isinf_f64(x); } +__device__ +inline +int isnan(double x) { return __ocml_isnan_f64(x); } +__device__ +inline +double j0(double x) { return __ocml_j0_f64(x); } +__device__ +inline +double j1(double x) { return __ocml_j1_f64(x); } +__device__ +inline +double jn(int n, double x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + double x0 = j0f(x); + double x1 = j1f(x); + for (int i = 1; i < n; ++i) { + double x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} +__device__ +inline +double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); } +__device__ +inline +double lgamma(double x) { return __ocml_lgamma_f64(x); } +__device__ +inline +long long int llrint(double x) { return __ocml_rint_f64(x); } +__device__ +inline +long long int llround(double x) { return __ocml_round_f64(x); } +__device__ +inline +double log(double x) { return __ocml_log_f64(x); } +__device__ +inline +double log10(double x) { return __ocml_log10_f64(x); } +__device__ +inline +double log1p(double x) { return __ocml_log1p_f64(x); } +__device__ +inline +double log2(double x) { return __ocml_log2_f64(x); } +__device__ +inline +double logb(double x) { return __ocml_logb_f64(x); } +__device__ +inline +long int lrint(double x) { return __ocml_rint_f64(x); } +__device__ +inline +long int lround(double x) { return __ocml_round_f64(x); } +__device__ +inline +double modf(double x, double* iptr) +{ + double tmp; + double r = + __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp); + *iptr = tmp; + + return r; +} +__device__ +inline +double nan(const char* tagp) +{ + union { + double val; + struct ieee_double { + uint64_t mantissa : 51; + uint32_t quiet : 1; + uint32_t exponent : 11; + uint32_t sign : 1; + } bits; + + static_assert(sizeof(double) == sizeof(ieee_double), ""); + } tmp; + + tmp.bits.sign = 0u; + tmp.bits.exponent = ~0u; + tmp.bits.quiet = 1u; + tmp.bits.mantissa = __make_mantissa(tagp); + + return tmp.val; +} +__device__ +inline +double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +__device__ +inline +double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); } +__device__ +inline +double norm(int dim, const double* a) +{ + double r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_sqrt_f64(r); +} +__device__ +inline +double norm3d(double x, double y, double z) +{ + return __ocml_len3_f64(x, y, z); +} +__device__ +inline +double norm4d(double x, double y, double z, double w) +{ + return __ocml_len4_f64(x, y, z, w); +} +__device__ +inline +double normcdf(double x) { return __ocml_ncdf_f64(x); } +__device__ +inline +double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); } +__device__ +inline +double pow(double x, double y) { return __ocml_pow_f64(x, y); } +__device__ +inline +double rcbrt(double x) { return __ocml_rcbrt_f64(x); } +__device__ +inline +double remainder(double x, double y) { return __ocml_remainder_f64(x, y); } +__device__ +inline +double remquo(double x, double y, int* quo) +{ + int tmp; + double r = + __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp); + *quo = tmp; + + return r; +} +__device__ +inline +double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); } +__device__ +inline +double rint(double x) { return __ocml_rint_f64(x); } +__device__ +inline +double rnorm(int dim, const double* a) +{ + double r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_rsqrt_f64(r); +} +__device__ +inline +double rnorm3d(double x, double y, double z) +{ + return __ocml_rlen3_f64(x, y, z); +} +__device__ +inline +double rnorm4d(double x, double y, double z, double w) +{ + return __ocml_rlen4_f64(x, y, z, w); +} +__device__ +inline +double round(double x) { return __ocml_round_f64(x); } +__device__ +inline +double rsqrt(double x) { return __ocml_rsqrt_f64(x); } +__device__ +inline +double scalbln(double x, long int n) +{ + return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n); +} +__device__ +inline +double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); } +__device__ +inline +int signbit(double x) { return __ocml_signbit_f64(x); } +__device__ +inline +double sin(double x) { return __ocml_sin_f64(x); } +__device__ +inline +void sincos(double x, double* sptr, double* cptr) +{ + double tmp; + *sptr = + __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp); + *cptr = tmp; +} +__device__ +inline +void sincospi(double x, double* sptr, double* cptr) +{ + double tmp; + *sptr = __ocml_sincospi_f64( + x, (__attribute__((address_space(5))) double*) &tmp); + *cptr = tmp; +} +__device__ +inline +double sinh(double x) { return __ocml_sinh_f64(x); } +__device__ +inline +double sinpi(double x) { return __ocml_sinpi_f64(x); } +__device__ +inline +double sqrt(double x) { return __ocml_sqrt_f64(x); } +__device__ +inline +double tan(double x) { return __ocml_tan_f64(x); } +__device__ +inline +double tanh(double x) { return __ocml_tanh_f64(x); } +__device__ +inline +double tgamma(double x) { return __ocml_tgamma_f64(x); } +__device__ +inline +double trunc(double x) { return __ocml_trunc_f64(x); } +__device__ +inline +double y0(double x) { return __ocml_y0_f64(x); } +__device__ +inline +double y1(double x) { return __ocml_y1_f64(x); } +__device__ +inline +double yn(int n, double x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + double x0 = j0f(x); + double x1 = j1f(x); + for (int i = 1; i < n; ++i) { + double x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} + +// BEGIN INTRINSICS +__device__ +inline +double __dadd_rd(double x, double y) { return __llvm_add_rtp_f64(x, y); } +__device__ +inline +double __dadd_rn(double x, double y) { return __llvm_add_rte_f64(x, y); } +__device__ +inline +double __dadd_ru(double x, double y) { return __llvm_add_rtn_f64(x, y); } +__device__ +inline +double __dadd_rz(double x, double y) { return __llvm_add_rtz_f64(x, y); } +__device__ +inline +double __ddiv_rd(double x, double y) { return __llvm_div_rtp_f64(x, y); } +__device__ +inline +double __ddiv_rn(double x, double y) { return __llvm_div_rte_f64(x, y); } +__device__ +inline +double __ddiv_ru(double x, double y) { return __llvm_div_rtn_f64(x, y); } +__device__ +inline +double __ddiv_rz(double x, double y) { return __llvm_div_rtz_f64(x, y); } +__device__ +inline +double __dmul_rd(double x, double y) { return __llvm_mul_rtp_f64(x, y); } +__device__ +inline +double __dmul_rn(double x, double y) { return __llvm_mul_rte_f64(x, y); } +__device__ +inline +double __dmul_ru(double x, double y) { return __llvm_mul_rtn_f64(x, y); } +__device__ +inline +double __dmul_rz(double x, double y) { return __llvm_mul_rtz_f64(x, y); } +__device__ +inline +double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } +__device__ +inline +double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); } +__device__ +inline +double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } +__device__ +inline +double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } +__device__ +inline +double __dsqrt_rd(double x) { return __llvm_sqrt_rtp_f64(x); } +__device__ +inline +double __dsqrt_rn(double x) { return __llvm_sqrt_rte_f64(x); } +__device__ +inline +double __dsqrt_ru(double x) { return __llvm_sqrt_rtn_f64(x); } +__device__ +inline +double __dsqrt_rz(double x) { return __llvm_sqrt_rtz_f64(x); } +__device__ +inline +double __dsub_rd(double x, double y) { return __llvm_sub_rtp_f64(x, y); } +__device__ +inline +double __dsub_rn(double x, double y) { return __llvm_sub_rte_f64(x, y); } +__device__ +inline +double __dsub_ru(double x, double y) { return __llvm_sub_rtn_f64(x, y); } +__device__ +inline +double __dsub_rz(double x, double y) { return __llvm_sub_rtz_f64(x, y); } +__device__ +inline +double __fma_rd(double x, double y, double z) +{ + return __llvm_fma_rtp_f64(x, y, z); +} +__device__ +inline +double __fma_rn(double x, double y, double z) +{ + return __llvm_fma_rte_f64(x, y, z); +} +__device__ +inline +double __fma_ru(double x, double y, double z) +{ + return __llvm_fma_rtn_f64(x, y, z); +} +__device__ +inline +double __fma_rz(double x, double y, double z) +{ + return __llvm_fma_rtz_f64(x, y, z); +} +// END INTRINSICS +// END DOUBLE \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/hcc_detail/ocml_math_fwd.h b/projects/clr/hipamd/include/hip/hcc_detail/ocml_math_fwd.h new file mode 100644 index 0000000000..773fe0bf9b --- /dev/null +++ b/projects/clr/hipamd/include/hip/hcc_detail/ocml_math_fwd.h @@ -0,0 +1,431 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + extern "C" { +#endif + +// BEGIN FLOAT +__attribute__((const)) +float __ocml_acos_f32(float); +__attribute__((pure)) +float __ocml_acosh_f32(float); +__attribute__((const)) +float __ocml_asin_f32(float); +__attribute__((pure)) +float __ocml_asinh_f32(float); +__attribute__((const)) +float __ocml_atan2_f32(float, float); +__attribute__((const)) +float __ocml_atan_f32(float); +__attribute__((pure)) +float __ocml_atanh_f32(float); +__attribute__((pure)) +float __ocml_cbrt_f32(float); +__attribute__((const)) +float __ocml_ceil_f32(float); +__attribute__((const)) +float __ocml_copysign_f32(float, float); +float __ocml_cos_f32(float); +__attribute__((pure)) +float __ocml_cosh_f32(float); +float __ocml_cospi_f32(float); +float __ocml_div_rtz_f32(float, float); +float __ocml_i0_f32(float); +float __ocml_i1_f32(float); +__attribute__((pure)) +float __ocml_erfc_f32(float); +__attribute__((pure)) +float __ocml_erfcinv_f32(float); +__attribute__((pure)) +float __ocml_erfcx_f32(float); +__attribute__((pure)) +float __ocml_erf_f32(float); +__attribute__((pure)) +float __ocml_erfinv_f32(float); +__attribute__((pure)) +float __ocml_exp10_f32(float); +__attribute__((pure)) +float __ocml_exp2_f32(float); +__attribute__((pure)) +float __ocml_exp_f32(float); +__attribute__((pure)) +float __ocml_expm1_f32(float); +__attribute__((const)) +float __ocml_fabs_f32(float); +__attribute__((const)) +float __ocml_fdim_f32(float, float); +__attribute__((const)) +float __ocml_floor_f32(float); +__attribute__((const)) +float __ocml_fma_f32(float, float, float); +__attribute__((const)) +float __ocml_fmax_f32(float, float); +__attribute__((const)) +float __ocml_fmin_f32(float, float); +__attribute__((const)) +float __ocml_fmod_f32(float, float); +float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*); +__attribute__((const)) +float __ocml_hypot_f32(float, float); +__attribute__((const)) +int __ocml_ilogb_f32(float); +__attribute__((const)) +int __ocml_isfinite_f32(float); +__attribute__((const)) +int __ocml_isinf_f32(float); +__attribute__((const)) +int __ocml_isnan_f32(float); +float __ocml_j0_f32(float); +float __ocml_j1_f32(float); +__attribute__((const)) +float __ocml_ldexp_f32(float, int); +float __ocml_lgamma_f32(float); +__attribute__((pure)) +float __ocml_log10_f32(float); +__attribute__((pure)) +float __ocml_log1p_f32(float); +__attribute__((pure)) +float __ocml_log2_f32(float); +__attribute__((const)) +float __ocml_logb_f32(float); +__attribute__((pure)) +float __ocml_log_f32(float); +float __ocml_modf_f32(float, __attribute__((address_space(5))) float*); +__attribute__((const)) +float __ocml_nearbyint_f32(float); +__attribute__((const)) +float __ocml_nextafter_f32(float, float); +__attribute__((const)) +float __ocml_len3_f32(float, float, float); +__attribute__((const)) +float __ocml_len4_f32(float, float, float, float); +__attribute__((pure)) +float __ocml_ncdf_f32(float); +__attribute__((pure)) +float __ocml_ncdfinv_f32(float); +__attribute__((pure)) +float __ocml_pow_f32(float, float); +__attribute__((pure)) +float __ocml_rcbrt_f32(float); +__attribute__((const)) +float __ocml_remainder_f32(float, float); +float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*); +__attribute__((const)) +float __ocml_rhypot_f32(float, float); +__attribute__((const)) +float __ocml_rint_f32(float); +__attribute__((const)) +float __ocml_rlen3_f32(float, float, float); +__attribute__((const)) +float __ocml_rlen4_f32(float, float, float, float); +__attribute__((const)) +float __ocml_round_f32(float); +__attribute__((pure)) +float __ocml_rsqrt_f32(float); +__attribute__((const)) +float __ocml_scalb_f32(float, float); +__attribute__((const)) +float __ocml_scalbn_f32(float, int); +__attribute__((const)) +int __ocml_signbit_f32(float); +float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*); +float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*); +float __ocml_sin_f32(float); +__attribute__((pure)) +float __ocml_sinh_f32(float); +float __ocml_sinpi_f32(float); +__attribute__((const)) +float __ocml_sqrt_f32(float); +float __ocml_tan_f32(float); +__attribute__((pure)) +float __ocml_tanh_f32(float); +float __ocml_tgamma_f32(float); +__attribute__((const)) +float __ocml_trunc_f32(float); +float __ocml_y0_f32(float); +float __ocml_y1_f32(float); + +// BEGIN INTRINSICS +__attribute__((const)) +float __llvm_add_rte_f32(float, float); +__attribute__((const)) +float __llvm_add_rtn_f32(float, float); +__attribute__((const)) +float __llvm_add_rtp_f32(float, float); +__attribute__((const)) +float __llvm_add_rtz_f32(float, float); +__attribute__((const)) +float __llvm_sub_rte_f32(float, float); +__attribute__((const)) +float __llvm_sub_rtn_f32(float, float); +__attribute__((const)) +float __llvm_sub_rtp_f32(float, float); +__attribute__((const)) +float __llvm_sub_rtz_f32(float, float); +__attribute__((const)) +float __llvm_mul_rte_f32(float, float); +__attribute__((const)) +float __llvm_mul_rtn_f32(float, float); +__attribute__((const)) +float __llvm_mul_rtp_f32(float, float); +__attribute__((const)) +float __llvm_mul_rtz_f32(float, float); +__attribute__((const)) +float __llvm_div_rte_f32(float, float); +__attribute__((const)) +float __llvm_div_rtn_f32(float, float); +__attribute__((const)) +float __llvm_div_rtp_f32(float, float); +__attribute__((const)) +float __llvm_div_rtz_f32(float, float); +__attribute__((const)) +float __llvm_sqrt_rte_f32(float); +__attribute__((const)) +float __llvm_sqrt_rtn_f32(float); +__attribute__((const)) +float __llvm_sqrt_rtp_f32(float); +__attribute__((const)) +float __llvm_sqrt_rtz_f32(float); +__attribute__((const)) +float __llvm_fma_rte_f32(float, float, float); +__attribute__((const)) +float __llvm_fma_rtn_f32(float, float, float); +__attribute__((const)) +float __llvm_fma_rtp_f32(float, float, float); +__attribute__((const)) +float __llvm_fma_rtz_f32(float, float, float); + +__attribute__((const)) +float __llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32"); +__attribute__((const)) +float __llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32"); +__attribute__((const)) +float __llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32"); +__attribute__((const)) +float __llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32"); +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__attribute__((const)) +double __ocml_acos_f64(double); +__attribute__((pure)) +double __ocml_acosh_f64(double); +__attribute__((const)) +double __ocml_asin_f64(double); +__attribute__((pure)) +double __ocml_asinh_f64(double); +__attribute__((const)) +double __ocml_atan2_f64(double, double); +__attribute__((const)) +double __ocml_atan_f64(double); +__attribute__((pure)) +double __ocml_atanh_f64(double); +__attribute__((pure)) +double __ocml_cbrt_f64(double); +__attribute__((const)) +double __ocml_ceil_f64(double); +__attribute__((const)) +double __ocml_copysign_f64(double, double); +double __ocml_cos_f64(double); +__attribute__((pure)) +double __ocml_cosh_f64(double); +double __ocml_cospi_f64(double); +double __ocml_i0_f64(double); +double __ocml_i1_f64(double); +__attribute__((pure)) +double __ocml_erfc_f64(double); +__attribute__((pure)) +double __ocml_erfcinv_f64(double); +__attribute__((pure)) +double __ocml_erfcx_f64(double); +__attribute__((pure)) +double __ocml_erf_f64(double); +__attribute__((pure)) +double __ocml_erfinv_f64(double); +__attribute__((pure)) +double __ocml_exp10_f64(double); +__attribute__((pure)) +double __ocml_exp2_f64(double); +__attribute__((pure)) +double __ocml_exp_f64(double); +__attribute__((pure)) +double __ocml_expm1_f64(double); +__attribute__((const)) +double __ocml_fabs_f64(double); +__attribute__((const)) +double __ocml_fdim_f64(double, double); +__attribute__((const)) +double __ocml_floor_f64(double); +__attribute__((const)) +double __ocml_fma_f64(double, double, double); +__attribute__((const)) +double __ocml_fmax_f64(double, double); +__attribute__((const)) +double __ocml_fmin_f64(double, double); +__attribute__((const)) +double __ocml_fmod_f64(double, double); +double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*); +__attribute__((const)) +double __ocml_hypot_f64(double, double); +__attribute__((const)) +int __ocml_ilogb_f64(double); +__attribute__((const)) +int __ocml_isfinite_f64(double); +__attribute__((const)) +int __ocml_isinf_f64(double); +__attribute__((const)) +int __ocml_isnan_f64(double); +double __ocml_j0_f64(double); +double __ocml_j1_f64(double); +__attribute__((const)) +double __ocml_ldexp_f64(double, int); +double __ocml_lgamma_f64(double); +__attribute__((pure)) +double __ocml_log10_f64(double); +__attribute__((pure)) +double __ocml_log1p_f64(double); +__attribute__((pure)) +double __ocml_log2_f64(double); +__attribute__((const)) +double __ocml_logb_f64(double); +__attribute__((pure)) +double __ocml_log_f64(double); +double __ocml_modf_f64(double, __attribute__((address_space(5))) double*); +__attribute__((const)) +double __ocml_nearbyint_f64(double); +__attribute__((const)) +double __ocml_nextafter_f64(double, double); +__attribute__((const)) +double __ocml_len3_f64(double, double, double); +__attribute__((const)) +double __ocml_len4_f64(double, double, double, double); +__attribute__((pure)) +double __ocml_ncdf_f64(double); +__attribute__((pure)) +double __ocml_ncdfinv_f64(double); +__attribute__((pure)) +double __ocml_pow_f64(double, double); +__attribute__((pure)) +double __ocml_rcbrt_f64(double); +__attribute__((const)) +double __ocml_remainder_f64(double, double); +double __ocml_remquo_f64( + double, double, __attribute__((address_space(5))) int*); +__attribute__((const)) +double __ocml_rhypot_f64(double, double); +__attribute__((const)) +double __ocml_rint_f64(double); +__attribute__((const)) +double __ocml_rlen3_f64(double, double, double); +__attribute__((const)) +double __ocml_rlen4_f64(double, double, double, double); +__attribute__((const)) +double __ocml_round_f64(double); +__attribute__((pure)) +double __ocml_rsqrt_f64(double); +__attribute__((const)) +double __ocml_scalb_f64(double, double); +__attribute__((const)) +double __ocml_scalbn_f64(double, int); +__attribute__((const)) +int __ocml_signbit_f64(double); +double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*); +double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*); +double __ocml_sin_f64(double); +__attribute__((pure)) +double __ocml_sinh_f64(double); +double __ocml_sinpi_f64(double); +__attribute__((const)) +double __ocml_sqrt_f64(double); +double __ocml_tan_f64(double); +__attribute__((pure)) +double __ocml_tanh_f64(double); +double __ocml_tgamma_f64(double); +__attribute__((const)) +double __ocml_trunc_f64(double); +double __ocml_y0_f64(double); +double __ocml_y1_f64(double); + +// BEGIN INTRINSICS +__attribute__((const)) +double __llvm_add_rte_f64(double, double); +__attribute__((const)) +double __llvm_add_rtn_f64(double, double); +__attribute__((const)) +double __llvm_add_rtp_f64(double, double); +__attribute__((const)) +double __llvm_add_rtz_f64(double, double); +__attribute__((const)) +double __llvm_sub_rte_f64(double, double); +__attribute__((const)) +double __llvm_sub_rtn_f64(double, double); +__attribute__((const)) +double __llvm_sub_rtp_f64(double, double); +__attribute__((const)) +double __llvm_sub_rtz_f64(double, double); +__attribute__((const)) +double __llvm_mul_rte_f64(double, double); +__attribute__((const)) +double __llvm_mul_rtn_f64(double, double); +__attribute__((const)) +double __llvm_mul_rtp_f64(double, double); +__attribute__((const)) +double __llvm_mul_rtz_f64(double, double); +__attribute__((const)) +double __llvm_div_rte_f64(double, double); +__attribute__((const)) +double __llvm_div_rtn_f64(double, double); +__attribute__((const)) +double __llvm_div_rtp_f64(double, double); +__attribute__((const)) +double __llvm_div_rtz_f64(double, double); +__attribute__((const)) +double __llvm_sqrt_rte_f64(double); +__attribute__((const)) +double __llvm_sqrt_rtn_f64(double); +__attribute__((const)) +double __llvm_sqrt_rtp_f64(double); +__attribute__((const)) +double __llvm_sqrt_rtz_f64(double); +__attribute__((const)) +double __llvm_fma_rte_f64(float, float, float); +__attribute__((const)) +double __llvm_fma_rtn_f64(float, float, float); +__attribute__((const)) +double __llvm_fma_rtp_f64(float, float, float); +__attribute__((const)) +double __llvm_fma_rtz_f64(float, float, float); + +__attribute__((const)) +double __llvm_amdgcn_rcp_f64(double); +__attribute__((const)) +double __llvm_amdgcn_rsq_f64(double); +// END INTRINSICS +// END DOUBLE + +#if defined(__cplusplus) + } // extern "C" +#endif \ No newline at end of file diff --git a/projects/clr/hipamd/src/device_functions.cpp b/projects/clr/hipamd/src/device_functions.cpp index 86d0530817..1af66f97e5 100644 --- a/projects/clr/hipamd/src/device_functions.cpp +++ b/projects/clr/hipamd/src/device_functions.cpp @@ -23,11 +23,6 @@ THE SOFTWARE. #include #include "device_util.h" -extern "C" float __ocml_floor_f32(float); -extern "C" float __ocml_rint_f32(float); -extern "C" float __ocml_ceil_f32(float); -extern "C" float __ocml_trunc_f32(float); - __device__ float __double2float_rd(double x) { return (double)x; } __device__ float __double2float_rn(double x) { return (double)x; } __device__ float __double2float_ru(double x) { return (double)x; } diff --git a/projects/clr/hipamd/src/device_util.cpp b/projects/clr/hipamd/src/device_util.cpp index 5ce014b2b9..50dfb48c8a 100644 --- a/projects/clr/hipamd/src/device_util.cpp +++ b/projects/clr/hipamd/src/device_util.cpp @@ -943,97 +943,6 @@ __device__ void* __get_dynamicgroupbaseptr() { __host__ void* __get_dynamicgroupbaseptr() { return nullptr; } -// Precise Math Functions -__device__ float __hip_precise_cosf(float x) { return hc::precise_math::cosf(x); } - -__device__ float __hip_precise_exp10f(float x) { return hc::precise_math::exp10f(x); } - -__device__ float __hip_precise_expf(float x) { return hc::precise_math::expf(x); } - -__device__ float __hip_precise_frsqrt_rn(float x) { return hc::precise_math::rsqrt(x); } - -__device__ float __hip_precise_fsqrt_rd(float x) { return hc::precise_math::sqrt(x); } - -__device__ float __hip_precise_fsqrt_rn(float x) { return hc::precise_math::sqrt(x); } - -__device__ float __hip_precise_fsqrt_ru(float x) { return hc::precise_math::sqrt(x); } - -__device__ float __hip_precise_fsqrt_rz(float x) { return hc::precise_math::sqrt(x); } - -__device__ float __hip_precise_log10f(float x) { return hc::precise_math::log10(x); } - -__device__ float __hip_precise_log2f(float x) { return hc::precise_math::log2(x); } - -__device__ float __hip_precise_logf(float x) { return hc::precise_math::logf(x); } - -__device__ float __hip_precise_powf(float base, float exponent) { - return hc::precise_math::powf(base, exponent); -} - -__device__ void __hip_precise_sincosf(float x, float* s, float* c) { - hc::precise_math::sincosf(x, s, c); -} - -__device__ float __hip_precise_sinf(float x) { return hc::precise_math::sinf(x); } - -__device__ float __hip_precise_tanf(float x) { return hc::precise_math::tanf(x); } - -// Double Precision Math -__device__ double __hip_precise_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); } - -__device__ double __hip_precise_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); } - -__device__ double __hip_precise_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); } - -__device__ double __hip_precise_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); } - -#define LOG_BASE2_E 1.4426950408889634 -#define LOG_BASE2_10 3.32192809488736 -#define ONE_DIV_LOG_BASE2_E 0.69314718056 -#define ONE_DIV_LOG_BASE2_10 0.30102999566 - -// Fast Math Intrinsics -__device__ float __hip_fast_exp10f(float x) { return __hip_fast_exp2f(x * LOG_BASE2_E); } - -__device__ float __hip_fast_expf(float x) { return __hip_fast_exp2f(x * LOG_BASE2_10); } - -__device__ float __hip_fast_frsqrt_rn(float x) { - return 1 / __hip_fast_fsqrt_rd(x); - ; -} - -__device__ float __hip_fast_fsqrt_rn(float x) { return __hip_fast_fsqrt_rd(x); } - -__device__ float __hip_fast_fsqrt_ru(float x) { return __hip_fast_fsqrt_rd(x); } - -__device__ float __hip_fast_fsqrt_rz(float x) { return __hip_fast_fsqrt_rd(x); } - -__device__ float __hip_fast_log10f(float x) { return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x); } - -__device__ float __hip_fast_logf(float x) { return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x); } - -__device__ float __hip_fast_powf(float base, float exponent) { - return hc::fast_math::powf(base, exponent); -} - -__device__ void __hip_fast_sincosf(float x, float* s, float* c) { - *s = __hip_fast_sinf(x); - *c = __hip_fast_cosf(x); -} - -__device__ float __hip_fast_tanf(float x) { return hc::fast_math::tanf(x); } - -// Double Precision Math -// FIXME - HCC doesn't have a fast_math version double FP sqrt -// Another issue is that these intrinsics call for a specific rounding mode; -// however, their implementation all map to the same sqrt builtin -__device__ double __hip_fast_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); } - -__device__ double __hip_fast_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); } - -__device__ double __hip_fast_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); } - -__device__ double __hip_fast_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); } __device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); } diff --git a/projects/clr/hipamd/src/math_functions.cpp b/projects/clr/hipamd/src/math_functions.cpp deleted file mode 100644 index dedc40f2ae..0000000000 --- a/projects/clr/hipamd/src/math_functions.cpp +++ /dev/null @@ -1,429 +0,0 @@ - -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include "device_util.h" -#include "hip/hcc_detail/device_functions.h" -#include "hip/hip_runtime.h" - -__device__ float acosf(float x) { return hc::precise_math::acosf(x); } -__device__ float acoshf(float x) { return hc::precise_math::acoshf(x); } -__device__ float asinf(float x) { return hc::precise_math::asinf(x); } -__device__ float asinhf(float x) { return hc::precise_math::asinhf(x); } -__device__ float atan2f(float y, float x) { return hc::precise_math::atan2f(y, x); } -__device__ float atanf(float x) { return hc::precise_math::atanf(x); } -__device__ float atanhf(float x) { return hc::precise_math::atanhf(x); } -__device__ float cbrtf(float x) { return hc::precise_math::cbrtf(x); } -__device__ float ceilf(float x) { return hc::precise_math::ceilf(x); } -__device__ float copysignf(float x, float y) { return hc::precise_math::copysignf(x, y); } -__device__ float cosf(float x) { return hc::precise_math::cosf(x); } -__device__ float coshf(float x) { return hc::precise_math::coshf(x); } -__device__ float cyl_bessel_i0f(float x); -__device__ float cyl_bessel_i1f(float x); -__device__ float erfcf(float x) { return hc::precise_math::erfcf(x); } -__device__ float erfcinvf(float y) { return hc::precise_math::erfcinvf(y); } -__device__ float erfcxf(float x) { - return hc::precise_math::expf(x * x) * hc::precise_math::erfcf(x); -} -__device__ float erff(float x) { return hc::precise_math::erff(x); } -__device__ float erfinvf(float y) { return hc::precise_math::erfinvf(y); } -__device__ float exp10f(float x) { return hc::precise_math::exp10f(x); } -__device__ float exp2f(float x) { return hc::precise_math::exp2f(x); } -__device__ float expf(float x) { return hc::precise_math::expf(x); } -__device__ float expm1f(float x) { return hc::precise_math::expm1f(x); } -__device__ int abs(int x) { - return x >= 0 ? x : -x; // TODO - optimize with OCML -} -__device__ long long abs(long long x) { - return x >= 0 ? x : -x; -} -__device__ float fabsf(float x) { return hc::precise_math::fabsf(x); } -__device__ float fdimf(float x, float y) { return hc::precise_math::fdimf(x, y); } -__device__ float fdividef(float x, float y) { return x / y; } -__device__ float floorf(float x) { return hc::precise_math::floorf(x); } -__device__ float fmaf(float x, float y, float z) { return hc::precise_math::fmaf(x, y, z); } -__device__ float fmaxf(float x, float y) { return hc::precise_math::fmaxf(x, y); } -__device__ float fminf(float x, float y) { return hc::precise_math::fminf(x, y); } -__device__ float fmodf(float x, float y) { return hc::precise_math::fmodf(x, y); } -__device__ float frexpf(float x, int* nptr) { return hc::precise_math::frexpf(x, nptr); } -__device__ float hypotf(float x, float y) { return hc::precise_math::hypotf(x, y); } -__device__ int ilogbf(float x) { return hc::precise_math::ilogbf(x); } -__device__ int isfinite(float a) { return hc::precise_math::isfinite(a); } -__device__ unsigned isinf(float a) { return hc::precise_math::isinf(a); } -__device__ unsigned isnan(float a) { return hc::precise_math::isnan(a); } -__device__ float j0f(float x) { return __hip_j0f(x); } -__device__ float j1f(float x) { return __hip_j1f(x); } -__device__ float jnf(int n, float x) { return __hip_jnf(n, x); } -__device__ float ldexpf(float x, int exp) { return hc::precise_math::ldexpf(x, exp); } -__device__ float lgammaf(float x) { return hc::precise_math::lgammaf(x); } -__device__ long long int llrintf(float x) { - int y = hc::precise_math::roundf(x); - long long int z = y; - return z; -} -__device__ long long int llroundf(float x) { - int y = hc::precise_math::roundf(x); - long long int z = y; - return z; -} -__device__ float log10f(float x) { return hc::precise_math::log10f(x); } -__device__ float log1pf(float x) { return hc::precise_math::log1pf(x); } -__device__ float log2f(float x) { return hc::precise_math::log2f(x); } -__device__ float logbf(float x) { return hc::precise_math::logbf(x); } -__device__ float logf(float x) { return hc::precise_math::logf(x); } -__device__ long int lrintf(float x) { - int y = hc::precise_math::roundf(x); - long int z = y; - return z; -} -__device__ long int lroundf(float x) { - long int y = hc::precise_math::roundf(x); - return y; -} -__device__ float modff(float x, float* iptr) { return hc::precise_math::modff(x, iptr); } -__device__ float nanf(const char* tagp) { return hc::precise_math::nanf((int)*tagp); } -__device__ float nearbyintf(float x) { return hc::precise_math::nearbyintf(x); } -__device__ float nextafterf(float x, float y) { return hc::precise_math::nextafter(x, y); } -__device__ float norm3df(float a, float b, float c) { - float x = a * a + b * b + c * c; - return hc::precise_math::sqrtf(x); -} -__device__ float norm4df(float a, float b, float c, float d) { - float x = a * a + b * b; - float y = c * c + d * d; - return hc::precise_math::sqrtf(x + y); -} - -__device__ float normcdff(float y) { return ((hc::precise_math::erff(y) / 1.41421356237) + 1) / 2; } -__device__ float normcdfinvf(float y) { return HIP_SQRT_2 * __hip_erfinvf(2 * y - 1); } -__device__ float normf(int dim, const float* a) { - float x = 0.0f; - for (int i = 0; i < dim; i++) { - x = hc::precise_math::fmaf(a[i], a[i], x); - } - return hc::precise_math::sqrtf(x); -} -__device__ float powf(float x, float y) { return hc::precise_math::powf(x, y); } -__device__ float rcbrtf(float x) { return hc::precise_math::rcbrtf(x); } -__device__ float remainderf(float x, float y) { return hc::precise_math::remainderf(x, y); } -__device__ float remquof(float x, float y, int* quo) { - return hc::precise_math::remquof(x, y, quo); -} -__device__ float rhypotf(float x, float y) { return 1 / hc::precise_math::hypotf(x, y); } -__device__ float rintf(float x) { return hc::precise_math::roundf(x); } -__device__ float rnorm3df(float a, float b, float c) { - float x = a * a + b * b + c * c; - return 1 / hc::precise_math::sqrtf(x); -} -__device__ float rnorm4df(float a, float b, float c, float d) { - float x = a * a + b * b; - float y = c * c + d * d; - return 1 / hc::precise_math::sqrtf(x + y); -} -__device__ float rnormf(int dim, const float* a) { - float x = 0.0f; - for (int i = 0; i < dim; i++) { - x = hc::precise_math::fmaf(a[i], a[i], x); - } - return 1 / hc::precise_math::sqrtf(x); -} -__device__ float roundf(float x) { return hc::precise_math::roundf(x); } -__device__ float scalblnf(float x, long int n) { return hc::precise_math::scalb(x, n); } -__device__ float scalbnf(float x, int n) { return hc::precise_math::scalbnf(x, n); } -__device__ int signbit(float a) { return hc::precise_math::signbit(a); } -__device__ void sincosf(float x, float* sptr, float* cptr) { - *sptr = hc::precise_math::sinf(x); - *cptr = hc::precise_math::cosf(x); -} -__device__ void sincospif(float x, float* sptr, float* cptr) { - *sptr = hc::precise_math::sinpif(x); - *cptr = hc::precise_math::cospif(x); -} -__device__ float sinf(float x) { return hc::precise_math::sinf(x); } -__device__ float sinhf(float x) { return hc::precise_math::sinhf(x); } -__device__ float tanf(float x) { return hc::precise_math::tanf(x); } -__device__ float tanhf(float x) { return hc::precise_math::tanhf(x); } -__device__ float tgammaf(float x) { return hc::precise_math::tgammaf(x); } -__device__ float truncf(float x) { return hc::precise_math::truncf(x); } -__device__ float y0f(float x) { return __hip_y0f(x); } -__device__ float y1f(float x) { return __hip_y1f(x); } -__device__ float ynf(int n, float x) { return __hip_ynf(n, x); } -__device__ float cospif(float x) { return hc::precise_math::cospif(x); } -__device__ float sinpif(float x) { return hc::precise_math::sinpif(x); } -__device__ float sqrtf(float x) { return hc::precise_math::sqrtf(x); } -__device__ float rsqrtf(float x) { return hc::precise_math::rsqrtf(x); } - -/* - * Double precision device math functions - */ - -__device__ double acos(double x) { return hc::precise_math::acos(x); } -__device__ double acosh(double x) { return hc::precise_math::acosh(x); } -__device__ double asin(double x) { return hc::precise_math::asin(x); } -__device__ double asinh(double x) { return hc::precise_math::asinh(x); } -__device__ double atan(double x) { return hc::precise_math::atan(x); } -__device__ double atan2(double y, double x) { return hc::precise_math::atan2(y, x); } -__device__ double atanh(double x) { return hc::precise_math::atanh(x); } -__device__ double cbrt(double x) { return hc::precise_math::cbrt(x); } -__device__ double ceil(double x) { return hc::precise_math::ceil(x); } -__device__ double copysign(double x, double y) { return hc::precise_math::copysign(x, y); } -__device__ double cos(double x) { return hc::precise_math::cos(x); } -__device__ double cosh(double x) { return hc::precise_math::cosh(x); } -__device__ double cospi(double x) { return hc::precise_math::cospi(x); } -__device__ double cyl_bessel_i0(double x); -__device__ double cyl_bessel_i1(double x); -__device__ double erf(double x) { return hc::precise_math::erf(x); } -__device__ double erfc(double x) { return hc::precise_math::erfc(x); } -__device__ double erfcinv(double x) { return __hip_erfinv(1 - x); } -__device__ double erfcx(double x) { - return hc::precise_math::exp(x * x) * hc::precise_math::erf(x); -} -__device__ double erfinv(double x) { return __hip_erfinv(x); } -__device__ double exp(double x) { return hc::precise_math::exp(x); } -__device__ double exp10(double x) { return hc::precise_math::exp10(x); } -__device__ double exp2(double x) { return hc::precise_math::exp2(x); } -__device__ double expm1(double x) { return hc::precise_math::expm1(x); } -__device__ double fabs(double x) { return hc::precise_math::fabs(x); } -__device__ double fdim(double x, double y) { return hc::precise_math::fdim(x, y); } -__device__ double fdivide(double x, double y) { return x / y; } -__device__ double floor(double x) { return hc::precise_math::floor(x); } -__device__ double fma(double x, double y, double z) { return hc::precise_math::fma(x, y, z); } -__device__ double fmax(double x, double y) { return hc::precise_math::fmax(x, y); } -__device__ double fmin(double x, double y) { return hc::precise_math::fmin(x, y); } -__device__ double fmod(double x, double y) { return hc::precise_math::fmod(x, y); } -__device__ double frexp(double x, int* y) { return hc::precise_math::frexp(x, y); } -__device__ double hypot(double x, double y) { return hc::precise_math::hypot(x, y); } -__device__ int ilogb(double x) { return hc::precise_math::ilogb(x); } -__device__ int isfinite(double x) { return hc::precise_math::isfinite(x); } -__device__ unsigned isinf(double x) { return hc::precise_math::isinf(x); } -__device__ unsigned isnan(double x) { return hc::precise_math::isnan(x); } -__device__ double j0(double x) { return __hip_j0(x); } -__device__ double j1(double x) { return __hip_j1(x); } -__device__ double jn(int n, double x) { return __hip_jn(n, x); } -__device__ double ldexp(double x, int exp) { return hc::precise_math::ldexp(x, exp); } -__device__ double lgamma(double x) { return hc::precise_math::lgamma(x); } -__device__ long long int llrint(double x) { - long long int y = hc::precise_math::round(x); - return y; -} -__device__ long long int llround(double x) { - long long int y = hc::precise_math::round(x); - return y; -} -__device__ double log(double x) { return hc::precise_math::log(x); } -__device__ double log10(double x) { return hc::precise_math::log10(x); } -__device__ double log1p(double x) { return hc::precise_math::log1p(x); } -__device__ double log2(double x) { return hc::precise_math::log2(x); } -__device__ double logb(double x) { return hc::precise_math::logb(x); } -__device__ long int lrint(double x) { - long int y = hc::precise_math::round(x); - return y; -} -__device__ long int lround(double x) { - long int y = hc::precise_math::round(x); - return y; -} -__device__ double modf(double x, double* iptr) { return hc::precise_math::modf(x, iptr); } -__device__ double nan(const char* tagp) { return hc::precise_math::nan((int)*tagp); } -__device__ double nearbyint(double x) { return hc::precise_math::nearbyint(x); } -__device__ double nextafter(double x, double y) { return hc::precise_math::nextafter(x, y); } -__device__ double norm(int x, const double* d) { - double val = 0; - for (int i = 0; i < x; i++) { - val += d[i] * d[i]; - } - return hc::precise_math::sqrt(val); -} -__device__ double norm3d(double a, double b, double c) { - double x = a * a + b * b + c * c; - return hc::precise_math::sqrt(x); -} -__device__ double norm4d(double a, double b, double c, double d) { - double x = a * a + b * b; - double y = c * c + d * d; - return hc::precise_math::sqrt(x + y); -} -__device__ double normcdf(double y) { return ((hc::precise_math::erf(y) / HIP_SQRT_2) + 1) / 2; } -__device__ double normcdfinv(double y) { return HIP_SQRT_2 * __hip_erfinv(2 * y - 1); } -__device__ double pow(double x, double y) { return hc::precise_math::pow(x, y); } -__device__ double rcbrt(double x) { return hc::precise_math::rcbrt(x); } -__device__ double remainder(double x, double y) { return hc::precise_math::remainder(x, y); } -__device__ double remquo(double x, double y, int* quo) { - return hc::precise_math::remquo(x, y, quo); -} -__device__ double rhypot(double x, double y) { return 1 / hc::precise_math::sqrt(x * x + y * y); } -__device__ double rint(double x) { return hc::precise_math::round(x); } -__device__ double rnorm3d(double a, double b, double c) { - return hc::precise_math::rsqrt(a * a + b * b + c * c); -} -__device__ double rnorm4d(double a, double b, double c, double d) { - return hc::precise_math::rsqrt(a * a + b * b + c * c + d * d); -} -__device__ double rnorm(int dim, const double* t) { - double x = 0.0; - for (int i = 0; i < dim; i++) { - x = hc::precise_math::fma(t[i], t[i], x); - } - return 1 / x; -} -__device__ double round(double x) { return hc::precise_math::round(x); } -__device__ double rsqrt(double x) { return hc::precise_math::rsqrt(x); } -__device__ double scalbln(double x, long int n) { return hc::precise_math::scalb(x, n); } -__device__ double scalbn(double x, int n) { return hc::precise_math::scalbn(x, n); } -__device__ int signbit(double x) { return hc::precise_math::signbit(x); } -__device__ double sin(double x) { return hc::precise_math::sin(x); } -__device__ void sincos(double x, double* sptr, double* cptr) { - *sptr = hc::precise_math::sin(x); - *cptr = hc::precise_math::cos(x); -} -__device__ void sincospi(double x, double* sptr, double* cptr) { - *sptr = hc::precise_math::sinpi(x); - *cptr = hc::precise_math::cospi(x); -} -__device__ double sinh(double x) { return hc::precise_math::sinh(x); } -__device__ double sinpi(double x) { return hc::precise_math::sinpi(x); } -__device__ double sqrt(double x) { return hc::precise_math::sqrt(x); } -__device__ double tan(double x) { return hc::precise_math::tan(x); } -__device__ double tanh(double x) { return hc::precise_math::tanh(x); } -__device__ double tgamma(double x) { return hc::precise_math::tgamma(x); } -__device__ double trunc(double x) { return hc::precise_math::trunc(x); } -__device__ double y0(double x) { return __hip_y0(x); } -__device__ double y1(double x) { return __hip_y1(x); } -__device__ double yn(int n, double x) { return __hip_yn(n, x); } - - -__host__ float cospif(float x) { return std::cos(x * HIP_PI); } - -__host__ float fdividef(float x, float y) { return x / y; } - -__host__ int isfinite(float x) { return std::isfinite(x); } - -__host__ int signbit(float x) { return std::signbit(x); } - -__host__ float sinpif(float x) { return std::sin(x * HIP_PI); } - -__host__ float rsqrtf(float x) { return 1 / std::sqrt(x); } - -__host__ float modff(float x, float* iptr) { return std::modf(x, iptr); } - -__host__ double fdivide(double x, double y) { return x / y; } - -__host__ float normcdff(float t) { return (1 - std::erf(-t / std::sqrt(2))) / 2; } - -__host__ double normcdf(double x) { return (1 - std::erf(-x / std::sqrt(2))) / 2; } - -__host__ float erfcxf(float x) { return std::exp(x * x) * std::erfc(x); } - -__host__ double erfcx(double x) { return std::exp(x * x) * std::erfc(x); } - -__host__ float rhypotf(float x, float y) { return 1 / std::sqrt(x * x + y * y); } - -__host__ double rhypot(double x, double y) { return 1 / std::sqrt(x * x + y * y); } - -__host__ float rcbrtf(float a) { return 1 / std::cbrt(a); } - -__host__ double rcbrt(double a) { return 1 / std::cbrt(a); } - -__host__ float normf(int dim, const float* a) { - float val = 0.0f; - for (int i = 0; i < dim; i++) { - val = val + a[i] * a[i]; - } - return val; -} - -__host__ float rnormf(int dim, const float* t) { - float val = 0.0f; - for (int i = 0; i < dim; i++) { - val = val + t[i] * t[i]; - } - return 1 / std::sqrt(val); -} - -__host__ double rnorm(int dim, const double* t) { - double val = 0.0; - for (int i = 0; i < dim; i++) { - val = val + t[i] * t[i]; - } - return 1 / std::sqrt(val); -} - -__host__ float rnorm4df(float a, float b, float c, float d) { - return 1 / std::sqrt(a * a + b * b + c * c + d * d); -} - -__host__ double rnorm4d(double a, double b, double c, double d) { - return 1 / std::sqrt(a * a + b * b + c * c + d * d); -} - -__host__ float rnorm3df(float a, float b, float c) { return 1 / std::sqrt(a * a + b * b + c * c); } - -__host__ double rnorm3d(double a, double b, double c) { - return 1 / std::sqrt(a * a + b * b + c * c); -} - -__host__ void sincospif(float x, float* sptr, float* cptr) { - *sptr = std::sin(HIP_PI * x); - *cptr = std::cos(HIP_PI * x); -} - -__host__ void sincospi(double x, double* sptr, double* cptr) { - *sptr = std::sin(HIP_PI * x); - *cptr = std::cos(HIP_PI * x); -} - -__host__ float nextafterf(float x, float y) { return std::nextafter(x, y); } - -__host__ double nextafter(double x, double y) { return std::nextafter(x, y); } - -__host__ float norm3df(float a, float b, float c) { return std::sqrt(a * a + b * b + c * c); } - -__host__ float norm4df(float a, float b, float c, float d) { - return std::sqrt(a * a + b * b + c * c + d * d); -} - -__host__ double norm3d(double a, double b, double c) { return std::sqrt(a * a + b * b + c * c); } - -__host__ double norm4d(double a, double b, double c, double d) { - return std::sqrt(a * a + b * b + c * c + d * d); -} - -__host__ double sinpi(double a) { return std::sin(HIP_PI * a); } - -__host__ double cospi(double a) { return std::cos(HIP_PI * a); } - -__host__ int isfinite(double a) { return std::isfinite(a); } - -__host__ double norm(int dim, const double* t) { - double val = 0; - for (int i = 0; i < dim; i++) { - val += t[i] * t[i]; - } - return std::sqrt(val); -} - -__host__ double rsqrt(double x) { return 1 / std::sqrt(x); } - -__host__ int signbit(double x) { return std::signbit(x); } diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp index 739660c04d..2620a3cbde 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp @@ -50,8 +50,8 @@ __device__ void double_precision_math_functions() { cos(0.0); cosh(0.0); cospi(0.0); - // cyl_bessel_i0(0.0); - // cyl_bessel_i1(0.0); + cyl_bessel_i0(0.0); + cyl_bessel_i1(0.0); erf(0.0); erfc(0.0); erfcinv(2.0); @@ -78,7 +78,7 @@ __device__ void double_precision_math_functions() { j1(0.0); jn(-1.0, 1.0); ldexp(0.0, 0); - // lgamma(1.0); + lgamma(1.0); llrint(0.0); llround(0.0); log(1.0); @@ -88,7 +88,7 @@ __device__ void double_precision_math_functions() { logb(1.0); lrint(0.0); lround(0.0); - // modf(0.0, &fX); + modf(0.0, &fX); nan("1"); nearbyint(0.0); nextafter(0.0, 0.0); @@ -99,9 +99,9 @@ __device__ void double_precision_math_functions() { normcdf(0.0); normcdfinv(1.0); pow(1.0, 0.0); - // rcbrt(1.0); + rcbrt(1.0); remainder(2.0, 1.0); - // remquo(1.0, 2.0, &iX); + remquo(1.0, 2.0, &iX); rhypot(0.0, 1.0); rint(1.0); fX = 1.0; @@ -110,8 +110,8 @@ __device__ void double_precision_math_functions() { rnorm4d(0.0, 0.0, 0.0, 1.0); round(0.0); rsqrt(1.0); - // scalbln(0.0, 1); - // scalbn(0.0, 1); + scalbln(0.0, 1); + scalbn(0.0, 1); signbit(1.0); sin(0.0); sincos(0.0, &fX, &fY); @@ -128,11 +128,17 @@ __device__ void double_precision_math_functions() { yn(1, 1.0); } -__global__ void compileDoublePrecisionMathOnDevice(hipLaunchParm lp, int ignored) { +__global__ void compileDoublePrecisionMathOnDevice(int) { double_precision_math_functions(); } int main() { - hipLaunchKernel(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, 1); + hipLaunchKernelGGL( + compileDoublePrecisionMathOnDevice, + dim3(1, 1, 1), + dim3(1, 1, 1), + 0, + 0, + 1); passed(); } diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp index f9f38efa96..6a0c91e890 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipFloatMathPrecise.cpp @@ -30,8 +30,8 @@ THE SOFTWARE. #include #include "test_common.h" -__global__ void FloatMathPrecise(hipLaunchParm lp) { - // int iX; //uncomment this when remqouf() is enabled again +__global__ void FloatMathPrecise() { + int iX; float fX, fY; acosf(1.0f); @@ -47,8 +47,8 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) { cosf(0.0f); coshf(0.0f); cospif(0.0f); - // cyl_bessel_i0f(0.0f); - // cyl_bessel_i1f(0.0f); + cyl_bessel_i0f(0.0f); + cyl_bessel_i1f(0.0f); erfcf(0.0f); erfcinvf(2.0f); erfcxf(0.0f); @@ -66,7 +66,7 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) { fX = fmaxf(0.0f, 0.0f); fX = fminf(0.0f, 0.0f); fmodf(0.0f, 1.0f); - // frexpf(0.0f, &iX); + frexpf(0.0f, &iX); hypotf(1.0f, 0.0f); ilogbf(1.0f); isfinite(0.0f); @@ -76,7 +76,7 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) { j1f(0.0f); jnf(-1.0f, 1.0f); ldexpf(0.0f, 0); - // lgammaf(1.0f); + lgammaf(1.0f); llrintf(0.0f); llroundf(0.0f); log10f(1.0f); @@ -86,10 +86,10 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) { logf(1.0f); lrintf(0.0f); lroundf(0.0f); - // modff(0.0f, &fX); + modff(0.0f, &fX); fX = nanf("1"); fX = nearbyintf(0.0f); - // nextafterf(0.0f); + nextafterf(0.0f, 0.0f); norm3df(1.0f, 0.0f, 0.0f); norm4df(1.0f, 0.0f, 0.0f, 0.0f); normcdff(0.0f); @@ -99,7 +99,7 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) { powf(1.0f, 0.0f); rcbrtf(1.0f); remainderf(2.0f, 1.0f); - // remquof(1.0f, 2.0f, &iX); + remquof(1.0f, 2.0f, &iX); rhypotf(0.0f, 1.0f); fY = rintf(1.0f); rnorm3df(0.0f, 0.0f, 1.0f); @@ -127,6 +127,6 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) { } int main() { - hipLaunchKernel(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0); + hipLaunchKernelGGL(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0); passed(); } diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipTestDevice.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipTestDevice.cpp index 395e63a330..2bb5163dc9 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipTestDevice.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipTestDevice.cpp @@ -31,82 +31,82 @@ THE SOFTWARE. #define N 512 #define SIZE N * sizeof(float) -__global__ void test_sincosf(hipLaunchParm lp, float* a, float* b, float* c) { +__global__ void test_sincosf(float* a, float* b, float* c) { int tid = threadIdx.x; sincosf(a[tid], b + tid, c + tid); } -__global__ void test_sincospif(hipLaunchParm lp, float* a, float* b, float* c) { +__global__ void test_sincospif(float* a, float* b, float* c) { int tid = threadIdx.x; sincospif(a[tid], b + tid, c + tid); } -__global__ void test_fdividef(hipLaunchParm lp, float* a, float* b, float* c) { +__global__ void test_fdividef(float* a, float* b, float* c) { int tid = threadIdx.x; c[tid] = fdividef(a[tid], b[tid]); } -__global__ void test_llrintf(hipLaunchParm lp, float* a, long long int* b) { +__global__ void test_llrintf(float* a, long long int* b) { int tid = threadIdx.x; b[tid] = llrintf(a[tid]); } -__global__ void test_lrintf(hipLaunchParm lp, float* a, long int* b) { +__global__ void test_lrintf(float* a, long int* b) { int tid = threadIdx.x; b[tid] = lrintf(a[tid]); } -__global__ void test_rintf(hipLaunchParm lp, float* a, float* b) { +__global__ void test_rintf(float* a, float* b) { int tid = threadIdx.x; b[tid] = rintf(a[tid]); } -__global__ void test_llroundf(hipLaunchParm lp, float* a, long long int* b) { +__global__ void test_llroundf(float* a, long long int* b) { int tid = threadIdx.x; b[tid] = llroundf(a[tid]); } -__global__ void test_lroundf(hipLaunchParm lp, float* a, long int* b) { +__global__ void test_lroundf(float* a, long int* b) { int tid = threadIdx.x; b[tid] = lroundf(a[tid]); } -__global__ void test_rhypotf(hipLaunchParm lp, float* a, float* b, float* c) { +__global__ void test_rhypotf(float* a, float* b, float* c) { int tid = threadIdx.x; c[tid] = rhypotf(a[tid], b[tid]); } -__global__ void test_norm3df(hipLaunchParm lp, float* a, float* b, float* c, float* d) { +__global__ void test_norm3df(float* a, float* b, float* c, float* d) { int tid = threadIdx.x; d[tid] = norm3df(a[tid], b[tid], c[tid]); } -__global__ void test_norm4df(hipLaunchParm lp, float* a, float* b, float* c, float* d, float* e) { +__global__ void test_norm4df(float* a, float* b, float* c, float* d, float* e) { int tid = threadIdx.x; e[tid] = norm4df(a[tid], b[tid], c[tid], d[tid]); } -__global__ void test_normf(hipLaunchParm lp, float* a, float* b) { +__global__ void test_normf(float* a, float* b) { int tid = threadIdx.x; b[tid] = normf(N, a); } -__global__ void test_rnorm3df(hipLaunchParm lp, float* a, float* b, float* c, float* d) { +__global__ void test_rnorm3df(float* a, float* b, float* c, float* d) { int tid = threadIdx.x; d[tid] = rnorm3df(a[tid], b[tid], c[tid]); } -__global__ void test_rnorm4df(hipLaunchParm lp, float* a, float* b, float* c, float* d, float* e) { +__global__ void test_rnorm4df(float* a, float* b, float* c, float* d, float* e) { int tid = threadIdx.x; e[tid] = rnorm4df(a[tid], b[tid], c[tid], d[tid]); } -__global__ void test_rnormf(hipLaunchParm lp, float* a, float* b) { +__global__ void test_rnormf(float* a, float* b) { int tid = threadIdx.x; b[tid] = rnormf(N, a); } -__global__ void test_erfinvf(hipLaunchParm lp, float* a, float* b) { +__global__ void test_erfinvf(float* a, float* b) { int tid = threadIdx.x; b[tid] = erff(erfinvf(a[tid])); } @@ -124,7 +124,7 @@ bool run_sincosf() { hipMalloc((void**)&Bd, SIZE); hipMalloc((void**)&Cd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_sincosf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); + hipLaunchKernelGGL(test_sincosf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); int passed = 0; @@ -166,7 +166,7 @@ bool run_sincospif() { hipMalloc((void**)&Bd, SIZE); hipMalloc((void**)&Cd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_sincospif, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); + hipLaunchKernelGGL(test_sincospif, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); int passed = 0; @@ -210,7 +210,7 @@ bool run_fdividef() { hipMalloc((void**)&Cd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_fdividef, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); + hipLaunchKernelGGL(test_fdividef, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -244,7 +244,7 @@ bool run_llrintf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, N * sizeof(long long int)); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_llrintf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_llrintf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -277,7 +277,7 @@ bool run_lrintf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, N * sizeof(long int)); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_lrintf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_lrintf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -310,7 +310,7 @@ bool run_rintf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_rintf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_rintf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -344,7 +344,7 @@ bool run_llroundf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, N * sizeof(long long int)); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_llroundf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_llroundf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -377,7 +377,7 @@ bool run_lroundf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, N * sizeof(long int)); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_lroundf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_lroundf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -420,7 +420,7 @@ bool run_norm3df() { hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_norm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); + hipLaunchKernelGGL(test_norm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -469,7 +469,7 @@ bool run_norm4df() { hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_norm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); + hipLaunchKernelGGL(test_norm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -510,7 +510,7 @@ bool run_normf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_normf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_normf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -547,7 +547,7 @@ bool run_rhypotf() { hipMalloc((void**)&Cd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_rhypotf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); + hipLaunchKernelGGL(test_rhypotf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -590,7 +590,7 @@ bool run_rnorm3df() { hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_rnorm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); + hipLaunchKernelGGL(test_rnorm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -639,7 +639,7 @@ bool run_rnorm4df() { hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_rnorm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); + hipLaunchKernelGGL(test_rnorm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -680,7 +680,7 @@ bool run_rnormf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_rnormf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_rnormf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) { @@ -712,7 +712,7 @@ bool run_erfinvf() { hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(test_erfinvf, dim3(1), dim3(N), 0, 0, Ad, Bd); + hipLaunchKernelGGL(test_erfinvf, dim3(1), dim3(N), 0, 0, Ad, Bd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for (int i = 0; i < 512; i++) {