Moved device code to mimic cuda header behavior
1. All fp32, fp64 math device/host functions should be in math_functions.h/.cpp 2. All fp32, fp64 fast math intrinsics for device/host functions should be in device_functions.h/.cpp 3. All the device code implementations should be in device_util.h/.cpp 4. Hence, made changes appropriately by moving code and creating new header files 5. Added math_functions.cpp/.h 6. Changed #ifndef signature to make sure no conflicts between headers with same names in hip/hip_runtime.h and hip/hcc_detail/hip_runtime.h 7. Changed tests to fit the code changes, making them to include appropriate headers 8. Added math_functions.cpp to CMakeLists.txt 9. Some of the tests are still broken, mostly host math functions will fix them in next commit 10. TODO: FIX compilation issues for host math functions Change-Id: I7a17637d7e294a7d224ffba932c1a08668febd26
Этот коммит содержится в:
@@ -181,7 +181,8 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
src/device_util.cpp
|
||||
src/hip_ldg.cpp
|
||||
src/hip_fp16.cpp
|
||||
src/device_functions.cpp)
|
||||
src/device_functions.cpp
|
||||
src/math_functions.cpp)
|
||||
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${HCC_HOME}/lib -lmcwamp -Wl,-Bsymbolic -Wl,-rpath ${HCC_HOME}/lib")
|
||||
add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME})
|
||||
|
||||
@@ -23,6 +23,173 @@ THE SOFTWARE.
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_vector_types.h>
|
||||
|
||||
// Single Precision Fast Math
|
||||
extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
|
||||
extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
|
||||
__device__ float __hip_fast_exp10f(float);
|
||||
__device__ float __hip_fast_expf(float);
|
||||
__device__ float __hip_fast_frsqrt_rn(float);
|
||||
extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
|
||||
__device__ float __hip_fast_fsqrt_rn(float);
|
||||
__device__ float __hip_fast_fsqrt_ru(float);
|
||||
__device__ float __hip_fast_fsqrt_rz(float);
|
||||
__device__ float __hip_fast_log10f(float);
|
||||
extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
|
||||
__device__ float __hip_fast_logf(float);
|
||||
__device__ float __hip_fast_powf(float, float);
|
||||
__device__ void __hip_fast_sincosf(float,float*,float*);
|
||||
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
|
||||
__device__ float __hip_fast_tanf(float);
|
||||
extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
|
||||
extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
|
||||
|
||||
extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
|
||||
extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
|
||||
extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
|
||||
|
||||
|
||||
// Single Precision Fast Math
|
||||
__device__ inline float __cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __exp10f(float x) {
|
||||
return __hip_fast_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __expf(float x) {
|
||||
return __hip_fast_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frsqrt_rn(float x) {
|
||||
return __hip_fast_frsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rd(float x) {
|
||||
return __hip_fast_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rn(float x) {
|
||||
return __hip_fast_fsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_ru(float x) {
|
||||
return __hip_fast_fsqrt_ru(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rz(float x) {
|
||||
return __hip_fast_fsqrt_rz(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log10f(float x) {
|
||||
return __hip_fast_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log2f(float x) {
|
||||
return __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __logf(float x) {
|
||||
return __hip_fast_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __powf(float base, float exponent) {
|
||||
return __hip_fast_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void __sincosf(float x, float *s, float *c) {
|
||||
return __hip_fast_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float __sinf(float x) {
|
||||
return __hip_fast_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_rd(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_rn(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_ru(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_rz(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_rd(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_rn(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_ru(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_rz(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rd(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rn(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_ru(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rz(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_rd(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_rn(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_ru(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_rz(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_rd(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_rn(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_ru(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_rz(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
|
||||
extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
|
||||
extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int);
|
||||
extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_FP16_H
|
||||
#define HIP_FP16_H
|
||||
#ifndef HIP_HCC_DETAIL_FP16_H
|
||||
#define HIP_HCC_DETAIL_FP16_H
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
@@ -452,8 +452,6 @@ typedef struct __attribute__((aligned(4))){
|
||||
} __half2;
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -121,208 +121,6 @@ extern int HIP_TRACE_API;
|
||||
#define __HCC_C__
|
||||
#endif
|
||||
|
||||
__device__ float acosf(float x);
|
||||
__device__ float acoshf(float x);
|
||||
__device__ float asinf(float x);
|
||||
__device__ float asinhf(float x);
|
||||
__device__ float atan2f(float y, float x);
|
||||
__device__ float atanf(float x);
|
||||
__device__ float atanhf(float x);
|
||||
__device__ float cbrtf(float x);
|
||||
__device__ float ceilf(float x);
|
||||
__device__ float copysignf(float x, float y);
|
||||
__device__ float coshf(float x);
|
||||
__device__ float cyl_bessel_i0f(float x);
|
||||
__device__ float cyl_bessel_i1f(float x);
|
||||
__device__ float erfcf(float x);
|
||||
__device__ float erfcinvf(float y);
|
||||
|
||||
__device__ float erfcxf(float x);
|
||||
__device__ float erff(float x);
|
||||
__device__ float erfinvf(float y);
|
||||
__device__ float exp2f(float x);
|
||||
__device__ float expm1f(float x);
|
||||
__device__ float fabsf(float x);
|
||||
__device__ float fdimf(float x, float y);
|
||||
__device__ __host__ float fdividef(float x, float y);
|
||||
__device__ float floorf(float x);
|
||||
__device__ float fmaf(float x, float y, float z);
|
||||
__device__ float fmaxf(float x, float y);
|
||||
__device__ float fminf(float x, float y);
|
||||
__device__ float fmodf(float x, float y);
|
||||
__device__ float frexpf(float x, float y);
|
||||
__device__ float hypotf(float x, float y);
|
||||
__device__ float ilogbf(float x);
|
||||
__host__ __device__ unsigned isfinite(float a);
|
||||
__device__ unsigned isinf(float a);
|
||||
__device__ unsigned isnan(float a);
|
||||
__device__ float j0f(float x);
|
||||
__device__ float j1f(float x);
|
||||
__device__ float jnf(int n, float x);
|
||||
__device__ float ldexpf(float x, int exp);
|
||||
__device__ float lgammaf(float x);
|
||||
__device__ long long int llrintf(float x);
|
||||
__device__ long long int llroundf(float x);
|
||||
__device__ float log1pf(float x);
|
||||
__device__ float logbf(float x);
|
||||
__device__ long int lrintf(float x);
|
||||
__device__ long int lroundf(float x);
|
||||
__device__ float modff(float x, float *iptr);
|
||||
__device__ float nanf(const char* tagp);
|
||||
__device__ float nearbyintf(float x);
|
||||
__device__ float nextafterf(float x, float y);
|
||||
__device__ float norm3df(float a, float b, float c);
|
||||
__device__ float norm4df(float a, float b, float c, float d);
|
||||
__device__ float normcdff(float y);
|
||||
__device__ float normcdfinvf(float y);
|
||||
__device__ float normf(int dim, const float *a);
|
||||
__device__ float rcbrtf(float x);
|
||||
__device__ float remainderf(float x, float y);
|
||||
__device__ float remquof(float x, float y, int *quo);
|
||||
__device__ float rhypotf(float x, float y);
|
||||
__device__ float rintf(float x);
|
||||
__device__ float rnorm3df(float a, float b, float c);
|
||||
__device__ float rnorm4df(float a, float b, float c, float d);
|
||||
__device__ float rnormf(int dim, const float* a);
|
||||
__device__ float roundf(float x);
|
||||
__device__ float rsqrtf(float x);
|
||||
__device__ float scalblnf(float x, long int n);
|
||||
__device__ float scalbnf(float x, int n);
|
||||
__host__ __device__ unsigned signbit(float a);
|
||||
__device__ void sincospif(float x, float *sptr, float *cptr);
|
||||
__device__ float sinhf(float x);
|
||||
__device__ float sinpif(float x);
|
||||
__device__ float sqrtf(float x);
|
||||
__device__ float tanhf(float x);
|
||||
__device__ float tgammaf(float x);
|
||||
__device__ float truncf(float x);
|
||||
__device__ float y0f(float x);
|
||||
__device__ float y1f(float x);
|
||||
__device__ float ynf(int n, float x);
|
||||
|
||||
__host__ __device__ float cospif(float x);
|
||||
__host__ __device__ float sinpif(float x);
|
||||
// /__device__ float sqrtf(float x);
|
||||
__host__ __device__ float rsqrtf(float x);
|
||||
__host__ float normcdff(float y);
|
||||
|
||||
__host__ float erfcinvf(float y);
|
||||
__host__ float erfcxf(float x);
|
||||
__host__ float erfinvf(float y);
|
||||
__host__ float norm3df(float a, float b, float c);
|
||||
__host__ float normcdfinvf(float y);
|
||||
__host__ float norm4df(float a, float b, float c, float d);
|
||||
__host__ float rcbrtf(float x);
|
||||
__host__ float rhypotf(float x, float y);
|
||||
__host__ float rnorm3df(float a, float b, float c);
|
||||
__host__ float rnormf(int dim, const float* a);
|
||||
__host__ float rnorm4df(float a, float b, float c, float d);
|
||||
__host__ void sincospif(float x, float *sptr, float *cptr);
|
||||
|
||||
__device__ double acos(double x);
|
||||
__device__ double acosh(double x);
|
||||
__device__ double asin(double x);
|
||||
__device__ double asinh(double x);
|
||||
__device__ double atan(double x);
|
||||
__device__ double atan2(double y, double x);
|
||||
__device__ double atanh(double x);
|
||||
__device__ double cbrt(double x);
|
||||
__device__ double ceil(double x);
|
||||
__device__ double copysign(double x, double y);
|
||||
__device__ double cos(double x);
|
||||
__device__ double cosh(double x);
|
||||
__host__ __device__ double cospi(double x);
|
||||
__device__ double cyl_bessel_i0(double x);
|
||||
__device__ double cyl_bessel_i1(double x);
|
||||
__device__ double erf(double x);
|
||||
__device__ double erfc(double x);
|
||||
__device__ double erfcinv(double y);
|
||||
__device__ double erfcx(double x);
|
||||
__device__ double erfinv(double x);
|
||||
__device__ double exp(double x);
|
||||
__device__ double exp10(double x);
|
||||
__device__ double exp2(double x);
|
||||
__device__ double expm1(double x);
|
||||
__device__ double fabs(double x);
|
||||
__device__ double fdim(double x, double y);
|
||||
__device__ double fdivide(double x, double y);
|
||||
__device__ double floor(double x);
|
||||
__device__ double fma(double x, double y, double z);
|
||||
__device__ double fmax(double x, double y);
|
||||
__device__ double fmin(double x, double y);
|
||||
__device__ double fmod(double x, double y);
|
||||
__device__ double frexp(double x, int *nptr);
|
||||
__device__ double hypot(double x, double y);
|
||||
__device__ double ilogb(double x);
|
||||
__host__ __device__ unsigned isfinite(double x);
|
||||
__device__ unsigned isinf(double x);
|
||||
__device__ unsigned isnan(double x);
|
||||
__device__ double j0(double x);
|
||||
__device__ double j1(double x);
|
||||
__device__ double jn(int n, double x);
|
||||
__device__ double ldexp(double x, int exp);
|
||||
__device__ double lgamma(double x);
|
||||
__device__ long long llrint(double x);
|
||||
__device__ long long llround(double x);
|
||||
__device__ double log(double x);
|
||||
__device__ double log10(double x);
|
||||
__device__ double log1p(double x);
|
||||
__device__ double log2(double x);
|
||||
__device__ double logb(double x);
|
||||
__device__ long int lrint(double x);
|
||||
__device__ long int lround(double x);
|
||||
__device__ double modf(double x, double *iptr);
|
||||
__device__ double nan(const char* tagp);
|
||||
__device__ double nearbyint(double x);
|
||||
__device__ double nextafter(double x, double y);
|
||||
__device__ double norm(int dim, const double* t);
|
||||
__device__ double norm3d(double a, double b, double c);
|
||||
__host__ double norm3d(double a, double b, double c);
|
||||
__device__ double norm4d(double a, double b, double c, double d);
|
||||
__host__ double norm4d(double a, double b, double c, double d);
|
||||
__device__ double normcdf(double y);
|
||||
__host__ double normcdf(double y);
|
||||
__device__ double normcdfinv(double y);
|
||||
__host__ double normcdfinv(double y);
|
||||
__device__ double pow(double x, double y);
|
||||
__device__ double rcbrt(double x);
|
||||
__host__ double rcbrt(double x);
|
||||
__device__ double remainder(double x, double y);
|
||||
__device__ double remquo(double x, double y, int *quo);
|
||||
__device__ double rhypot(double x, double y);
|
||||
__host__ double rhypot(double x, double y);
|
||||
__device__ double rint(double x);
|
||||
__device__ double rnorm(int dim, const double* t);
|
||||
__host__ double rnorm(int dim, const double* t);
|
||||
__device__ double rnorm3d(double a, double b, double c);
|
||||
__host__ double rnorm3d(double a, double b, double c);
|
||||
__device__ double rnorm4d(double a, double b, double c, double d);
|
||||
__host__ double rnorm4d(double a, double b, double c, double d);
|
||||
__device__ double round(double x);
|
||||
__host__ __device__ double rsqrt(double x);
|
||||
__device__ double scalbln(double x, long int n);
|
||||
__device__ double scalbn(double x, int n);
|
||||
__host__ __device__ unsigned signbit(double a);
|
||||
__device__ double sin(double a);
|
||||
__device__ void sincos(double x, double *sptr, double *cptr);
|
||||
__device__ void sincospi(double x, double *sptr, double *cptr);
|
||||
__host__ void sincospi(double x, double *sptr, double *cptr);
|
||||
__device__ double sinh(double x);
|
||||
__host__ __device__ double sinpi(double x);
|
||||
__device__ double sqrt(double x);
|
||||
__device__ double tan(double x);
|
||||
__device__ double tanh(double x);
|
||||
__device__ double tgamma(double x);
|
||||
__device__ double trunc(double x);
|
||||
__device__ double y0(double x);
|
||||
__device__ double y1(double y);
|
||||
__device__ double yn(int n, double x);
|
||||
|
||||
__host__ double erfcinv(double y);
|
||||
__host__ double erfcx(double x);
|
||||
__host__ double erfinv(double y);
|
||||
__host__ double fdivide(double x, double y);
|
||||
|
||||
// TODO - hipify-clang - change to use the function call.
|
||||
//#define warpSize hc::__wavesize()
|
||||
extern const int warpSize;
|
||||
@@ -451,252 +249,6 @@ __host__ __device__ int max(int arg1, int arg2);
|
||||
|
||||
__device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr();
|
||||
|
||||
//TODO - add a couple fast math operations here, the set here will grow :
|
||||
|
||||
// Single Precision Precise Math
|
||||
__device__ float __hip_precise_cosf(float);
|
||||
__device__ float __hip_precise_exp10f(float);
|
||||
__device__ float __hip_precise_expf(float);
|
||||
__device__ float __hip_precise_frsqrt_rn(float);
|
||||
__device__ float __hip_precise_fsqrt_rd(float);
|
||||
__device__ float __hip_precise_fsqrt_rn(float);
|
||||
__device__ float __hip_precise_fsqrt_ru(float);
|
||||
__device__ float __hip_precise_fsqrt_rz(float);
|
||||
__device__ float __hip_precise_log10f(float);
|
||||
__device__ float __hip_precise_log2f(float);
|
||||
__device__ float __hip_precise_logf(float);
|
||||
__device__ float __hip_precise_powf(float, float);
|
||||
__device__ void __hip_precise_sincosf(float,float*,float*);
|
||||
__device__ float __hip_precise_sinf(float);
|
||||
__device__ float __hip_precise_tanf(float);
|
||||
|
||||
// Double Precision Precise Math
|
||||
__device__ double __hip_precise_dsqrt_rd(double);
|
||||
__device__ double __hip_precise_dsqrt_rn(double);
|
||||
__device__ double __hip_precise_dsqrt_ru(double);
|
||||
__device__ double __hip_precise_dsqrt_rz(double);
|
||||
|
||||
// Single Precision Fast Math
|
||||
extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
|
||||
extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
|
||||
__device__ float __hip_fast_exp10f(float);
|
||||
__device__ float __hip_fast_expf(float);
|
||||
__device__ float __hip_fast_frsqrt_rn(float);
|
||||
extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
|
||||
__device__ float __hip_fast_fsqrt_rn(float);
|
||||
__device__ float __hip_fast_fsqrt_ru(float);
|
||||
__device__ float __hip_fast_fsqrt_rz(float);
|
||||
__device__ float __hip_fast_log10f(float);
|
||||
extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
|
||||
__device__ float __hip_fast_logf(float);
|
||||
__device__ float __hip_fast_powf(float, float);
|
||||
__device__ void __hip_fast_sincosf(float,float*,float*);
|
||||
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
|
||||
__device__ float __hip_fast_tanf(float);
|
||||
extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
|
||||
extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
|
||||
|
||||
extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
|
||||
extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
|
||||
extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
|
||||
|
||||
#ifdef HIP_FAST_MATH
|
||||
// Single Precision Precise Math when enabled
|
||||
|
||||
__device__ inline float cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float exp10f(float x) {
|
||||
return __hip_fast_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float expf(float x) {
|
||||
return __hip_fast_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float log10f(float x) {
|
||||
return __hip_fast_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float log2f(float x) {
|
||||
return __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float logf(float x) {
|
||||
return __hip_fast_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float powf(float base, float exponent) {
|
||||
return __hip_fast_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void sincosf(float x, float *s, float *c) {
|
||||
return __hip_fast_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float sinf(float x) {
|
||||
return __hip_fast_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__device__ float sinf(float);
|
||||
__device__ float cosf(float);
|
||||
__device__ float tanf(float);
|
||||
__device__ void sincosf(float, float*, float*);
|
||||
__device__ float logf(float);
|
||||
__device__ float log2f(float);
|
||||
__device__ float log10f(float);
|
||||
__device__ float expf(float);
|
||||
__device__ float exp10f(float);
|
||||
__device__ float powf(float, float);
|
||||
|
||||
#endif
|
||||
// Single Precision Fast Math
|
||||
__device__ inline float __cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __exp10f(float x) {
|
||||
return __hip_fast_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __expf(float x) {
|
||||
return __hip_fast_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frsqrt_rn(float x) {
|
||||
return __hip_fast_frsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rd(float x) {
|
||||
return __hip_fast_fsqrt_rd(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rn(float x) {
|
||||
return __hip_fast_fsqrt_rn(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_ru(float x) {
|
||||
return __hip_fast_fsqrt_ru(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fsqrt_rz(float x) {
|
||||
return __hip_fast_fsqrt_rz(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log10f(float x) {
|
||||
return __hip_fast_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __log2f(float x) {
|
||||
return __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float __logf(float x) {
|
||||
return __hip_fast_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __powf(float base, float exponent) {
|
||||
return __hip_fast_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void __sincosf(float x, float *s, float *c) {
|
||||
return __hip_fast_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float __sinf(float x) {
|
||||
return __hip_fast_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_rd(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_rn(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_ru(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __fmaf_rz(float x, float y, float z) {
|
||||
return __hip_fast_fmaf(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_rd(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_rn(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_ru(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline float __frcp_rz(float x) {
|
||||
return __hip_fast_frcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rd(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rn(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_ru(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __dsqrt_rz(double x) {
|
||||
return __hip_fast_dsqrt(x);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_rd(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_rn(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_ru(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __fma_rz(double x, double y, double z) {
|
||||
return __hip_fast_fma(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_rd(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_rn(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_ru(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
__device__ inline double __drcp_rz(double x) {
|
||||
return __hip_fast_drcp(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* CUDA 8 device function features
|
||||
|
||||
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_HCC_DETAIL_MATH_FUNCTIONS_H
|
||||
#define HIP_HCC_DETAIL_MATH_FUNCTIONS_H
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/hcc_detail/device_functions.h>
|
||||
|
||||
__device__ float acosf(float x);
|
||||
__device__ float acoshf(float x);
|
||||
__device__ float asinf(float x);
|
||||
__device__ float asinhf(float x);
|
||||
__device__ float atan2f(float y, float x);
|
||||
__device__ float atanf(float x);
|
||||
__device__ float atanhf(float x);
|
||||
__device__ float cbrtf(float x);
|
||||
__device__ float ceilf(float x);
|
||||
__device__ float copysignf(float x, float y);
|
||||
__device__ float coshf(float x);
|
||||
__device__ float cyl_bessel_i0f(float x);
|
||||
__device__ float cyl_bessel_i1f(float x);
|
||||
__device__ float erfcf(float x);
|
||||
__device__ float erfcinvf(float y);
|
||||
|
||||
__device__ float erfcxf(float x);
|
||||
__device__ float erff(float x);
|
||||
__device__ float erfinvf(float y);
|
||||
__device__ float exp2f(float x);
|
||||
__device__ float expm1f(float x);
|
||||
__device__ float fabsf(float x);
|
||||
__device__ float fdimf(float x, float y);
|
||||
__device__ __host__ float fdividef(float x, float y);
|
||||
__device__ float floorf(float x);
|
||||
__device__ float fmaf(float x, float y, float z);
|
||||
__device__ float fmaxf(float x, float y);
|
||||
__device__ float fminf(float x, float y);
|
||||
__device__ float fmodf(float x, float y);
|
||||
__device__ float frexpf(float x, float y);
|
||||
__device__ float hypotf(float x, float y);
|
||||
__device__ float ilogbf(float x);
|
||||
__host__ __device__ int isfinite(float a);
|
||||
__device__ unsigned isinf(float a);
|
||||
__device__ unsigned isnan(float a);
|
||||
__device__ float j0f(float x);
|
||||
__device__ float j1f(float x);
|
||||
__device__ float jnf(int n, float x);
|
||||
__device__ float ldexpf(float x, int exp);
|
||||
__device__ float lgammaf(float x);
|
||||
__device__ long long int llrintf(float x);
|
||||
__device__ long long int llroundf(float x);
|
||||
__device__ float log1pf(float x);
|
||||
__device__ float logbf(float x);
|
||||
__device__ long int lrintf(float x);
|
||||
__device__ long int lroundf(float x);
|
||||
__device__ float modff(float x, float *iptr);
|
||||
__device__ float nanf(const char* tagp);
|
||||
__device__ float nearbyintf(float x);
|
||||
__device__ float nextafterf(float x, float y);
|
||||
__device__ float norm3df(float a, float b, float c);
|
||||
__device__ float norm4df(float a, float b, float c, float d);
|
||||
__device__ float normcdff(float y);
|
||||
__device__ float normcdfinvf(float y);
|
||||
__device__ float normf(int dim, const float *a);
|
||||
__device__ float rcbrtf(float x);
|
||||
__device__ float remainderf(float x, float y);
|
||||
__device__ float remquof(float x, float y, int *quo);
|
||||
__device__ float rhypotf(float x, float y);
|
||||
__device__ float rintf(float x);
|
||||
__device__ float rnorm3df(float a, float b, float c);
|
||||
__device__ float rnorm4df(float a, float b, float c, float d);
|
||||
__device__ float rnormf(int dim, const float* a);
|
||||
__device__ float roundf(float x);
|
||||
__device__ float rsqrtf(float x);
|
||||
__device__ float scalblnf(float x, long int n);
|
||||
__device__ float scalbnf(float x, int n);
|
||||
__host__ __device__ unsigned signbit(float a);
|
||||
__device__ void sincospif(float x, float *sptr, float *cptr);
|
||||
__device__ float sinhf(float x);
|
||||
__device__ float sinpif(float x);
|
||||
__device__ float sqrtf(float x);
|
||||
__device__ float tanhf(float x);
|
||||
__device__ float tgammaf(float x);
|
||||
__device__ float truncf(float x);
|
||||
__device__ float y0f(float x);
|
||||
__device__ float y1f(float x);
|
||||
__device__ float ynf(int n, float x);
|
||||
|
||||
__host__ __device__ float cospif(float x);
|
||||
__host__ __device__ float sinpif(float x);
|
||||
// /__device__ float sqrtf(float x);
|
||||
__host__ __device__ float rsqrtf(float x);
|
||||
__host__ float normcdff(float y);
|
||||
|
||||
__host__ float erfcinvf(float y);
|
||||
__host__ float erfcxf(float x);
|
||||
__host__ float erfinvf(float y);
|
||||
__host__ float norm3df(float a, float b, float c);
|
||||
__host__ float normcdfinvf(float y);
|
||||
__host__ float norm4df(float a, float b, float c, float d);
|
||||
__host__ float rcbrtf(float x);
|
||||
__host__ float rhypotf(float x, float y);
|
||||
__host__ float rnorm3df(float a, float b, float c);
|
||||
__host__ float rnormf(int dim, const float* a);
|
||||
__host__ float rnorm4df(float a, float b, float c, float d);
|
||||
__host__ void sincospif(float x, float *sptr, float *cptr);
|
||||
|
||||
__device__ double acos(double x);
|
||||
__device__ double acosh(double x);
|
||||
__device__ double asin(double x);
|
||||
__device__ double asinh(double x);
|
||||
__device__ double atan(double x);
|
||||
__device__ double atan2(double y, double x);
|
||||
__device__ double atanh(double x);
|
||||
__device__ double cbrt(double x);
|
||||
__device__ double ceil(double x);
|
||||
__device__ double copysign(double x, double y);
|
||||
__device__ double cos(double x);
|
||||
__device__ double cosh(double x);
|
||||
__host__ __device__ double cospi(double x);
|
||||
__device__ double cyl_bessel_i0(double x);
|
||||
__device__ double cyl_bessel_i1(double x);
|
||||
__device__ double erf(double x);
|
||||
__device__ double erfc(double x);
|
||||
__device__ double erfcinv(double y);
|
||||
__device__ double erfcx(double x);
|
||||
__device__ double erfinv(double x);
|
||||
__device__ double exp(double x);
|
||||
__device__ double exp10(double x);
|
||||
__device__ double exp2(double x);
|
||||
__device__ double expm1(double x);
|
||||
__device__ double fabs(double x);
|
||||
__device__ double fdim(double x, double y);
|
||||
__device__ double floor(double x);
|
||||
__device__ double fma(double x, double y, double z);
|
||||
__device__ double fmax(double x, double y);
|
||||
__device__ double fmin(double x, double y);
|
||||
__device__ double fmod(double x, double y);
|
||||
__device__ double frexp(double x, int *nptr);
|
||||
__device__ double hypot(double x, double y);
|
||||
__device__ double ilogb(double x);
|
||||
__host__ __device__ unsigned isfinite(double x);
|
||||
__device__ unsigned isinf(double x);
|
||||
__device__ unsigned isnan(double x);
|
||||
__device__ double j0(double x);
|
||||
__device__ double j1(double x);
|
||||
__device__ double jn(int n, double x);
|
||||
__device__ double ldexp(double x, int exp);
|
||||
__device__ double lgamma(double x);
|
||||
__device__ long long llrint(double x);
|
||||
__device__ long long llround(double x);
|
||||
__device__ double log(double x);
|
||||
__device__ double log10(double x);
|
||||
__device__ double log1p(double x);
|
||||
__device__ double log2(double x);
|
||||
__device__ double logb(double x);
|
||||
__device__ long int lrint(double x);
|
||||
__device__ long int lround(double x);
|
||||
__device__ double modf(double x, double *iptr);
|
||||
__device__ double nan(const char* tagp);
|
||||
__device__ double nearbyint(double x);
|
||||
__device__ double nextafter(double x, double y);
|
||||
__device__ double norm(int dim, const double* t);
|
||||
__device__ double norm3d(double a, double b, double c);
|
||||
__host__ double norm3d(double a, double b, double c);
|
||||
__device__ double norm4d(double a, double b, double c, double d);
|
||||
__host__ double norm4d(double a, double b, double c, double d);
|
||||
__device__ double normcdf(double y);
|
||||
__host__ double normcdf(double y);
|
||||
__device__ double normcdfinv(double y);
|
||||
__host__ double normcdfinv(double y);
|
||||
__device__ double pow(double x, double y);
|
||||
__device__ double rcbrt(double x);
|
||||
__host__ double rcbrt(double x);
|
||||
__device__ double remainder(double x, double y);
|
||||
__device__ double remquo(double x, double y, int *quo);
|
||||
__device__ double rhypot(double x, double y);
|
||||
__host__ double rhypot(double x, double y);
|
||||
__device__ double rint(double x);
|
||||
__device__ double rnorm(int dim, const double* t);
|
||||
__host__ double rnorm(int dim, const double* t);
|
||||
__device__ double rnorm3d(double a, double b, double c);
|
||||
__host__ double rnorm3d(double a, double b, double c);
|
||||
__device__ double rnorm4d(double a, double b, double c, double d);
|
||||
__host__ double rnorm4d(double a, double b, double c, double d);
|
||||
__device__ double round(double x);
|
||||
__host__ __device__ double rsqrt(double x);
|
||||
__device__ double scalbln(double x, long int n);
|
||||
__device__ double scalbn(double x, int n);
|
||||
__host__ __device__ unsigned signbit(double a);
|
||||
__device__ double sin(double a);
|
||||
__device__ void sincos(double x, double *sptr, double *cptr);
|
||||
__device__ void sincospi(double x, double *sptr, double *cptr);
|
||||
__host__ void sincospi(double x, double *sptr, double *cptr);
|
||||
__device__ double sinh(double x);
|
||||
__host__ __device__ double sinpi(double x);
|
||||
__device__ double sqrt(double x);
|
||||
__device__ double tan(double x);
|
||||
__device__ double tanh(double x);
|
||||
__device__ double tgamma(double x);
|
||||
__device__ double trunc(double x);
|
||||
__device__ double y0(double x);
|
||||
__device__ double y1(double y);
|
||||
__device__ double yn(int n, double x);
|
||||
|
||||
__host__ double erfcinv(double y);
|
||||
__host__ double erfcx(double x);
|
||||
__host__ double erfinv(double y);
|
||||
__host__ double fdivide(double x, double y);
|
||||
__host__ double norm(double x, const double *t);
|
||||
|
||||
#ifdef HIP_FAST_MATH
|
||||
// Single Precision Precise Math when enabled
|
||||
|
||||
__device__ inline float cosf(float x) {
|
||||
return __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ inline float exp10f(float x) {
|
||||
return __hip_fast_exp10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float expf(float x) {
|
||||
return __hip_fast_expf(x);
|
||||
}
|
||||
|
||||
__device__ inline float log10f(float x) {
|
||||
return __hip_fast_log10f(x);
|
||||
}
|
||||
|
||||
__device__ inline float log2f(float x) {
|
||||
return __hip_fast_log2f(x);
|
||||
}
|
||||
|
||||
__device__ inline float logf(float x) {
|
||||
return __hip_fast_logf(x);
|
||||
}
|
||||
|
||||
__device__ inline float powf(float base, float exponent) {
|
||||
return __hip_fast_powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ inline void sincosf(float x, float *s, float *c) {
|
||||
return __hip_fast_sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ inline float sinf(float x) {
|
||||
return __hip_fast_sinf(x);
|
||||
}
|
||||
|
||||
__device__ inline float tanf(float x) {
|
||||
return __hip_fast_tanf(x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__device__ float sinf(float);
|
||||
__device__ float cosf(float);
|
||||
__device__ float tanf(float);
|
||||
__device__ void sincosf(float, float*, float*);
|
||||
__device__ float logf(float);
|
||||
__device__ float log2f(float);
|
||||
__device__ float log10f(float);
|
||||
__device__ float expf(float);
|
||||
__device__ float exp10f(float);
|
||||
__device__ float powf(float, float);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//! HIP = Heterogeneous-compute Interface for Portability
|
||||
//!
|
||||
//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
|
||||
//! through either AMD HCC or NVCC. Key features tend to be in the spirit
|
||||
//! and terminology of CUDA, but with a portable path to other accelerators as well:
|
||||
//
|
||||
//! Both paths support rich C++ features including classes, templates, lambdas, etc.
|
||||
//! Runtime API is C
|
||||
//! Memory management is based on pure pointers and resembles malloc/free/copy.
|
||||
//
|
||||
//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch macros.
|
||||
//! hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ features.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
|
||||
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
|
||||
// on NVCC path:
|
||||
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
|
||||
#include <hip/hcc_detail/math_functions.h>
|
||||
#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__)
|
||||
#include <hip/nvcc_detail/math_functions.h>
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
@@ -523,3 +523,71 @@ __device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long
|
||||
uHold1.ul = uHold1.ui[1] * uHold2.ui[1];
|
||||
return uHold1.ul;
|
||||
}
|
||||
|
||||
/*
|
||||
HIP specific device functions
|
||||
*/
|
||||
|
||||
__device__ unsigned __hip_ds_bpermute(int index, unsigned src) {
|
||||
return hc::__amdgcn_ds_bpermute(index, src);
|
||||
}
|
||||
|
||||
__device__ float __hip_ds_bpermutef(int index, float src) {
|
||||
return hc::__amdgcn_ds_bpermute(index, src);
|
||||
}
|
||||
|
||||
__device__ unsigned __hip_ds_permute(int index, unsigned src) {
|
||||
return hc::__amdgcn_ds_permute(index, src);
|
||||
}
|
||||
|
||||
__device__ float __hip_ds_permutef(int index, float src) {
|
||||
return hc::__amdgcn_ds_permute(index, src);
|
||||
}
|
||||
|
||||
__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) {
|
||||
return hc::__amdgcn_ds_swizzle(src, pattern);
|
||||
}
|
||||
|
||||
__device__ float __hip_ds_swizzlef(float src, int pattern) {
|
||||
return hc::__amdgcn_ds_swizzle(src, pattern);
|
||||
}
|
||||
|
||||
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
|
||||
return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
|
||||
}
|
||||
|
||||
#define MASK1 0x00ff00ff
|
||||
#define MASK2 0xff00ff00
|
||||
|
||||
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 + one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 + one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 - one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 - one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 * one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 * one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -23,6 +23,8 @@ THE SOFTWARE.
|
||||
#ifndef DEVICE_UTIL_H
|
||||
#define DEVICE_UTIL_H
|
||||
|
||||
#include<hip/hcc_detail/hip_runtime.h>
|
||||
|
||||
/*
|
||||
Heap size computation for malloc and free device functions.
|
||||
*/
|
||||
@@ -35,4 +37,119 @@ THE SOFTWARE.
|
||||
#define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE
|
||||
#define SIZE_OF_HEAP SIZE_MALLOC
|
||||
|
||||
#define HIP_SQRT_2 1.41421356237
|
||||
#define HIP_SQRT_PI 1.77245385091
|
||||
|
||||
#define __hip_erfinva3 -0.140543331
|
||||
#define __hip_erfinva2 0.914624893
|
||||
#define __hip_erfinva1 -1.645349621
|
||||
#define __hip_erfinva0 0.886226899
|
||||
|
||||
#define __hip_erfinvb4 0.012229801
|
||||
#define __hip_erfinvb3 -0.329097515
|
||||
#define __hip_erfinvb2 1.442710462
|
||||
#define __hip_erfinvb1 -2.118377725
|
||||
#define __hip_erfinvb0 1
|
||||
|
||||
#define __hip_erfinvc3 1.641345311
|
||||
#define __hip_erfinvc2 3.429567803
|
||||
#define __hip_erfinvc1 -1.62490649
|
||||
#define __hip_erfinvc0 -1.970840454
|
||||
|
||||
#define __hip_erfinvd2 1.637067800
|
||||
#define __hip_erfinvd1 3.543889200
|
||||
#define __hip_erfinvd0 1
|
||||
|
||||
#define HIP_PI 3.14159265358979323846
|
||||
|
||||
__device__ void* __hip_hc_malloc(size_t size);
|
||||
__device__ void* __hip_hc_free(void* ptr);
|
||||
|
||||
__device__ float __hip_erfinvf(float x);
|
||||
__device__ double __hip_erfinv(double x);
|
||||
|
||||
__device__ float __hip_j0f(float x);
|
||||
__device__ double __hip_j0(double x);
|
||||
|
||||
__device__ float __hip_j1f(float x);
|
||||
__device__ double __hip_j1(double x);
|
||||
|
||||
__device__ float __hip_y0f(float x);
|
||||
__device__ double __hip_y0(double x);
|
||||
|
||||
__device__ float __hip_y1f(float x);
|
||||
__device__ double __hip_y1(double x);
|
||||
|
||||
__device__ float __hip_jnf(int n, float x);
|
||||
__device__ double __hip_jn(int n, double x);
|
||||
|
||||
__device__ float __hip_ynf(int n, float x);
|
||||
__device__ double __hip_yn(int n, double x);
|
||||
|
||||
__device__ float __hip_precise_cosf(float x);
|
||||
__device__ float __hip_precise_exp10f(float x);
|
||||
__device__ float __hip_precise_expf(float x);
|
||||
__device__ float __hip_precise_frsqrt_rn(float x);
|
||||
__device__ float __hip_precise_fsqrt_rd(float x);
|
||||
__device__ float __hip_precise_fsqrt_rn(float x);
|
||||
__device__ float __hip_precise_fsqrt_ru(float x);
|
||||
__device__ float __hip_precise_fsqrt_rz(float x);
|
||||
__device__ float __hip_precise_log10f(float x);
|
||||
__device__ float __hip_precise_log2f(float x);
|
||||
__device__ float __hip_precise_logf(float x);
|
||||
__device__ float __hip_precise_powf(float base, float exponent);
|
||||
__device__ void __hip_precise_sincosf(float x, float *s, float *c);
|
||||
__device__ float __hip_precise_sinf(float x);
|
||||
__device__ float __hip_precise_tanf(float x);
|
||||
// Double Precision Math
|
||||
__device__ double __hip_precise_dsqrt_rd(double x);
|
||||
__device__ double __hip_precise_dsqrt_rn(double x);
|
||||
__device__ double __hip_precise_dsqrt_ru(double x);
|
||||
__device__ double __hip_precise_dsqrt_rz(double x);
|
||||
|
||||
|
||||
|
||||
// Float Fast Math
|
||||
__device__ float __hip_fast_exp10f(float x);
|
||||
__device__ float __hip_fast_expf(float x);
|
||||
__device__ float __hip_fast_frsqrt_rn(float x);
|
||||
__device__ float __hip_fast_fsqrt_rn(float x);
|
||||
__device__ float __hip_fast_fsqrt_ru(float x);
|
||||
__device__ float __hip_fast_fsqrt_rz(float x);
|
||||
__device__ float __hip_fast_log10f(float x);
|
||||
__device__ float __hip_fast_logf(float x);
|
||||
__device__ float __hip_fast_powf(float base, float exponent);
|
||||
__device__ void __hip_fast_sincosf(float x, float *s, float *c);
|
||||
__device__ float __hip_fast_tanf(float x);
|
||||
// Double Precision Math
|
||||
__device__ double __hip_fast_dsqrt_rd(double x);
|
||||
__device__ double __hip_fast_dsqrt_rn(double x);
|
||||
__device__ double __hip_fast_dsqrt_ru(double x);
|
||||
__device__ double __hip_fast_dsqrt_rz(double x);
|
||||
__device__ void __threadfence_system(void);
|
||||
|
||||
float __hip_host_erfinvf(float x);
|
||||
double __hip_host_erfinv(double x);
|
||||
|
||||
float __hip_host_erfcinvf(float y);
|
||||
double __hip_host_erfcinv(double y);
|
||||
|
||||
float __hip_host_j0f(float x);
|
||||
double __hip_host_j0(double x);
|
||||
|
||||
float __hip_host_j1f(float x);
|
||||
double __hip_host_j1(double x);
|
||||
|
||||
float __hip_host_y0f(float x);
|
||||
double __hip_host_y1(double x);
|
||||
|
||||
float __hip_host_y1f(float x);
|
||||
double __hip_host_y1(double x);
|
||||
|
||||
float __hip_host_jnf(int n, float x);
|
||||
double __hip_host_jn(int n, double x);
|
||||
|
||||
float __hip_host_ynf(int n, float x);
|
||||
double __hip_host_yn(int n, double x);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,971 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hc.hpp>
|
||||
#include <grid_launch.h>
|
||||
#include <hc_math.hpp>
|
||||
#include "device_util.h"
|
||||
#include "hip/hcc_detail/device_functions.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
__device__ float acosf(float x)
|
||||
{
|
||||
return hc::precise_math::acosf(x);
|
||||
}
|
||||
__device__ float acoshf(float x)
|
||||
{
|
||||
return hc::precise_math::acoshf(x);
|
||||
}
|
||||
__device__ float asinf(float x)
|
||||
{
|
||||
return hc::precise_math::asinf(x);
|
||||
}
|
||||
__device__ float asinhf(float x)
|
||||
{
|
||||
return hc::precise_math::asinhf(x);
|
||||
}
|
||||
__device__ float atan2f(float y, float x)
|
||||
{
|
||||
return hc::precise_math::atan2f(x, y);
|
||||
}
|
||||
__device__ float atanf(float x)
|
||||
{
|
||||
return hc::precise_math::atanf(x);
|
||||
}
|
||||
__device__ float atanhf(float x)
|
||||
{
|
||||
return hc::precise_math::atanhf(x);
|
||||
}
|
||||
__device__ float cbrtf(float x)
|
||||
{
|
||||
return hc::precise_math::cbrtf(x);
|
||||
}
|
||||
__device__ float ceilf(float x)
|
||||
{
|
||||
return hc::precise_math::ceilf(x);
|
||||
}
|
||||
__device__ float copysignf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::copysignf(x, y);
|
||||
}
|
||||
__device__ float cosf(float x)
|
||||
{
|
||||
return hc::precise_math::cosf(x);
|
||||
}
|
||||
__device__ float coshf(float x)
|
||||
{
|
||||
return hc::precise_math::coshf(x);
|
||||
}
|
||||
__device__ float cyl_bessel_i0f(float x);
|
||||
__device__ float cyl_bessel_i1f(float x);
|
||||
__device__ float erfcf(float x)
|
||||
{
|
||||
return hc::precise_math::erfcf(x);
|
||||
}
|
||||
__device__ float erfcinvf(float y)
|
||||
{
|
||||
return __hip_erfinvf(1 - y);
|
||||
}
|
||||
__device__ float erfcxf(float x)
|
||||
{
|
||||
return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x);
|
||||
}
|
||||
__device__ float erff(float x)
|
||||
{
|
||||
return hc::precise_math::erff(x);
|
||||
}
|
||||
__device__ float erfinvf(float y)
|
||||
{
|
||||
return __hip_erfinvf(y);
|
||||
}
|
||||
__device__ float exp10f(float x)
|
||||
{
|
||||
return hc::precise_math::exp10f(x);
|
||||
}
|
||||
__device__ float exp2f(float x)
|
||||
{
|
||||
return hc::precise_math::exp2f(x);
|
||||
}
|
||||
__device__ float expf(float x)
|
||||
{
|
||||
return hc::precise_math::expf(x);
|
||||
}
|
||||
__device__ float expm1f(float x)
|
||||
{
|
||||
return hc::precise_math::expm1f(x);
|
||||
}
|
||||
__device__ float fabsf(float x)
|
||||
{
|
||||
return hc::precise_math::fabsf(x);
|
||||
}
|
||||
__device__ float fdimf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::fdimf(x, y);
|
||||
}
|
||||
__device__ float fdividef(float x, float y)
|
||||
{
|
||||
return x/y;
|
||||
}
|
||||
__device__ float floorf(float x)
|
||||
{
|
||||
return hc::precise_math::floorf(x);
|
||||
}
|
||||
__device__ float fmaf(float x, float y, float z)
|
||||
{
|
||||
return hc::precise_math::fmaf(x, y, z);
|
||||
}
|
||||
__device__ float fmaxf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::fmaxf(x, y);
|
||||
}
|
||||
__device__ float fminf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::fminf(x, y);
|
||||
}
|
||||
__device__ float fmodf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::fmodf(x, y);
|
||||
}
|
||||
__device__ float frexpf(float x, int *nptr)
|
||||
{
|
||||
return hc::precise_math::frexpf(x, nptr);
|
||||
}
|
||||
__device__ float hypotf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::hypotf(x, y);
|
||||
}
|
||||
__device__ float ilogbf(float x)
|
||||
{
|
||||
return hc::precise_math::ilogbf(x);
|
||||
}
|
||||
__device__ unsigned isfinite(float a)
|
||||
{
|
||||
return hc::precise_math::isfinite(a);
|
||||
}
|
||||
__device__ unsigned isinf(float a)
|
||||
{
|
||||
return hc::precise_math::isinf(a);
|
||||
}
|
||||
__device__ unsigned isnan(float a)
|
||||
{
|
||||
return hc::precise_math::isnan(a);
|
||||
}
|
||||
__device__ float j0f(float x)
|
||||
{
|
||||
return __hip_j0f(x);
|
||||
}
|
||||
__device__ float j1f(float x)
|
||||
{
|
||||
return __hip_j1f(x);
|
||||
}
|
||||
__device__ float jnf(int n, float x)
|
||||
{
|
||||
return __hip_jnf(n, x);
|
||||
}
|
||||
__device__ float ldexpf(float x, int exp)
|
||||
{
|
||||
return hc::precise_math::ldexpf(x, exp);
|
||||
}
|
||||
__device__ float lgammaf(float x, int *sign)
|
||||
{
|
||||
return hc::precise_math::lgammaf(x, sign);
|
||||
}
|
||||
__device__ long long int llrintf(float x)
|
||||
{
|
||||
int y = hc::precise_math::roundf(x);
|
||||
long long int z = y;
|
||||
return z;
|
||||
}
|
||||
__device__ long long int llroundf(float x)
|
||||
{
|
||||
int y = hc::precise_math::roundf(x);
|
||||
long long int z = y;
|
||||
return z;
|
||||
}__device__ float log10f(float x)
|
||||
{
|
||||
return hc::precise_math::log10f(x);
|
||||
}
|
||||
__device__ float log1pf(float x)
|
||||
{
|
||||
return hc::precise_math::log1pf(x);
|
||||
}
|
||||
__device__ float log2f(float x)
|
||||
{
|
||||
return hc::precise_math::log2f(x);
|
||||
}
|
||||
__device__ float logbf(float x)
|
||||
{
|
||||
return hc::precise_math::logbf(x);
|
||||
}
|
||||
__device__ float logf(float x)
|
||||
{
|
||||
return hc::precise_math::logf(x);
|
||||
}
|
||||
__device__ long int lrintf(float x)
|
||||
{
|
||||
int y = hc::precise_math::roundf(x);
|
||||
long int z = y;
|
||||
return z;
|
||||
}
|
||||
__device__ long int lroundf(float x)
|
||||
{
|
||||
long int y = hc::precise_math::roundf(x);
|
||||
return y;
|
||||
}
|
||||
__device__ float modff(float x, float *iptr)
|
||||
{
|
||||
return hc::precise_math::modff(x, iptr);
|
||||
}
|
||||
__device__ float nanf(const char* tagp)
|
||||
{
|
||||
return hc::precise_math::nanf((int)*tagp);
|
||||
}
|
||||
__device__ float nearbyintf(float x)
|
||||
{
|
||||
return hc::precise_math::nearbyintf(x);
|
||||
}
|
||||
__device__ float nextafterf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::nextafter(x, y);
|
||||
}
|
||||
__device__ float norm3df(float a, float b, float c)
|
||||
{
|
||||
float x = a*a + b*b + c*c;
|
||||
return hc::precise_math::sqrtf(x);
|
||||
}
|
||||
__device__ float norm4df(float a, float b, float c, float d)
|
||||
{
|
||||
float x = a*a + b*b;
|
||||
float y = c*c + d*d;
|
||||
return hc::precise_math::sqrtf(x+y);
|
||||
}
|
||||
|
||||
__device__ float normcdff(float y)
|
||||
{
|
||||
return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2;
|
||||
}
|
||||
__device__ float normcdfinvf(float y)
|
||||
{
|
||||
return HIP_SQRT_2 * __hip_erfinvf(2*y-1);
|
||||
}
|
||||
__device__ float normf(int dim, const float *a)
|
||||
{
|
||||
float x = 0.0f;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
x = hc::precise_math::fmaf(a[i], a[i], x);
|
||||
}
|
||||
return hc::precise_math::sqrtf(x);
|
||||
}
|
||||
__device__ float powf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::powf(x, y);
|
||||
}
|
||||
__device__ float rcbrtf(float x)
|
||||
{
|
||||
return hc::precise_math::rcbrtf(x);
|
||||
}
|
||||
__device__ float remainderf(float x, float y)
|
||||
{
|
||||
return hc::precise_math::remainderf(x, y);
|
||||
}
|
||||
__device__ float remquof(float x, float y, int *quo)
|
||||
{
|
||||
return hc::precise_math::remquof(x, y, quo);
|
||||
}
|
||||
__device__ float rhypotf(float x, float y)
|
||||
{
|
||||
return 1/hc::precise_math::hypotf(x, y);
|
||||
}
|
||||
__device__ float rintf(float x)
|
||||
{
|
||||
return hc::precise_math::roundf(x);
|
||||
}
|
||||
__device__ float rnorm3df(float a, float b, float c)
|
||||
{
|
||||
float x = a*a + b*b + c*c;
|
||||
return 1/hc::precise_math::sqrtf(x);
|
||||
}
|
||||
__device__ float rnorm4df(float a, float b, float c, float d)
|
||||
{
|
||||
float x = a*a + b*b;
|
||||
float y = c*c + d*d;
|
||||
return 1/hc::precise_math::sqrtf(x+y);
|
||||
}
|
||||
__device__ float rnormf(int dim, const float* a)
|
||||
{
|
||||
float x = 0.0f;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
x = hc::precise_math::fmaf(a[i], a[i], x);
|
||||
}
|
||||
return 1/hc::precise_math::sqrtf(x);
|
||||
}
|
||||
__device__ float roundf(float x)
|
||||
{
|
||||
return hc::precise_math::roundf(x);
|
||||
}
|
||||
__device__ float scalblnf(float x, long int n)
|
||||
{
|
||||
return hc::precise_math::scalb(x, n);
|
||||
}
|
||||
__device__ float scalbnf(float x, int n)
|
||||
{
|
||||
return hc::precise_math::scalbnf(x, n);
|
||||
}
|
||||
__device__ unsigned signbit(float a)
|
||||
{
|
||||
return hc::precise_math::signbit(a);
|
||||
}
|
||||
__device__ void sincosf(float x, float *sptr, float *cptr)
|
||||
{
|
||||
*sptr = hc::precise_math::sinf(x);
|
||||
*cptr = hc::precise_math::cosf(x);
|
||||
}
|
||||
__device__ void sincospif(float x, float *sptr, float *cptr)
|
||||
{
|
||||
*sptr = hc::precise_math::sinpif(x);
|
||||
*cptr = hc::precise_math::cospif(x);
|
||||
}
|
||||
__device__ float sinf(float x)
|
||||
{
|
||||
return hc::precise_math::sinf(x);
|
||||
}
|
||||
__device__ float sinhf(float x)
|
||||
{
|
||||
return hc::precise_math::sinhf(x);
|
||||
}
|
||||
__device__ float tanf(float x)
|
||||
{
|
||||
return hc::precise_math::tanf(x);
|
||||
}
|
||||
__device__ float tanhf(float x)
|
||||
{
|
||||
return hc::precise_math::tanhf(x);
|
||||
}
|
||||
__device__ float tgammaf(float x)
|
||||
{
|
||||
return hc::precise_math::tgammaf(x);
|
||||
}
|
||||
__device__ float truncf(float x)
|
||||
{
|
||||
return hc::precise_math::truncf(x);
|
||||
}
|
||||
__device__ float y0f(float x)
|
||||
{
|
||||
return __hip_y0f(x);
|
||||
}
|
||||
__device__ float y1f(float x)
|
||||
{
|
||||
return __hip_y1f(x);
|
||||
}
|
||||
__device__ float ynf(int n, float x)
|
||||
{
|
||||
return __hip_ynf(n, x);
|
||||
}
|
||||
__device__ float cospif(float x)
|
||||
{
|
||||
return hc::precise_math::cospif(x);
|
||||
}
|
||||
__device__ float sinpif(float x)
|
||||
{
|
||||
return hc::precise_math::sinpif(x);
|
||||
}
|
||||
__device__ float sqrtf(float x)
|
||||
{
|
||||
return hc::precise_math::sqrtf(x);
|
||||
}
|
||||
__device__ float rsqrtf(float x)
|
||||
{
|
||||
return hc::precise_math::rsqrtf(x);
|
||||
}
|
||||
|
||||
/*
|
||||
* Double precision device math functions
|
||||
*/
|
||||
|
||||
__device__ double acos(double x)
|
||||
{
|
||||
return hc::precise_math::acos(x);
|
||||
}
|
||||
__device__ double acosh(double x)
|
||||
{
|
||||
return hc::precise_math::acosh(x);
|
||||
}
|
||||
__device__ double asin(double x)
|
||||
{
|
||||
return hc::precise_math::asin(x);
|
||||
}
|
||||
__device__ double asinh(double x)
|
||||
{
|
||||
return hc::precise_math::asinh(x);
|
||||
}
|
||||
__device__ double atan(double x)
|
||||
{
|
||||
return hc::precise_math::atan(x);
|
||||
}
|
||||
__device__ double atan2(double y, double x)
|
||||
{
|
||||
return hc::precise_math::atan2(y, x);
|
||||
}
|
||||
__device__ double atanh(double x)
|
||||
{
|
||||
return hc::precise_math::atanh(x);
|
||||
}
|
||||
__device__ double cbrt(double x)
|
||||
{
|
||||
return hc::precise_math::cbrt(x);
|
||||
}
|
||||
__device__ double ceil(double x)
|
||||
{
|
||||
return hc::precise_math::ceil(x);
|
||||
}
|
||||
__device__ double copysign(double x, double y)
|
||||
{
|
||||
return hc::precise_math::copysign(x, y);
|
||||
}
|
||||
__device__ double cos(double x)
|
||||
{
|
||||
return hc::precise_math::cos(x);
|
||||
}
|
||||
__device__ double cosh(double x)
|
||||
{
|
||||
return hc::precise_math::cosh(x);
|
||||
}
|
||||
__device__ double cospi(double x)
|
||||
{
|
||||
return hc::precise_math::cospi(x);
|
||||
}
|
||||
__device__ double cyl_bessel_i0(double x);
|
||||
__device__ double cyl_bessel_i1(double x);
|
||||
__device__ double erf(double x)
|
||||
{
|
||||
return hc::precise_math::erf(x);
|
||||
}
|
||||
__device__ double erfc(double x)
|
||||
{
|
||||
return hc::precise_math::erfc(x);
|
||||
}
|
||||
__device__ double erfcinv(double x)
|
||||
{
|
||||
return __hip_erfinv(1 - x);
|
||||
}
|
||||
__device__ double erfcx(double x)
|
||||
{
|
||||
return hc::precise_math::exp(x*x)*hc::precise_math::erf(x);
|
||||
}
|
||||
__device__ double erfinv(double x)
|
||||
{
|
||||
return __hip_erfinv(x);
|
||||
}
|
||||
__device__ double exp(double x)
|
||||
{
|
||||
return hc::precise_math::exp(x);
|
||||
}
|
||||
__device__ double exp10(double x)
|
||||
{
|
||||
return hc::precise_math::exp10(x);
|
||||
}
|
||||
__device__ double exp2(double x)
|
||||
{
|
||||
return hc::precise_math::exp2(x);
|
||||
}
|
||||
__device__ double expm1(double x)
|
||||
{
|
||||
return hc::precise_math::expm1(x);
|
||||
}
|
||||
__device__ double fabs(double x)
|
||||
{
|
||||
return hc::precise_math::fabs(x);
|
||||
}
|
||||
__device__ double fdim(double x, double y)
|
||||
{
|
||||
return hc::precise_math::fdim(x, y);
|
||||
}
|
||||
__device__ double fdivide(double x, double y)
|
||||
{
|
||||
return x/y;
|
||||
}
|
||||
__device__ double floor(double x)
|
||||
{
|
||||
return hc::precise_math::floor(x);
|
||||
}
|
||||
__device__ double fma(double x, double y, double z)
|
||||
{
|
||||
return hc::precise_math::fma(x, y, z);
|
||||
}
|
||||
__device__ double fmax(double x, double y)
|
||||
{
|
||||
return hc::precise_math::fmax(x, y);
|
||||
}
|
||||
__device__ double fmin(double x, double y)
|
||||
{
|
||||
return hc::precise_math::fmin(x, y);
|
||||
}
|
||||
__device__ double fmod(double x, double y)
|
||||
{
|
||||
return hc::precise_math::fmod(x, y);
|
||||
}
|
||||
__device__ double frexp(double x, int *y)
|
||||
{
|
||||
return hc::precise_math::frexp(x, y);
|
||||
}
|
||||
__device__ double hypot(double x, double y)
|
||||
{
|
||||
return hc::precise_math::hypot(x, y);
|
||||
}
|
||||
__device__ double ilogb(double x)
|
||||
{
|
||||
return hc::precise_math::ilogb(x);
|
||||
}
|
||||
__device__ unsigned isfinite(double x)
|
||||
{
|
||||
return hc::precise_math::isfinite(x);
|
||||
}
|
||||
__device__ unsigned isinf(double x)
|
||||
{
|
||||
return hc::precise_math::isinf(x);
|
||||
}
|
||||
__device__ unsigned isnan(double x)
|
||||
{
|
||||
return hc::precise_math::isnan(x);
|
||||
}
|
||||
__device__ double j0(double x)
|
||||
{
|
||||
return __hip_j0(x);
|
||||
}
|
||||
__device__ double j1(double x)
|
||||
{
|
||||
return __hip_j1(x);
|
||||
}
|
||||
__device__ double jn(int n, double x)
|
||||
{
|
||||
return __hip_jn(n, x);
|
||||
}
|
||||
__device__ double ldexp(double x, int exp)
|
||||
{
|
||||
return hc::precise_math::ldexp(x, exp);
|
||||
}
|
||||
__device__ double lgamma(double x, int *sign)
|
||||
{
|
||||
return hc::precise_math::lgamma(x, sign);
|
||||
}
|
||||
__device__ long long int llrint(double x)
|
||||
{
|
||||
long long int y = hc::precise_math::round(x);
|
||||
return y;
|
||||
}
|
||||
__device__ long long int llround(double x)
|
||||
{
|
||||
long long int y = hc::precise_math::round(x);
|
||||
return y;
|
||||
}
|
||||
__device__ double log(double x)
|
||||
{
|
||||
return hc::precise_math::log(x);
|
||||
}
|
||||
__device__ double log10(double x)
|
||||
{
|
||||
return hc::precise_math::log10(x);
|
||||
}
|
||||
__device__ double log1p(double x)
|
||||
{
|
||||
return hc::precise_math::log1p(x);
|
||||
}
|
||||
__device__ double log2(double x)
|
||||
{
|
||||
return hc::precise_math::log2(x);
|
||||
}
|
||||
__device__ double logb(double x)
|
||||
{
|
||||
return hc::precise_math::logb(x);
|
||||
}
|
||||
__device__ long int lrint(double x)
|
||||
{
|
||||
long int y = hc::precise_math::round(x);
|
||||
return y;
|
||||
}
|
||||
__device__ long int lround(double x)
|
||||
{
|
||||
long int y = hc::precise_math::round(x);
|
||||
return y;
|
||||
}
|
||||
__device__ double modf(double x, double *iptr)
|
||||
{
|
||||
return hc::precise_math::modf(x, iptr);
|
||||
}
|
||||
__device__ double nan(const char *tagp)
|
||||
{
|
||||
return hc::precise_math::nan((int)*tagp);
|
||||
}
|
||||
__device__ double nearbyint(double x)
|
||||
{
|
||||
return hc::precise_math::nearbyint(x);
|
||||
}
|
||||
__device__ double nextafter(double x, double y)
|
||||
{
|
||||
return hc::precise_math::nextafter(x, y);
|
||||
}
|
||||
__device__ double norm3d(double a, double b, double c)
|
||||
{
|
||||
double x = a*a + b*b + c*c;
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
__device__ double norm4d(double a, double b, double c, double d)
|
||||
{
|
||||
double x = a*a + b*b;
|
||||
double y = c*c + d*d;
|
||||
return hc::precise_math::sqrt(x+y);
|
||||
}
|
||||
__device__ double normcdf(double y)
|
||||
{
|
||||
return ((hc::precise_math::erf(y)/HIP_SQRT_2) + 1)/2;
|
||||
}
|
||||
__device__ double pow(double x, double y)
|
||||
{
|
||||
return hc::precise_math::pow(x, y);
|
||||
}
|
||||
__device__ double rcbrt(double x)
|
||||
{
|
||||
return hc::precise_math::rcbrt(x);
|
||||
}
|
||||
__device__ double remainder(double x, double y)
|
||||
{
|
||||
return hc::precise_math::remainder(x, y);
|
||||
}
|
||||
__device__ double remquo(double x, double y, int *quo)
|
||||
{
|
||||
return hc::precise_math::remquo(x, y, quo);
|
||||
}
|
||||
__device__ double rhypot(double x, double y)
|
||||
{
|
||||
return 1/hc::precise_math::sqrt(x*x + y*y);
|
||||
}
|
||||
__device__ double rint(double x)
|
||||
{
|
||||
return hc::precise_math::round(x);
|
||||
}
|
||||
__device__ double rnorm3d(double a, double b, double c)
|
||||
{
|
||||
return hc::precise_math::rsqrt(a*a + b*b + c*c);
|
||||
}
|
||||
__device__ double rnorm4d(double a, double b, double c, double d)
|
||||
{
|
||||
return hc::precise_math::rsqrt(a*a + b*b + c*c + d*d);
|
||||
}
|
||||
__device__ double rnorm(int dim, const double* t)
|
||||
{
|
||||
double x = 0.0;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
x = hc::precise_math::fma(t[i], t[i], x);
|
||||
}
|
||||
return 1/x;
|
||||
}
|
||||
__device__ double round(double x)
|
||||
{
|
||||
return hc::precise_math::round(x);
|
||||
}
|
||||
__device__ double rsqrt(double x)
|
||||
{
|
||||
return hc::precise_math::rsqrt(x);
|
||||
}
|
||||
__device__ double scalbln(double x, long int n)
|
||||
{
|
||||
return hc::precise_math::scalb(x, n);
|
||||
}
|
||||
__device__ double scalbn(double x, int n)
|
||||
{
|
||||
return hc::precise_math::scalbn(x, n);
|
||||
}
|
||||
__device__ unsigned signbit(double x)
|
||||
{
|
||||
return hc::precise_math::signbit(x);
|
||||
}
|
||||
__device__ double sin(double x)
|
||||
{
|
||||
return hc::precise_math::sin(x);
|
||||
}
|
||||
__device__ void sincos(double x, double *sptr, double *cptr)
|
||||
{
|
||||
*sptr = hc::precise_math::sin(x);
|
||||
*cptr = hc::precise_math::cos(x);
|
||||
}
|
||||
__device__ void sincospi(double x, double *sptr, double *cptr)
|
||||
{
|
||||
*sptr = hc::precise_math::sinpi(x);
|
||||
*cptr = hc::precise_math::cospi(x);
|
||||
}
|
||||
__device__ double sinh(double x)
|
||||
{
|
||||
return hc::precise_math::sinh(x);
|
||||
}
|
||||
__device__ double sinpi(double x)
|
||||
{
|
||||
return hc::precise_math::sinpi(x);
|
||||
}
|
||||
__device__ double sqrt(double x)
|
||||
{
|
||||
return hc::precise_math::sqrt(x);
|
||||
}
|
||||
__device__ double tan(double x)
|
||||
{
|
||||
return hc::precise_math::tan(x);
|
||||
}
|
||||
__device__ double tanh(double x)
|
||||
{
|
||||
return hc::precise_math::tanh(x);
|
||||
}
|
||||
__device__ double tgamma(double x)
|
||||
{
|
||||
return hc::precise_math::tgamma(x);
|
||||
}
|
||||
__device__ double trunc(double x)
|
||||
{
|
||||
return hc::precise_math::trunc(x);
|
||||
}
|
||||
__device__ double y0(double x)
|
||||
{
|
||||
return __hip_y0(x);
|
||||
}
|
||||
__device__ double y1(double x)
|
||||
{
|
||||
return __hip_y1(x);
|
||||
}
|
||||
__device__ double yn(int n, double x)
|
||||
{
|
||||
return __hip_yn(n, x);
|
||||
}
|
||||
|
||||
|
||||
__host__ float cospif(float x)
|
||||
{
|
||||
return std::cos(x*HIP_PI);
|
||||
}
|
||||
|
||||
__host__ float fdividef(float x, float y)
|
||||
{
|
||||
return x / y;
|
||||
}
|
||||
|
||||
__host__ int isfinite(float x)
|
||||
{
|
||||
return std::isfinite(x);
|
||||
}
|
||||
|
||||
__host__ int signbit(float x)
|
||||
{
|
||||
return std::signbit(x);
|
||||
}
|
||||
|
||||
__host__ int sinpif(float x)
|
||||
{
|
||||
return std::sin(x*HIP_PI);
|
||||
}
|
||||
|
||||
__host__ float rsqrtf(float x)
|
||||
{
|
||||
return 1 / std::sqrt(x);
|
||||
}
|
||||
|
||||
__host__ float modff(float x, float *iptr)
|
||||
{
|
||||
return std::modf(x, iptr);
|
||||
}
|
||||
|
||||
__host__ float erfcinvf(float y)
|
||||
{
|
||||
return __hip_host_erfcinvf(y);
|
||||
}
|
||||
|
||||
__host__ double erfcinv(double y)
|
||||
{
|
||||
return __hip_host_erfcinv(y);
|
||||
}
|
||||
|
||||
__host__ float erfinvf(float x)
|
||||
{
|
||||
return __hip_host_erfinvf(x);
|
||||
}
|
||||
|
||||
__host__ double erfinv(double x)
|
||||
{
|
||||
return __hip_host_erfinv(x);
|
||||
}
|
||||
|
||||
__host__ double fdivide(double x, double y)
|
||||
{
|
||||
return x/y;
|
||||
}
|
||||
|
||||
__host__ float normcdff(float t)
|
||||
{
|
||||
return (1 - std::erf(-t/std::sqrt(2)))/2;
|
||||
}
|
||||
|
||||
__host__ double normcdf(double x)
|
||||
{
|
||||
return (1 - std::erf(-x/std::sqrt(2)))/2;
|
||||
}
|
||||
|
||||
__host__ float erfcxf(float x)
|
||||
{
|
||||
return std::exp(x*x) * std::erfc(x);
|
||||
}
|
||||
|
||||
__host__ double erfcx(double x)
|
||||
{
|
||||
return std::exp(x*x) * std::erfc(x);
|
||||
}
|
||||
|
||||
__host__ float rhypotf(float x, float y)
|
||||
{
|
||||
return 1 / std::sqrt(x*x + y*y);
|
||||
}
|
||||
|
||||
__host__ double rhypot(double x, double y)
|
||||
{
|
||||
return 1 / std::sqrt(x*x + y*y);
|
||||
}
|
||||
|
||||
__host__ float rcbrtf(float a)
|
||||
{
|
||||
return 1 / std::cbrt(a);
|
||||
}
|
||||
|
||||
__host__ double rcbrt(double a)
|
||||
{
|
||||
return 1 / std::cbrt(a);
|
||||
}
|
||||
|
||||
__host__ float normf(int dim, const float *a)
|
||||
{
|
||||
float val = 0.0f;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
val = val + a[i] * a[i];
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
__host__ double norm(int dim, const double *a)
|
||||
{
|
||||
double val = 0.0;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
val = val + a[i] * a[i];
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
__host__ float rnormf(int dim, const float *t)
|
||||
{
|
||||
float val = 0.0f;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
val = val + t[i] * t[i];
|
||||
}
|
||||
return 1 / std::sqrt(val);
|
||||
}
|
||||
|
||||
__host__ double rnorm(int dim, const double *t)
|
||||
{
|
||||
double val = 0.0;
|
||||
for(int i=0;i<dim;i++)
|
||||
{
|
||||
val = val + t[i] * t[i];
|
||||
}
|
||||
return 1 / std::sqrt(val);
|
||||
}
|
||||
|
||||
__host__ float rnorm4df(float a, float b, float c, float d)
|
||||
{
|
||||
return 1 / std::sqrt(a*a + b*b + c*c + d*d);
|
||||
}
|
||||
|
||||
__host__ double rnorm4d(double a, double b, double c, double d)
|
||||
{
|
||||
return 1 / std::sqrt(a*a + b*b + c*c + d*d);
|
||||
}
|
||||
|
||||
__host__ float rnorm3df(float a, float b, float c)
|
||||
{
|
||||
return 1 / std::sqrt(a*a + b*b + c*c);
|
||||
}
|
||||
|
||||
__host__ double rnorm3d(double a, double b, double c)
|
||||
{
|
||||
return 1 / std::sqrt(a*a + b*b + c*c);
|
||||
}
|
||||
|
||||
__host__ void sincospif(float x, float *sptr, float *cptr)
|
||||
{
|
||||
*sptr = std::sin(HIP_PI*x);
|
||||
*cptr = std::cos(HIP_PI*x);
|
||||
}
|
||||
|
||||
__host__ void sincospi(double x, double *sptr, double *cptr)
|
||||
{
|
||||
*sptr = std::sin(HIP_PI*x);
|
||||
*cptr = std::cos(HIP_PI*x);
|
||||
}
|
||||
|
||||
__host__ float normcdfinvf(float x)
|
||||
{
|
||||
return std::sqrt(2) * erfinv(2*x-1);
|
||||
}
|
||||
|
||||
__host__ double normcdfinv(double x)
|
||||
{
|
||||
return std::sqrt(2) * erfinv(2*x-1);
|
||||
}
|
||||
|
||||
__host__ float nextafterf(float x, float y)
|
||||
{
|
||||
return std::nextafter(x, y);
|
||||
}
|
||||
|
||||
__host__ double nextafter(double x, double y)
|
||||
{
|
||||
return std::nextafter(x, y);
|
||||
}
|
||||
|
||||
__host__ float norm3df(float a, float b, float c)
|
||||
{
|
||||
return std::sqrt(a*a + b*b + c*c);
|
||||
}
|
||||
|
||||
__host__ float norm4df(float a, float b, float c, float d)
|
||||
{
|
||||
return std::sqrt(a*a + b*b + c*c + d*d);
|
||||
}
|
||||
|
||||
__host__ double norm3d(double a, double b, double c)
|
||||
{
|
||||
return std::sqrt(a*a + b*b + c*c);
|
||||
}
|
||||
|
||||
__host__ double norm4d(double a, double b, double c, double d)
|
||||
{
|
||||
return std::sqrt(a*a + b*b + c*c + d*d);
|
||||
}
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
@@ -27,18 +28,18 @@ THE SOFTWARE.
|
||||
|
||||
__device__ void double_precision_intrinsics()
|
||||
{
|
||||
//__dadd_rd(0.0, 1.0);
|
||||
//__dadd_rn(0.0, 1.0);
|
||||
//__dadd_ru(0.0, 1.0);
|
||||
//__dadd_rz(0.0, 1.0);
|
||||
//__ddiv_rd(0.0, 1.0);
|
||||
//__ddiv_rn(0.0, 1.0);
|
||||
//__ddiv_ru(0.0, 1.0);
|
||||
//__ddiv_rz(0.0, 1.0);
|
||||
//__dmul_rd(1.0, 2.0);
|
||||
//__dmul_rn(1.0, 2.0);
|
||||
//__dmul_ru(1.0, 2.0);
|
||||
//__dmul_rz(1.0, 2.0);
|
||||
__dadd_rd(0.0, 1.0);
|
||||
__dadd_rn(0.0, 1.0);
|
||||
__dadd_ru(0.0, 1.0);
|
||||
__dadd_rz(0.0, 1.0);
|
||||
__ddiv_rd(0.0, 1.0);
|
||||
__ddiv_rn(0.0, 1.0);
|
||||
__ddiv_ru(0.0, 1.0);
|
||||
__ddiv_rz(0.0, 1.0);
|
||||
__dmul_rd(1.0, 2.0);
|
||||
__dmul_rn(1.0, 2.0);
|
||||
__dmul_ru(1.0, 2.0);
|
||||
__dmul_rz(1.0, 2.0);
|
||||
__drcp_rd(2.0);
|
||||
__drcp_rn(2.0);
|
||||
__drcp_ru(2.0);
|
||||
@@ -47,10 +48,10 @@ __device__ void double_precision_intrinsics()
|
||||
__dsqrt_rn(4.0);
|
||||
__dsqrt_ru(4.0);
|
||||
__dsqrt_rz(4.0);
|
||||
//__dsub_rd(2.0, 1.0);
|
||||
//__dsub_rn(2.0, 1.0);
|
||||
//__dsub_ru(2.0, 1.0);
|
||||
//__dsub_rz(2.0, 1.0);
|
||||
__dsub_rd(2.0, 1.0);
|
||||
__dsub_rn(2.0, 1.0);
|
||||
__dsub_ru(2.0, 1.0);
|
||||
__dsub_rz(2.0, 1.0);
|
||||
__fma_rd(1.0, 2.0, 3.0);
|
||||
__fma_rn(1.0, 2.0, 3.0);
|
||||
__fma_ru(1.0, 2.0, 3.0);
|
||||
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
@@ -43,8 +44,8 @@ __device__ void double_precision_math_functions()
|
||||
cos(0.0);
|
||||
cosh(0.0);
|
||||
cospi(0.0);
|
||||
//cyl_bessel_i0(0.0);
|
||||
//cyl_bessel_i1(0.0);
|
||||
cyl_bessel_i0(0.0);
|
||||
cyl_bessel_i1(0.0);
|
||||
erf(0.0);
|
||||
erfc(0.0);
|
||||
erfcinv(2.0);
|
||||
@@ -61,7 +62,7 @@ __device__ void double_precision_math_functions()
|
||||
fmax(0.0, 0.0);
|
||||
fmin(0.0, 0.0);
|
||||
fmod(0.0, 1.0);
|
||||
//frexp(0.0, &iX);
|
||||
frexp(0.0, &iX);
|
||||
hypot(1.0, 0.0);
|
||||
ilogb(1.0);
|
||||
isfinite(0.0);
|
||||
@@ -71,7 +72,7 @@ __device__ void double_precision_math_functions()
|
||||
j1(0.0);
|
||||
jn(-1.0, 1.0);
|
||||
ldexp(0.0, 0);
|
||||
//lgamma(1.0);
|
||||
lgamma(1.0);
|
||||
llrint(0.0);
|
||||
llround(0.0);
|
||||
log(1.0);
|
||||
@@ -81,19 +82,19 @@ __device__ void double_precision_math_functions()
|
||||
logb(1.0);
|
||||
lrint(0.0);
|
||||
lround(0.0);
|
||||
//modf(0.0, &fX);
|
||||
modf(0.0, &fX);
|
||||
nan("1");
|
||||
nearbyint(0.0);
|
||||
//nextafter(0.0);
|
||||
//fX = 1.0; norm(1, &fX);
|
||||
nextafter(0.0, 0.0);
|
||||
fX = 1.0; norm(1, &fX);
|
||||
norm3d(1.0, 0.0, 0.0);
|
||||
norm4d(1.0, 0.0, 0.0, 0.0);
|
||||
normcdf(0.0);
|
||||
//normcdfinv(1.0);
|
||||
normcdfinv(1.0);
|
||||
pow(1.0, 0.0);
|
||||
rcbrt(1.0);
|
||||
remainder(2.0, 1.0);
|
||||
//remquo(1.0, 2.0, &iX);
|
||||
remquo(1.0, 2.0, &iX);
|
||||
rhypot(0.0, 1.0);
|
||||
rint(1.0);
|
||||
fX = 1.0; rnorm(1, &fX);
|
||||
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
@@ -85,7 +86,7 @@ __host__ void double_precision_math_functions()
|
||||
nan("1");
|
||||
nearbyint(0.0);
|
||||
//nextafter(0.0);
|
||||
//fX = 1.0; norm(1, &fX);
|
||||
fX = 1.0; norm(1, &fX);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
norm3d(1.0, 0.0, 0.0);
|
||||
norm4d(1.0, 0.0, 0.0, 0.0);
|
||||
|
||||
@@ -27,6 +27,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE LEN<<2
|
||||
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
__global__ void FloatMathPrecise(hipLaunchParm lp)
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
@@ -30,44 +31,44 @@ __device__ void single_precision_intrinsics()
|
||||
float fX, fY;
|
||||
|
||||
__cosf(0.0f);
|
||||
//__exp10f(0.0f);
|
||||
__exp10f(0.0f);
|
||||
__expf(0.0f);
|
||||
//__fadd_rd(0.0f, 1.0f);
|
||||
//__fadd_rn(0.0f, 1.0f);
|
||||
//__fadd_ru(0.0f, 1.0f);
|
||||
//__fadd_rz(0.0f, 1.0f);
|
||||
//__fdiv_rd(4.0f, 2.0f);
|
||||
//__fdiv_rn(4.0f, 2.0f);
|
||||
//__fdiv_ru(4.0f, 2.0f);
|
||||
//__fdiv_rz(4.0f, 2.0f);
|
||||
//__fdividef(4.0f, 2.0f);
|
||||
//__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
//__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
//__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
//__fmaf_rz(1.0f, 2.0f, 3.0f);
|
||||
//__fmul_rd(1.0f, 2.0f);
|
||||
//__fmul_rn(1.0f, 2.0f);
|
||||
//__fmul_ru(1.0f, 2.0f);
|
||||
//__fmul_rz(1.0f, 2.0f);
|
||||
//__frcp_rd(2.0f);
|
||||
//__frcp_rn(2.0f);
|
||||
//__frcp_ru(2.0f);
|
||||
//__frcp_rz(2.0f);
|
||||
__fadd_rd(0.0f, 1.0f);
|
||||
__fadd_rn(0.0f, 1.0f);
|
||||
__fadd_ru(0.0f, 1.0f);
|
||||
__fadd_rz(0.0f, 1.0f);
|
||||
__fdiv_rd(4.0f, 2.0f);
|
||||
__fdiv_rn(4.0f, 2.0f);
|
||||
__fdiv_ru(4.0f, 2.0f);
|
||||
__fdiv_rz(4.0f, 2.0f);
|
||||
__fdividef(4.0f, 2.0f);
|
||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rz(1.0f, 2.0f, 3.0f);
|
||||
__fmul_rd(1.0f, 2.0f);
|
||||
__fmul_rn(1.0f, 2.0f);
|
||||
__fmul_ru(1.0f, 2.0f);
|
||||
__fmul_rz(1.0f, 2.0f);
|
||||
__frcp_rd(2.0f);
|
||||
__frcp_rn(2.0f);
|
||||
__frcp_ru(2.0f);
|
||||
__frcp_rz(2.0f);
|
||||
__frsqrt_rn(4.0f);
|
||||
__fsqrt_rd(4.0f);
|
||||
__fsqrt_rn(4.0f);
|
||||
__fsqrt_ru(4.0f);
|
||||
__fsqrt_rz(4.0f);
|
||||
//__fsub_rd(2.0f, 1.0f);
|
||||
//__fsub_rn(2.0f, 1.0f);
|
||||
//__fsub_ru(2.0f, 1.0f);
|
||||
//__fsub_rz(2.0f, 1.0f);
|
||||
__fsub_rd(2.0f, 1.0f);
|
||||
__fsub_rn(2.0f, 1.0f);
|
||||
__fsub_ru(2.0f, 1.0f);
|
||||
__fsub_rz(2.0f, 1.0f);
|
||||
__log10f(1.0f);
|
||||
__log2f(1.0f);
|
||||
__logf(1.0f);
|
||||
__powf(1.0f, 0.0f);
|
||||
//__saturatef(0.1f);
|
||||
//__sincosf(0.0f, &fX, &fY);
|
||||
__saturatef(0.1f);
|
||||
__sincosf(0.0f, &fX, &fY);
|
||||
__sinf(0.0f);
|
||||
__tanf(0.0f);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
|
||||
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
|
||||
@@ -24,8 +24,9 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include"test_common.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#define N 512
|
||||
#define SIZE N*sizeof(float)
|
||||
|
||||
@@ -24,8 +24,9 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include"test_common.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
#define N 512
|
||||
#define SIZE N*sizeof(double)
|
||||
|
||||
@@ -29,7 +29,8 @@ THE SOFTWARE.
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
__global__ void
|
||||
|
||||
@@ -25,8 +25,8 @@ THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
#define WIDTH 8
|
||||
|
||||
@@ -31,8 +31,8 @@ THE SOFTWARE.
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
|
||||
@@ -31,8 +31,8 @@ THE SOFTWARE.
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/device_functions.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s
|
||||
* BUILD: %t %s
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -30,6 +30,7 @@ THE SOFTWARE.
|
||||
#include<hip/hip_runtime.h>
|
||||
#include<iostream>
|
||||
#include"test_common.h"
|
||||
#include<hip/device_functions.h>
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE LEN<<2
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
#include<iostream>
|
||||
#include"test_common.h"
|
||||
#include"hip/math_functions.h"
|
||||
|
||||
const int NN = 1 << 21;
|
||||
|
||||
@@ -31,7 +32,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
|
||||
int tid = hipThreadIdx_x;
|
||||
if(tid < 1){
|
||||
for(int i=0;i<n;i++){
|
||||
x[i] = sqrt(pow(3.14159,i));
|
||||
x[i] = sqrt(powf(3.14159,i));
|
||||
}
|
||||
y[tid] = y[tid] + 1.0f;
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ THE SOFTWARE.
|
||||
|
||||
#include<iostream>
|
||||
#include"test_common.h"
|
||||
#include"hip/math_functions.h"
|
||||
|
||||
const int NN = 1 << 21;
|
||||
|
||||
@@ -33,7 +34,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
|
||||
int tid = hipThreadIdx_x;
|
||||
if(tid < 1){
|
||||
for(int i=0;i<n;i++){
|
||||
x[i] = sqrt(pow(3.14159,i));
|
||||
x[i] = sqrt(powf(3.14159,i));
|
||||
}
|
||||
y[tid] = y[tid] + 1.0f;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user