Moved device code to mimic cuda header behavior

1. All fp32, fp64 math device/host functions should be in math_functions.h/.cpp
2. All fp32, fp64 fast math intrinsics for device/host functions should be in device_functions.h/.cpp
3. All the device code implementations should be in device_util.h/.cpp
4. Hence, made changes appropriately by moving code and creating new header files
5. Added math_functions.cpp/.h
6. Changed #ifndef signature to make sure no conflicts between headers with same names in hip/hip_runtime.h and hip/hcc_detail/hip_runtime.h
7. Changed tests to fit the code changes, making them to include appropriate headers
8. Added math_functions.cpp to CMakeLists.txt
9. Some of the tests are still broken, mostly host math functions will fix them in next commit
10. TODO: FIX compilation issues for host math functions

Change-Id: I7a17637d7e294a7d224ffba932c1a08668febd26
Этот коммит содержится в:
Aditya Atluri
2017-01-17 14:57:51 -06:00
родитель 3f9a9d9318
Коммит d23b6b8694
30 изменённых файлов: 1759 добавлений и 1540 удалений
+2 -1
Просмотреть файл
@@ -181,7 +181,8 @@ if(HIP_PLATFORM STREQUAL "hcc")
src/device_util.cpp
src/hip_ldg.cpp
src/hip_fp16.cpp
src/device_functions.cpp)
src/device_functions.cpp
src/math_functions.cpp)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${HCC_HOME}/lib -lmcwamp -Wl,-Bsymbolic -Wl,-rpath ${HCC_HOME}/lib")
add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME})
+167
Просмотреть файл
@@ -23,6 +23,173 @@ THE SOFTWARE.
#include <hip/hip_runtime.h>
#include <hip/hip_vector_types.h>
// Single Precision Fast Math
extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
__device__ float __hip_fast_exp10f(float);
__device__ float __hip_fast_expf(float);
__device__ float __hip_fast_frsqrt_rn(float);
extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
__device__ float __hip_fast_fsqrt_rn(float);
__device__ float __hip_fast_fsqrt_ru(float);
__device__ float __hip_fast_fsqrt_rz(float);
__device__ float __hip_fast_log10f(float);
extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
__device__ float __hip_fast_logf(float);
__device__ float __hip_fast_powf(float, float);
__device__ void __hip_fast_sincosf(float,float*,float*);
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
__device__ float __hip_fast_tanf(float);
extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
// Single Precision Fast Math
__device__ inline float __cosf(float x) {
return __hip_fast_cosf(x);
}
__device__ inline float __exp10f(float x) {
return __hip_fast_exp10f(x);
}
__device__ inline float __expf(float x) {
return __hip_fast_expf(x);
}
__device__ inline float __frsqrt_rn(float x) {
return __hip_fast_frsqrt_rn(x);
}
__device__ inline float __fsqrt_rd(float x) {
return __hip_fast_fsqrt_rd(x);
}
__device__ inline float __fsqrt_rn(float x) {
return __hip_fast_fsqrt_rn(x);
}
__device__ inline float __fsqrt_ru(float x) {
return __hip_fast_fsqrt_ru(x);
}
__device__ inline float __fsqrt_rz(float x) {
return __hip_fast_fsqrt_rz(x);
}
__device__ inline float __log10f(float x) {
return __hip_fast_log10f(x);
}
__device__ inline float __log2f(float x) {
return __hip_fast_log2f(x);
}
__device__ inline float __logf(float x) {
return __hip_fast_logf(x);
}
__device__ inline float __powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ inline void __sincosf(float x, float *s, float *c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float __sinf(float x) {
return __hip_fast_sinf(x);
}
__device__ inline float __tanf(float x) {
return __hip_fast_tanf(x);
}
__device__ inline float __fmaf_rd(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __fmaf_rn(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __fmaf_ru(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __fmaf_rz(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __frcp_rd(float x) {
return __hip_fast_frcp(x);
}
__device__ inline float __frcp_rn(float x) {
return __hip_fast_frcp(x);
}
__device__ inline float __frcp_ru(float x) {
return __hip_fast_frcp(x);
}
__device__ inline float __frcp_rz(float x) {
return __hip_fast_frcp(x);
}
__device__ inline double __dsqrt_rd(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_rn(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_ru(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_rz(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __fma_rd(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __fma_rn(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __fma_ru(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __fma_rz(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __drcp_rd(double x) {
return __hip_fast_drcp(x);
}
__device__ inline double __drcp_rn(double x) {
return __hip_fast_drcp(x);
}
__device__ inline double __drcp_ru(double x) {
return __hip_fast_drcp(x);
}
__device__ inline double __drcp_rz(double x) {
return __hip_fast_drcp(x);
}
extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int);
extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int);
+3 -5
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_FP16_H
#define HIP_FP16_H
#ifndef HIP_HCC_DETAIL_FP16_H
#define HIP_HCC_DETAIL_FP16_H
#include "hip/hip_runtime.h"
@@ -452,8 +452,6 @@ typedef struct __attribute__((aligned(4))){
} __half2;
#endif
-448
Просмотреть файл
@@ -121,208 +121,6 @@ extern int HIP_TRACE_API;
#define __HCC_C__
#endif
__device__ float acosf(float x);
__device__ float acoshf(float x);
__device__ float asinf(float x);
__device__ float asinhf(float x);
__device__ float atan2f(float y, float x);
__device__ float atanf(float x);
__device__ float atanhf(float x);
__device__ float cbrtf(float x);
__device__ float ceilf(float x);
__device__ float copysignf(float x, float y);
__device__ float coshf(float x);
__device__ float cyl_bessel_i0f(float x);
__device__ float cyl_bessel_i1f(float x);
__device__ float erfcf(float x);
__device__ float erfcinvf(float y);
__device__ float erfcxf(float x);
__device__ float erff(float x);
__device__ float erfinvf(float y);
__device__ float exp2f(float x);
__device__ float expm1f(float x);
__device__ float fabsf(float x);
__device__ float fdimf(float x, float y);
__device__ __host__ float fdividef(float x, float y);
__device__ float floorf(float x);
__device__ float fmaf(float x, float y, float z);
__device__ float fmaxf(float x, float y);
__device__ float fminf(float x, float y);
__device__ float fmodf(float x, float y);
__device__ float frexpf(float x, float y);
__device__ float hypotf(float x, float y);
__device__ float ilogbf(float x);
__host__ __device__ unsigned isfinite(float a);
__device__ unsigned isinf(float a);
__device__ unsigned isnan(float a);
__device__ float j0f(float x);
__device__ float j1f(float x);
__device__ float jnf(int n, float x);
__device__ float ldexpf(float x, int exp);
__device__ float lgammaf(float x);
__device__ long long int llrintf(float x);
__device__ long long int llroundf(float x);
__device__ float log1pf(float x);
__device__ float logbf(float x);
__device__ long int lrintf(float x);
__device__ long int lroundf(float x);
__device__ float modff(float x, float *iptr);
__device__ float nanf(const char* tagp);
__device__ float nearbyintf(float x);
__device__ float nextafterf(float x, float y);
__device__ float norm3df(float a, float b, float c);
__device__ float norm4df(float a, float b, float c, float d);
__device__ float normcdff(float y);
__device__ float normcdfinvf(float y);
__device__ float normf(int dim, const float *a);
__device__ float rcbrtf(float x);
__device__ float remainderf(float x, float y);
__device__ float remquof(float x, float y, int *quo);
__device__ float rhypotf(float x, float y);
__device__ float rintf(float x);
__device__ float rnorm3df(float a, float b, float c);
__device__ float rnorm4df(float a, float b, float c, float d);
__device__ float rnormf(int dim, const float* a);
__device__ float roundf(float x);
__device__ float rsqrtf(float x);
__device__ float scalblnf(float x, long int n);
__device__ float scalbnf(float x, int n);
__host__ __device__ unsigned signbit(float a);
__device__ void sincospif(float x, float *sptr, float *cptr);
__device__ float sinhf(float x);
__device__ float sinpif(float x);
__device__ float sqrtf(float x);
__device__ float tanhf(float x);
__device__ float tgammaf(float x);
__device__ float truncf(float x);
__device__ float y0f(float x);
__device__ float y1f(float x);
__device__ float ynf(int n, float x);
__host__ __device__ float cospif(float x);
__host__ __device__ float sinpif(float x);
// /__device__ float sqrtf(float x);
__host__ __device__ float rsqrtf(float x);
__host__ float normcdff(float y);
__host__ float erfcinvf(float y);
__host__ float erfcxf(float x);
__host__ float erfinvf(float y);
__host__ float norm3df(float a, float b, float c);
__host__ float normcdfinvf(float y);
__host__ float norm4df(float a, float b, float c, float d);
__host__ float rcbrtf(float x);
__host__ float rhypotf(float x, float y);
__host__ float rnorm3df(float a, float b, float c);
__host__ float rnormf(int dim, const float* a);
__host__ float rnorm4df(float a, float b, float c, float d);
__host__ void sincospif(float x, float *sptr, float *cptr);
__device__ double acos(double x);
__device__ double acosh(double x);
__device__ double asin(double x);
__device__ double asinh(double x);
__device__ double atan(double x);
__device__ double atan2(double y, double x);
__device__ double atanh(double x);
__device__ double cbrt(double x);
__device__ double ceil(double x);
__device__ double copysign(double x, double y);
__device__ double cos(double x);
__device__ double cosh(double x);
__host__ __device__ double cospi(double x);
__device__ double cyl_bessel_i0(double x);
__device__ double cyl_bessel_i1(double x);
__device__ double erf(double x);
__device__ double erfc(double x);
__device__ double erfcinv(double y);
__device__ double erfcx(double x);
__device__ double erfinv(double x);
__device__ double exp(double x);
__device__ double exp10(double x);
__device__ double exp2(double x);
__device__ double expm1(double x);
__device__ double fabs(double x);
__device__ double fdim(double x, double y);
__device__ double fdivide(double x, double y);
__device__ double floor(double x);
__device__ double fma(double x, double y, double z);
__device__ double fmax(double x, double y);
__device__ double fmin(double x, double y);
__device__ double fmod(double x, double y);
__device__ double frexp(double x, int *nptr);
__device__ double hypot(double x, double y);
__device__ double ilogb(double x);
__host__ __device__ unsigned isfinite(double x);
__device__ unsigned isinf(double x);
__device__ unsigned isnan(double x);
__device__ double j0(double x);
__device__ double j1(double x);
__device__ double jn(int n, double x);
__device__ double ldexp(double x, int exp);
__device__ double lgamma(double x);
__device__ long long llrint(double x);
__device__ long long llround(double x);
__device__ double log(double x);
__device__ double log10(double x);
__device__ double log1p(double x);
__device__ double log2(double x);
__device__ double logb(double x);
__device__ long int lrint(double x);
__device__ long int lround(double x);
__device__ double modf(double x, double *iptr);
__device__ double nan(const char* tagp);
__device__ double nearbyint(double x);
__device__ double nextafter(double x, double y);
__device__ double norm(int dim, const double* t);
__device__ double norm3d(double a, double b, double c);
__host__ double norm3d(double a, double b, double c);
__device__ double norm4d(double a, double b, double c, double d);
__host__ double norm4d(double a, double b, double c, double d);
__device__ double normcdf(double y);
__host__ double normcdf(double y);
__device__ double normcdfinv(double y);
__host__ double normcdfinv(double y);
__device__ double pow(double x, double y);
__device__ double rcbrt(double x);
__host__ double rcbrt(double x);
__device__ double remainder(double x, double y);
__device__ double remquo(double x, double y, int *quo);
__device__ double rhypot(double x, double y);
__host__ double rhypot(double x, double y);
__device__ double rint(double x);
__device__ double rnorm(int dim, const double* t);
__host__ double rnorm(int dim, const double* t);
__device__ double rnorm3d(double a, double b, double c);
__host__ double rnorm3d(double a, double b, double c);
__device__ double rnorm4d(double a, double b, double c, double d);
__host__ double rnorm4d(double a, double b, double c, double d);
__device__ double round(double x);
__host__ __device__ double rsqrt(double x);
__device__ double scalbln(double x, long int n);
__device__ double scalbn(double x, int n);
__host__ __device__ unsigned signbit(double a);
__device__ double sin(double a);
__device__ void sincos(double x, double *sptr, double *cptr);
__device__ void sincospi(double x, double *sptr, double *cptr);
__host__ void sincospi(double x, double *sptr, double *cptr);
__device__ double sinh(double x);
__host__ __device__ double sinpi(double x);
__device__ double sqrt(double x);
__device__ double tan(double x);
__device__ double tanh(double x);
__device__ double tgamma(double x);
__device__ double trunc(double x);
__device__ double y0(double x);
__device__ double y1(double y);
__device__ double yn(int n, double x);
__host__ double erfcinv(double y);
__host__ double erfcx(double x);
__host__ double erfinv(double y);
__host__ double fdivide(double x, double y);
// TODO - hipify-clang - change to use the function call.
//#define warpSize hc::__wavesize()
extern const int warpSize;
@@ -451,252 +249,6 @@ __host__ __device__ int max(int arg1, int arg2);
__device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr();
//TODO - add a couple fast math operations here, the set here will grow :
// Single Precision Precise Math
__device__ float __hip_precise_cosf(float);
__device__ float __hip_precise_exp10f(float);
__device__ float __hip_precise_expf(float);
__device__ float __hip_precise_frsqrt_rn(float);
__device__ float __hip_precise_fsqrt_rd(float);
__device__ float __hip_precise_fsqrt_rn(float);
__device__ float __hip_precise_fsqrt_ru(float);
__device__ float __hip_precise_fsqrt_rz(float);
__device__ float __hip_precise_log10f(float);
__device__ float __hip_precise_log2f(float);
__device__ float __hip_precise_logf(float);
__device__ float __hip_precise_powf(float, float);
__device__ void __hip_precise_sincosf(float,float*,float*);
__device__ float __hip_precise_sinf(float);
__device__ float __hip_precise_tanf(float);
// Double Precision Precise Math
__device__ double __hip_precise_dsqrt_rd(double);
__device__ double __hip_precise_dsqrt_rn(double);
__device__ double __hip_precise_dsqrt_ru(double);
__device__ double __hip_precise_dsqrt_rz(double);
// Single Precision Fast Math
extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
__device__ float __hip_fast_exp10f(float);
__device__ float __hip_fast_expf(float);
__device__ float __hip_fast_frsqrt_rn(float);
extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
__device__ float __hip_fast_fsqrt_rn(float);
__device__ float __hip_fast_fsqrt_ru(float);
__device__ float __hip_fast_fsqrt_rz(float);
__device__ float __hip_fast_log10f(float);
extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
__device__ float __hip_fast_logf(float);
__device__ float __hip_fast_powf(float, float);
__device__ void __hip_fast_sincosf(float,float*,float*);
extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
__device__ float __hip_fast_tanf(float);
extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
#ifdef HIP_FAST_MATH
// Single Precision Precise Math when enabled
__device__ inline float cosf(float x) {
return __hip_fast_cosf(x);
}
__device__ inline float exp10f(float x) {
return __hip_fast_exp10f(x);
}
__device__ inline float expf(float x) {
return __hip_fast_expf(x);
}
__device__ inline float log10f(float x) {
return __hip_fast_log10f(x);
}
__device__ inline float log2f(float x) {
return __hip_fast_log2f(x);
}
__device__ inline float logf(float x) {
return __hip_fast_logf(x);
}
__device__ inline float powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ inline void sincosf(float x, float *s, float *c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float sinf(float x) {
return __hip_fast_sinf(x);
}
__device__ inline float tanf(float x) {
return __hip_fast_tanf(x);
}
#else
__device__ float sinf(float);
__device__ float cosf(float);
__device__ float tanf(float);
__device__ void sincosf(float, float*, float*);
__device__ float logf(float);
__device__ float log2f(float);
__device__ float log10f(float);
__device__ float expf(float);
__device__ float exp10f(float);
__device__ float powf(float, float);
#endif
// Single Precision Fast Math
__device__ inline float __cosf(float x) {
return __hip_fast_cosf(x);
}
__device__ inline float __exp10f(float x) {
return __hip_fast_exp10f(x);
}
__device__ inline float __expf(float x) {
return __hip_fast_expf(x);
}
__device__ inline float __frsqrt_rn(float x) {
return __hip_fast_frsqrt_rn(x);
}
__device__ inline float __fsqrt_rd(float x) {
return __hip_fast_fsqrt_rd(x);
}
__device__ inline float __fsqrt_rn(float x) {
return __hip_fast_fsqrt_rn(x);
}
__device__ inline float __fsqrt_ru(float x) {
return __hip_fast_fsqrt_ru(x);
}
__device__ inline float __fsqrt_rz(float x) {
return __hip_fast_fsqrt_rz(x);
}
__device__ inline float __log10f(float x) {
return __hip_fast_log10f(x);
}
__device__ inline float __log2f(float x) {
return __hip_fast_log2f(x);
}
__device__ inline float __logf(float x) {
return __hip_fast_logf(x);
}
__device__ inline float __powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ inline void __sincosf(float x, float *s, float *c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float __sinf(float x) {
return __hip_fast_sinf(x);
}
__device__ inline float __tanf(float x) {
return __hip_fast_tanf(x);
}
__device__ inline float __fmaf_rd(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __fmaf_rn(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __fmaf_ru(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __fmaf_rz(float x, float y, float z) {
return __hip_fast_fmaf(x, y, z);
}
__device__ inline float __frcp_rd(float x) {
return __hip_fast_frcp(x);
}
__device__ inline float __frcp_rn(float x) {
return __hip_fast_frcp(x);
}
__device__ inline float __frcp_ru(float x) {
return __hip_fast_frcp(x);
}
__device__ inline float __frcp_rz(float x) {
return __hip_fast_frcp(x);
}
__device__ inline double __dsqrt_rd(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_rn(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_ru(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __dsqrt_rz(double x) {
return __hip_fast_dsqrt(x);
}
__device__ inline double __fma_rd(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __fma_rn(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __fma_ru(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __fma_rz(double x, double y, double z) {
return __hip_fast_fma(x, y, z);
}
__device__ inline double __drcp_rd(double x) {
return __hip_fast_drcp(x);
}
__device__ inline double __drcp_rn(double x) {
return __hip_fast_drcp(x);
}
__device__ inline double __drcp_ru(double x) {
return __hip_fast_drcp(x);
}
__device__ inline double __drcp_rz(double x) {
return __hip_fast_drcp(x);
}
/**
* CUDA 8 device function features
+288
Просмотреть файл
@@ -0,0 +1,288 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_HCC_DETAIL_MATH_FUNCTIONS_H
#define HIP_HCC_DETAIL_MATH_FUNCTIONS_H
#include <hip/hip_runtime.h>
#include <hip/hip_vector_types.h>
#include <hip/hcc_detail/device_functions.h>
__device__ float acosf(float x);
__device__ float acoshf(float x);
__device__ float asinf(float x);
__device__ float asinhf(float x);
__device__ float atan2f(float y, float x);
__device__ float atanf(float x);
__device__ float atanhf(float x);
__device__ float cbrtf(float x);
__device__ float ceilf(float x);
__device__ float copysignf(float x, float y);
__device__ float coshf(float x);
__device__ float cyl_bessel_i0f(float x);
__device__ float cyl_bessel_i1f(float x);
__device__ float erfcf(float x);
__device__ float erfcinvf(float y);
__device__ float erfcxf(float x);
__device__ float erff(float x);
__device__ float erfinvf(float y);
__device__ float exp2f(float x);
__device__ float expm1f(float x);
__device__ float fabsf(float x);
__device__ float fdimf(float x, float y);
__device__ __host__ float fdividef(float x, float y);
__device__ float floorf(float x);
__device__ float fmaf(float x, float y, float z);
__device__ float fmaxf(float x, float y);
__device__ float fminf(float x, float y);
__device__ float fmodf(float x, float y);
__device__ float frexpf(float x, float y);
__device__ float hypotf(float x, float y);
__device__ float ilogbf(float x);
__host__ __device__ int isfinite(float a);
__device__ unsigned isinf(float a);
__device__ unsigned isnan(float a);
__device__ float j0f(float x);
__device__ float j1f(float x);
__device__ float jnf(int n, float x);
__device__ float ldexpf(float x, int exp);
__device__ float lgammaf(float x);
__device__ long long int llrintf(float x);
__device__ long long int llroundf(float x);
__device__ float log1pf(float x);
__device__ float logbf(float x);
__device__ long int lrintf(float x);
__device__ long int lroundf(float x);
__device__ float modff(float x, float *iptr);
__device__ float nanf(const char* tagp);
__device__ float nearbyintf(float x);
__device__ float nextafterf(float x, float y);
__device__ float norm3df(float a, float b, float c);
__device__ float norm4df(float a, float b, float c, float d);
__device__ float normcdff(float y);
__device__ float normcdfinvf(float y);
__device__ float normf(int dim, const float *a);
__device__ float rcbrtf(float x);
__device__ float remainderf(float x, float y);
__device__ float remquof(float x, float y, int *quo);
__device__ float rhypotf(float x, float y);
__device__ float rintf(float x);
__device__ float rnorm3df(float a, float b, float c);
__device__ float rnorm4df(float a, float b, float c, float d);
__device__ float rnormf(int dim, const float* a);
__device__ float roundf(float x);
__device__ float rsqrtf(float x);
__device__ float scalblnf(float x, long int n);
__device__ float scalbnf(float x, int n);
__host__ __device__ unsigned signbit(float a);
__device__ void sincospif(float x, float *sptr, float *cptr);
__device__ float sinhf(float x);
__device__ float sinpif(float x);
__device__ float sqrtf(float x);
__device__ float tanhf(float x);
__device__ float tgammaf(float x);
__device__ float truncf(float x);
__device__ float y0f(float x);
__device__ float y1f(float x);
__device__ float ynf(int n, float x);
__host__ __device__ float cospif(float x);
__host__ __device__ float sinpif(float x);
// /__device__ float sqrtf(float x);
__host__ __device__ float rsqrtf(float x);
__host__ float normcdff(float y);
__host__ float erfcinvf(float y);
__host__ float erfcxf(float x);
__host__ float erfinvf(float y);
__host__ float norm3df(float a, float b, float c);
__host__ float normcdfinvf(float y);
__host__ float norm4df(float a, float b, float c, float d);
__host__ float rcbrtf(float x);
__host__ float rhypotf(float x, float y);
__host__ float rnorm3df(float a, float b, float c);
__host__ float rnormf(int dim, const float* a);
__host__ float rnorm4df(float a, float b, float c, float d);
__host__ void sincospif(float x, float *sptr, float *cptr);
__device__ double acos(double x);
__device__ double acosh(double x);
__device__ double asin(double x);
__device__ double asinh(double x);
__device__ double atan(double x);
__device__ double atan2(double y, double x);
__device__ double atanh(double x);
__device__ double cbrt(double x);
__device__ double ceil(double x);
__device__ double copysign(double x, double y);
__device__ double cos(double x);
__device__ double cosh(double x);
__host__ __device__ double cospi(double x);
__device__ double cyl_bessel_i0(double x);
__device__ double cyl_bessel_i1(double x);
__device__ double erf(double x);
__device__ double erfc(double x);
__device__ double erfcinv(double y);
__device__ double erfcx(double x);
__device__ double erfinv(double x);
__device__ double exp(double x);
__device__ double exp10(double x);
__device__ double exp2(double x);
__device__ double expm1(double x);
__device__ double fabs(double x);
__device__ double fdim(double x, double y);
__device__ double floor(double x);
__device__ double fma(double x, double y, double z);
__device__ double fmax(double x, double y);
__device__ double fmin(double x, double y);
__device__ double fmod(double x, double y);
__device__ double frexp(double x, int *nptr);
__device__ double hypot(double x, double y);
__device__ double ilogb(double x);
__host__ __device__ unsigned isfinite(double x);
__device__ unsigned isinf(double x);
__device__ unsigned isnan(double x);
__device__ double j0(double x);
__device__ double j1(double x);
__device__ double jn(int n, double x);
__device__ double ldexp(double x, int exp);
__device__ double lgamma(double x);
__device__ long long llrint(double x);
__device__ long long llround(double x);
__device__ double log(double x);
__device__ double log10(double x);
__device__ double log1p(double x);
__device__ double log2(double x);
__device__ double logb(double x);
__device__ long int lrint(double x);
__device__ long int lround(double x);
__device__ double modf(double x, double *iptr);
__device__ double nan(const char* tagp);
__device__ double nearbyint(double x);
__device__ double nextafter(double x, double y);
__device__ double norm(int dim, const double* t);
__device__ double norm3d(double a, double b, double c);
__host__ double norm3d(double a, double b, double c);
__device__ double norm4d(double a, double b, double c, double d);
__host__ double norm4d(double a, double b, double c, double d);
__device__ double normcdf(double y);
__host__ double normcdf(double y);
__device__ double normcdfinv(double y);
__host__ double normcdfinv(double y);
__device__ double pow(double x, double y);
__device__ double rcbrt(double x);
__host__ double rcbrt(double x);
__device__ double remainder(double x, double y);
__device__ double remquo(double x, double y, int *quo);
__device__ double rhypot(double x, double y);
__host__ double rhypot(double x, double y);
__device__ double rint(double x);
__device__ double rnorm(int dim, const double* t);
__host__ double rnorm(int dim, const double* t);
__device__ double rnorm3d(double a, double b, double c);
__host__ double rnorm3d(double a, double b, double c);
__device__ double rnorm4d(double a, double b, double c, double d);
__host__ double rnorm4d(double a, double b, double c, double d);
__device__ double round(double x);
__host__ __device__ double rsqrt(double x);
__device__ double scalbln(double x, long int n);
__device__ double scalbn(double x, int n);
__host__ __device__ unsigned signbit(double a);
__device__ double sin(double a);
__device__ void sincos(double x, double *sptr, double *cptr);
__device__ void sincospi(double x, double *sptr, double *cptr);
__host__ void sincospi(double x, double *sptr, double *cptr);
__device__ double sinh(double x);
__host__ __device__ double sinpi(double x);
__device__ double sqrt(double x);
__device__ double tan(double x);
__device__ double tanh(double x);
__device__ double tgamma(double x);
__device__ double trunc(double x);
__device__ double y0(double x);
__device__ double y1(double y);
__device__ double yn(int n, double x);
__host__ double erfcinv(double y);
__host__ double erfcx(double x);
__host__ double erfinv(double y);
__host__ double fdivide(double x, double y);
__host__ double norm(double x, const double *t);
#ifdef HIP_FAST_MATH
// Single Precision Precise Math when enabled
__device__ inline float cosf(float x) {
return __hip_fast_cosf(x);
}
__device__ inline float exp10f(float x) {
return __hip_fast_exp10f(x);
}
__device__ inline float expf(float x) {
return __hip_fast_expf(x);
}
__device__ inline float log10f(float x) {
return __hip_fast_log10f(x);
}
__device__ inline float log2f(float x) {
return __hip_fast_log2f(x);
}
__device__ inline float logf(float x) {
return __hip_fast_logf(x);
}
__device__ inline float powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ inline void sincosf(float x, float *s, float *c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float sinf(float x) {
return __hip_fast_sinf(x);
}
__device__ inline float tanf(float x) {
return __hip_fast_tanf(x);
}
#else
__device__ float sinf(float);
__device__ float cosf(float);
__device__ float tanf(float);
__device__ void sincosf(float, float*, float*);
__device__ float logf(float);
__device__ float log2f(float);
__device__ float log10f(float);
__device__ float expf(float);
__device__ float exp10f(float);
__device__ float powf(float, float);
#endif
#endif
+49
Просмотреть файл
@@ -0,0 +1,49 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
//! HIP = Heterogeneous-compute Interface for Portability
//!
//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
//! through either AMD HCC or NVCC. Key features tend to be in the spirit
//! and terminology of CUDA, but with a portable path to other accelerators as well:
//
//! Both paths support rich C++ features including classes, templates, lambdas, etc.
//! Runtime API is C
//! Memory management is based on pure pointers and resembles malloc/free/copy.
//
//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch macros.
//! hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ features.
#pragma once
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
// on NVCC path:
#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
#include <hip/hcc_detail/math_functions.h>
#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__)
#include <hip/nvcc_detail/math_functions.h>
#else
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
#endif
+68
Просмотреть файл
@@ -523,3 +523,71 @@ __device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long
uHold1.ul = uHold1.ui[1] * uHold2.ui[1];
return uHold1.ul;
}
/*
HIP specific device functions
*/
__device__ unsigned __hip_ds_bpermute(int index, unsigned src) {
return hc::__amdgcn_ds_bpermute(index, src);
}
__device__ float __hip_ds_bpermutef(int index, float src) {
return hc::__amdgcn_ds_bpermute(index, src);
}
__device__ unsigned __hip_ds_permute(int index, unsigned src) {
return hc::__amdgcn_ds_permute(index, src);
}
__device__ float __hip_ds_permutef(int index, float src) {
return hc::__amdgcn_ds_permute(index, src);
}
__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) {
return hc::__amdgcn_ds_swizzle(src, pattern);
}
__device__ float __hip_ds_swizzlef(float src, int pattern) {
return hc::__amdgcn_ds_swizzle(src, pattern);
}
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
}
#define MASK1 0x00ff00ff
#define MASK2 0xff00ff00
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 + one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 + one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 - one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 - one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 * one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 * one2) & MASK2);
return out;
}
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+117
Просмотреть файл
@@ -23,6 +23,8 @@ THE SOFTWARE.
#ifndef DEVICE_UTIL_H
#define DEVICE_UTIL_H
#include<hip/hcc_detail/hip_runtime.h>
/*
Heap size computation for malloc and free device functions.
*/
@@ -35,4 +37,119 @@ THE SOFTWARE.
#define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE
#define SIZE_OF_HEAP SIZE_MALLOC
#define HIP_SQRT_2 1.41421356237
#define HIP_SQRT_PI 1.77245385091
#define __hip_erfinva3 -0.140543331
#define __hip_erfinva2 0.914624893
#define __hip_erfinva1 -1.645349621
#define __hip_erfinva0 0.886226899
#define __hip_erfinvb4 0.012229801
#define __hip_erfinvb3 -0.329097515
#define __hip_erfinvb2 1.442710462
#define __hip_erfinvb1 -2.118377725
#define __hip_erfinvb0 1
#define __hip_erfinvc3 1.641345311
#define __hip_erfinvc2 3.429567803
#define __hip_erfinvc1 -1.62490649
#define __hip_erfinvc0 -1.970840454
#define __hip_erfinvd2 1.637067800
#define __hip_erfinvd1 3.543889200
#define __hip_erfinvd0 1
#define HIP_PI 3.14159265358979323846
__device__ void* __hip_hc_malloc(size_t size);
__device__ void* __hip_hc_free(void* ptr);
__device__ float __hip_erfinvf(float x);
__device__ double __hip_erfinv(double x);
__device__ float __hip_j0f(float x);
__device__ double __hip_j0(double x);
__device__ float __hip_j1f(float x);
__device__ double __hip_j1(double x);
__device__ float __hip_y0f(float x);
__device__ double __hip_y0(double x);
__device__ float __hip_y1f(float x);
__device__ double __hip_y1(double x);
__device__ float __hip_jnf(int n, float x);
__device__ double __hip_jn(int n, double x);
__device__ float __hip_ynf(int n, float x);
__device__ double __hip_yn(int n, double x);
__device__ float __hip_precise_cosf(float x);
__device__ float __hip_precise_exp10f(float x);
__device__ float __hip_precise_expf(float x);
__device__ float __hip_precise_frsqrt_rn(float x);
__device__ float __hip_precise_fsqrt_rd(float x);
__device__ float __hip_precise_fsqrt_rn(float x);
__device__ float __hip_precise_fsqrt_ru(float x);
__device__ float __hip_precise_fsqrt_rz(float x);
__device__ float __hip_precise_log10f(float x);
__device__ float __hip_precise_log2f(float x);
__device__ float __hip_precise_logf(float x);
__device__ float __hip_precise_powf(float base, float exponent);
__device__ void __hip_precise_sincosf(float x, float *s, float *c);
__device__ float __hip_precise_sinf(float x);
__device__ float __hip_precise_tanf(float x);
// Double Precision Math
__device__ double __hip_precise_dsqrt_rd(double x);
__device__ double __hip_precise_dsqrt_rn(double x);
__device__ double __hip_precise_dsqrt_ru(double x);
__device__ double __hip_precise_dsqrt_rz(double x);
// Float Fast Math
__device__ float __hip_fast_exp10f(float x);
__device__ float __hip_fast_expf(float x);
__device__ float __hip_fast_frsqrt_rn(float x);
__device__ float __hip_fast_fsqrt_rn(float x);
__device__ float __hip_fast_fsqrt_ru(float x);
__device__ float __hip_fast_fsqrt_rz(float x);
__device__ float __hip_fast_log10f(float x);
__device__ float __hip_fast_logf(float x);
__device__ float __hip_fast_powf(float base, float exponent);
__device__ void __hip_fast_sincosf(float x, float *s, float *c);
__device__ float __hip_fast_tanf(float x);
// Double Precision Math
__device__ double __hip_fast_dsqrt_rd(double x);
__device__ double __hip_fast_dsqrt_rn(double x);
__device__ double __hip_fast_dsqrt_ru(double x);
__device__ double __hip_fast_dsqrt_rz(double x);
__device__ void __threadfence_system(void);
float __hip_host_erfinvf(float x);
double __hip_host_erfinv(double x);
float __hip_host_erfcinvf(float y);
double __hip_host_erfcinv(double y);
float __hip_host_j0f(float x);
double __hip_host_j0(double x);
float __hip_host_j1f(float x);
double __hip_host_j1(double x);
float __hip_host_y0f(float x);
double __hip_host_y1(double x);
float __hip_host_y1f(float x);
double __hip_host_y1(double x);
float __hip_host_jnf(int n, float x);
double __hip_host_jn(int n, double x);
float __hip_host_ynf(int n, float x);
double __hip_host_yn(int n, double x);
#endif
+971
Просмотреть файл
@@ -0,0 +1,971 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hc.hpp>
#include <grid_launch.h>
#include <hc_math.hpp>
#include "device_util.h"
#include "hip/hcc_detail/device_functions.h"
#include "hip/hip_runtime.h"
__device__ float acosf(float x)
{
return hc::precise_math::acosf(x);
}
__device__ float acoshf(float x)
{
return hc::precise_math::acoshf(x);
}
__device__ float asinf(float x)
{
return hc::precise_math::asinf(x);
}
__device__ float asinhf(float x)
{
return hc::precise_math::asinhf(x);
}
__device__ float atan2f(float y, float x)
{
return hc::precise_math::atan2f(x, y);
}
__device__ float atanf(float x)
{
return hc::precise_math::atanf(x);
}
__device__ float atanhf(float x)
{
return hc::precise_math::atanhf(x);
}
__device__ float cbrtf(float x)
{
return hc::precise_math::cbrtf(x);
}
__device__ float ceilf(float x)
{
return hc::precise_math::ceilf(x);
}
__device__ float copysignf(float x, float y)
{
return hc::precise_math::copysignf(x, y);
}
__device__ float cosf(float x)
{
return hc::precise_math::cosf(x);
}
__device__ float coshf(float x)
{
return hc::precise_math::coshf(x);
}
__device__ float cyl_bessel_i0f(float x);
__device__ float cyl_bessel_i1f(float x);
__device__ float erfcf(float x)
{
return hc::precise_math::erfcf(x);
}
__device__ float erfcinvf(float y)
{
return __hip_erfinvf(1 - y);
}
__device__ float erfcxf(float x)
{
return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x);
}
__device__ float erff(float x)
{
return hc::precise_math::erff(x);
}
__device__ float erfinvf(float y)
{
return __hip_erfinvf(y);
}
__device__ float exp10f(float x)
{
return hc::precise_math::exp10f(x);
}
__device__ float exp2f(float x)
{
return hc::precise_math::exp2f(x);
}
__device__ float expf(float x)
{
return hc::precise_math::expf(x);
}
__device__ float expm1f(float x)
{
return hc::precise_math::expm1f(x);
}
__device__ float fabsf(float x)
{
return hc::precise_math::fabsf(x);
}
__device__ float fdimf(float x, float y)
{
return hc::precise_math::fdimf(x, y);
}
__device__ float fdividef(float x, float y)
{
return x/y;
}
__device__ float floorf(float x)
{
return hc::precise_math::floorf(x);
}
__device__ float fmaf(float x, float y, float z)
{
return hc::precise_math::fmaf(x, y, z);
}
__device__ float fmaxf(float x, float y)
{
return hc::precise_math::fmaxf(x, y);
}
__device__ float fminf(float x, float y)
{
return hc::precise_math::fminf(x, y);
}
__device__ float fmodf(float x, float y)
{
return hc::precise_math::fmodf(x, y);
}
__device__ float frexpf(float x, int *nptr)
{
return hc::precise_math::frexpf(x, nptr);
}
__device__ float hypotf(float x, float y)
{
return hc::precise_math::hypotf(x, y);
}
__device__ float ilogbf(float x)
{
return hc::precise_math::ilogbf(x);
}
__device__ unsigned isfinite(float a)
{
return hc::precise_math::isfinite(a);
}
__device__ unsigned isinf(float a)
{
return hc::precise_math::isinf(a);
}
__device__ unsigned isnan(float a)
{
return hc::precise_math::isnan(a);
}
__device__ float j0f(float x)
{
return __hip_j0f(x);
}
__device__ float j1f(float x)
{
return __hip_j1f(x);
}
__device__ float jnf(int n, float x)
{
return __hip_jnf(n, x);
}
__device__ float ldexpf(float x, int exp)
{
return hc::precise_math::ldexpf(x, exp);
}
__device__ float lgammaf(float x, int *sign)
{
return hc::precise_math::lgammaf(x, sign);
}
__device__ long long int llrintf(float x)
{
int y = hc::precise_math::roundf(x);
long long int z = y;
return z;
}
__device__ long long int llroundf(float x)
{
int y = hc::precise_math::roundf(x);
long long int z = y;
return z;
}__device__ float log10f(float x)
{
return hc::precise_math::log10f(x);
}
__device__ float log1pf(float x)
{
return hc::precise_math::log1pf(x);
}
__device__ float log2f(float x)
{
return hc::precise_math::log2f(x);
}
__device__ float logbf(float x)
{
return hc::precise_math::logbf(x);
}
__device__ float logf(float x)
{
return hc::precise_math::logf(x);
}
__device__ long int lrintf(float x)
{
int y = hc::precise_math::roundf(x);
long int z = y;
return z;
}
__device__ long int lroundf(float x)
{
long int y = hc::precise_math::roundf(x);
return y;
}
__device__ float modff(float x, float *iptr)
{
return hc::precise_math::modff(x, iptr);
}
__device__ float nanf(const char* tagp)
{
return hc::precise_math::nanf((int)*tagp);
}
__device__ float nearbyintf(float x)
{
return hc::precise_math::nearbyintf(x);
}
__device__ float nextafterf(float x, float y)
{
return hc::precise_math::nextafter(x, y);
}
__device__ float norm3df(float a, float b, float c)
{
float x = a*a + b*b + c*c;
return hc::precise_math::sqrtf(x);
}
__device__ float norm4df(float a, float b, float c, float d)
{
float x = a*a + b*b;
float y = c*c + d*d;
return hc::precise_math::sqrtf(x+y);
}
__device__ float normcdff(float y)
{
return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2;
}
__device__ float normcdfinvf(float y)
{
return HIP_SQRT_2 * __hip_erfinvf(2*y-1);
}
__device__ float normf(int dim, const float *a)
{
float x = 0.0f;
for(int i=0;i<dim;i++)
{
x = hc::precise_math::fmaf(a[i], a[i], x);
}
return hc::precise_math::sqrtf(x);
}
__device__ float powf(float x, float y)
{
return hc::precise_math::powf(x, y);
}
__device__ float rcbrtf(float x)
{
return hc::precise_math::rcbrtf(x);
}
__device__ float remainderf(float x, float y)
{
return hc::precise_math::remainderf(x, y);
}
__device__ float remquof(float x, float y, int *quo)
{
return hc::precise_math::remquof(x, y, quo);
}
__device__ float rhypotf(float x, float y)
{
return 1/hc::precise_math::hypotf(x, y);
}
__device__ float rintf(float x)
{
return hc::precise_math::roundf(x);
}
__device__ float rnorm3df(float a, float b, float c)
{
float x = a*a + b*b + c*c;
return 1/hc::precise_math::sqrtf(x);
}
__device__ float rnorm4df(float a, float b, float c, float d)
{
float x = a*a + b*b;
float y = c*c + d*d;
return 1/hc::precise_math::sqrtf(x+y);
}
__device__ float rnormf(int dim, const float* a)
{
float x = 0.0f;
for(int i=0;i<dim;i++)
{
x = hc::precise_math::fmaf(a[i], a[i], x);
}
return 1/hc::precise_math::sqrtf(x);
}
__device__ float roundf(float x)
{
return hc::precise_math::roundf(x);
}
__device__ float scalblnf(float x, long int n)
{
return hc::precise_math::scalb(x, n);
}
__device__ float scalbnf(float x, int n)
{
return hc::precise_math::scalbnf(x, n);
}
__device__ unsigned signbit(float a)
{
return hc::precise_math::signbit(a);
}
__device__ void sincosf(float x, float *sptr, float *cptr)
{
*sptr = hc::precise_math::sinf(x);
*cptr = hc::precise_math::cosf(x);
}
__device__ void sincospif(float x, float *sptr, float *cptr)
{
*sptr = hc::precise_math::sinpif(x);
*cptr = hc::precise_math::cospif(x);
}
__device__ float sinf(float x)
{
return hc::precise_math::sinf(x);
}
__device__ float sinhf(float x)
{
return hc::precise_math::sinhf(x);
}
__device__ float tanf(float x)
{
return hc::precise_math::tanf(x);
}
__device__ float tanhf(float x)
{
return hc::precise_math::tanhf(x);
}
__device__ float tgammaf(float x)
{
return hc::precise_math::tgammaf(x);
}
__device__ float truncf(float x)
{
return hc::precise_math::truncf(x);
}
__device__ float y0f(float x)
{
return __hip_y0f(x);
}
__device__ float y1f(float x)
{
return __hip_y1f(x);
}
__device__ float ynf(int n, float x)
{
return __hip_ynf(n, x);
}
__device__ float cospif(float x)
{
return hc::precise_math::cospif(x);
}
__device__ float sinpif(float x)
{
return hc::precise_math::sinpif(x);
}
__device__ float sqrtf(float x)
{
return hc::precise_math::sqrtf(x);
}
__device__ float rsqrtf(float x)
{
return hc::precise_math::rsqrtf(x);
}
/*
* Double precision device math functions
*/
__device__ double acos(double x)
{
return hc::precise_math::acos(x);
}
__device__ double acosh(double x)
{
return hc::precise_math::acosh(x);
}
__device__ double asin(double x)
{
return hc::precise_math::asin(x);
}
__device__ double asinh(double x)
{
return hc::precise_math::asinh(x);
}
__device__ double atan(double x)
{
return hc::precise_math::atan(x);
}
__device__ double atan2(double y, double x)
{
return hc::precise_math::atan2(y, x);
}
__device__ double atanh(double x)
{
return hc::precise_math::atanh(x);
}
__device__ double cbrt(double x)
{
return hc::precise_math::cbrt(x);
}
__device__ double ceil(double x)
{
return hc::precise_math::ceil(x);
}
__device__ double copysign(double x, double y)
{
return hc::precise_math::copysign(x, y);
}
__device__ double cos(double x)
{
return hc::precise_math::cos(x);
}
__device__ double cosh(double x)
{
return hc::precise_math::cosh(x);
}
__device__ double cospi(double x)
{
return hc::precise_math::cospi(x);
}
__device__ double cyl_bessel_i0(double x);
__device__ double cyl_bessel_i1(double x);
__device__ double erf(double x)
{
return hc::precise_math::erf(x);
}
__device__ double erfc(double x)
{
return hc::precise_math::erfc(x);
}
__device__ double erfcinv(double x)
{
return __hip_erfinv(1 - x);
}
__device__ double erfcx(double x)
{
return hc::precise_math::exp(x*x)*hc::precise_math::erf(x);
}
__device__ double erfinv(double x)
{
return __hip_erfinv(x);
}
__device__ double exp(double x)
{
return hc::precise_math::exp(x);
}
__device__ double exp10(double x)
{
return hc::precise_math::exp10(x);
}
__device__ double exp2(double x)
{
return hc::precise_math::exp2(x);
}
__device__ double expm1(double x)
{
return hc::precise_math::expm1(x);
}
__device__ double fabs(double x)
{
return hc::precise_math::fabs(x);
}
__device__ double fdim(double x, double y)
{
return hc::precise_math::fdim(x, y);
}
__device__ double fdivide(double x, double y)
{
return x/y;
}
__device__ double floor(double x)
{
return hc::precise_math::floor(x);
}
__device__ double fma(double x, double y, double z)
{
return hc::precise_math::fma(x, y, z);
}
__device__ double fmax(double x, double y)
{
return hc::precise_math::fmax(x, y);
}
__device__ double fmin(double x, double y)
{
return hc::precise_math::fmin(x, y);
}
__device__ double fmod(double x, double y)
{
return hc::precise_math::fmod(x, y);
}
__device__ double frexp(double x, int *y)
{
return hc::precise_math::frexp(x, y);
}
__device__ double hypot(double x, double y)
{
return hc::precise_math::hypot(x, y);
}
__device__ double ilogb(double x)
{
return hc::precise_math::ilogb(x);
}
__device__ unsigned isfinite(double x)
{
return hc::precise_math::isfinite(x);
}
__device__ unsigned isinf(double x)
{
return hc::precise_math::isinf(x);
}
__device__ unsigned isnan(double x)
{
return hc::precise_math::isnan(x);
}
__device__ double j0(double x)
{
return __hip_j0(x);
}
__device__ double j1(double x)
{
return __hip_j1(x);
}
__device__ double jn(int n, double x)
{
return __hip_jn(n, x);
}
__device__ double ldexp(double x, int exp)
{
return hc::precise_math::ldexp(x, exp);
}
__device__ double lgamma(double x, int *sign)
{
return hc::precise_math::lgamma(x, sign);
}
__device__ long long int llrint(double x)
{
long long int y = hc::precise_math::round(x);
return y;
}
__device__ long long int llround(double x)
{
long long int y = hc::precise_math::round(x);
return y;
}
__device__ double log(double x)
{
return hc::precise_math::log(x);
}
__device__ double log10(double x)
{
return hc::precise_math::log10(x);
}
__device__ double log1p(double x)
{
return hc::precise_math::log1p(x);
}
__device__ double log2(double x)
{
return hc::precise_math::log2(x);
}
__device__ double logb(double x)
{
return hc::precise_math::logb(x);
}
__device__ long int lrint(double x)
{
long int y = hc::precise_math::round(x);
return y;
}
__device__ long int lround(double x)
{
long int y = hc::precise_math::round(x);
return y;
}
__device__ double modf(double x, double *iptr)
{
return hc::precise_math::modf(x, iptr);
}
__device__ double nan(const char *tagp)
{
return hc::precise_math::nan((int)*tagp);
}
__device__ double nearbyint(double x)
{
return hc::precise_math::nearbyint(x);
}
__device__ double nextafter(double x, double y)
{
return hc::precise_math::nextafter(x, y);
}
__device__ double norm3d(double a, double b, double c)
{
double x = a*a + b*b + c*c;
return hc::precise_math::sqrt(x);
}
__device__ double norm4d(double a, double b, double c, double d)
{
double x = a*a + b*b;
double y = c*c + d*d;
return hc::precise_math::sqrt(x+y);
}
__device__ double normcdf(double y)
{
return ((hc::precise_math::erf(y)/HIP_SQRT_2) + 1)/2;
}
__device__ double pow(double x, double y)
{
return hc::precise_math::pow(x, y);
}
__device__ double rcbrt(double x)
{
return hc::precise_math::rcbrt(x);
}
__device__ double remainder(double x, double y)
{
return hc::precise_math::remainder(x, y);
}
__device__ double remquo(double x, double y, int *quo)
{
return hc::precise_math::remquo(x, y, quo);
}
__device__ double rhypot(double x, double y)
{
return 1/hc::precise_math::sqrt(x*x + y*y);
}
__device__ double rint(double x)
{
return hc::precise_math::round(x);
}
__device__ double rnorm3d(double a, double b, double c)
{
return hc::precise_math::rsqrt(a*a + b*b + c*c);
}
__device__ double rnorm4d(double a, double b, double c, double d)
{
return hc::precise_math::rsqrt(a*a + b*b + c*c + d*d);
}
__device__ double rnorm(int dim, const double* t)
{
double x = 0.0;
for(int i=0;i<dim;i++)
{
x = hc::precise_math::fma(t[i], t[i], x);
}
return 1/x;
}
__device__ double round(double x)
{
return hc::precise_math::round(x);
}
__device__ double rsqrt(double x)
{
return hc::precise_math::rsqrt(x);
}
__device__ double scalbln(double x, long int n)
{
return hc::precise_math::scalb(x, n);
}
__device__ double scalbn(double x, int n)
{
return hc::precise_math::scalbn(x, n);
}
__device__ unsigned signbit(double x)
{
return hc::precise_math::signbit(x);
}
__device__ double sin(double x)
{
return hc::precise_math::sin(x);
}
__device__ void sincos(double x, double *sptr, double *cptr)
{
*sptr = hc::precise_math::sin(x);
*cptr = hc::precise_math::cos(x);
}
__device__ void sincospi(double x, double *sptr, double *cptr)
{
*sptr = hc::precise_math::sinpi(x);
*cptr = hc::precise_math::cospi(x);
}
__device__ double sinh(double x)
{
return hc::precise_math::sinh(x);
}
__device__ double sinpi(double x)
{
return hc::precise_math::sinpi(x);
}
__device__ double sqrt(double x)
{
return hc::precise_math::sqrt(x);
}
__device__ double tan(double x)
{
return hc::precise_math::tan(x);
}
__device__ double tanh(double x)
{
return hc::precise_math::tanh(x);
}
__device__ double tgamma(double x)
{
return hc::precise_math::tgamma(x);
}
__device__ double trunc(double x)
{
return hc::precise_math::trunc(x);
}
__device__ double y0(double x)
{
return __hip_y0(x);
}
__device__ double y1(double x)
{
return __hip_y1(x);
}
__device__ double yn(int n, double x)
{
return __hip_yn(n, x);
}
__host__ float cospif(float x)
{
return std::cos(x*HIP_PI);
}
__host__ float fdividef(float x, float y)
{
return x / y;
}
__host__ int isfinite(float x)
{
return std::isfinite(x);
}
__host__ int signbit(float x)
{
return std::signbit(x);
}
__host__ int sinpif(float x)
{
return std::sin(x*HIP_PI);
}
__host__ float rsqrtf(float x)
{
return 1 / std::sqrt(x);
}
__host__ float modff(float x, float *iptr)
{
return std::modf(x, iptr);
}
__host__ float erfcinvf(float y)
{
return __hip_host_erfcinvf(y);
}
__host__ double erfcinv(double y)
{
return __hip_host_erfcinv(y);
}
__host__ float erfinvf(float x)
{
return __hip_host_erfinvf(x);
}
__host__ double erfinv(double x)
{
return __hip_host_erfinv(x);
}
__host__ double fdivide(double x, double y)
{
return x/y;
}
__host__ float normcdff(float t)
{
return (1 - std::erf(-t/std::sqrt(2)))/2;
}
__host__ double normcdf(double x)
{
return (1 - std::erf(-x/std::sqrt(2)))/2;
}
__host__ float erfcxf(float x)
{
return std::exp(x*x) * std::erfc(x);
}
__host__ double erfcx(double x)
{
return std::exp(x*x) * std::erfc(x);
}
__host__ float rhypotf(float x, float y)
{
return 1 / std::sqrt(x*x + y*y);
}
__host__ double rhypot(double x, double y)
{
return 1 / std::sqrt(x*x + y*y);
}
__host__ float rcbrtf(float a)
{
return 1 / std::cbrt(a);
}
__host__ double rcbrt(double a)
{
return 1 / std::cbrt(a);
}
__host__ float normf(int dim, const float *a)
{
float val = 0.0f;
for(int i=0;i<dim;i++)
{
val = val + a[i] * a[i];
}
return val;
}
__host__ double norm(int dim, const double *a)
{
double val = 0.0;
for(int i=0;i<dim;i++)
{
val = val + a[i] * a[i];
}
return val;
}
__host__ float rnormf(int dim, const float *t)
{
float val = 0.0f;
for(int i=0;i<dim;i++)
{
val = val + t[i] * t[i];
}
return 1 / std::sqrt(val);
}
__host__ double rnorm(int dim, const double *t)
{
double val = 0.0;
for(int i=0;i<dim;i++)
{
val = val + t[i] * t[i];
}
return 1 / std::sqrt(val);
}
__host__ float rnorm4df(float a, float b, float c, float d)
{
return 1 / std::sqrt(a*a + b*b + c*c + d*d);
}
__host__ double rnorm4d(double a, double b, double c, double d)
{
return 1 / std::sqrt(a*a + b*b + c*c + d*d);
}
__host__ float rnorm3df(float a, float b, float c)
{
return 1 / std::sqrt(a*a + b*b + c*c);
}
__host__ double rnorm3d(double a, double b, double c)
{
return 1 / std::sqrt(a*a + b*b + c*c);
}
__host__ void sincospif(float x, float *sptr, float *cptr)
{
*sptr = std::sin(HIP_PI*x);
*cptr = std::cos(HIP_PI*x);
}
__host__ void sincospi(double x, double *sptr, double *cptr)
{
*sptr = std::sin(HIP_PI*x);
*cptr = std::cos(HIP_PI*x);
}
__host__ float normcdfinvf(float x)
{
return std::sqrt(2) * erfinv(2*x-1);
}
__host__ double normcdfinv(double x)
{
return std::sqrt(2) * erfinv(2*x-1);
}
__host__ float nextafterf(float x, float y)
{
return std::nextafter(x, y);
}
__host__ double nextafter(double x, double y)
{
return std::nextafter(x, y);
}
__host__ float norm3df(float a, float b, float c)
{
return std::sqrt(a*a + b*b + c*c);
}
__host__ float norm4df(float a, float b, float c, float d)
{
return std::sqrt(a*a + b*b + c*c + d*d);
}
__host__ double norm3d(double a, double b, double c)
{
return std::sqrt(a*a + b*b + c*c);
}
__host__ double norm4d(double a, double b, double c, double d)
{
return std::sqrt(a*a + b*b + c*c + d*d);
}
+18 -17
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
@@ -27,18 +28,18 @@ THE SOFTWARE.
__device__ void double_precision_intrinsics()
{
//__dadd_rd(0.0, 1.0);
//__dadd_rn(0.0, 1.0);
//__dadd_ru(0.0, 1.0);
//__dadd_rz(0.0, 1.0);
//__ddiv_rd(0.0, 1.0);
//__ddiv_rn(0.0, 1.0);
//__ddiv_ru(0.0, 1.0);
//__ddiv_rz(0.0, 1.0);
//__dmul_rd(1.0, 2.0);
//__dmul_rn(1.0, 2.0);
//__dmul_ru(1.0, 2.0);
//__dmul_rz(1.0, 2.0);
__dadd_rd(0.0, 1.0);
__dadd_rn(0.0, 1.0);
__dadd_ru(0.0, 1.0);
__dadd_rz(0.0, 1.0);
__ddiv_rd(0.0, 1.0);
__ddiv_rn(0.0, 1.0);
__ddiv_ru(0.0, 1.0);
__ddiv_rz(0.0, 1.0);
__dmul_rd(1.0, 2.0);
__dmul_rn(1.0, 2.0);
__dmul_ru(1.0, 2.0);
__dmul_rz(1.0, 2.0);
__drcp_rd(2.0);
__drcp_rn(2.0);
__drcp_ru(2.0);
@@ -47,10 +48,10 @@ __device__ void double_precision_intrinsics()
__dsqrt_rn(4.0);
__dsqrt_ru(4.0);
__dsqrt_rz(4.0);
//__dsub_rd(2.0, 1.0);
//__dsub_rn(2.0, 1.0);
//__dsub_ru(2.0, 1.0);
//__dsub_rz(2.0, 1.0);
__dsub_rd(2.0, 1.0);
__dsub_rn(2.0, 1.0);
__dsub_ru(2.0, 1.0);
__dsub_rz(2.0, 1.0);
__fma_rd(1.0, 2.0, 3.0);
__fma_rn(1.0, 2.0, 3.0);
__fma_ru(1.0, 2.0, 3.0);
+11 -10
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/math_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
@@ -43,8 +44,8 @@ __device__ void double_precision_math_functions()
cos(0.0);
cosh(0.0);
cospi(0.0);
//cyl_bessel_i0(0.0);
//cyl_bessel_i1(0.0);
cyl_bessel_i0(0.0);
cyl_bessel_i1(0.0);
erf(0.0);
erfc(0.0);
erfcinv(2.0);
@@ -61,7 +62,7 @@ __device__ void double_precision_math_functions()
fmax(0.0, 0.0);
fmin(0.0, 0.0);
fmod(0.0, 1.0);
//frexp(0.0, &iX);
frexp(0.0, &iX);
hypot(1.0, 0.0);
ilogb(1.0);
isfinite(0.0);
@@ -71,7 +72,7 @@ __device__ void double_precision_math_functions()
j1(0.0);
jn(-1.0, 1.0);
ldexp(0.0, 0);
//lgamma(1.0);
lgamma(1.0);
llrint(0.0);
llround(0.0);
log(1.0);
@@ -81,19 +82,19 @@ __device__ void double_precision_math_functions()
logb(1.0);
lrint(0.0);
lround(0.0);
//modf(0.0, &fX);
modf(0.0, &fX);
nan("1");
nearbyint(0.0);
//nextafter(0.0);
//fX = 1.0; norm(1, &fX);
nextafter(0.0, 0.0);
fX = 1.0; norm(1, &fX);
norm3d(1.0, 0.0, 0.0);
norm4d(1.0, 0.0, 0.0, 0.0);
normcdf(0.0);
//normcdfinv(1.0);
normcdfinv(1.0);
pow(1.0, 0.0);
rcbrt(1.0);
remainder(2.0, 1.0);
//remquo(1.0, 2.0, &iX);
remquo(1.0, 2.0, &iX);
rhypot(0.0, 1.0);
rint(1.0);
fX = 1.0; rnorm(1, &fX);
+3 -2
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/math_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
@@ -85,7 +86,7 @@ __host__ void double_precision_math_functions()
nan("1");
nearbyint(0.0);
//nextafter(0.0);
//fX = 1.0; norm(1, &fX);
fX = 1.0; norm(1, &fX);
#if defined(__HIP_PLATFORM_HCC__)
norm3d(1.0, 0.0, 0.0);
norm4d(1.0, 0.0, 0.0, 0.0);
+1
Просмотреть файл
@@ -27,6 +27,7 @@ THE SOFTWARE.
*/
#include "test_common.h"
#include <hip/device_functions.h>
#define LEN 512
#define SIZE LEN<<2
+2 -1
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/math_functions.h>
#include "test_common.h"
__global__ void FloatMathPrecise(hipLaunchParm lp)
+2 -2
Просмотреть файл
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "hip/device_functions.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
+30 -29
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
@@ -30,44 +31,44 @@ __device__ void single_precision_intrinsics()
float fX, fY;
__cosf(0.0f);
//__exp10f(0.0f);
__exp10f(0.0f);
__expf(0.0f);
//__fadd_rd(0.0f, 1.0f);
//__fadd_rn(0.0f, 1.0f);
//__fadd_ru(0.0f, 1.0f);
//__fadd_rz(0.0f, 1.0f);
//__fdiv_rd(4.0f, 2.0f);
//__fdiv_rn(4.0f, 2.0f);
//__fdiv_ru(4.0f, 2.0f);
//__fdiv_rz(4.0f, 2.0f);
//__fdividef(4.0f, 2.0f);
//__fmaf_rd(1.0f, 2.0f, 3.0f);
//__fmaf_rn(1.0f, 2.0f, 3.0f);
//__fmaf_ru(1.0f, 2.0f, 3.0f);
//__fmaf_rz(1.0f, 2.0f, 3.0f);
//__fmul_rd(1.0f, 2.0f);
//__fmul_rn(1.0f, 2.0f);
//__fmul_ru(1.0f, 2.0f);
//__fmul_rz(1.0f, 2.0f);
//__frcp_rd(2.0f);
//__frcp_rn(2.0f);
//__frcp_ru(2.0f);
//__frcp_rz(2.0f);
__fadd_rd(0.0f, 1.0f);
__fadd_rn(0.0f, 1.0f);
__fadd_ru(0.0f, 1.0f);
__fadd_rz(0.0f, 1.0f);
__fdiv_rd(4.0f, 2.0f);
__fdiv_rn(4.0f, 2.0f);
__fdiv_ru(4.0f, 2.0f);
__fdiv_rz(4.0f, 2.0f);
__fdividef(4.0f, 2.0f);
__fmaf_rd(1.0f, 2.0f, 3.0f);
__fmaf_rn(1.0f, 2.0f, 3.0f);
__fmaf_ru(1.0f, 2.0f, 3.0f);
__fmaf_rz(1.0f, 2.0f, 3.0f);
__fmul_rd(1.0f, 2.0f);
__fmul_rn(1.0f, 2.0f);
__fmul_ru(1.0f, 2.0f);
__fmul_rz(1.0f, 2.0f);
__frcp_rd(2.0f);
__frcp_rn(2.0f);
__frcp_ru(2.0f);
__frcp_rz(2.0f);
__frsqrt_rn(4.0f);
__fsqrt_rd(4.0f);
__fsqrt_rn(4.0f);
__fsqrt_ru(4.0f);
__fsqrt_rz(4.0f);
//__fsub_rd(2.0f, 1.0f);
//__fsub_rn(2.0f, 1.0f);
//__fsub_ru(2.0f, 1.0f);
//__fsub_rz(2.0f, 1.0f);
__fsub_rd(2.0f, 1.0f);
__fsub_rn(2.0f, 1.0f);
__fsub_ru(2.0f, 1.0f);
__fsub_rz(2.0f, 1.0f);
__log10f(1.0f);
__log2f(1.0f);
__logf(1.0f);
__powf(1.0f, 0.0f);
//__saturatef(0.1f);
//__sincosf(0.0f, &fX, &fY);
__saturatef(0.1f);
__sincosf(0.0f, &fX, &fY);
__sinf(0.0f);
__tanf(0.0f);
}
+2 -1
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/math_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
+2 -1
Просмотреть файл
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/math_functions.h>
#include "test_common.h"
#pragma GCC diagnostic ignored "-Wall"
+3 -2
Просмотреть файл
@@ -24,8 +24,9 @@ THE SOFTWARE.
*/
#include"test_common.h"
#include "hip/hip_runtime.h"
#include "hip/hip_runtime_api.h"
#include <hip/hip_runtime.h>
#include <hip/math_functions.h>
#include <hip/hip_runtime_api.h>
#define N 512
#define SIZE N*sizeof(float)
+3 -2
Просмотреть файл
@@ -24,8 +24,9 @@ THE SOFTWARE.
*/
#include"test_common.h"
#include "hip/hip_runtime.h"
#include "hip/hip_runtime_api.h"
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include <hip/math_functions.h>
#define N 512
#define SIZE N*sizeof(double)
+2 -1
Просмотреть файл
@@ -29,7 +29,8 @@ THE SOFTWARE.
#include <stdio.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
__global__ void
+2 -2
Просмотреть файл
@@ -25,8 +25,8 @@ THE SOFTWARE.
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/device_functions.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
+1 -1
Просмотреть файл
@@ -32,7 +32,7 @@ THE SOFTWARE.
#include <stdlib.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/device_functions.h"
#include <hip/device_functions.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
+1 -1
Просмотреть файл
@@ -32,7 +32,7 @@ THE SOFTWARE.
#include <stdlib.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/device_functions.h"
#include <hip/device_functions.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
#define WIDTH 8
+2 -2
Просмотреть файл
@@ -31,8 +31,8 @@ THE SOFTWARE.
#include <algorithm>
#include <stdlib.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/device_functions.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
+2 -2
Просмотреть файл
@@ -31,8 +31,8 @@ THE SOFTWARE.
#include <algorithm>
#include <stdlib.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/device_functions.h"
#include <hip/hip_runtime.h>
#include <hip/device_functions.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
+2 -1
Просмотреть файл
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s
* BUILD: %t %s
* RUN: %t
* HIT_END
*/
@@ -30,6 +30,7 @@ THE SOFTWARE.
#include<hip/hip_runtime.h>
#include<iostream>
#include"test_common.h"
#include<hip/device_functions.h>
#define LEN 512
#define SIZE LEN<<2
+2 -1
Просмотреть файл
@@ -24,6 +24,7 @@ THE SOFTWARE.
#include<iostream>
#include"test_common.h"
#include"hip/math_functions.h"
const int NN = 1 << 21;
@@ -31,7 +32,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
int tid = hipThreadIdx_x;
if(tid < 1){
for(int i=0;i<n;i++){
x[i] = sqrt(pow(3.14159,i));
x[i] = sqrt(powf(3.14159,i));
}
y[tid] = y[tid] + 1.0f;
}
+2 -1
Просмотреть файл
@@ -26,6 +26,7 @@ THE SOFTWARE.
#include<iostream>
#include"test_common.h"
#include"hip/math_functions.h"
const int NN = 1 << 21;
@@ -33,7 +34,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
int tid = hipThreadIdx_x;
if(tid < 1){
for(int i=0;i<n;i++){
x[i] = sqrt(pow(3.14159,i));
x[i] = sqrt(powf(3.14159,i));
}
y[tid] = y[tid] + 1.0f;
}