Switch to using ROCDL directly, as opposed to via HC. Add missing bits.

[ROCm/clr commit: 14e6a04387]
This commit is contained in:
Alex Voicu
2018-05-31 03:17:26 +01:00
rodzic bcf5b45d04
commit 2924783ee4
10 zmienionych plików z 1550 dodań i 1045 usunięć
+1 -2
Wyświetl plik
@@ -196,8 +196,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
src/device_util.cpp
src/hip_ldg.cpp
src/hip_fp16.cpp
src/device_functions.cpp
src/math_functions.cpp)
src/device_functions.cpp)
execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic")
@@ -26,263 +26,6 @@ THE SOFTWARE.
#include <hip/hip_runtime.h>
#include <hip/hip_vector_types.h>
// Single Precision Fast Math
__device__ float __cosf(float x);
__device__ float __exp10f(float x);
__device__ float __expf(float x);
__device__ static float __fadd_rd(float x, float y);
__device__ static float __fadd_rn(float x, float y);
__device__ static float __fadd_ru(float x, float y);
__device__ static float __fadd_rz(float x, float y);
__device__ static float __fdiv_rd(float x, float y);
__device__ static float __fdiv_rn(float x, float y);
__device__ static float __fdiv_ru(float x, float y);
__device__ static float __fdiv_rz(float x, float y);
__device__ static float __fdividef(float x, float y);
__device__ float __fmaf_rd(float x, float y, float z);
__device__ float __fmaf_rn(float x, float y, float z);
__device__ float __fmaf_ru(float x, float y, float z);
__device__ float __fmaf_rz(float x, float y, float z);
__device__ static float __fmul_rd(float x, float y);
__device__ static float __fmul_rn(float x, float y);
__device__ static float __fmul_ru(float x, float y);
__device__ static float __fmul_rz(float x, float y);
__device__ float __frcp_rd(float x);
__device__ float __frcp_rn(float x);
__device__ float __frcp_ru(float x);
__device__ float __frcp_rz(float x);
__device__ float __frsqrt_rn(float x);
__device__ float __fsqrt_rd(float x);
__device__ float __fsqrt_rn(float x);
__device__ float __fsqrt_ru(float x);
__device__ float __fsqrt_rz(float x);
__device__ static float __fsub_rd(float x, float y);
__device__ static float __fsub_rn(float x, float y);
__device__ static float __fsub_ru(float x, float y);
__device__ float __log10f(float x);
__device__ float __log2f(float x);
__device__ float __logf(float x);
__device__ float __powf(float base, float exponent);
__device__ static float __saturatef(float x);
__device__ void __sincosf(float x, float* s, float* c);
__device__ float __sinf(float x);
__device__ float __tanf(float x);
/*
Double Precision Intrinsics
*/
__device__ static double __dadd_rd(double x, double y);
__device__ static double __dadd_rn(double x, double y);
__device__ static double __dadd_ru(double x, double y);
__device__ static double __dadd_rz(double x, double y);
__device__ static double __ddiv_rd(double x, double y);
__device__ static double __ddiv_rn(double x, double y);
__device__ static double __ddiv_ru(double x, double y);
__device__ static double __ddiv_rz(double x, double y);
__device__ static double __dmul_rd(double x, double y);
__device__ static double __dmul_rn(double x, double y);
__device__ static double __dmul_ru(double x, double y);
__device__ static double __dmul_rz(double x, double y);
__device__ double __drcp_rd(double x);
__device__ double __drcp_rn(double x);
__device__ double __drcp_ru(double x);
__device__ double __drcp_rz(double x);
__device__ double __dsqrt_rd(double x);
__device__ double __dsqrt_rn(double x);
__device__ double __dsqrt_ru(double x);
__device__ double __dsqrt_rz(double x);
__device__ static double __dsub_rd(double x, double y);
__device__ static double __dsub_rn(double x, double y);
__device__ static double __dsub_ru(double x, double y);
__device__ static double __dsub_rz(double x, double y);
__device__ double __fma_rd(double x, double y, double z);
__device__ double __fma_rn(double x, double y, double z);
__device__ double __fma_ru(double x, double y, double z);
__device__ double __fma_rz(double x, double y, double z);
// Single Precision Fast Math
extern __device__ __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
extern __device__ __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
__device__ float __hip_fast_exp10f(float);
__device__ float __hip_fast_expf(float);
__device__ float __hip_fast_frsqrt_rn(float);
extern __device__ __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
__device__ float __hip_fast_fsqrt_rn(float);
__device__ float __hip_fast_fsqrt_ru(float);
__device__ float __hip_fast_fsqrt_rz(float);
__device__ float __hip_fast_log10f(float);
extern __device__ __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
__device__ float __hip_fast_logf(float);
__device__ float __hip_fast_powf(float, float);
__device__ void __hip_fast_sincosf(float, float*, float*);
extern __device__ __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
__device__ float __hip_fast_tanf(float);
extern __device__ __attribute__((const)) float __hip_fast_fmaf(float, float, float) __asm("llvm.fma.f32");
extern __device__ __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
extern __device__ __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
extern __device__ __attribute__((const)) double __hip_fast_fma(double, double, double) __asm("llvm.fma.f64");
extern __device__ __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
// Single Precision Fast Math
__device__ inline float __cosf(float x) { return __hip_fast_cosf(x); }
__device__ inline float __exp10f(float x) { return __hip_fast_exp10f(x); }
__device__ inline float __expf(float x) { return __hip_fast_expf(x); }
__device__ static inline float __fadd_rd(float x, float y) { return x + y; }
__device__ static inline float __fadd_rn(float x, float y) { return x + y; }
__device__ static inline float __fadd_ru(float x, float y) { return x + y; }
__device__ static inline float __fadd_rz(float x, float y) { return x + y; }
__device__ static inline float __fdiv_rd(float x, float y) { return x / y; }
__device__ static inline float __fdiv_rn(float x, float y) { return x / y; }
__device__ static inline float __fdiv_ru(float x, float y) { return x / y; }
__device__ static inline float __fdiv_rz(float x, float y) { return x / y; }
__device__ static inline float __fdividef(float x, float y) { return x / y; }
__device__ inline float __fmaf_rd(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); }
__device__ inline float __fmaf_rn(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); }
__device__ inline float __fmaf_ru(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); }
__device__ inline float __fmaf_rz(float x, float y, float z) { return __hip_fast_fmaf(x, y, z); }
__device__ static inline float __fmul_rd(float x, float y) { return x * y; }
__device__ static inline float __fmul_rn(float x, float y) { return x * y; }
__device__ static inline float __fmul_ru(float x, float y) { return x * y; }
__device__ static inline float __fmul_rz(float x, float y) { return x * y; }
__device__ inline float __frcp_rd(float x) { return __hip_fast_frcp(x); }
__device__ inline float __frcp_rn(float x) { return __hip_fast_frcp(x); }
__device__ inline float __frcp_ru(float x) { return __hip_fast_frcp(x); }
__device__ inline float __frcp_rz(float x) { return __hip_fast_frcp(x); }
__device__ inline float __frsqrt_rn(float x) { return __hip_fast_frsqrt_rn(x); }
__device__ inline float __fsqrt_rd(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ inline float __fsqrt_rn(float x) { return __hip_fast_fsqrt_rn(x); }
__device__ inline float __fsqrt_ru(float x) { return __hip_fast_fsqrt_ru(x); }
__device__ inline float __fsqrt_rz(float x) { return __hip_fast_fsqrt_rz(x); }
__device__ static inline float __fsub_rd(float x, float y) { return x - y; }
__device__ static inline float __fsub_rn(float x, float y) { return x - y; }
__device__ static inline float __fsub_ru(float x, float y) { return x - y; }
__device__ static inline float __fsub_rz(float x, float y) { return x - y; }
__device__ inline float __log10f(float x) { return __hip_fast_log10f(x); }
__device__ inline float __log2f(float x) { return __hip_fast_log2f(x); }
__device__ inline float __logf(float x) { return __hip_fast_logf(x); }
__device__ inline float __powf(float base, float exponent) {
return __hip_fast_powf(base, exponent);
}
__device__ static inline float __saturatef(float x) {
x = x > 1.0f ? 1.0f : x;
x = x < 0.0f ? 0.0f : x;
return x;
}
__device__ inline void __sincosf(float x, float* s, float* c) {
return __hip_fast_sincosf(x, s, c);
}
__device__ inline float __sinf(float x) { return __hip_fast_sinf(x); }
__device__ inline float __tanf(float x) { return __hip_fast_tanf(x); }
/*
Double Precision Intrinsics
*/
__device__ static inline double __dadd_rd(double x, double y) { return x + y; }
__device__ static inline double __dadd_rn(double x, double y) { return x + y; }
__device__ static inline double __dadd_ru(double x, double y) { return x + y; }
__device__ static inline double __dadd_rz(double x, double y) { return x + y; }
__device__ static inline double __ddiv_rd(double x, double y) { return x / y; }
__device__ static inline double __ddiv_rn(double x, double y) { return x / y; }
__device__ static inline double __ddiv_ru(double x, double y) { return x / y; }
__device__ static inline double __ddiv_rz(double x, double y) { return x / y; }
__device__ static inline double __dmul_rd(double x, double y) { return x * y; }
__device__ static inline double __dmul_rn(double x, double y) { return x * y; }
__device__ static inline double __dmul_ru(double x, double y) { return x * y; }
__device__ static inline double __dmul_rz(double x, double y) { return x * y; }
__device__ inline double __drcp_rd(double x) { return __hip_fast_drcp(x); }
__device__ inline double __drcp_rn(double x) { return __hip_fast_drcp(x); }
__device__ inline double __drcp_ru(double x) { return __hip_fast_drcp(x); }
__device__ inline double __drcp_rz(double x) { return __hip_fast_drcp(x); }
__device__ inline double __dsqrt_rd(double x) { return __hip_fast_dsqrt(x); }
__device__ inline double __dsqrt_rn(double x) { return __hip_fast_dsqrt(x); }
__device__ inline double __dsqrt_ru(double x) { return __hip_fast_dsqrt(x); }
__device__ inline double __dsqrt_rz(double x) { return __hip_fast_dsqrt(x); }
__device__ static inline double __dsub_rd(double x, double y) { return x - y; }
__device__ static inline double __dsub_rn(double x, double y) { return x - y; }
__device__ static inline double __dsub_ru(double x, double y) { return x - y; }
__device__ static inline double __dsub_rz(double x, double y) { return x - y; }
__device__ inline double __fma_rd(double x, double y, double z) { return __hip_fast_fma(x, y, z); }
__device__ inline double __fma_rn(double x, double y, double z) { return __hip_fast_fma(x, y, z); }
__device__ inline double __fma_ru(double x, double y, double z) { return __hip_fast_fma(x, y, z); }
__device__ inline double __fma_rz(double x, double y, double z) { return __hip_fast_fma(x, y, z); }
extern "C" __device__ unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
extern "C" __device__ signed int __hip_hc_ir_mul24_int(signed int, signed int);
extern "C" __device__ signed int __hip_hc_ir_mulhi_int(signed int, signed int);
@@ -0,0 +1,431 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#if defined(__cplusplus)
extern "C" {
#endif
// BEGIN FLOAT
__attribute__((const))
float __ocml_acos_f32(float);
__attribute__((pure))
float __ocml_acosh_f32(float);
__attribute__((const))
float __ocml_asin_f32(float);
__attribute__((pure))
float __ocml_asinh_f32(float);
__attribute__((const))
float __ocml_atan2_f32(float, float);
__attribute__((const))
float __ocml_atan_f32(float);
__attribute__((pure))
float __ocml_atanh_f32(float);
__attribute__((pure))
float __ocml_cbrt_f32(float);
__attribute__((const))
float __ocml_ceil_f32(float);
__attribute__((const))
float __ocml_copysign_f32(float, float);
float __ocml_cos_f32(float);
__attribute__((pure))
float __ocml_cosh_f32(float);
float __ocml_cospi_f32(float);
float __ocml_div_rtz_f32(float, float);
float __ocml_i0_f32(float);
float __ocml_i1_f32(float);
__attribute__((pure))
float __ocml_erfc_f32(float);
__attribute__((pure))
float __ocml_erfcinv_f32(float);
__attribute__((pure))
float __ocml_erfcx_f32(float);
__attribute__((pure))
float __ocml_erf_f32(float);
__attribute__((pure))
float __ocml_erfinv_f32(float);
__attribute__((pure))
float __ocml_exp10_f32(float);
__attribute__((pure))
float __ocml_exp2_f32(float);
__attribute__((pure))
float __ocml_exp_f32(float);
__attribute__((pure))
float __ocml_expm1_f32(float);
__attribute__((const))
float __ocml_fabs_f32(float);
__attribute__((const))
float __ocml_fdim_f32(float, float);
__attribute__((const))
float __ocml_floor_f32(float);
__attribute__((const))
float __ocml_fma_f32(float, float, float);
__attribute__((const))
float __ocml_fmax_f32(float, float);
__attribute__((const))
float __ocml_fmin_f32(float, float);
__attribute__((const))
float __ocml_fmod_f32(float, float);
float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*);
__attribute__((const))
float __ocml_hypot_f32(float, float);
__attribute__((const))
int __ocml_ilogb_f32(float);
__attribute__((const))
int __ocml_isfinite_f32(float);
__attribute__((const))
int __ocml_isinf_f32(float);
__attribute__((const))
int __ocml_isnan_f32(float);
float __ocml_j0_f32(float);
float __ocml_j1_f32(float);
__attribute__((const))
float __ocml_ldexp_f32(float, int);
float __ocml_lgamma_f32(float);
__attribute__((pure))
float __ocml_log10_f32(float);
__attribute__((pure))
float __ocml_log1p_f32(float);
__attribute__((pure))
float __ocml_log2_f32(float);
__attribute__((const))
float __ocml_logb_f32(float);
__attribute__((pure))
float __ocml_log_f32(float);
float __ocml_modf_f32(float, __attribute__((address_space(5))) float*);
__attribute__((const))
float __ocml_nearbyint_f32(float);
__attribute__((const))
float __ocml_nextafter_f32(float, float);
__attribute__((const))
float __ocml_len3_f32(float, float, float);
__attribute__((const))
float __ocml_len4_f32(float, float, float, float);
__attribute__((pure))
float __ocml_ncdf_f32(float);
__attribute__((pure))
float __ocml_ncdfinv_f32(float);
__attribute__((pure))
float __ocml_pow_f32(float, float);
__attribute__((pure))
float __ocml_rcbrt_f32(float);
__attribute__((const))
float __ocml_remainder_f32(float, float);
float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*);
__attribute__((const))
float __ocml_rhypot_f32(float, float);
__attribute__((const))
float __ocml_rint_f32(float);
__attribute__((const))
float __ocml_rlen3_f32(float, float, float);
__attribute__((const))
float __ocml_rlen4_f32(float, float, float, float);
__attribute__((const))
float __ocml_round_f32(float);
__attribute__((pure))
float __ocml_rsqrt_f32(float);
__attribute__((const))
float __ocml_scalb_f32(float, float);
__attribute__((const))
float __ocml_scalbn_f32(float, int);
__attribute__((const))
int __ocml_signbit_f32(float);
float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*);
float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*);
float __ocml_sin_f32(float);
__attribute__((pure))
float __ocml_sinh_f32(float);
float __ocml_sinpi_f32(float);
__attribute__((const))
float __ocml_sqrt_f32(float);
float __ocml_tan_f32(float);
__attribute__((pure))
float __ocml_tanh_f32(float);
float __ocml_tgamma_f32(float);
__attribute__((const))
float __ocml_trunc_f32(float);
float __ocml_y0_f32(float);
float __ocml_y1_f32(float);
// BEGIN INTRINSICS
__attribute__((const))
float __llvm_add_rte_f32(float, float);
__attribute__((const))
float __llvm_add_rtn_f32(float, float);
__attribute__((const))
float __llvm_add_rtp_f32(float, float);
__attribute__((const))
float __llvm_add_rtz_f32(float, float);
__attribute__((const))
float __llvm_sub_rte_f32(float, float);
__attribute__((const))
float __llvm_sub_rtn_f32(float, float);
__attribute__((const))
float __llvm_sub_rtp_f32(float, float);
__attribute__((const))
float __llvm_sub_rtz_f32(float, float);
__attribute__((const))
float __llvm_mul_rte_f32(float, float);
__attribute__((const))
float __llvm_mul_rtn_f32(float, float);
__attribute__((const))
float __llvm_mul_rtp_f32(float, float);
__attribute__((const))
float __llvm_mul_rtz_f32(float, float);
__attribute__((const))
float __llvm_div_rte_f32(float, float);
__attribute__((const))
float __llvm_div_rtn_f32(float, float);
__attribute__((const))
float __llvm_div_rtp_f32(float, float);
__attribute__((const))
float __llvm_div_rtz_f32(float, float);
__attribute__((const))
float __llvm_sqrt_rte_f32(float);
__attribute__((const))
float __llvm_sqrt_rtn_f32(float);
__attribute__((const))
float __llvm_sqrt_rtp_f32(float);
__attribute__((const))
float __llvm_sqrt_rtz_f32(float);
__attribute__((const))
float __llvm_fma_rte_f32(float, float, float);
__attribute__((const))
float __llvm_fma_rtn_f32(float, float, float);
__attribute__((const))
float __llvm_fma_rtp_f32(float, float, float);
__attribute__((const))
float __llvm_fma_rtz_f32(float, float, float);
__attribute__((const))
float __llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32");
__attribute__((const))
float __llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32");
__attribute__((const))
float __llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32");
__attribute__((const))
float __llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32");
// END INTRINSICS
// END FLOAT
// BEGIN DOUBLE
__attribute__((const))
double __ocml_acos_f64(double);
__attribute__((pure))
double __ocml_acosh_f64(double);
__attribute__((const))
double __ocml_asin_f64(double);
__attribute__((pure))
double __ocml_asinh_f64(double);
__attribute__((const))
double __ocml_atan2_f64(double, double);
__attribute__((const))
double __ocml_atan_f64(double);
__attribute__((pure))
double __ocml_atanh_f64(double);
__attribute__((pure))
double __ocml_cbrt_f64(double);
__attribute__((const))
double __ocml_ceil_f64(double);
__attribute__((const))
double __ocml_copysign_f64(double, double);
double __ocml_cos_f64(double);
__attribute__((pure))
double __ocml_cosh_f64(double);
double __ocml_cospi_f64(double);
double __ocml_i0_f64(double);
double __ocml_i1_f64(double);
__attribute__((pure))
double __ocml_erfc_f64(double);
__attribute__((pure))
double __ocml_erfcinv_f64(double);
__attribute__((pure))
double __ocml_erfcx_f64(double);
__attribute__((pure))
double __ocml_erf_f64(double);
__attribute__((pure))
double __ocml_erfinv_f64(double);
__attribute__((pure))
double __ocml_exp10_f64(double);
__attribute__((pure))
double __ocml_exp2_f64(double);
__attribute__((pure))
double __ocml_exp_f64(double);
__attribute__((pure))
double __ocml_expm1_f64(double);
__attribute__((const))
double __ocml_fabs_f64(double);
__attribute__((const))
double __ocml_fdim_f64(double, double);
__attribute__((const))
double __ocml_floor_f64(double);
__attribute__((const))
double __ocml_fma_f64(double, double, double);
__attribute__((const))
double __ocml_fmax_f64(double, double);
__attribute__((const))
double __ocml_fmin_f64(double, double);
__attribute__((const))
double __ocml_fmod_f64(double, double);
double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*);
__attribute__((const))
double __ocml_hypot_f64(double, double);
__attribute__((const))
int __ocml_ilogb_f64(double);
__attribute__((const))
int __ocml_isfinite_f64(double);
__attribute__((const))
int __ocml_isinf_f64(double);
__attribute__((const))
int __ocml_isnan_f64(double);
double __ocml_j0_f64(double);
double __ocml_j1_f64(double);
__attribute__((const))
double __ocml_ldexp_f64(double, int);
double __ocml_lgamma_f64(double);
__attribute__((pure))
double __ocml_log10_f64(double);
__attribute__((pure))
double __ocml_log1p_f64(double);
__attribute__((pure))
double __ocml_log2_f64(double);
__attribute__((const))
double __ocml_logb_f64(double);
__attribute__((pure))
double __ocml_log_f64(double);
double __ocml_modf_f64(double, __attribute__((address_space(5))) double*);
__attribute__((const))
double __ocml_nearbyint_f64(double);
__attribute__((const))
double __ocml_nextafter_f64(double, double);
__attribute__((const))
double __ocml_len3_f64(double, double, double);
__attribute__((const))
double __ocml_len4_f64(double, double, double, double);
__attribute__((pure))
double __ocml_ncdf_f64(double);
__attribute__((pure))
double __ocml_ncdfinv_f64(double);
__attribute__((pure))
double __ocml_pow_f64(double, double);
__attribute__((pure))
double __ocml_rcbrt_f64(double);
__attribute__((const))
double __ocml_remainder_f64(double, double);
double __ocml_remquo_f64(
double, double, __attribute__((address_space(5))) int*);
__attribute__((const))
double __ocml_rhypot_f64(double, double);
__attribute__((const))
double __ocml_rint_f64(double);
__attribute__((const))
double __ocml_rlen3_f64(double, double, double);
__attribute__((const))
double __ocml_rlen4_f64(double, double, double, double);
__attribute__((const))
double __ocml_round_f64(double);
__attribute__((pure))
double __ocml_rsqrt_f64(double);
__attribute__((const))
double __ocml_scalb_f64(double, double);
__attribute__((const))
double __ocml_scalbn_f64(double, int);
__attribute__((const))
int __ocml_signbit_f64(double);
double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*);
double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*);
double __ocml_sin_f64(double);
__attribute__((pure))
double __ocml_sinh_f64(double);
double __ocml_sinpi_f64(double);
__attribute__((const))
double __ocml_sqrt_f64(double);
double __ocml_tan_f64(double);
__attribute__((pure))
double __ocml_tanh_f64(double);
double __ocml_tgamma_f64(double);
__attribute__((const))
double __ocml_trunc_f64(double);
double __ocml_y0_f64(double);
double __ocml_y1_f64(double);
// BEGIN INTRINSICS
__attribute__((const))
double __llvm_add_rte_f64(double, double);
__attribute__((const))
double __llvm_add_rtn_f64(double, double);
__attribute__((const))
double __llvm_add_rtp_f64(double, double);
__attribute__((const))
double __llvm_add_rtz_f64(double, double);
__attribute__((const))
double __llvm_sub_rte_f64(double, double);
__attribute__((const))
double __llvm_sub_rtn_f64(double, double);
__attribute__((const))
double __llvm_sub_rtp_f64(double, double);
__attribute__((const))
double __llvm_sub_rtz_f64(double, double);
__attribute__((const))
double __llvm_mul_rte_f64(double, double);
__attribute__((const))
double __llvm_mul_rtn_f64(double, double);
__attribute__((const))
double __llvm_mul_rtp_f64(double, double);
__attribute__((const))
double __llvm_mul_rtz_f64(double, double);
__attribute__((const))
double __llvm_div_rte_f64(double, double);
__attribute__((const))
double __llvm_div_rtn_f64(double, double);
__attribute__((const))
double __llvm_div_rtp_f64(double, double);
__attribute__((const))
double __llvm_div_rtz_f64(double, double);
__attribute__((const))
double __llvm_sqrt_rte_f64(double);
__attribute__((const))
double __llvm_sqrt_rtn_f64(double);
__attribute__((const))
double __llvm_sqrt_rtp_f64(double);
__attribute__((const))
double __llvm_sqrt_rtz_f64(double);
__attribute__((const))
double __llvm_fma_rte_f64(float, float, float);
__attribute__((const))
double __llvm_fma_rtn_f64(float, float, float);
__attribute__((const))
double __llvm_fma_rtp_f64(float, float, float);
__attribute__((const))
double __llvm_fma_rtz_f64(float, float, float);
__attribute__((const))
double __llvm_amdgcn_rcp_f64(double);
__attribute__((const))
double __llvm_amdgcn_rsq_f64(double);
// END INTRINSICS
// END DOUBLE
#if defined(__cplusplus)
} // extern "C"
#endif
@@ -23,11 +23,6 @@ THE SOFTWARE.
#include <hc_math.hpp>
#include "device_util.h"
extern "C" float __ocml_floor_f32(float);
extern "C" float __ocml_rint_f32(float);
extern "C" float __ocml_ceil_f32(float);
extern "C" float __ocml_trunc_f32(float);
__device__ float __double2float_rd(double x) { return (double)x; }
__device__ float __double2float_rn(double x) { return (double)x; }
__device__ float __double2float_ru(double x) { return (double)x; }
@@ -943,97 +943,6 @@ __device__ void* __get_dynamicgroupbaseptr() {
__host__ void* __get_dynamicgroupbaseptr() { return nullptr; }
// Precise Math Functions
__device__ float __hip_precise_cosf(float x) { return hc::precise_math::cosf(x); }
__device__ float __hip_precise_exp10f(float x) { return hc::precise_math::exp10f(x); }
__device__ float __hip_precise_expf(float x) { return hc::precise_math::expf(x); }
__device__ float __hip_precise_frsqrt_rn(float x) { return hc::precise_math::rsqrt(x); }
__device__ float __hip_precise_fsqrt_rd(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_fsqrt_rn(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_fsqrt_ru(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_fsqrt_rz(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_log10f(float x) { return hc::precise_math::log10(x); }
__device__ float __hip_precise_log2f(float x) { return hc::precise_math::log2(x); }
__device__ float __hip_precise_logf(float x) { return hc::precise_math::logf(x); }
__device__ float __hip_precise_powf(float base, float exponent) {
return hc::precise_math::powf(base, exponent);
}
__device__ void __hip_precise_sincosf(float x, float* s, float* c) {
hc::precise_math::sincosf(x, s, c);
}
__device__ float __hip_precise_sinf(float x) { return hc::precise_math::sinf(x); }
__device__ float __hip_precise_tanf(float x) { return hc::precise_math::tanf(x); }
// Double Precision Math
__device__ double __hip_precise_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_precise_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_precise_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_precise_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); }
#define LOG_BASE2_E 1.4426950408889634
#define LOG_BASE2_10 3.32192809488736
#define ONE_DIV_LOG_BASE2_E 0.69314718056
#define ONE_DIV_LOG_BASE2_10 0.30102999566
// Fast Math Intrinsics
__device__ float __hip_fast_exp10f(float x) { return __hip_fast_exp2f(x * LOG_BASE2_E); }
__device__ float __hip_fast_expf(float x) { return __hip_fast_exp2f(x * LOG_BASE2_10); }
__device__ float __hip_fast_frsqrt_rn(float x) {
return 1 / __hip_fast_fsqrt_rd(x);
;
}
__device__ float __hip_fast_fsqrt_rn(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ float __hip_fast_fsqrt_ru(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ float __hip_fast_fsqrt_rz(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ float __hip_fast_log10f(float x) { return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x); }
__device__ float __hip_fast_logf(float x) { return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x); }
__device__ float __hip_fast_powf(float base, float exponent) {
return hc::fast_math::powf(base, exponent);
}
__device__ void __hip_fast_sincosf(float x, float* s, float* c) {
*s = __hip_fast_sinf(x);
*c = __hip_fast_cosf(x);
}
__device__ float __hip_fast_tanf(float x) { return hc::fast_math::tanf(x); }
// Double Precision Math
// FIXME - HCC doesn't have a fast_math version double FP sqrt
// Another issue is that these intrinsics call for a specific rounding mode;
// however, their implementation all map to the same sqrt builtin
__device__ double __hip_fast_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_fast_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_fast_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_fast_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); }
__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); }
@@ -1,429 +0,0 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hc.hpp>
#include <grid_launch.h>
#include <hc_math.hpp>
#include "device_util.h"
#include "hip/hcc_detail/device_functions.h"
#include "hip/hip_runtime.h"
__device__ float acosf(float x) { return hc::precise_math::acosf(x); }
__device__ float acoshf(float x) { return hc::precise_math::acoshf(x); }
__device__ float asinf(float x) { return hc::precise_math::asinf(x); }
__device__ float asinhf(float x) { return hc::precise_math::asinhf(x); }
__device__ float atan2f(float y, float x) { return hc::precise_math::atan2f(y, x); }
__device__ float atanf(float x) { return hc::precise_math::atanf(x); }
__device__ float atanhf(float x) { return hc::precise_math::atanhf(x); }
__device__ float cbrtf(float x) { return hc::precise_math::cbrtf(x); }
__device__ float ceilf(float x) { return hc::precise_math::ceilf(x); }
__device__ float copysignf(float x, float y) { return hc::precise_math::copysignf(x, y); }
__device__ float cosf(float x) { return hc::precise_math::cosf(x); }
__device__ float coshf(float x) { return hc::precise_math::coshf(x); }
__device__ float cyl_bessel_i0f(float x);
__device__ float cyl_bessel_i1f(float x);
__device__ float erfcf(float x) { return hc::precise_math::erfcf(x); }
__device__ float erfcinvf(float y) { return hc::precise_math::erfcinvf(y); }
__device__ float erfcxf(float x) {
return hc::precise_math::expf(x * x) * hc::precise_math::erfcf(x);
}
__device__ float erff(float x) { return hc::precise_math::erff(x); }
__device__ float erfinvf(float y) { return hc::precise_math::erfinvf(y); }
__device__ float exp10f(float x) { return hc::precise_math::exp10f(x); }
__device__ float exp2f(float x) { return hc::precise_math::exp2f(x); }
__device__ float expf(float x) { return hc::precise_math::expf(x); }
__device__ float expm1f(float x) { return hc::precise_math::expm1f(x); }
__device__ int abs(int x) {
return x >= 0 ? x : -x; // TODO - optimize with OCML
}
__device__ long long abs(long long x) {
return x >= 0 ? x : -x;
}
__device__ float fabsf(float x) { return hc::precise_math::fabsf(x); }
__device__ float fdimf(float x, float y) { return hc::precise_math::fdimf(x, y); }
__device__ float fdividef(float x, float y) { return x / y; }
__device__ float floorf(float x) { return hc::precise_math::floorf(x); }
__device__ float fmaf(float x, float y, float z) { return hc::precise_math::fmaf(x, y, z); }
__device__ float fmaxf(float x, float y) { return hc::precise_math::fmaxf(x, y); }
__device__ float fminf(float x, float y) { return hc::precise_math::fminf(x, y); }
__device__ float fmodf(float x, float y) { return hc::precise_math::fmodf(x, y); }
__device__ float frexpf(float x, int* nptr) { return hc::precise_math::frexpf(x, nptr); }
__device__ float hypotf(float x, float y) { return hc::precise_math::hypotf(x, y); }
__device__ int ilogbf(float x) { return hc::precise_math::ilogbf(x); }
__device__ int isfinite(float a) { return hc::precise_math::isfinite(a); }
__device__ unsigned isinf(float a) { return hc::precise_math::isinf(a); }
__device__ unsigned isnan(float a) { return hc::precise_math::isnan(a); }
__device__ float j0f(float x) { return __hip_j0f(x); }
__device__ float j1f(float x) { return __hip_j1f(x); }
__device__ float jnf(int n, float x) { return __hip_jnf(n, x); }
__device__ float ldexpf(float x, int exp) { return hc::precise_math::ldexpf(x, exp); }
__device__ float lgammaf(float x) { return hc::precise_math::lgammaf(x); }
__device__ long long int llrintf(float x) {
int y = hc::precise_math::roundf(x);
long long int z = y;
return z;
}
__device__ long long int llroundf(float x) {
int y = hc::precise_math::roundf(x);
long long int z = y;
return z;
}
__device__ float log10f(float x) { return hc::precise_math::log10f(x); }
__device__ float log1pf(float x) { return hc::precise_math::log1pf(x); }
__device__ float log2f(float x) { return hc::precise_math::log2f(x); }
__device__ float logbf(float x) { return hc::precise_math::logbf(x); }
__device__ float logf(float x) { return hc::precise_math::logf(x); }
__device__ long int lrintf(float x) {
int y = hc::precise_math::roundf(x);
long int z = y;
return z;
}
__device__ long int lroundf(float x) {
long int y = hc::precise_math::roundf(x);
return y;
}
__device__ float modff(float x, float* iptr) { return hc::precise_math::modff(x, iptr); }
__device__ float nanf(const char* tagp) { return hc::precise_math::nanf((int)*tagp); }
__device__ float nearbyintf(float x) { return hc::precise_math::nearbyintf(x); }
__device__ float nextafterf(float x, float y) { return hc::precise_math::nextafter(x, y); }
__device__ float norm3df(float a, float b, float c) {
float x = a * a + b * b + c * c;
return hc::precise_math::sqrtf(x);
}
__device__ float norm4df(float a, float b, float c, float d) {
float x = a * a + b * b;
float y = c * c + d * d;
return hc::precise_math::sqrtf(x + y);
}
__device__ float normcdff(float y) { return ((hc::precise_math::erff(y) / 1.41421356237) + 1) / 2; }
__device__ float normcdfinvf(float y) { return HIP_SQRT_2 * __hip_erfinvf(2 * y - 1); }
__device__ float normf(int dim, const float* a) {
float x = 0.0f;
for (int i = 0; i < dim; i++) {
x = hc::precise_math::fmaf(a[i], a[i], x);
}
return hc::precise_math::sqrtf(x);
}
__device__ float powf(float x, float y) { return hc::precise_math::powf(x, y); }
__device__ float rcbrtf(float x) { return hc::precise_math::rcbrtf(x); }
__device__ float remainderf(float x, float y) { return hc::precise_math::remainderf(x, y); }
__device__ float remquof(float x, float y, int* quo) {
return hc::precise_math::remquof(x, y, quo);
}
__device__ float rhypotf(float x, float y) { return 1 / hc::precise_math::hypotf(x, y); }
__device__ float rintf(float x) { return hc::precise_math::roundf(x); }
__device__ float rnorm3df(float a, float b, float c) {
float x = a * a + b * b + c * c;
return 1 / hc::precise_math::sqrtf(x);
}
__device__ float rnorm4df(float a, float b, float c, float d) {
float x = a * a + b * b;
float y = c * c + d * d;
return 1 / hc::precise_math::sqrtf(x + y);
}
__device__ float rnormf(int dim, const float* a) {
float x = 0.0f;
for (int i = 0; i < dim; i++) {
x = hc::precise_math::fmaf(a[i], a[i], x);
}
return 1 / hc::precise_math::sqrtf(x);
}
__device__ float roundf(float x) { return hc::precise_math::roundf(x); }
__device__ float scalblnf(float x, long int n) { return hc::precise_math::scalb(x, n); }
__device__ float scalbnf(float x, int n) { return hc::precise_math::scalbnf(x, n); }
__device__ int signbit(float a) { return hc::precise_math::signbit(a); }
__device__ void sincosf(float x, float* sptr, float* cptr) {
*sptr = hc::precise_math::sinf(x);
*cptr = hc::precise_math::cosf(x);
}
__device__ void sincospif(float x, float* sptr, float* cptr) {
*sptr = hc::precise_math::sinpif(x);
*cptr = hc::precise_math::cospif(x);
}
__device__ float sinf(float x) { return hc::precise_math::sinf(x); }
__device__ float sinhf(float x) { return hc::precise_math::sinhf(x); }
__device__ float tanf(float x) { return hc::precise_math::tanf(x); }
__device__ float tanhf(float x) { return hc::precise_math::tanhf(x); }
__device__ float tgammaf(float x) { return hc::precise_math::tgammaf(x); }
__device__ float truncf(float x) { return hc::precise_math::truncf(x); }
__device__ float y0f(float x) { return __hip_y0f(x); }
__device__ float y1f(float x) { return __hip_y1f(x); }
__device__ float ynf(int n, float x) { return __hip_ynf(n, x); }
__device__ float cospif(float x) { return hc::precise_math::cospif(x); }
__device__ float sinpif(float x) { return hc::precise_math::sinpif(x); }
__device__ float sqrtf(float x) { return hc::precise_math::sqrtf(x); }
__device__ float rsqrtf(float x) { return hc::precise_math::rsqrtf(x); }
/*
* Double precision device math functions
*/
__device__ double acos(double x) { return hc::precise_math::acos(x); }
__device__ double acosh(double x) { return hc::precise_math::acosh(x); }
__device__ double asin(double x) { return hc::precise_math::asin(x); }
__device__ double asinh(double x) { return hc::precise_math::asinh(x); }
__device__ double atan(double x) { return hc::precise_math::atan(x); }
__device__ double atan2(double y, double x) { return hc::precise_math::atan2(y, x); }
__device__ double atanh(double x) { return hc::precise_math::atanh(x); }
__device__ double cbrt(double x) { return hc::precise_math::cbrt(x); }
__device__ double ceil(double x) { return hc::precise_math::ceil(x); }
__device__ double copysign(double x, double y) { return hc::precise_math::copysign(x, y); }
__device__ double cos(double x) { return hc::precise_math::cos(x); }
__device__ double cosh(double x) { return hc::precise_math::cosh(x); }
__device__ double cospi(double x) { return hc::precise_math::cospi(x); }
__device__ double cyl_bessel_i0(double x);
__device__ double cyl_bessel_i1(double x);
__device__ double erf(double x) { return hc::precise_math::erf(x); }
__device__ double erfc(double x) { return hc::precise_math::erfc(x); }
__device__ double erfcinv(double x) { return __hip_erfinv(1 - x); }
__device__ double erfcx(double x) {
return hc::precise_math::exp(x * x) * hc::precise_math::erf(x);
}
__device__ double erfinv(double x) { return __hip_erfinv(x); }
__device__ double exp(double x) { return hc::precise_math::exp(x); }
__device__ double exp10(double x) { return hc::precise_math::exp10(x); }
__device__ double exp2(double x) { return hc::precise_math::exp2(x); }
__device__ double expm1(double x) { return hc::precise_math::expm1(x); }
__device__ double fabs(double x) { return hc::precise_math::fabs(x); }
__device__ double fdim(double x, double y) { return hc::precise_math::fdim(x, y); }
__device__ double fdivide(double x, double y) { return x / y; }
__device__ double floor(double x) { return hc::precise_math::floor(x); }
__device__ double fma(double x, double y, double z) { return hc::precise_math::fma(x, y, z); }
__device__ double fmax(double x, double y) { return hc::precise_math::fmax(x, y); }
__device__ double fmin(double x, double y) { return hc::precise_math::fmin(x, y); }
__device__ double fmod(double x, double y) { return hc::precise_math::fmod(x, y); }
__device__ double frexp(double x, int* y) { return hc::precise_math::frexp(x, y); }
__device__ double hypot(double x, double y) { return hc::precise_math::hypot(x, y); }
__device__ int ilogb(double x) { return hc::precise_math::ilogb(x); }
__device__ int isfinite(double x) { return hc::precise_math::isfinite(x); }
__device__ unsigned isinf(double x) { return hc::precise_math::isinf(x); }
__device__ unsigned isnan(double x) { return hc::precise_math::isnan(x); }
__device__ double j0(double x) { return __hip_j0(x); }
__device__ double j1(double x) { return __hip_j1(x); }
__device__ double jn(int n, double x) { return __hip_jn(n, x); }
__device__ double ldexp(double x, int exp) { return hc::precise_math::ldexp(x, exp); }
__device__ double lgamma(double x) { return hc::precise_math::lgamma(x); }
__device__ long long int llrint(double x) {
long long int y = hc::precise_math::round(x);
return y;
}
__device__ long long int llround(double x) {
long long int y = hc::precise_math::round(x);
return y;
}
__device__ double log(double x) { return hc::precise_math::log(x); }
__device__ double log10(double x) { return hc::precise_math::log10(x); }
__device__ double log1p(double x) { return hc::precise_math::log1p(x); }
__device__ double log2(double x) { return hc::precise_math::log2(x); }
__device__ double logb(double x) { return hc::precise_math::logb(x); }
__device__ long int lrint(double x) {
long int y = hc::precise_math::round(x);
return y;
}
__device__ long int lround(double x) {
long int y = hc::precise_math::round(x);
return y;
}
__device__ double modf(double x, double* iptr) { return hc::precise_math::modf(x, iptr); }
__device__ double nan(const char* tagp) { return hc::precise_math::nan((int)*tagp); }
__device__ double nearbyint(double x) { return hc::precise_math::nearbyint(x); }
__device__ double nextafter(double x, double y) { return hc::precise_math::nextafter(x, y); }
__device__ double norm(int x, const double* d) {
double val = 0;
for (int i = 0; i < x; i++) {
val += d[i] * d[i];
}
return hc::precise_math::sqrt(val);
}
__device__ double norm3d(double a, double b, double c) {
double x = a * a + b * b + c * c;
return hc::precise_math::sqrt(x);
}
__device__ double norm4d(double a, double b, double c, double d) {
double x = a * a + b * b;
double y = c * c + d * d;
return hc::precise_math::sqrt(x + y);
}
__device__ double normcdf(double y) { return ((hc::precise_math::erf(y) / HIP_SQRT_2) + 1) / 2; }
__device__ double normcdfinv(double y) { return HIP_SQRT_2 * __hip_erfinv(2 * y - 1); }
__device__ double pow(double x, double y) { return hc::precise_math::pow(x, y); }
__device__ double rcbrt(double x) { return hc::precise_math::rcbrt(x); }
__device__ double remainder(double x, double y) { return hc::precise_math::remainder(x, y); }
__device__ double remquo(double x, double y, int* quo) {
return hc::precise_math::remquo(x, y, quo);
}
__device__ double rhypot(double x, double y) { return 1 / hc::precise_math::sqrt(x * x + y * y); }
__device__ double rint(double x) { return hc::precise_math::round(x); }
__device__ double rnorm3d(double a, double b, double c) {
return hc::precise_math::rsqrt(a * a + b * b + c * c);
}
__device__ double rnorm4d(double a, double b, double c, double d) {
return hc::precise_math::rsqrt(a * a + b * b + c * c + d * d);
}
__device__ double rnorm(int dim, const double* t) {
double x = 0.0;
for (int i = 0; i < dim; i++) {
x = hc::precise_math::fma(t[i], t[i], x);
}
return 1 / x;
}
__device__ double round(double x) { return hc::precise_math::round(x); }
__device__ double rsqrt(double x) { return hc::precise_math::rsqrt(x); }
__device__ double scalbln(double x, long int n) { return hc::precise_math::scalb(x, n); }
__device__ double scalbn(double x, int n) { return hc::precise_math::scalbn(x, n); }
__device__ int signbit(double x) { return hc::precise_math::signbit(x); }
__device__ double sin(double x) { return hc::precise_math::sin(x); }
__device__ void sincos(double x, double* sptr, double* cptr) {
*sptr = hc::precise_math::sin(x);
*cptr = hc::precise_math::cos(x);
}
__device__ void sincospi(double x, double* sptr, double* cptr) {
*sptr = hc::precise_math::sinpi(x);
*cptr = hc::precise_math::cospi(x);
}
__device__ double sinh(double x) { return hc::precise_math::sinh(x); }
__device__ double sinpi(double x) { return hc::precise_math::sinpi(x); }
__device__ double sqrt(double x) { return hc::precise_math::sqrt(x); }
__device__ double tan(double x) { return hc::precise_math::tan(x); }
__device__ double tanh(double x) { return hc::precise_math::tanh(x); }
__device__ double tgamma(double x) { return hc::precise_math::tgamma(x); }
__device__ double trunc(double x) { return hc::precise_math::trunc(x); }
__device__ double y0(double x) { return __hip_y0(x); }
__device__ double y1(double x) { return __hip_y1(x); }
__device__ double yn(int n, double x) { return __hip_yn(n, x); }
__host__ float cospif(float x) { return std::cos(x * HIP_PI); }
__host__ float fdividef(float x, float y) { return x / y; }
__host__ int isfinite(float x) { return std::isfinite(x); }
__host__ int signbit(float x) { return std::signbit(x); }
__host__ float sinpif(float x) { return std::sin(x * HIP_PI); }
__host__ float rsqrtf(float x) { return 1 / std::sqrt(x); }
__host__ float modff(float x, float* iptr) { return std::modf(x, iptr); }
__host__ double fdivide(double x, double y) { return x / y; }
__host__ float normcdff(float t) { return (1 - std::erf(-t / std::sqrt(2))) / 2; }
__host__ double normcdf(double x) { return (1 - std::erf(-x / std::sqrt(2))) / 2; }
__host__ float erfcxf(float x) { return std::exp(x * x) * std::erfc(x); }
__host__ double erfcx(double x) { return std::exp(x * x) * std::erfc(x); }
__host__ float rhypotf(float x, float y) { return 1 / std::sqrt(x * x + y * y); }
__host__ double rhypot(double x, double y) { return 1 / std::sqrt(x * x + y * y); }
__host__ float rcbrtf(float a) { return 1 / std::cbrt(a); }
__host__ double rcbrt(double a) { return 1 / std::cbrt(a); }
__host__ float normf(int dim, const float* a) {
float val = 0.0f;
for (int i = 0; i < dim; i++) {
val = val + a[i] * a[i];
}
return val;
}
__host__ float rnormf(int dim, const float* t) {
float val = 0.0f;
for (int i = 0; i < dim; i++) {
val = val + t[i] * t[i];
}
return 1 / std::sqrt(val);
}
__host__ double rnorm(int dim, const double* t) {
double val = 0.0;
for (int i = 0; i < dim; i++) {
val = val + t[i] * t[i];
}
return 1 / std::sqrt(val);
}
__host__ float rnorm4df(float a, float b, float c, float d) {
return 1 / std::sqrt(a * a + b * b + c * c + d * d);
}
__host__ double rnorm4d(double a, double b, double c, double d) {
return 1 / std::sqrt(a * a + b * b + c * c + d * d);
}
__host__ float rnorm3df(float a, float b, float c) { return 1 / std::sqrt(a * a + b * b + c * c); }
__host__ double rnorm3d(double a, double b, double c) {
return 1 / std::sqrt(a * a + b * b + c * c);
}
__host__ void sincospif(float x, float* sptr, float* cptr) {
*sptr = std::sin(HIP_PI * x);
*cptr = std::cos(HIP_PI * x);
}
__host__ void sincospi(double x, double* sptr, double* cptr) {
*sptr = std::sin(HIP_PI * x);
*cptr = std::cos(HIP_PI * x);
}
__host__ float nextafterf(float x, float y) { return std::nextafter(x, y); }
__host__ double nextafter(double x, double y) { return std::nextafter(x, y); }
__host__ float norm3df(float a, float b, float c) { return std::sqrt(a * a + b * b + c * c); }
__host__ float norm4df(float a, float b, float c, float d) {
return std::sqrt(a * a + b * b + c * c + d * d);
}
__host__ double norm3d(double a, double b, double c) { return std::sqrt(a * a + b * b + c * c); }
__host__ double norm4d(double a, double b, double c, double d) {
return std::sqrt(a * a + b * b + c * c + d * d);
}
__host__ double sinpi(double a) { return std::sin(HIP_PI * a); }
__host__ double cospi(double a) { return std::cos(HIP_PI * a); }
__host__ int isfinite(double a) { return std::isfinite(a); }
__host__ double norm(int dim, const double* t) {
double val = 0;
for (int i = 0; i < dim; i++) {
val += t[i] * t[i];
}
return std::sqrt(val);
}
__host__ double rsqrt(double x) { return 1 / std::sqrt(x); }
__host__ int signbit(double x) { return std::signbit(x); }
@@ -50,8 +50,8 @@ __device__ void double_precision_math_functions() {
cos(0.0);
cosh(0.0);
cospi(0.0);
// cyl_bessel_i0(0.0);
// cyl_bessel_i1(0.0);
cyl_bessel_i0(0.0);
cyl_bessel_i1(0.0);
erf(0.0);
erfc(0.0);
erfcinv(2.0);
@@ -78,7 +78,7 @@ __device__ void double_precision_math_functions() {
j1(0.0);
jn(-1.0, 1.0);
ldexp(0.0, 0);
// lgamma(1.0);
lgamma(1.0);
llrint(0.0);
llround(0.0);
log(1.0);
@@ -88,7 +88,7 @@ __device__ void double_precision_math_functions() {
logb(1.0);
lrint(0.0);
lround(0.0);
// modf(0.0, &fX);
modf(0.0, &fX);
nan("1");
nearbyint(0.0);
nextafter(0.0, 0.0);
@@ -99,9 +99,9 @@ __device__ void double_precision_math_functions() {
normcdf(0.0);
normcdfinv(1.0);
pow(1.0, 0.0);
// rcbrt(1.0);
rcbrt(1.0);
remainder(2.0, 1.0);
// remquo(1.0, 2.0, &iX);
remquo(1.0, 2.0, &iX);
rhypot(0.0, 1.0);
rint(1.0);
fX = 1.0;
@@ -110,8 +110,8 @@ __device__ void double_precision_math_functions() {
rnorm4d(0.0, 0.0, 0.0, 1.0);
round(0.0);
rsqrt(1.0);
// scalbln(0.0, 1);
// scalbn(0.0, 1);
scalbln(0.0, 1);
scalbn(0.0, 1);
signbit(1.0);
sin(0.0);
sincos(0.0, &fX, &fY);
@@ -128,11 +128,17 @@ __device__ void double_precision_math_functions() {
yn(1, 1.0);
}
__global__ void compileDoublePrecisionMathOnDevice(hipLaunchParm lp, int ignored) {
__global__ void compileDoublePrecisionMathOnDevice(int) {
double_precision_math_functions();
}
int main() {
hipLaunchKernel(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, 1);
hipLaunchKernelGGL(
compileDoublePrecisionMathOnDevice,
dim3(1, 1, 1),
dim3(1, 1, 1),
0,
0,
1);
passed();
}
@@ -30,8 +30,8 @@ THE SOFTWARE.
#include <hip/math_functions.h>
#include "test_common.h"
__global__ void FloatMathPrecise(hipLaunchParm lp) {
// int iX; //uncomment this when remqouf() is enabled again
__global__ void FloatMathPrecise() {
int iX;
float fX, fY;
acosf(1.0f);
@@ -47,8 +47,8 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) {
cosf(0.0f);
coshf(0.0f);
cospif(0.0f);
// cyl_bessel_i0f(0.0f);
// cyl_bessel_i1f(0.0f);
cyl_bessel_i0f(0.0f);
cyl_bessel_i1f(0.0f);
erfcf(0.0f);
erfcinvf(2.0f);
erfcxf(0.0f);
@@ -66,7 +66,7 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) {
fX = fmaxf(0.0f, 0.0f);
fX = fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
// frexpf(0.0f, &iX);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
isfinite(0.0f);
@@ -76,7 +76,7 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) {
j1f(0.0f);
jnf(-1.0f, 1.0f);
ldexpf(0.0f, 0);
// lgammaf(1.0f);
lgammaf(1.0f);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
@@ -86,10 +86,10 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) {
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
// modff(0.0f, &fX);
modff(0.0f, &fX);
fX = nanf("1");
fX = nearbyintf(0.0f);
// nextafterf(0.0f);
nextafterf(0.0f, 0.0f);
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
normcdff(0.0f);
@@ -99,7 +99,7 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) {
powf(1.0f, 0.0f);
rcbrtf(1.0f);
remainderf(2.0f, 1.0f);
// remquof(1.0f, 2.0f, &iX);
remquof(1.0f, 2.0f, &iX);
rhypotf(0.0f, 1.0f);
fY = rintf(1.0f);
rnorm3df(0.0f, 0.0f, 1.0f);
@@ -127,6 +127,6 @@ __global__ void FloatMathPrecise(hipLaunchParm lp) {
}
int main() {
hipLaunchKernel(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0);
hipLaunchKernelGGL(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0);
passed();
}
@@ -31,82 +31,82 @@ THE SOFTWARE.
#define N 512
#define SIZE N * sizeof(float)
__global__ void test_sincosf(hipLaunchParm lp, float* a, float* b, float* c) {
__global__ void test_sincosf(float* a, float* b, float* c) {
int tid = threadIdx.x;
sincosf(a[tid], b + tid, c + tid);
}
__global__ void test_sincospif(hipLaunchParm lp, float* a, float* b, float* c) {
__global__ void test_sincospif(float* a, float* b, float* c) {
int tid = threadIdx.x;
sincospif(a[tid], b + tid, c + tid);
}
__global__ void test_fdividef(hipLaunchParm lp, float* a, float* b, float* c) {
__global__ void test_fdividef(float* a, float* b, float* c) {
int tid = threadIdx.x;
c[tid] = fdividef(a[tid], b[tid]);
}
__global__ void test_llrintf(hipLaunchParm lp, float* a, long long int* b) {
__global__ void test_llrintf(float* a, long long int* b) {
int tid = threadIdx.x;
b[tid] = llrintf(a[tid]);
}
__global__ void test_lrintf(hipLaunchParm lp, float* a, long int* b) {
__global__ void test_lrintf(float* a, long int* b) {
int tid = threadIdx.x;
b[tid] = lrintf(a[tid]);
}
__global__ void test_rintf(hipLaunchParm lp, float* a, float* b) {
__global__ void test_rintf(float* a, float* b) {
int tid = threadIdx.x;
b[tid] = rintf(a[tid]);
}
__global__ void test_llroundf(hipLaunchParm lp, float* a, long long int* b) {
__global__ void test_llroundf(float* a, long long int* b) {
int tid = threadIdx.x;
b[tid] = llroundf(a[tid]);
}
__global__ void test_lroundf(hipLaunchParm lp, float* a, long int* b) {
__global__ void test_lroundf(float* a, long int* b) {
int tid = threadIdx.x;
b[tid] = lroundf(a[tid]);
}
__global__ void test_rhypotf(hipLaunchParm lp, float* a, float* b, float* c) {
__global__ void test_rhypotf(float* a, float* b, float* c) {
int tid = threadIdx.x;
c[tid] = rhypotf(a[tid], b[tid]);
}
__global__ void test_norm3df(hipLaunchParm lp, float* a, float* b, float* c, float* d) {
__global__ void test_norm3df(float* a, float* b, float* c, float* d) {
int tid = threadIdx.x;
d[tid] = norm3df(a[tid], b[tid], c[tid]);
}
__global__ void test_norm4df(hipLaunchParm lp, float* a, float* b, float* c, float* d, float* e) {
__global__ void test_norm4df(float* a, float* b, float* c, float* d, float* e) {
int tid = threadIdx.x;
e[tid] = norm4df(a[tid], b[tid], c[tid], d[tid]);
}
__global__ void test_normf(hipLaunchParm lp, float* a, float* b) {
__global__ void test_normf(float* a, float* b) {
int tid = threadIdx.x;
b[tid] = normf(N, a);
}
__global__ void test_rnorm3df(hipLaunchParm lp, float* a, float* b, float* c, float* d) {
__global__ void test_rnorm3df(float* a, float* b, float* c, float* d) {
int tid = threadIdx.x;
d[tid] = rnorm3df(a[tid], b[tid], c[tid]);
}
__global__ void test_rnorm4df(hipLaunchParm lp, float* a, float* b, float* c, float* d, float* e) {
__global__ void test_rnorm4df(float* a, float* b, float* c, float* d, float* e) {
int tid = threadIdx.x;
e[tid] = rnorm4df(a[tid], b[tid], c[tid], d[tid]);
}
__global__ void test_rnormf(hipLaunchParm lp, float* a, float* b) {
__global__ void test_rnormf(float* a, float* b) {
int tid = threadIdx.x;
b[tid] = rnormf(N, a);
}
__global__ void test_erfinvf(hipLaunchParm lp, float* a, float* b) {
__global__ void test_erfinvf(float* a, float* b) {
int tid = threadIdx.x;
b[tid] = erff(erfinvf(a[tid]));
}
@@ -124,7 +124,7 @@ bool run_sincosf() {
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_sincosf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipLaunchKernelGGL(test_sincosf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
@@ -166,7 +166,7 @@ bool run_sincospif() {
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_sincospif, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipLaunchKernelGGL(test_sincospif, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
@@ -210,7 +210,7 @@ bool run_fdividef() {
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_fdividef, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipLaunchKernelGGL(test_fdividef, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -244,7 +244,7 @@ bool run_llrintf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, N * sizeof(long long int));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_llrintf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_llrintf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -277,7 +277,7 @@ bool run_lrintf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, N * sizeof(long int));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_lrintf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_lrintf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -310,7 +310,7 @@ bool run_rintf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rintf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_rintf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -344,7 +344,7 @@ bool run_llroundf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, N * sizeof(long long int));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_llroundf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_llroundf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -377,7 +377,7 @@ bool run_lroundf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, N * sizeof(long int));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_lroundf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_lroundf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -420,7 +420,7 @@ bool run_norm3df() {
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_norm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd);
hipLaunchKernelGGL(test_norm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd);
hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -469,7 +469,7 @@ bool run_norm4df() {
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_norm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed);
hipLaunchKernelGGL(test_norm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed);
hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -510,7 +510,7 @@ bool run_normf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_normf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_normf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -547,7 +547,7 @@ bool run_rhypotf() {
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rhypotf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipLaunchKernelGGL(test_rhypotf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -590,7 +590,7 @@ bool run_rnorm3df() {
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rnorm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd);
hipLaunchKernelGGL(test_rnorm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd);
hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -639,7 +639,7 @@ bool run_rnorm4df() {
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rnorm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed);
hipLaunchKernelGGL(test_rnorm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed);
hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -680,7 +680,7 @@ bool run_rnormf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rnormf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_rnormf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
@@ -712,7 +712,7 @@ bool run_erfinvf() {
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_erfinvf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipLaunchKernelGGL(test_erfinvf, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {