diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1168b65be2..cbd2050025 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -181,7 +181,8 @@ if(HIP_PLATFORM STREQUAL "hcc")
         src/device_util.cpp
         src/hip_ldg.cpp
         src/hip_fp16.cpp
-        src/device_functions.cpp)
+        src/device_functions.cpp
+        src/math_functions.cpp)
 
     set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${HCC_HOME}/lib -lmcwamp -Wl,-Bsymbolic -Wl,-rpath ${HCC_HOME}/lib")
     add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME})
diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h
index 06beeb23f8..0489a72c8b 100644
--- a/include/hip/hcc_detail/device_functions.h
+++ b/include/hip/hcc_detail/device_functions.h
@@ -23,6 +23,173 @@ THE SOFTWARE.
 #include <hip/hip_runtime.h>
 #include <hip/hip_vector_types.h>
 
+// Single Precision Fast Math
+extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
+extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
+__device__ float __hip_fast_exp10f(float);
+__device__ float __hip_fast_expf(float);
+__device__ float __hip_fast_frsqrt_rn(float);
+extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
+__device__ float __hip_fast_fsqrt_rn(float);
+__device__ float __hip_fast_fsqrt_ru(float);
+__device__ float __hip_fast_fsqrt_rz(float);
+__device__ float __hip_fast_log10f(float);
+extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
+__device__ float __hip_fast_logf(float);
+__device__ float __hip_fast_powf(float, float);
+__device__ void __hip_fast_sincosf(float,float*,float*);
+extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
+__device__ float __hip_fast_tanf(float);
+extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
+extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
+
+extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
+extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
+extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
+
+
+// Single Precision Fast Math
+__device__ inline float __cosf(float x) {
+  return __hip_fast_cosf(x);
+}
+
+__device__ inline float __exp10f(float x) {
+  return __hip_fast_exp10f(x);
+}
+
+__device__ inline float __expf(float x) {
+  return __hip_fast_expf(x);
+}
+
+__device__ inline float __frsqrt_rn(float x) {
+  return __hip_fast_frsqrt_rn(x);
+}
+
+__device__ inline float __fsqrt_rd(float x) {
+  return __hip_fast_fsqrt_rd(x);
+}
+
+__device__ inline float __fsqrt_rn(float x) {
+  return __hip_fast_fsqrt_rn(x);
+}
+
+__device__ inline float __fsqrt_ru(float x) {
+  return __hip_fast_fsqrt_ru(x);
+}
+
+__device__ inline float __fsqrt_rz(float x) {
+  return __hip_fast_fsqrt_rz(x);
+}
+
+__device__ inline float __log10f(float x) {
+  return __hip_fast_log10f(x);
+}
+
+__device__ inline float __log2f(float x) {
+  return __hip_fast_log2f(x);
+}
+
+__device__ inline float __logf(float x) {
+  return __hip_fast_logf(x);
+}
+
+__device__ inline float __powf(float base, float exponent) {
+  return __hip_fast_powf(base, exponent);
+}
+
+__device__ inline void __sincosf(float x, float *s, float *c) {
+  return __hip_fast_sincosf(x, s, c);
+}
+
+__device__ inline float __sinf(float x) {
+  return __hip_fast_sinf(x);
+}
+
+__device__ inline float __tanf(float x) {
+  return __hip_fast_tanf(x);
+}
+
+__device__ inline float __fmaf_rd(float x, float y, float z) {
+  return __hip_fast_fmaf(x, y, z);
+}
+
+__device__ inline float __fmaf_rn(float x, float y, float z) {
+  return __hip_fast_fmaf(x, y, z);
+}
+
+__device__ inline float __fmaf_ru(float x, float y, float z) {
+  return __hip_fast_fmaf(x, y, z);
+}
+
+__device__ inline float __fmaf_rz(float x, float y, float z) {
+  return __hip_fast_fmaf(x, y, z);
+}
+
+__device__ inline float __frcp_rd(float x) {
+  return __hip_fast_frcp(x);
+}
+
+__device__ inline float __frcp_rn(float x) {
+  return __hip_fast_frcp(x);
+}
+
+__device__ inline float __frcp_ru(float x) {
+  return __hip_fast_frcp(x);
+}
+
+__device__ inline float __frcp_rz(float x) {
+  return __hip_fast_frcp(x);
+}
+
+__device__ inline double __dsqrt_rd(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __dsqrt_rn(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __dsqrt_ru(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __dsqrt_rz(double x) {
+  return __hip_fast_dsqrt(x);
+}
+
+__device__ inline double __fma_rd(double x, double y, double z) {
+  return __hip_fast_fma(x, y, z);
+}
+
+__device__ inline double __fma_rn(double x, double y, double z) {
+  return __hip_fast_fma(x, y, z);
+}
+
+__device__ inline double __fma_ru(double x, double y, double z) {
+  return __hip_fast_fma(x, y, z);
+}
+
+__device__ inline double __fma_rz(double x, double y, double z) {
+  return __hip_fast_fma(x, y, z);
+}
+
+__device__ inline double __drcp_rd(double x) {
+  return __hip_fast_drcp(x);
+}
+
+__device__ inline double __drcp_rn(double x) {
+  return __hip_fast_drcp(x);
+}
+
+__device__ inline double __drcp_ru(double x) {
+  return __hip_fast_drcp(x);
+}
+
+__device__ inline double __drcp_rz(double x) {
+  return __hip_fast_drcp(x);
+}
+
+
 extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
 extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int);
 extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int);
diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h
index 755cb19f6d..73049eb5fb 100644
--- a/include/hip/hcc_detail/hip_fp16.h
+++ b/include/hip/hcc_detail/hip_fp16.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
 
-#ifndef HIP_FP16_H
-#define HIP_FP16_H
+#ifndef HIP_HCC_DETAIL_FP16_H
+#define HIP_HCC_DETAIL_FP16_H
 
 #include "hip/hip_runtime.h"
 
@@ -452,8 +452,6 @@ typedef struct __attribute__((aligned(4))){
 } __half2;
 
 
-
-
 #endif
 
 
diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h
index e911d17ebb..f11846a0da 100644
--- a/include/hip/hcc_detail/hip_runtime.h
+++ b/include/hip/hcc_detail/hip_runtime.h
@@ -121,208 +121,6 @@ extern int HIP_TRACE_API;
 #define __HCC_C__
 #endif
 
-__device__ float acosf(float x);
-__device__ float acoshf(float x);
-__device__ float asinf(float x);
-__device__ float asinhf(float x);
-__device__ float atan2f(float y, float x);
-__device__ float atanf(float x);
-__device__ float atanhf(float x);
-__device__ float cbrtf(float x);
-__device__ float ceilf(float x);
-__device__ float copysignf(float x, float y);
-__device__ float coshf(float x);
-__device__ float cyl_bessel_i0f(float x);
-__device__ float cyl_bessel_i1f(float x);
-__device__ float erfcf(float x);
-__device__  float erfcinvf(float y);
-
-__device__ float erfcxf(float x);
-__device__ float erff(float x);
-__device__ float erfinvf(float y);
-__device__ float exp2f(float x);
-__device__ float expm1f(float x);
-__device__ float fabsf(float x);
-__device__ float fdimf(float x, float y);
-__device__ __host__ float fdividef(float x, float y);
-__device__ float floorf(float x);
-__device__ float fmaf(float x, float y, float z);
-__device__ float fmaxf(float x, float y);
-__device__ float fminf(float x, float y);
-__device__ float fmodf(float x, float y);
-__device__ float frexpf(float x, float y);
-__device__ float hypotf(float x, float y);
-__device__ float ilogbf(float x);
-__host__ __device__ unsigned isfinite(float a);
-__device__ unsigned isinf(float a);
-__device__ unsigned isnan(float a);
-__device__ float j0f(float x);
-__device__ float j1f(float x);
-__device__ float jnf(int n, float x);
-__device__ float ldexpf(float x, int exp);
-__device__ float lgammaf(float x);
-__device__ long long int llrintf(float x);
-__device__ long long int llroundf(float x);
-__device__ float log1pf(float x);
-__device__ float logbf(float x);
-__device__ long int lrintf(float x);
-__device__ long int lroundf(float x);
-__device__ float modff(float x, float *iptr);
-__device__ float nanf(const char* tagp);
-__device__ float nearbyintf(float x);
-__device__ float nextafterf(float x, float y);
-__device__ float norm3df(float a, float b, float c);
-__device__ float norm4df(float a, float b, float c, float d);
-__device__ float normcdff(float y);
-__device__ float normcdfinvf(float y);
-__device__ float normf(int dim, const float *a);
-__device__ float rcbrtf(float x);
-__device__ float remainderf(float x, float y);
-__device__ float remquof(float x, float y, int *quo);
-__device__ float rhypotf(float x, float y);
-__device__ float rintf(float x);
-__device__ float rnorm3df(float a, float b, float c);
-__device__ float rnorm4df(float a, float b, float c, float d);
-__device__ float rnormf(int dim, const float* a);
-__device__ float roundf(float x);
-__device__ float rsqrtf(float x);
-__device__ float scalblnf(float x, long int n);
-__device__ float scalbnf(float x, int n);
-__host__ __device__ unsigned signbit(float a);
-__device__ void sincospif(float x, float *sptr, float *cptr);
-__device__ float sinhf(float x);
-__device__ float sinpif(float x);
-__device__ float sqrtf(float x);
-__device__ float tanhf(float x);
-__device__ float tgammaf(float x);
-__device__ float truncf(float x);
-__device__ float y0f(float x);
-__device__ float y1f(float x);
-__device__ float ynf(int n, float x);
-
-__host__ __device__ float cospif(float x);
-__host__ __device__ float sinpif(float x);
-// /__device__ float sqrtf(float x);
-__host__ __device__ float rsqrtf(float x);
-__host__ float normcdff(float y);
-
-__host__ float erfcinvf(float y);
-__host__ float erfcxf(float x);
-__host__ float erfinvf(float y);
-__host__ float norm3df(float a, float b, float c);
-__host__ float normcdfinvf(float y);
-__host__ float norm4df(float a, float b, float c, float d);
-__host__ float rcbrtf(float x);
-__host__ float rhypotf(float x, float y);
-__host__ float rnorm3df(float a, float b, float c);
-__host__ float rnormf(int dim, const float* a);
-__host__ float rnorm4df(float a, float b, float c, float d);
-__host__ void sincospif(float x, float *sptr, float *cptr);
-
-__device__ double acos(double x);
-__device__ double acosh(double x);
-__device__ double asin(double x);
-__device__ double asinh(double x);
-__device__ double atan(double x);
-__device__ double atan2(double y, double x);
-__device__ double atanh(double x);
-__device__ double cbrt(double x);
-__device__ double ceil(double x);
-__device__ double copysign(double x, double y);
-__device__ double cos(double x);
-__device__ double cosh(double x);
-__host__ __device__ double cospi(double x);
-__device__ double cyl_bessel_i0(double x);
-__device__ double cyl_bessel_i1(double x);
-__device__ double erf(double x);
-__device__ double erfc(double x);
-__device__ double erfcinv(double y);
-__device__ double erfcx(double x);
-__device__ double erfinv(double x);
-__device__ double exp(double x);
-__device__ double exp10(double x);
-__device__ double exp2(double x);
-__device__ double expm1(double x);
-__device__ double fabs(double x);
-__device__ double fdim(double x, double y);
-__device__ double fdivide(double x, double y);
-__device__ double floor(double x);
-__device__ double fma(double x, double y, double z);
-__device__ double fmax(double x, double y);
-__device__ double fmin(double x, double y);
-__device__ double fmod(double x, double y);
-__device__ double frexp(double x, int *nptr);
-__device__ double hypot(double x, double y);
-__device__ double ilogb(double x);
-__host__ __device__ unsigned isfinite(double x);
-__device__ unsigned isinf(double x);
-__device__ unsigned isnan(double x);
-__device__ double j0(double x);
-__device__ double j1(double x);
-__device__ double jn(int n, double x);
-__device__ double ldexp(double x, int exp);
-__device__ double lgamma(double x);
-__device__ long long llrint(double x);
-__device__ long long llround(double x);
-__device__ double log(double x);
-__device__ double log10(double x);
-__device__ double log1p(double x);
-__device__ double log2(double x);
-__device__ double logb(double x);
-__device__ long int lrint(double x);
-__device__ long int lround(double x);
-__device__ double modf(double x, double *iptr);
-__device__ double nan(const char* tagp);
-__device__ double nearbyint(double x);
-__device__ double nextafter(double x, double y);
-__device__ double norm(int dim, const double* t);
-__device__ double norm3d(double a, double b, double c);
-__host__ double norm3d(double a, double b, double c);
-__device__ double norm4d(double a, double b, double c, double d);
-__host__ double norm4d(double a, double b, double c, double d);
-__device__ double normcdf(double y);
-__host__ double normcdf(double y);
-__device__ double normcdfinv(double y);
-__host__ double normcdfinv(double y);
-__device__ double pow(double x, double y);
-__device__ double rcbrt(double x);
-__host__ double rcbrt(double x);
-__device__ double remainder(double x, double y);
-__device__ double remquo(double x, double y, int *quo);
-__device__ double rhypot(double x, double y);
-__host__ double rhypot(double x, double y);
-__device__ double rint(double x);
-__device__ double rnorm(int dim, const double* t);
-__host__ double rnorm(int dim, const double* t);
-__device__ double rnorm3d(double a, double b, double c);
-__host__ double rnorm3d(double a, double b, double c);
-__device__ double rnorm4d(double a, double b, double c, double d);
-__host__ double rnorm4d(double a, double b, double c, double d);
-__device__ double round(double x);
-__host__ __device__ double rsqrt(double x);
-__device__ double scalbln(double x, long int n);
-__device__ double scalbn(double x, int n);
-__host__ __device__ unsigned signbit(double a);
-__device__ double sin(double a);
-__device__ void sincos(double x, double *sptr, double *cptr);
-__device__ void sincospi(double x, double *sptr, double *cptr);
-__host__ void sincospi(double x, double *sptr, double *cptr);
-__device__ double sinh(double x);
-__host__ __device__ double sinpi(double x);
-__device__ double sqrt(double x);
-__device__ double tan(double x);
-__device__ double tanh(double x);
-__device__ double tgamma(double x);
-__device__ double trunc(double x);
-__device__ double y0(double x);
-__device__ double y1(double y);
-__device__ double yn(int n, double x);
-
-__host__ double erfcinv(double y);
-__host__ double erfcx(double x);
-__host__ double erfinv(double y);
-__host__ double fdivide(double x, double y);
-
 // TODO - hipify-clang - change to use the function call.
 //#define warpSize hc::__wavesize()
 extern const int warpSize;
@@ -451,252 +249,6 @@ __host__ __device__ int max(int arg1, int arg2);
 
 __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr();
 
-//TODO - add a couple fast math operations here, the set here will grow :
-
-// Single Precision Precise Math
-__device__ float __hip_precise_cosf(float);
-__device__ float __hip_precise_exp10f(float);
-__device__ float __hip_precise_expf(float);
-__device__ float __hip_precise_frsqrt_rn(float);
-__device__ float __hip_precise_fsqrt_rd(float);
-__device__ float __hip_precise_fsqrt_rn(float);
-__device__ float __hip_precise_fsqrt_ru(float);
-__device__ float __hip_precise_fsqrt_rz(float);
-__device__ float __hip_precise_log10f(float);
-__device__ float __hip_precise_log2f(float);
-__device__ float __hip_precise_logf(float);
-__device__ float __hip_precise_powf(float, float);
-__device__ void __hip_precise_sincosf(float,float*,float*);
-__device__ float __hip_precise_sinf(float);
-__device__ float __hip_precise_tanf(float);
-
-// Double Precision Precise Math
-__device__ double __hip_precise_dsqrt_rd(double);
-__device__ double __hip_precise_dsqrt_rn(double);
-__device__ double __hip_precise_dsqrt_ru(double);
-__device__ double __hip_precise_dsqrt_rz(double);
-
-// Single Precision Fast Math
-extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
-extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
-__device__ float __hip_fast_exp10f(float);
-__device__ float __hip_fast_expf(float);
-__device__ float __hip_fast_frsqrt_rn(float);
-extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
-__device__ float __hip_fast_fsqrt_rn(float);
-__device__ float __hip_fast_fsqrt_ru(float);
-__device__ float __hip_fast_fsqrt_rz(float);
-__device__ float __hip_fast_log10f(float);
-extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
-__device__ float __hip_fast_logf(float);
-__device__ float __hip_fast_powf(float, float);
-__device__ void __hip_fast_sincosf(float,float*,float*);
-extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
-__device__ float __hip_fast_tanf(float);
-extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
-extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
-
-extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
-extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
-extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
-
-#ifdef HIP_FAST_MATH
-// Single Precision Precise Math when enabled
-
-__device__ inline float cosf(float x) {
-  return __hip_fast_cosf(x);
-}
-
-__device__ inline float exp10f(float x) {
-  return __hip_fast_exp10f(x);
-}
-
-__device__ inline float expf(float x) {
-  return __hip_fast_expf(x);
-}
-
-__device__ inline float log10f(float x) {
-  return __hip_fast_log10f(x);
-}
-
-__device__ inline float log2f(float x) {
-  return __hip_fast_log2f(x);
-}
-
-__device__ inline float logf(float x) {
-  return __hip_fast_logf(x);
-}
-
-__device__ inline float powf(float base, float exponent) {
-  return __hip_fast_powf(base, exponent);
-}
-
-__device__ inline void sincosf(float x, float *s, float *c) {
-  return __hip_fast_sincosf(x, s, c);
-}
-
-__device__ inline float sinf(float x) {
-  return __hip_fast_sinf(x);
-}
-
-__device__ inline float tanf(float x) {
-  return __hip_fast_tanf(x);
-}
-
-#else
-
-__device__ float sinf(float);
-__device__ float cosf(float);
-__device__ float tanf(float);
-__device__ void sincosf(float, float*, float*);
-__device__ float logf(float);
-__device__ float log2f(float);
-__device__ float log10f(float);
-__device__ float expf(float);
-__device__ float exp10f(float);
-__device__ float powf(float, float);
-
-#endif
-// Single Precision Fast Math
-__device__ inline float __cosf(float x) {
-  return __hip_fast_cosf(x);
-}
-
-__device__ inline float __exp10f(float x) {
-  return __hip_fast_exp10f(x);
-}
-
-__device__ inline float __expf(float x) {
-  return __hip_fast_expf(x);
-}
-
-__device__ inline float __frsqrt_rn(float x) {
-  return __hip_fast_frsqrt_rn(x);
-}
-
-__device__ inline float __fsqrt_rd(float x) {
-  return __hip_fast_fsqrt_rd(x);
-}
-
-__device__ inline float __fsqrt_rn(float x) {
-  return __hip_fast_fsqrt_rn(x);
-}
-
-__device__ inline float __fsqrt_ru(float x) {
-  return __hip_fast_fsqrt_ru(x);
-}
-
-__device__ inline float __fsqrt_rz(float x) {
-  return __hip_fast_fsqrt_rz(x);
-}
-
-__device__ inline float __log10f(float x) {
-  return __hip_fast_log10f(x);
-}
-
-__device__ inline float __log2f(float x) {
-  return __hip_fast_log2f(x);
-}
-
-__device__ inline float __logf(float x) {
-  return __hip_fast_logf(x);
-}
-
-__device__ inline float __powf(float base, float exponent) {
-  return __hip_fast_powf(base, exponent);
-}
-
-__device__ inline void __sincosf(float x, float *s, float *c) {
-  return __hip_fast_sincosf(x, s, c);
-}
-
-__device__ inline float __sinf(float x) {
-  return __hip_fast_sinf(x);
-}
-
-__device__ inline float __tanf(float x) {
-  return __hip_fast_tanf(x);
-}
-
-__device__ inline float __fmaf_rd(float x, float y, float z) {
-  return __hip_fast_fmaf(x, y, z);
-}
-
-__device__ inline float __fmaf_rn(float x, float y, float z) {
-  return __hip_fast_fmaf(x, y, z);
-}
-
-__device__ inline float __fmaf_ru(float x, float y, float z) {
-  return __hip_fast_fmaf(x, y, z);
-}
-
-__device__ inline float __fmaf_rz(float x, float y, float z) {
-  return __hip_fast_fmaf(x, y, z);
-}
-
-__device__ inline float __frcp_rd(float x) {
-  return __hip_fast_frcp(x);
-}
-
-__device__ inline float __frcp_rn(float x) {
-  return __hip_fast_frcp(x);
-}
-
-__device__ inline float __frcp_ru(float x) {
-  return __hip_fast_frcp(x);
-}
-
-__device__ inline float __frcp_rz(float x) {
-  return __hip_fast_frcp(x);
-}
-
-__device__ inline double __dsqrt_rd(double x) {
-  return __hip_fast_dsqrt(x);
-}
-
-__device__ inline double __dsqrt_rn(double x) {
-  return __hip_fast_dsqrt(x);
-}
-
-__device__ inline double __dsqrt_ru(double x) {
-  return __hip_fast_dsqrt(x);
-}
-
-__device__ inline double __dsqrt_rz(double x) {
-  return __hip_fast_dsqrt(x);
-}
-
-__device__ inline double __fma_rd(double x, double y, double z) {
-  return __hip_fast_fma(x, y, z);
-}
-
-__device__ inline double __fma_rn(double x, double y, double z) {
-  return __hip_fast_fma(x, y, z);
-}
-
-__device__ inline double __fma_ru(double x, double y, double z) {
-  return __hip_fast_fma(x, y, z);
-}
-
-__device__ inline double __fma_rz(double x, double y, double z) {
-  return __hip_fast_fma(x, y, z);
-}
-
-__device__ inline double __drcp_rd(double x) {
-  return __hip_fast_drcp(x);
-}
-
-__device__ inline double __drcp_rn(double x) {
-  return __hip_fast_drcp(x);
-}
-
-__device__ inline double __drcp_ru(double x) {
-  return __hip_fast_drcp(x);
-}
-
-__device__ inline double __drcp_rz(double x) {
-  return __hip_fast_drcp(x);
-}
 
 /**
  * CUDA 8 device function features
diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h
new file mode 100644
index 0000000000..5a0e21f83c
--- /dev/null
+++ b/include/hip/hcc_detail/math_functions.h
@@ -0,0 +1,288 @@
+/*
+Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef HIP_HCC_DETAIL_MATH_FUNCTIONS_H
+#define HIP_HCC_DETAIL_MATH_FUNCTIONS_H
+
+#include <hip/hip_runtime.h>
+#include <hip/hip_vector_types.h>
+#include <hip/hcc_detail/device_functions.h>
+
+__device__ float acosf(float x);
+__device__ float acoshf(float x);
+__device__ float asinf(float x);
+__device__ float asinhf(float x);
+__device__ float atan2f(float y, float x);
+__device__ float atanf(float x);
+__device__ float atanhf(float x);
+__device__ float cbrtf(float x);
+__device__ float ceilf(float x);
+__device__ float copysignf(float x, float y);
+__device__ float coshf(float x);
+__device__ float cyl_bessel_i0f(float x);
+__device__ float cyl_bessel_i1f(float x);
+__device__ float erfcf(float x);
+__device__  float erfcinvf(float y);
+
+__device__ float erfcxf(float x);
+__device__ float erff(float x);
+__device__ float erfinvf(float y);
+__device__ float exp2f(float x);
+__device__ float expm1f(float x);
+__device__ float fabsf(float x);
+__device__ float fdimf(float x, float y);
+__device__ __host__ float fdividef(float x, float y);
+__device__ float floorf(float x);
+__device__ float fmaf(float x, float y, float z);
+__device__ float fmaxf(float x, float y);
+__device__ float fminf(float x, float y);
+__device__ float fmodf(float x, float y);
+__device__ float frexpf(float x, float y);
+__device__ float hypotf(float x, float y);
+__device__ float ilogbf(float x);
+__host__ __device__ int isfinite(float a);
+__device__ unsigned isinf(float a);
+__device__ unsigned isnan(float a);
+__device__ float j0f(float x);
+__device__ float j1f(float x);
+__device__ float jnf(int n, float x);
+__device__ float ldexpf(float x, int exp);
+__device__ float lgammaf(float x);
+__device__ long long int llrintf(float x);
+__device__ long long int llroundf(float x);
+__device__ float log1pf(float x);
+__device__ float logbf(float x);
+__device__ long int lrintf(float x);
+__device__ long int lroundf(float x);
+__device__ float modff(float x, float *iptr);
+__device__ float nanf(const char* tagp);
+__device__ float nearbyintf(float x);
+__device__ float nextafterf(float x, float y);
+__device__ float norm3df(float a, float b, float c);
+__device__ float norm4df(float a, float b, float c, float d);
+__device__ float normcdff(float y);
+__device__ float normcdfinvf(float y);
+__device__ float normf(int dim, const float *a);
+__device__ float rcbrtf(float x);
+__device__ float remainderf(float x, float y);
+__device__ float remquof(float x, float y, int *quo);
+__device__ float rhypotf(float x, float y);
+__device__ float rintf(float x);
+__device__ float rnorm3df(float a, float b, float c);
+__device__ float rnorm4df(float a, float b, float c, float d);
+__device__ float rnormf(int dim, const float* a);
+__device__ float roundf(float x);
+__device__ float rsqrtf(float x);
+__device__ float scalblnf(float x, long int n);
+__device__ float scalbnf(float x, int n);
+__host__ __device__ unsigned signbit(float a);
+__device__ void sincospif(float x, float *sptr, float *cptr);
+__device__ float sinhf(float x);
+__device__ float sinpif(float x);
+__device__ float sqrtf(float x);
+__device__ float tanhf(float x);
+__device__ float tgammaf(float x);
+__device__ float truncf(float x);
+__device__ float y0f(float x);
+__device__ float y1f(float x);
+__device__ float ynf(int n, float x);
+
+__host__ __device__ float cospif(float x);
+__host__ __device__ float sinpif(float x);
+// /__device__ float sqrtf(float x);
+__host__ __device__ float rsqrtf(float x);
+__host__ float normcdff(float y);
+
+__host__ float erfcinvf(float y);
+__host__ float erfcxf(float x);
+__host__ float erfinvf(float y);
+__host__ float norm3df(float a, float b, float c);
+__host__ float normcdfinvf(float y);
+__host__ float norm4df(float a, float b, float c, float d);
+__host__ float rcbrtf(float x);
+__host__ float rhypotf(float x, float y);
+__host__ float rnorm3df(float a, float b, float c);
+__host__ float rnormf(int dim, const float* a);
+__host__ float rnorm4df(float a, float b, float c, float d);
+__host__ void sincospif(float x, float *sptr, float *cptr);
+
+__device__ double acos(double x);
+__device__ double acosh(double x);
+__device__ double asin(double x);
+__device__ double asinh(double x);
+__device__ double atan(double x);
+__device__ double atan2(double y, double x);
+__device__ double atanh(double x);
+__device__ double cbrt(double x);
+__device__ double ceil(double x);
+__device__ double copysign(double x, double y);
+__device__ double cos(double x);
+__device__ double cosh(double x);
+__host__ __device__ double cospi(double x);
+__device__ double cyl_bessel_i0(double x);
+__device__ double cyl_bessel_i1(double x);
+__device__ double erf(double x);
+__device__ double erfc(double x);
+__device__ double erfcinv(double y);
+__device__ double erfcx(double x);
+__device__ double erfinv(double x);
+__device__ double exp(double x);
+__device__ double exp10(double x);
+__device__ double exp2(double x);
+__device__ double expm1(double x);
+__device__ double fabs(double x);
+__device__ double fdim(double x, double y);
+__device__ double floor(double x);
+__device__ double fma(double x, double y, double z);
+__device__ double fmax(double x, double y);
+__device__ double fmin(double x, double y);
+__device__ double fmod(double x, double y);
+__device__ double frexp(double x, int *nptr);
+__device__ double hypot(double x, double y);
+__device__ double ilogb(double x);
+__host__ __device__ unsigned isfinite(double x);
+__device__ unsigned isinf(double x);
+__device__ unsigned isnan(double x);
+__device__ double j0(double x);
+__device__ double j1(double x);
+__device__ double jn(int n, double x);
+__device__ double ldexp(double x, int exp);
+__device__ double lgamma(double x);
+__device__ long long llrint(double x);
+__device__ long long llround(double x);
+__device__ double log(double x);
+__device__ double log10(double x);
+__device__ double log1p(double x);
+__device__ double log2(double x);
+__device__ double logb(double x);
+__device__ long int lrint(double x);
+__device__ long int lround(double x);
+__device__ double modf(double x, double *iptr);
+__device__ double nan(const char* tagp);
+__device__ double nearbyint(double x);
+__device__ double nextafter(double x, double y);
+__device__ double norm(int dim, const double* t);
+__device__ double norm3d(double a, double b, double c);
+__host__ double norm3d(double a, double b, double c);
+__device__ double norm4d(double a, double b, double c, double d);
+__host__ double norm4d(double a, double b, double c, double d);
+__device__ double normcdf(double y);
+__host__ double normcdf(double y);
+__device__ double normcdfinv(double y);
+__host__ double normcdfinv(double y);
+__device__ double pow(double x, double y);
+__device__ double rcbrt(double x);
+__host__ double rcbrt(double x);
+__device__ double remainder(double x, double y);
+__device__ double remquo(double x, double y, int *quo);
+__device__ double rhypot(double x, double y);
+__host__ double rhypot(double x, double y);
+__device__ double rint(double x);
+__device__ double rnorm(int dim, const double* t);
+__host__ double rnorm(int dim, const double* t);
+__device__ double rnorm3d(double a, double b, double c);
+__host__ double rnorm3d(double a, double b, double c);
+__device__ double rnorm4d(double a, double b, double c, double d);
+__host__ double rnorm4d(double a, double b, double c, double d);
+__device__ double round(double x);
+__host__ __device__ double rsqrt(double x);
+__device__ double scalbln(double x, long int n);
+__device__ double scalbn(double x, int n);
+__host__ __device__ unsigned signbit(double a);
+__device__ double sin(double a);
+__device__ void sincos(double x, double *sptr, double *cptr);
+__device__ void sincospi(double x, double *sptr, double *cptr);
+__host__ void sincospi(double x, double *sptr, double *cptr);
+__device__ double sinh(double x);
+__host__ __device__ double sinpi(double x);
+__device__ double sqrt(double x);
+__device__ double tan(double x);
+__device__ double tanh(double x);
+__device__ double tgamma(double x);
+__device__ double trunc(double x);
+__device__ double y0(double x);
+__device__ double y1(double y);
+__device__ double yn(int n, double x);
+
+__host__ double erfcinv(double y);
+__host__ double erfcx(double x);
+__host__ double erfinv(double y);
+__host__ double fdivide(double x, double y);
+__host__ double norm(double x, const double *t);
+
+#ifdef HIP_FAST_MATH
+// Single Precision Precise Math when enabled
+
+__device__ inline float cosf(float x) {
+  return __hip_fast_cosf(x);
+}
+
+__device__ inline float exp10f(float x) {
+  return __hip_fast_exp10f(x);
+}
+
+__device__ inline float expf(float x) {
+  return __hip_fast_expf(x);
+}
+
+__device__ inline float log10f(float x) {
+  return __hip_fast_log10f(x);
+}
+
+__device__ inline float log2f(float x) {
+  return __hip_fast_log2f(x);
+}
+
+__device__ inline float logf(float x) {
+  return __hip_fast_logf(x);
+}
+
+__device__ inline float powf(float base, float exponent) {
+  return __hip_fast_powf(base, exponent);
+}
+
+__device__ inline void sincosf(float x, float *s, float *c) {
+  return __hip_fast_sincosf(x, s, c);
+}
+
+__device__ inline float sinf(float x) {
+  return __hip_fast_sinf(x);
+}
+
+__device__ inline float tanf(float x) {
+  return __hip_fast_tanf(x);
+}
+
+#else
+
+__device__ float sinf(float);
+__device__ float cosf(float);
+__device__ float tanf(float);
+__device__ void sincosf(float, float*, float*);
+__device__ float logf(float);
+__device__ float log2f(float);
+__device__ float log10f(float);
+__device__ float expf(float);
+__device__ float exp10f(float);
+__device__ float powf(float, float);
+
+#endif
+
+
+#endif
diff --git a/include/hip/math_functions.h b/include/hip/math_functions.h
new file mode 100644
index 0000000000..d33f7a2e90
--- /dev/null
+++ b/include/hip/math_functions.h
@@ -0,0 +1,49 @@
+/*
+Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+//! HIP = Heterogeneous-compute Interface for Portability
+//!
+//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
+//! through either AMD HCC or NVCC.   Key features tend to be in the spirit
+//! and terminology of CUDA, but with a portable path to other accelerators as well:
+//
+//! Both paths support rich C++ features including classes, templates, lambdas, etc.
+//! Runtime API is C
+//! Memory management is based on pure pointers and resembles malloc/free/copy.
+//
+//! hip_runtime.h     : includes everything in hip_api.h, plus math builtins and kernel launch macros.
+//! hip_runtime_api.h : Defines HIP API.  This is a C header file and does not use any C++ features.
+
+#pragma once
+
+// Some standard header files, these are included by hc.hpp and so want to make them avail on both
+// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
+// on NVCC path:
+
+
+#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__)
+#include <hip/hcc_detail/math_functions.h>
+#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__)
+#include <hip/nvcc_detail/math_functions.h>
+#else
+#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
+#endif
diff --git a/src/device_functions.cpp b/src/device_functions.cpp
index 4b0eb9a5ff..abc9db570e 100644
--- a/src/device_functions.cpp
+++ b/src/device_functions.cpp
@@ -523,3 +523,71 @@ __device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long
   uHold1.ul = uHold1.ui[1] * uHold2.ui[1];
   return uHold1.ul;
 }
+
+/*
+HIP specific device functions
+*/
+
+__device__ unsigned __hip_ds_bpermute(int index, unsigned src) {
+    return hc::__amdgcn_ds_bpermute(index, src);
+}
+
+__device__ float __hip_ds_bpermutef(int index, float src) {
+    return hc::__amdgcn_ds_bpermute(index, src);
+}
+
+__device__ unsigned __hip_ds_permute(int index, unsigned src) {
+    return hc::__amdgcn_ds_permute(index, src);
+}
+
+__device__ float __hip_ds_permutef(int index, float src) {
+    return hc::__amdgcn_ds_permute(index, src);
+}
+
+__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) {
+    return hc::__amdgcn_ds_swizzle(src, pattern);
+}
+
+__device__ float __hip_ds_swizzlef(float src, int pattern) {
+    return hc::__amdgcn_ds_swizzle(src, pattern);
+}
+
+__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
+    return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
+}
+
+#define MASK1 0x00ff00ff
+#define MASK2 0xff00ff00
+
+__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
+    char4 out;
+    unsigned one1 = in1.a & MASK1;
+    unsigned one2 = in2.a & MASK1;
+    out.a = (one1 + one2) & MASK1;
+    one1 = in1.a & MASK2;
+    one2 = in2.a & MASK2;
+    out.a = out.a | ((one1 + one2) & MASK2);
+    return out;
+}
+
+__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
+    char4 out;
+    unsigned one1 = in1.a & MASK1;
+    unsigned one2 = in2.a & MASK1;
+    out.a = (one1 - one2) & MASK1;
+    one1 = in1.a & MASK2;
+    one2 = in2.a & MASK2;
+    out.a = out.a | ((one1 - one2) & MASK2);
+    return out;
+}
+
+__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
+    char4 out;
+    unsigned one1 = in1.a & MASK1;
+    unsigned one2 = in2.a & MASK1;
+    out.a = (one1 * one2) & MASK1;
+    one1 = in1.a & MASK2;
+    one2 = in2.a & MASK2;
+    out.a = out.a | ((one1 * one2) & MASK2);
+    return out;
+}
diff --git a/src/device_util.cpp b/src/device_util.cpp
index e875db1cf9..d80d9e7ef5 100644
--- a/src/device_util.cpp
+++ b/src/device_util.cpp
@@ -24,7 +24,7 @@ THE SOFTWARE.
 #include <grid_launch.h>
 #include <hc_math.hpp>
 #include "device_util.h"
-
+#include "hip/hcc_detail/device_functions.h"
 #include "hip/hip_runtime.h"
 
 //=================================================================================================
@@ -96,69 +96,7 @@ __device__ void* __hip_hc_free(void *ptr)
     return nullptr;
 }
 
-__device__ unsigned __hip_ds_bpermute(int index, unsigned src) {
-    return hc::__amdgcn_ds_bpermute(index, src);
-}
 
-__device__ float __hip_ds_bpermutef(int index, float src) {
-    return hc::__amdgcn_ds_bpermute(index, src);
-}
-
-__device__ unsigned __hip_ds_permute(int index, unsigned src) {
-    return hc::__amdgcn_ds_permute(index, src);
-}
-
-__device__ float __hip_ds_permutef(int index, float src) {
-    return hc::__amdgcn_ds_permute(index, src);
-}
-
-__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) {
-    return hc::__amdgcn_ds_swizzle(src, pattern);
-}
-
-__device__ float __hip_ds_swizzlef(float src, int pattern) {
-    return hc::__amdgcn_ds_swizzle(src, pattern);
-}
-
-__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
-    return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
-}
-
-#define MASK1 0x00ff00ff
-#define MASK2 0xff00ff00
-
-__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
-    char4 out;
-    unsigned one1 = in1.a & MASK1;
-    unsigned one2 = in2.a & MASK1;
-    out.a = (one1 + one2) & MASK1;
-    one1 = in1.a & MASK2;
-    one2 = in2.a & MASK2;
-    out.a = out.a | ((one1 + one2) & MASK2);
-    return out;
-}
-
-__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
-    char4 out;
-    unsigned one1 = in1.a & MASK1;
-    unsigned one2 = in2.a & MASK1;
-    out.a = (one1 - one2) & MASK1;
-    one1 = in1.a & MASK2;
-    one2 = in2.a & MASK2;
-    out.a = out.a | ((one1 - one2) & MASK2);
-    return out;
-}
-
-__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
-    char4 out;
-    unsigned one1 = in1.a & MASK1;
-    unsigned one2 = in2.a & MASK1;
-    out.a = (one1 * one2) & MASK1;
-    one1 = in1.a & MASK2;
-    one2 = in2.a & MASK2;
-    out.a = out.a | ((one1 * one2) & MASK2);
-    return out;
-}
 
 // loop unrolling
 __device__ void* memcpy(void* dst, void* src, size_t size)
@@ -192,39 +130,6 @@ __device__ void* free(void *ptr)
     return __hip_hc_free(ptr);
 }
 
-//=================================================================================================
-
-// TODO: Choose whether default is precise math or fast math based on compilation flag.
-#ifdef __HCC_ACCELERATOR__
-using namespace hc::precise_math;
-#endif
-
-
-#define HIP_SQRT_2 1.41421356237
-#define HIP_SQRT_PI 1.77245385091
-
-#define __hip_erfinva3 -0.140543331
-#define __hip_erfinva2 0.914624893
-#define __hip_erfinva1 -1.645349621
-#define __hip_erfinva0 0.886226899
-
-#define __hip_erfinvb4 0.012229801
-#define __hip_erfinvb3 -0.329097515
-#define __hip_erfinvb2 1.442710462
-#define __hip_erfinvb1 -2.118377725
-#define __hip_erfinvb0 1
-
-#define __hip_erfinvc3 1.641345311
-#define __hip_erfinvc2 3.429567803
-#define __hip_erfinvc1 -1.62490649
-#define __hip_erfinvc0 -1.970840454
-
-#define __hip_erfinvd2 1.637067800
-#define __hip_erfinvd1 3.543889200
-#define __hip_erfinvd0 1
-
-#define HIP_PI 3.14159265358979323846
-
 __device__ float __hip_erfinvf(float x){
     float ret;
     int  sign;
@@ -942,735 +847,6 @@ __device__ float __hip_ynf(int n, float x)
 
 
 
-__device__ float acosf(float x)
-{
-    return hc::precise_math::acosf(x);
-}
-__device__ float acoshf(float x)
-{
-    return hc::precise_math::acoshf(x);
-}
-__device__ float asinf(float x)
-{
-    return hc::precise_math::asinf(x);
-}
-__device__ float asinhf(float x)
-{
-    return hc::precise_math::asinhf(x);
-}
-__device__ float atan2f(float y, float x)
-{
-    return hc::precise_math::atan2f(x, y);
-}
-__device__ float atanf(float x)
-{
-    return hc::precise_math::atanf(x);
-}
-__device__ float atanhf(float x)
-{
-    return hc::precise_math::atanhf(x);
-}
-__device__ float cbrtf(float x)
-{
-    return hc::precise_math::cbrtf(x);
-}
-__device__ float ceilf(float x)
-{
-    return hc::precise_math::ceilf(x);
-}
-__device__ float copysignf(float x, float y)
-{
-    return hc::precise_math::copysignf(x, y);
-}
-__device__ float cosf(float x)
-{
-    return hc::precise_math::cosf(x);
-}
-__device__ float coshf(float x)
-{
-    return hc::precise_math::coshf(x);
-}
-__device__ float cyl_bessel_i0f(float x);
-__device__ float cyl_bessel_i1f(float x);
-__device__ float erfcf(float x)
-{
-    return hc::precise_math::erfcf(x);
-}
-__device__ float erfcinvf(float y)
-{
-    return __hip_erfinvf(1 - y);
-}
-__device__ float erfcxf(float x)
-{
-    return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x);
-}
-__device__ float erff(float x)
-{
-    return hc::precise_math::erff(x);
-}
-__device__ float erfinvf(float y)
-{
-    return __hip_erfinvf(y);
-}
-__device__ float exp10f(float x)
-{
-    return hc::precise_math::exp10f(x);
-}
-__device__ float exp2f(float x)
-{
-    return hc::precise_math::exp2f(x);
-}
-__device__ float expf(float x)
-{
-    return hc::precise_math::expf(x);
-}
-__device__ float expm1f(float x)
-{
-    return hc::precise_math::expm1f(x);
-}
-__device__ float fabsf(float x)
-{
-    return hc::precise_math::fabsf(x);
-}
-__device__ float fdimf(float x, float y)
-{
-    return hc::precise_math::fdimf(x, y);
-}
-__device__ float fdividef(float x, float y)
-{
-    return x/y;
-}
-__device__ float floorf(float x)
-{
-    return hc::precise_math::floorf(x);
-}
-__device__ float fmaf(float x, float y, float z)
-{
-    return hc::precise_math::fmaf(x, y, z);
-}
-__device__ float fmaxf(float x, float y)
-{
-    return hc::precise_math::fmaxf(x, y);
-}
-__device__ float fminf(float x, float y)
-{
-    return hc::precise_math::fminf(x, y);
-}
-__device__ float fmodf(float x, float y)
-{
-    return hc::precise_math::fmodf(x, y);
-}
-__device__ float frexpf(float x, int *nptr)
-{
-    return hc::precise_math::frexpf(x, nptr);
-}
-__device__ float hypotf(float x, float y)
-{
-    return hc::precise_math::hypotf(x, y);
-}
-__device__ float ilogbf(float x)
-{
-    return hc::precise_math::ilogbf(x);
-}
-__device__ unsigned isfinite(float a)
-{
-    return hc::precise_math::isfinite(a);
-}
-__device__ unsigned isinf(float a)
-{
-    return hc::precise_math::isinf(a);
-}
-__device__ unsigned isnan(float a)
-{
-    return hc::precise_math::isnan(a);
-}
-__device__ float j0f(float x)
-{
-    return __hip_j0f(x);
-}
-__device__ float j1f(float x)
-{
-    return __hip_j1f(x);
-}
-__device__ float jnf(int n, float x)
-{
-    return __hip_jnf(n, x);
-}
-__device__ float ldexpf(float x, int exp)
-{
-    return hc::precise_math::ldexpf(x, exp);
-}
-__device__ float lgammaf(float x, int *sign)
-{
-    return hc::precise_math::lgammaf(x, sign);
-}
-__device__ long long int llrintf(float x)
-{
-    int y = hc::precise_math::roundf(x);
-    long long int z = y;
-    return z;
-}
-__device__ long long int llroundf(float x)
-{
-    int y = hc::precise_math::roundf(x);
-    long long int z = y;
-    return z;
-}__device__ float log10f(float x)
-{
-    return hc::precise_math::log10f(x);
-}
-__device__ float log1pf(float x)
-{
-    return hc::precise_math::log1pf(x);
-}
-__device__ float log2f(float x)
-{
-    return hc::precise_math::log2f(x);
-}
-__device__ float logbf(float x)
-{
-    return hc::precise_math::logbf(x);
-}
-__device__ float logf(float x)
-{
-    return hc::precise_math::logf(x);
-}
-__device__ long int lrintf(float x)
-{
-    int y = hc::precise_math::roundf(x);
-    long int z = y;
-    return z;
-}
-__device__ long int lroundf(float x)
-{
-    long int y = hc::precise_math::roundf(x);
-    return y;
-}
-__device__ float modff(float x, float *iptr)
-{
-    return hc::precise_math::modff(x, iptr);
-}
-__device__ float nanf(const char* tagp)
-{
-    return hc::precise_math::nanf((int)*tagp);
-}
-__device__ float nearbyintf(float x)
-{
-    return hc::precise_math::nearbyintf(x);
-}
-__device__ float nextafterf(float x, float y)
-{
-    return hc::precise_math::nextafter(x, y);
-}
-__device__ float norm3df(float a, float b, float c)
-{
-     float x = a*a + b*b + c*c;
-     return hc::precise_math::sqrtf(x);
-}
-__device__ float norm4df(float a, float b, float c, float d)
-{
-     float x = a*a + b*b;
-     float y = c*c + d*d;
-     return hc::precise_math::sqrtf(x+y);
-}
-
-__device__ float normcdff(float y)
-{
-     return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2;
-}
-__device__ float normcdfinvf(float y)
-{
-     return HIP_SQRT_2 * __hip_erfinvf(2*y-1);
-}
-__device__ float normf(int dim, const float *a)
-{
-    float x = 0.0f;
-    for(int i=0;i<dim;i++)
-    {
-        x = hc::precise_math::fmaf(a[i], a[i], x);
-    }
-    return hc::precise_math::sqrtf(x);
-}
-__device__ float powf(float x, float y)
-{
-    return hc::precise_math::powf(x, y);
-}
-__device__ float rcbrtf(float x)
-{
-    return hc::precise_math::rcbrtf(x);
-}
-__device__ float remainderf(float x, float y)
-{
-    return hc::precise_math::remainderf(x, y);
-}
-__device__ float remquof(float x, float y, int *quo)
-{
-    return hc::precise_math::remquof(x, y, quo);
-}
-__device__ float rhypotf(float x, float y)
-{
-    return 1/hc::precise_math::hypotf(x, y);
-}
-__device__ float rintf(float x)
-{
-    return hc::precise_math::roundf(x);
-}
-__device__ float rnorm3df(float a, float b, float c)
-{
-    float x = a*a + b*b + c*c;
-    return 1/hc::precise_math::sqrtf(x);
-}
-__device__ float rnorm4df(float a, float b, float c, float d)
-{
-    float x = a*a + b*b;
-    float y = c*c + d*d;
-    return 1/hc::precise_math::sqrtf(x+y);
-}
-__device__ float rnormf(int dim, const float* a)
-{
-    float x = 0.0f;
-    for(int i=0;i<dim;i++)
-    {
-        x = hc::precise_math::fmaf(a[i], a[i], x);
-    }
-    return 1/hc::precise_math::sqrtf(x);
-}
-__device__ float roundf(float x)
-{
-    return hc::precise_math::roundf(x);
-}
-__device__ float scalblnf(float x, long int n)
-{
-    return hc::precise_math::scalb(x, n);
-}
-__device__ float scalbnf(float x, int n)
-{
-    return hc::precise_math::scalbnf(x, n);
-}
-__device__ unsigned signbit(float a)
-{
-    return hc::precise_math::signbit(a);
-}
-__device__ void sincosf(float x, float *sptr, float *cptr)
-{
-    *sptr = hc::precise_math::sinf(x);
-    *cptr = hc::precise_math::cosf(x);
-}
-__device__ void sincospif(float x, float *sptr, float *cptr)
-{
-    *sptr = hc::precise_math::sinpif(x);
-    *cptr = hc::precise_math::cospif(x);
-}
-__device__ float sinf(float x)
-{
-    return hc::precise_math::sinf(x);
-}
-__device__ float sinhf(float x)
-{
-    return hc::precise_math::sinhf(x);
-}
-__device__ float tanf(float x)
-{
-    return hc::precise_math::tanf(x);
-}
-__device__ float tanhf(float x)
-{
-    return hc::precise_math::tanhf(x);
-}
-__device__ float tgammaf(float x)
-{
-    return hc::precise_math::tgammaf(x);
-}
-__device__ float truncf(float x)
-{
-    return hc::precise_math::truncf(x);
-}
-__device__ float y0f(float x)
-{
-    return __hip_y0f(x);
-}
-__device__ float y1f(float x)
-{
-    return __hip_y1f(x);
-}
-__device__ float ynf(int n, float x)
-{
-    return __hip_ynf(n, x);
-}
-__device__ float cospif(float x)
-{
-    return hc::precise_math::cospif(x);
-}
-__device__ float sinpif(float x)
-{
-    return hc::precise_math::sinpif(x);
-}
-__device__ float sqrtf(float x)
-{
-    return hc::precise_math::sqrtf(x);
-}
-__device__ float rsqrtf(float x)
-{
-    return hc::precise_math::rsqrtf(x);
-}
-
-/*
- * Double precision device math functions
- */
-
-__device__ double acos(double x)
-{
-    return hc::precise_math::acos(x);
-}
-__device__ double acosh(double x)
-{
-    return hc::precise_math::acosh(x);
-}
-__device__ double asin(double x)
-{
-    return hc::precise_math::asin(x);
-}
-__device__ double asinh(double x)
-{
-    return hc::precise_math::asinh(x);
-}
-__device__ double atan(double x)
-{
-    return hc::precise_math::atan(x);
-}
-__device__ double atan2(double y, double x)
-{
-    return hc::precise_math::atan2(y, x);
-}
-__device__ double atanh(double x)
-{
-    return hc::precise_math::atanh(x);
-}
-__device__ double cbrt(double x)
-{
-    return hc::precise_math::cbrt(x);
-}
-__device__ double ceil(double x)
-{
-    return hc::precise_math::ceil(x);
-}
-__device__ double copysign(double x, double y)
-{
-    return hc::precise_math::copysign(x, y);
-}
-__device__ double cos(double x)
-{
-    return hc::precise_math::cos(x);
-}
-__device__ double cosh(double x)
-{
-    return hc::precise_math::cosh(x);
-}
-__device__ double cospi(double x)
-{
-    return hc::precise_math::cospi(x);
-}
-__device__ double cyl_bessel_i0(double x);
-__device__ double cyl_bessel_i1(double x);
-__device__ double erf(double x)
-{
-    return hc::precise_math::erf(x);
-}
-__device__ double erfc(double x)
-{
-    return hc::precise_math::erfc(x);
-}
-__device__ double erfcinv(double x)
-{
-    return __hip_erfinv(1 - x);
-}
-__device__ double erfcx(double x)
-{
-    return hc::precise_math::exp(x*x)*hc::precise_math::erf(x);
-}
-__device__ double erfinv(double x)
-{
-    return __hip_erfinv(x);
-}
-__device__ double exp(double x)
-{
-    return hc::precise_math::exp(x);
-}
-__device__ double exp10(double x)
-{
-    return hc::precise_math::exp10(x);
-}
-__device__ double exp2(double x)
-{
-    return hc::precise_math::exp2(x);
-}
-__device__ double expm1(double x)
-{
-    return hc::precise_math::expm1(x);
-}
-__device__ double fabs(double x)
-{
-    return hc::precise_math::fabs(x);
-}
-__device__ double fdim(double x, double y)
-{
-    return hc::precise_math::fdim(x, y);
-}
-__device__ double fdivide(double x, double y)
-{
-    return x/y;
-}
-__device__ double floor(double x)
-{
-    return hc::precise_math::floor(x);
-}
-__device__ double fma(double x, double y, double z)
-{
-    return hc::precise_math::fma(x, y, z);
-}
-__device__ double fmax(double x, double y)
-{
-    return hc::precise_math::fmax(x, y);
-}
-__device__ double fmin(double x, double y)
-{
-    return hc::precise_math::fmin(x, y);
-}
-__device__ double fmod(double x, double y)
-{
-    return hc::precise_math::fmod(x, y);
-}
-__device__ double frexp(double x, int *y)
-{
-    return hc::precise_math::frexp(x, y);
-}
-__device__ double hypot(double x, double y)
-{
-    return hc::precise_math::hypot(x, y);
-}
-__device__ double ilogb(double x)
-{
-    return hc::precise_math::ilogb(x);
-}
-__device__ unsigned isfinite(double x)
-{
-    return hc::precise_math::isfinite(x);
-}
-__device__ unsigned isinf(double x)
-{
-    return hc::precise_math::isinf(x);
-}
-__device__ unsigned isnan(double x)
-{
-    return hc::precise_math::isnan(x);
-}
-__device__ double j0(double x)
-{
-    return __hip_j0(x);
-}
-__device__ double j1(double x)
-{
-    return __hip_j1(x);
-}
-__device__ double jn(int n, double x)
-{
-    return __hip_jn(n, x);
-}
-__device__ double ldexp(double x, int exp)
-{
-    return hc::precise_math::ldexp(x, exp);
-}
-__device__ double lgamma(double x, int *sign)
-{
-    return hc::precise_math::lgamma(x, sign);
-}
-__device__ long long int llrint(double x)
-{
-    long long int y = hc::precise_math::round(x);
-    return y;
-}
-__device__ long long int llround(double x)
-{
-    long long int y = hc::precise_math::round(x);
-    return y;
-}
-__device__ double log(double x)
-{
-    return hc::precise_math::log(x);
-}
-__device__ double log10(double x)
-{
-    return hc::precise_math::log10(x);
-}
-__device__ double log1p(double x)
-{
-    return hc::precise_math::log1p(x);
-}
-__device__ double log2(double x)
-{
-    return hc::precise_math::log2(x);
-}
-__device__ double logb(double x)
-{
-    return hc::precise_math::logb(x);
-}
-__device__ long int lrint(double x)
-{
-    long int y = hc::precise_math::round(x);
-    return y;
-}
-__device__ long int lround(double x)
-{
-    long int y = hc::precise_math::round(x);
-    return y;
-}
-__device__ double modf(double x, double *iptr)
-{
-    return hc::precise_math::modf(x, iptr);
-}
-__device__ double nan(const char *tagp)
-{
-    return hc::precise_math::nan((int)*tagp);
-}
-__device__ double nearbyint(double x)
-{
-    return hc::precise_math::nearbyint(x);
-}
-__device__ double nextafter(double x, double y)
-{
-    return hc::precise_math::nextafter(x, y);
-}
-__device__ double norm3d(double a, double b, double c)
-{
-    double x = a*a + b*b + c*c;
-    return hc::precise_math::sqrt(x);
-}
-__device__ double norm4d(double a, double b, double c, double d)
-{
-    double x = a*a + b*b;
-    double y = c*c + d*d;
-    return hc::precise_math::sqrt(x+y);
-}
-__device__ double normcdf(double y)
-{
-     return ((hc::precise_math::erf(y)/HIP_SQRT_2) + 1)/2;
-}
-__device__ double pow(double x, double y)
-{
-    return hc::precise_math::pow(x, y);
-}
-__device__ double rcbrt(double x)
-{
-    return hc::precise_math::rcbrt(x);
-}
-__device__ double remainder(double x, double y)
-{
-    return hc::precise_math::remainder(x, y);
-}
-__device__ double remquo(double x, double y, int *quo)
-{
-    return hc::precise_math::remquo(x, y, quo);
-}
-__device__ double rhypot(double x, double y)
-{
-    return 1/hc::precise_math::sqrt(x*x + y*y);
-}
-__device__ double rint(double x)
-{
-    return hc::precise_math::round(x);
-}
-__device__ double rnorm3d(double a, double b, double c)
-{
-    return hc::precise_math::rsqrt(a*a + b*b + c*c);
-}
-__device__ double rnorm4d(double a, double b, double c, double d)
-{
-    return hc::precise_math::rsqrt(a*a + b*b + c*c + d*d);
-}
-__device__ double rnorm(int dim, const double* t)
-{
-    double x = 0.0;
-    for(int i=0;i<dim;i++)
-    {
-        x = hc::precise_math::fma(t[i], t[i], x);
-    }
-    return 1/x;
-}
-__device__ double round(double x)
-{
-    return hc::precise_math::round(x);
-}
-__device__ double rsqrt(double x)
-{
-    return hc::precise_math::rsqrt(x);
-}
-__device__ double scalbln(double x, long int n)
-{
-    return hc::precise_math::scalb(x, n);
-}
-__device__ double scalbn(double x, int n)
-{
-    return hc::precise_math::scalbn(x, n);
-}
-__device__ unsigned signbit(double x)
-{
-    return hc::precise_math::signbit(x);
-}
-__device__ double sin(double x)
-{
-    return hc::precise_math::sin(x);
-}
-__device__ void sincos(double x, double *sptr, double *cptr)
-{
-    *sptr = hc::precise_math::sin(x);
-    *cptr = hc::precise_math::cos(x);
-}
-__device__ void sincospi(double x, double *sptr, double *cptr)
-{
-    *sptr = hc::precise_math::sinpi(x);
-    *cptr = hc::precise_math::cospi(x);
-}
-__device__ double sinh(double x)
-{
-    return hc::precise_math::sinh(x);
-}
-__device__ double sinpi(double x)
-{
-    return hc::precise_math::sinpi(x);
-}
-__device__ double sqrt(double x)
-{
-    return hc::precise_math::sqrt(x);
-}
-__device__ double tan(double x)
-{
-    return hc::precise_math::tan(x);
-}
-__device__ double tanh(double x)
-{
-    return hc::precise_math::tanh(x);
-}
-__device__ double tgamma(double x)
-{
-    return hc::precise_math::tgamma(x);
-}
-__device__ double trunc(double x)
-{
-    return hc::precise_math::trunc(x);
-}
-__device__ double y0(double x)
-{
-    return __hip_y0(x);
-}
-__device__ double y1(double x)
-{
-    return __hip_y1(x);
-}
-__device__ double yn(int n, double x)
-{
-    return __hip_yn(n, x);
-}
-
 const int warpSize = 64;
 
 __device__ long long int clock64() { return (long long int)hc::__cycle_u64(); };
@@ -2680,185 +1856,3 @@ float __hip_host_ynf(int n, float x)
     }
     return b0;
 }
-
-__host__ float modff(float x, float *iptr)
-{
-    return std::modf(x, iptr);
-}
-
-__host__ float erfcinvf(float y)
-{
-    return __hip_host_erfcinvf(y);
-}
-
-__host__ double erfcinv(double y)
-{
-    return __hip_host_erfcinv(y);
-}
-
-__host__ float erfinvf(float x)
-{
-    return __hip_host_erfinvf(x);
-}
-
-__host__ double erfinv(double x)
-{
-    return __hip_host_erfinv(x);
-}
-
-__host__ double fdivide(double x, double y)
-{
-    return x/y;
-}
-
-__host__ float normcdff(float t)
-{
-     return (1 - std::erf(-t/std::sqrt(2)))/2;
-}
-
-__host__ double normcdf(double x)
-{
-     return (1 - std::erf(-x/std::sqrt(2)))/2;
-}
-
-__host__ float erfcxf(float x)
-{
-     return std::exp(x*x) * std::erfc(x);
-}
-
-__host__ double erfcx(double x)
-{
-     return std::exp(x*x) * std::erfc(x);
-}
-
-__host__ float rhypotf(float x, float y)
-{
-     return 1 / std::sqrt(x*x + y*y);
-}
-
-__host__ double rhypot(double x, double y)
-{
-    return 1 / std::sqrt(x*x + y*y);
-}
-
-__host__ float rcbrtf(float a)
-{
-    return 1 / std::cbrt(a);
-}
-
-__host__ double rcbrt(double a)
-{
-    return 1 / std::cbrt(a);
-}
-
-__host__ float normf(int dim, const float *a)
-{
-    float val = 0.0f;
-    for(int i=0;i<dim;i++)
-    {
-        val = val + a[i] * a[i];
-    }
-    return val;
-}
-
-__host__ double norm(int dim, const double *a)
-{
-    double val = 0.0;
-    for(int i=0;i<dim;i++)
-    {
-        val = val + a[i] * a[i];
-    }
-    return val;
-}
-
-__host__ float rnormf(int dim, const float *t)
-{
-    float val = 0.0f;
-    for(int i=0;i<dim;i++)
-    {
-        val = val + t[i] * t[i];
-    }
-    return 1 / std::sqrt(val);
-}
-
-__host__ double rnorm(int dim, const double *t)
-{
-    double val = 0.0;
-    for(int i=0;i<dim;i++)
-    {
-        val = val + t[i] * t[i];
-    }
-    return 1 / std::sqrt(val);
-}
-
-__host__ float rnorm4df(float a, float b, float c, float d)
-{
-    return 1 / std::sqrt(a*a + b*b + c*c + d*d);
-}
-
-__host__ double rnorm4d(double a, double b, double c, double d)
-{
-    return 1 / std::sqrt(a*a + b*b + c*c + d*d);
-}
-
-__host__ float rnorm3df(float a, float b, float c)
-{
-    return 1 / std::sqrt(a*a + b*b + c*c);
-}
-
-__host__ double rnorm3d(double a, double b, double c)
-{
-    return 1 / std::sqrt(a*a + b*b + c*c);
-}
-
-__host__ void sincospif(float x, float *sptr, float *cptr)
-{
-    *sptr = std::sin(HIP_PI*x);
-    *cptr = std::cos(HIP_PI*x);
-}
-
-__host__ void sincospi(double x, double *sptr, double *cptr)
-{
-    *sptr = std::sin(HIP_PI*x);
-    *cptr = std::cos(HIP_PI*x);
-}
-
-__host__ float normcdfinvf(float x)
-{
-    return std::sqrt(2) * erfinv(2*x-1);
-}
-
-__host__ double normcdfinv(double x)
-{
-    return std::sqrt(2) * erfinv(2*x-1);
-}
-
-__host__ float nextafterf(float x, float y)
-{
-    return std::nextafter(x, y);
-}
-
-__host__ double nextafter(double x, double y)
-{
-    return std::nextafter(x, y);
-}
-
-__host__ float norm3df(float a, float b, float c)
-{
-    return std::sqrt(a*a + b*b + c*c);
-}
-
-__host__ float norm4df(float a, float b, float c, float d)
-{
-    return std::sqrt(a*a + b*b + c*c + d*d);
-}
-
-__host__ double norm3d(double a, double b, double c)
-{
-    return std::sqrt(a*a + b*b + c*c);
-}
-
-__host__ double norm4d(double a, double b, double c, double d)
-{
-    return std::sqrt(a*a + b*b + c*c + d*d);
-}
diff --git a/src/device_util.h b/src/device_util.h
index 18811321d9..e302b7db8a 100644
--- a/src/device_util.h
+++ b/src/device_util.h
@@ -23,6 +23,8 @@ THE SOFTWARE.
 #ifndef DEVICE_UTIL_H
 #define DEVICE_UTIL_H
 
+#include<hip/hcc_detail/hip_runtime.h>
+
 /*
  Heap size computation for malloc and free device functions.
 */
@@ -35,4 +37,119 @@ THE SOFTWARE.
 #define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE
 #define SIZE_OF_HEAP SIZE_MALLOC
 
+#define HIP_SQRT_2 1.41421356237
+#define HIP_SQRT_PI 1.77245385091
+
+#define __hip_erfinva3 -0.140543331
+#define __hip_erfinva2 0.914624893
+#define __hip_erfinva1 -1.645349621
+#define __hip_erfinva0 0.886226899
+
+#define __hip_erfinvb4 0.012229801
+#define __hip_erfinvb3 -0.329097515
+#define __hip_erfinvb2 1.442710462
+#define __hip_erfinvb1 -2.118377725
+#define __hip_erfinvb0 1
+
+#define __hip_erfinvc3 1.641345311
+#define __hip_erfinvc2 3.429567803
+#define __hip_erfinvc1 -1.62490649
+#define __hip_erfinvc0 -1.970840454
+
+#define __hip_erfinvd2 1.637067800
+#define __hip_erfinvd1 3.543889200
+#define __hip_erfinvd0 1
+
+#define HIP_PI 3.14159265358979323846
+
+__device__ void* __hip_hc_malloc(size_t size);
+__device__ void* __hip_hc_free(void* ptr);
+
+__device__ float __hip_erfinvf(float x);
+__device__ double __hip_erfinv(double x);
+
+__device__ float __hip_j0f(float x);
+__device__ double __hip_j0(double x);
+
+__device__ float __hip_j1f(float x);
+__device__ double __hip_j1(double x);
+
+__device__ float __hip_y0f(float x);
+__device__ double __hip_y0(double x);
+
+__device__ float __hip_y1f(float x);
+__device__ double __hip_y1(double x);
+
+__device__ float __hip_jnf(int n, float x);
+__device__ double __hip_jn(int n, double x);
+
+__device__ float __hip_ynf(int n, float x);
+__device__ double __hip_yn(int n, double x);
+
+__device__ float __hip_precise_cosf(float x);
+__device__ float __hip_precise_exp10f(float x);
+__device__ float __hip_precise_expf(float x);
+__device__ float __hip_precise_frsqrt_rn(float x);
+__device__ float __hip_precise_fsqrt_rd(float x);
+__device__ float __hip_precise_fsqrt_rn(float x);
+__device__ float __hip_precise_fsqrt_ru(float x);
+__device__ float __hip_precise_fsqrt_rz(float x);
+__device__ float __hip_precise_log10f(float x);
+__device__ float __hip_precise_log2f(float x);
+__device__ float __hip_precise_logf(float x);
+__device__ float __hip_precise_powf(float base, float exponent);
+__device__ void __hip_precise_sincosf(float x, float *s, float *c);
+__device__ float __hip_precise_sinf(float x);
+__device__ float __hip_precise_tanf(float x);
+// Double Precision Math
+__device__ double __hip_precise_dsqrt_rd(double x);
+__device__ double __hip_precise_dsqrt_rn(double x);
+__device__ double __hip_precise_dsqrt_ru(double x);
+__device__ double __hip_precise_dsqrt_rz(double x);
+
+
+
+// Float Fast Math
+__device__ float __hip_fast_exp10f(float x);
+__device__ float __hip_fast_expf(float x);
+__device__ float __hip_fast_frsqrt_rn(float x);
+__device__ float __hip_fast_fsqrt_rn(float x);
+__device__ float __hip_fast_fsqrt_ru(float x);
+__device__ float __hip_fast_fsqrt_rz(float x);
+__device__ float __hip_fast_log10f(float x);
+__device__ float __hip_fast_logf(float x);
+__device__ float __hip_fast_powf(float base, float exponent);
+__device__ void __hip_fast_sincosf(float x, float *s, float *c);
+__device__ float __hip_fast_tanf(float x);
+// Double Precision Math
+__device__ double __hip_fast_dsqrt_rd(double x);
+__device__ double __hip_fast_dsqrt_rn(double x);
+__device__ double __hip_fast_dsqrt_ru(double x);
+__device__ double __hip_fast_dsqrt_rz(double x);
+__device__ void  __threadfence_system(void);
+
+float __hip_host_erfinvf(float x);
+double __hip_host_erfinv(double x);
+
+float __hip_host_erfcinvf(float y);
+double __hip_host_erfcinv(double y);
+
+float __hip_host_j0f(float x);
+double __hip_host_j0(double x);
+
+float __hip_host_j1f(float x);
+double __hip_host_j1(double x);
+
+float __hip_host_y0f(float x);
+double __hip_host_y1(double x);
+
+float __hip_host_y1f(float x);
+double __hip_host_y1(double x);
+
+float __hip_host_jnf(int n, float x);
+double __hip_host_jn(int n, double x);
+
+float __hip_host_ynf(int n, float x);
+double __hip_host_yn(int n, double x);
+
 #endif
diff --git a/src/math_functions.cpp b/src/math_functions.cpp
new file mode 100644
index 0000000000..34a80448db
--- /dev/null
+++ b/src/math_functions.cpp
@@ -0,0 +1,971 @@
+
+/*
+Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hc.hpp>
+#include <grid_launch.h>
+#include <hc_math.hpp>
+#include "device_util.h"
+#include "hip/hcc_detail/device_functions.h"
+#include "hip/hip_runtime.h"
+
+__device__ float acosf(float x)
+{
+    return hc::precise_math::acosf(x);
+}
+__device__ float acoshf(float x)
+{
+    return hc::precise_math::acoshf(x);
+}
+__device__ float asinf(float x)
+{
+    return hc::precise_math::asinf(x);
+}
+__device__ float asinhf(float x)
+{
+    return hc::precise_math::asinhf(x);
+}
+__device__ float atan2f(float y, float x)
+{
+    return hc::precise_math::atan2f(x, y);
+}
+__device__ float atanf(float x)
+{
+    return hc::precise_math::atanf(x);
+}
+__device__ float atanhf(float x)
+{
+    return hc::precise_math::atanhf(x);
+}
+__device__ float cbrtf(float x)
+{
+    return hc::precise_math::cbrtf(x);
+}
+__device__ float ceilf(float x)
+{
+    return hc::precise_math::ceilf(x);
+}
+__device__ float copysignf(float x, float y)
+{
+    return hc::precise_math::copysignf(x, y);
+}
+__device__ float cosf(float x)
+{
+    return hc::precise_math::cosf(x);
+}
+__device__ float coshf(float x)
+{
+    return hc::precise_math::coshf(x);
+}
+__device__ float cyl_bessel_i0f(float x);
+__device__ float cyl_bessel_i1f(float x);
+__device__ float erfcf(float x)
+{
+    return hc::precise_math::erfcf(x);
+}
+__device__ float erfcinvf(float y)
+{
+    return __hip_erfinvf(1 - y);
+}
+__device__ float erfcxf(float x)
+{
+    return hc::precise_math::expf(x*x)*hc::precise_math::erfcf(x);
+}
+__device__ float erff(float x)
+{
+    return hc::precise_math::erff(x);
+}
+__device__ float erfinvf(float y)
+{
+    return __hip_erfinvf(y);
+}
+__device__ float exp10f(float x)
+{
+    return hc::precise_math::exp10f(x);
+}
+__device__ float exp2f(float x)
+{
+    return hc::precise_math::exp2f(x);
+}
+__device__ float expf(float x)
+{
+    return hc::precise_math::expf(x);
+}
+__device__ float expm1f(float x)
+{
+    return hc::precise_math::expm1f(x);
+}
+__device__ float fabsf(float x)
+{
+    return hc::precise_math::fabsf(x);
+}
+__device__ float fdimf(float x, float y)
+{
+    return hc::precise_math::fdimf(x, y);
+}
+__device__ float fdividef(float x, float y)
+{
+    return x/y;
+}
+__device__ float floorf(float x)
+{
+    return hc::precise_math::floorf(x);
+}
+__device__ float fmaf(float x, float y, float z)
+{
+    return hc::precise_math::fmaf(x, y, z);
+}
+__device__ float fmaxf(float x, float y)
+{
+    return hc::precise_math::fmaxf(x, y);
+}
+__device__ float fminf(float x, float y)
+{
+    return hc::precise_math::fminf(x, y);
+}
+__device__ float fmodf(float x, float y)
+{
+    return hc::precise_math::fmodf(x, y);
+}
+__device__ float frexpf(float x, int *nptr)
+{
+    return hc::precise_math::frexpf(x, nptr);
+}
+__device__ float hypotf(float x, float y)
+{
+    return hc::precise_math::hypotf(x, y);
+}
+__device__ float ilogbf(float x)
+{
+    return hc::precise_math::ilogbf(x);
+}
+__device__ unsigned isfinite(float a)
+{
+    return hc::precise_math::isfinite(a);
+}
+__device__ unsigned isinf(float a)
+{
+    return hc::precise_math::isinf(a);
+}
+__device__ unsigned isnan(float a)
+{
+    return hc::precise_math::isnan(a);
+}
+__device__ float j0f(float x)
+{
+    return __hip_j0f(x);
+}
+__device__ float j1f(float x)
+{
+    return __hip_j1f(x);
+}
+__device__ float jnf(int n, float x)
+{
+    return __hip_jnf(n, x);
+}
+__device__ float ldexpf(float x, int exp)
+{
+    return hc::precise_math::ldexpf(x, exp);
+}
+__device__ float lgammaf(float x, int *sign)
+{
+    return hc::precise_math::lgammaf(x, sign);
+}
+__device__ long long int llrintf(float x)
+{
+    int y = hc::precise_math::roundf(x);
+    long long int z = y;
+    return z;
+}
+__device__ long long int llroundf(float x)
+{
+    int y = hc::precise_math::roundf(x);
+    long long int z = y;
+    return z;
+}__device__ float log10f(float x)
+{
+    return hc::precise_math::log10f(x);
+}
+__device__ float log1pf(float x)
+{
+    return hc::precise_math::log1pf(x);
+}
+__device__ float log2f(float x)
+{
+    return hc::precise_math::log2f(x);
+}
+__device__ float logbf(float x)
+{
+    return hc::precise_math::logbf(x);
+}
+__device__ float logf(float x)
+{
+    return hc::precise_math::logf(x);
+}
+__device__ long int lrintf(float x)
+{
+    int y = hc::precise_math::roundf(x);
+    long int z = y;
+    return z;
+}
+__device__ long int lroundf(float x)
+{
+    long int y = hc::precise_math::roundf(x);
+    return y;
+}
+__device__ float modff(float x, float *iptr)
+{
+    return hc::precise_math::modff(x, iptr);
+}
+__device__ float nanf(const char* tagp)
+{
+    return hc::precise_math::nanf((int)*tagp);
+}
+__device__ float nearbyintf(float x)
+{
+    return hc::precise_math::nearbyintf(x);
+}
+__device__ float nextafterf(float x, float y)
+{
+    return hc::precise_math::nextafter(x, y);
+}
+__device__ float norm3df(float a, float b, float c)
+{
+     float x = a*a + b*b + c*c;
+     return hc::precise_math::sqrtf(x);
+}
+__device__ float norm4df(float a, float b, float c, float d)
+{
+     float x = a*a + b*b;
+     float y = c*c + d*d;
+     return hc::precise_math::sqrtf(x+y);
+}
+
+__device__ float normcdff(float y)
+{
+     return ((hc::precise_math::erff(y)/1.41421356237) + 1)/2;
+}
+__device__ float normcdfinvf(float y)
+{
+     return HIP_SQRT_2 * __hip_erfinvf(2*y-1);
+}
+__device__ float normf(int dim, const float *a)
+{
+    float x = 0.0f;
+    for(int i=0;i<dim;i++)
+    {
+        x = hc::precise_math::fmaf(a[i], a[i], x);
+    }
+    return hc::precise_math::sqrtf(x);
+}
+__device__ float powf(float x, float y)
+{
+    return hc::precise_math::powf(x, y);
+}
+__device__ float rcbrtf(float x)
+{
+    return hc::precise_math::rcbrtf(x);
+}
+__device__ float remainderf(float x, float y)
+{
+    return hc::precise_math::remainderf(x, y);
+}
+__device__ float remquof(float x, float y, int *quo)
+{
+    return hc::precise_math::remquof(x, y, quo);
+}
+__device__ float rhypotf(float x, float y)
+{
+    return 1/hc::precise_math::hypotf(x, y);
+}
+__device__ float rintf(float x)
+{
+    return hc::precise_math::roundf(x);
+}
+__device__ float rnorm3df(float a, float b, float c)
+{
+    float x = a*a + b*b + c*c;
+    return 1/hc::precise_math::sqrtf(x);
+}
+__device__ float rnorm4df(float a, float b, float c, float d)
+{
+    float x = a*a + b*b;
+    float y = c*c + d*d;
+    return 1/hc::precise_math::sqrtf(x+y);
+}
+__device__ float rnormf(int dim, const float* a)
+{
+    float x = 0.0f;
+    for(int i=0;i<dim;i++)
+    {
+        x = hc::precise_math::fmaf(a[i], a[i], x);
+    }
+    return 1/hc::precise_math::sqrtf(x);
+}
+__device__ float roundf(float x)
+{
+    return hc::precise_math::roundf(x);
+}
+__device__ float scalblnf(float x, long int n)
+{
+    return hc::precise_math::scalb(x, n);
+}
+__device__ float scalbnf(float x, int n)
+{
+    return hc::precise_math::scalbnf(x, n);
+}
+__device__ unsigned signbit(float a)
+{
+    return hc::precise_math::signbit(a);
+}
+__device__ void sincosf(float x, float *sptr, float *cptr)
+{
+    *sptr = hc::precise_math::sinf(x);
+    *cptr = hc::precise_math::cosf(x);
+}
+__device__ void sincospif(float x, float *sptr, float *cptr)
+{
+    *sptr = hc::precise_math::sinpif(x);
+    *cptr = hc::precise_math::cospif(x);
+}
+__device__ float sinf(float x)
+{
+    return hc::precise_math::sinf(x);
+}
+__device__ float sinhf(float x)
+{
+    return hc::precise_math::sinhf(x);
+}
+__device__ float tanf(float x)
+{
+    return hc::precise_math::tanf(x);
+}
+__device__ float tanhf(float x)
+{
+    return hc::precise_math::tanhf(x);
+}
+__device__ float tgammaf(float x)
+{
+    return hc::precise_math::tgammaf(x);
+}
+__device__ float truncf(float x)
+{
+    return hc::precise_math::truncf(x);
+}
+__device__ float y0f(float x)
+{
+    return __hip_y0f(x);
+}
+__device__ float y1f(float x)
+{
+    return __hip_y1f(x);
+}
+__device__ float ynf(int n, float x)
+{
+    return __hip_ynf(n, x);
+}
+__device__ float cospif(float x)
+{
+    return hc::precise_math::cospif(x);
+}
+__device__ float sinpif(float x)
+{
+    return hc::precise_math::sinpif(x);
+}
+__device__ float sqrtf(float x)
+{
+    return hc::precise_math::sqrtf(x);
+}
+__device__ float rsqrtf(float x)
+{
+    return hc::precise_math::rsqrtf(x);
+}
+
+/*
+ * Double precision device math functions
+ */
+
+__device__ double acos(double x)
+{
+    return hc::precise_math::acos(x);
+}
+__device__ double acosh(double x)
+{
+    return hc::precise_math::acosh(x);
+}
+__device__ double asin(double x)
+{
+    return hc::precise_math::asin(x);
+}
+__device__ double asinh(double x)
+{
+    return hc::precise_math::asinh(x);
+}
+__device__ double atan(double x)
+{
+    return hc::precise_math::atan(x);
+}
+__device__ double atan2(double y, double x)
+{
+    return hc::precise_math::atan2(y, x);
+}
+__device__ double atanh(double x)
+{
+    return hc::precise_math::atanh(x);
+}
+__device__ double cbrt(double x)
+{
+    return hc::precise_math::cbrt(x);
+}
+__device__ double ceil(double x)
+{
+    return hc::precise_math::ceil(x);
+}
+__device__ double copysign(double x, double y)
+{
+    return hc::precise_math::copysign(x, y);
+}
+__device__ double cos(double x)
+{
+    return hc::precise_math::cos(x);
+}
+__device__ double cosh(double x)
+{
+    return hc::precise_math::cosh(x);
+}
+__device__ double cospi(double x)
+{
+    return hc::precise_math::cospi(x);
+}
+__device__ double cyl_bessel_i0(double x);
+__device__ double cyl_bessel_i1(double x);
+__device__ double erf(double x)
+{
+    return hc::precise_math::erf(x);
+}
+__device__ double erfc(double x)
+{
+    return hc::precise_math::erfc(x);
+}
+__device__ double erfcinv(double x)
+{
+    return __hip_erfinv(1 - x);
+}
+__device__ double erfcx(double x)
+{
+    return hc::precise_math::exp(x*x)*hc::precise_math::erf(x);
+}
+__device__ double erfinv(double x)
+{
+    return __hip_erfinv(x);
+}
+__device__ double exp(double x)
+{
+    return hc::precise_math::exp(x);
+}
+__device__ double exp10(double x)
+{
+    return hc::precise_math::exp10(x);
+}
+__device__ double exp2(double x)
+{
+    return hc::precise_math::exp2(x);
+}
+__device__ double expm1(double x)
+{
+    return hc::precise_math::expm1(x);
+}
+__device__ double fabs(double x)
+{
+    return hc::precise_math::fabs(x);
+}
+__device__ double fdim(double x, double y)
+{
+    return hc::precise_math::fdim(x, y);
+}
+__device__ double fdivide(double x, double y)
+{
+    return x/y;
+}
+__device__ double floor(double x)
+{
+    return hc::precise_math::floor(x);
+}
+__device__ double fma(double x, double y, double z)
+{
+    return hc::precise_math::fma(x, y, z);
+}
+__device__ double fmax(double x, double y)
+{
+    return hc::precise_math::fmax(x, y);
+}
+__device__ double fmin(double x, double y)
+{
+    return hc::precise_math::fmin(x, y);
+}
+__device__ double fmod(double x, double y)
+{
+    return hc::precise_math::fmod(x, y);
+}
+__device__ double frexp(double x, int *y)
+{
+    return hc::precise_math::frexp(x, y);
+}
+__device__ double hypot(double x, double y)
+{
+    return hc::precise_math::hypot(x, y);
+}
+__device__ double ilogb(double x)
+{
+    return hc::precise_math::ilogb(x);
+}
+__device__ unsigned isfinite(double x)
+{
+    return hc::precise_math::isfinite(x);
+}
+__device__ unsigned isinf(double x)
+{
+    return hc::precise_math::isinf(x);
+}
+__device__ unsigned isnan(double x)
+{
+    return hc::precise_math::isnan(x);
+}
+__device__ double j0(double x)
+{
+    return __hip_j0(x);
+}
+__device__ double j1(double x)
+{
+    return __hip_j1(x);
+}
+__device__ double jn(int n, double x)
+{
+    return __hip_jn(n, x);
+}
+__device__ double ldexp(double x, int exp)
+{
+    return hc::precise_math::ldexp(x, exp);
+}
+__device__ double lgamma(double x, int *sign)
+{
+    return hc::precise_math::lgamma(x, sign);
+}
+__device__ long long int llrint(double x)
+{
+    long long int y = hc::precise_math::round(x);
+    return y;
+}
+__device__ long long int llround(double x)
+{
+    long long int y = hc::precise_math::round(x);
+    return y;
+}
+__device__ double log(double x)
+{
+    return hc::precise_math::log(x);
+}
+__device__ double log10(double x)
+{
+    return hc::precise_math::log10(x);
+}
+__device__ double log1p(double x)
+{
+    return hc::precise_math::log1p(x);
+}
+__device__ double log2(double x)
+{
+    return hc::precise_math::log2(x);
+}
+__device__ double logb(double x)
+{
+    return hc::precise_math::logb(x);
+}
+__device__ long int lrint(double x)
+{
+    long int y = hc::precise_math::round(x);
+    return y;
+}
+__device__ long int lround(double x)
+{
+    long int y = hc::precise_math::round(x);
+    return y;
+}
+__device__ double modf(double x, double *iptr)
+{
+    return hc::precise_math::modf(x, iptr);
+}
+__device__ double nan(const char *tagp)
+{
+    return hc::precise_math::nan((int)*tagp);
+}
+__device__ double nearbyint(double x)
+{
+    return hc::precise_math::nearbyint(x);
+}
+__device__ double nextafter(double x, double y)
+{
+    return hc::precise_math::nextafter(x, y);
+}
+__device__ double norm3d(double a, double b, double c)
+{
+    double x = a*a + b*b + c*c;
+    return hc::precise_math::sqrt(x);
+}
+__device__ double norm4d(double a, double b, double c, double d)
+{
+    double x = a*a + b*b;
+    double y = c*c + d*d;
+    return hc::precise_math::sqrt(x+y);
+}
+__device__ double normcdf(double y)
+{
+     return ((hc::precise_math::erf(y)/HIP_SQRT_2) + 1)/2;
+}
+__device__ double pow(double x, double y)
+{
+    return hc::precise_math::pow(x, y);
+}
+__device__ double rcbrt(double x)
+{
+    return hc::precise_math::rcbrt(x);
+}
+__device__ double remainder(double x, double y)
+{
+    return hc::precise_math::remainder(x, y);
+}
+__device__ double remquo(double x, double y, int *quo)
+{
+    return hc::precise_math::remquo(x, y, quo);
+}
+__device__ double rhypot(double x, double y)
+{
+    return 1/hc::precise_math::sqrt(x*x + y*y);
+}
+__device__ double rint(double x)
+{
+    return hc::precise_math::round(x);
+}
+__device__ double rnorm3d(double a, double b, double c)
+{
+    return hc::precise_math::rsqrt(a*a + b*b + c*c);
+}
+__device__ double rnorm4d(double a, double b, double c, double d)
+{
+    return hc::precise_math::rsqrt(a*a + b*b + c*c + d*d);
+}
+__device__ double rnorm(int dim, const double* t)
+{
+    double x = 0.0;
+    for(int i=0;i<dim;i++)
+    {
+        x = hc::precise_math::fma(t[i], t[i], x);
+    }
+    return 1/x;
+}
+__device__ double round(double x)
+{
+    return hc::precise_math::round(x);
+}
+__device__ double rsqrt(double x)
+{
+    return hc::precise_math::rsqrt(x);
+}
+__device__ double scalbln(double x, long int n)
+{
+    return hc::precise_math::scalb(x, n);
+}
+__device__ double scalbn(double x, int n)
+{
+    return hc::precise_math::scalbn(x, n);
+}
+__device__ unsigned signbit(double x)
+{
+    return hc::precise_math::signbit(x);
+}
+__device__ double sin(double x)
+{
+    return hc::precise_math::sin(x);
+}
+__device__ void sincos(double x, double *sptr, double *cptr)
+{
+    *sptr = hc::precise_math::sin(x);
+    *cptr = hc::precise_math::cos(x);
+}
+__device__ void sincospi(double x, double *sptr, double *cptr)
+{
+    *sptr = hc::precise_math::sinpi(x);
+    *cptr = hc::precise_math::cospi(x);
+}
+__device__ double sinh(double x)
+{
+    return hc::precise_math::sinh(x);
+}
+__device__ double sinpi(double x)
+{
+    return hc::precise_math::sinpi(x);
+}
+__device__ double sqrt(double x)
+{
+    return hc::precise_math::sqrt(x);
+}
+__device__ double tan(double x)
+{
+    return hc::precise_math::tan(x);
+}
+__device__ double tanh(double x)
+{
+    return hc::precise_math::tanh(x);
+}
+__device__ double tgamma(double x)
+{
+    return hc::precise_math::tgamma(x);
+}
+__device__ double trunc(double x)
+{
+    return hc::precise_math::trunc(x);
+}
+__device__ double y0(double x)
+{
+    return __hip_y0(x);
+}
+__device__ double y1(double x)
+{
+    return __hip_y1(x);
+}
+__device__ double yn(int n, double x)
+{
+    return __hip_yn(n, x);
+}
+
+
+__host__ float cospif(float x)
+{
+    return std::cos(x*HIP_PI);
+}
+
+__host__ float fdividef(float x, float y)
+{
+  return x / y;
+}
+
+__host__ int isfinite(float x)
+{
+  return std::isfinite(x);
+}
+
+__host__ int signbit(float x)
+{
+  return std::signbit(x);
+}
+
+__host__ int sinpif(float x)
+{
+  return std::sin(x*HIP_PI);
+}
+
+__host__ float rsqrtf(float x)
+{
+  return 1 / std::sqrt(x);
+}
+
+__host__ float modff(float x, float *iptr)
+{
+    return std::modf(x, iptr);
+}
+
+__host__ float erfcinvf(float y)
+{
+    return __hip_host_erfcinvf(y);
+}
+
+__host__ double erfcinv(double y)
+{
+    return __hip_host_erfcinv(y);
+}
+
+__host__ float erfinvf(float x)
+{
+    return __hip_host_erfinvf(x);
+}
+
+__host__ double erfinv(double x)
+{
+    return __hip_host_erfinv(x);
+}
+
+__host__ double fdivide(double x, double y)
+{
+    return x/y;
+}
+
+__host__ float normcdff(float t)
+{
+     return (1 - std::erf(-t/std::sqrt(2)))/2;
+}
+
+__host__ double normcdf(double x)
+{
+     return (1 - std::erf(-x/std::sqrt(2)))/2;
+}
+
+__host__ float erfcxf(float x)
+{
+     return std::exp(x*x) * std::erfc(x);
+}
+
+__host__ double erfcx(double x)
+{
+     return std::exp(x*x) * std::erfc(x);
+}
+
+__host__ float rhypotf(float x, float y)
+{
+     return 1 / std::sqrt(x*x + y*y);
+}
+
+__host__ double rhypot(double x, double y)
+{
+    return 1 / std::sqrt(x*x + y*y);
+}
+
+__host__ float rcbrtf(float a)
+{
+    return 1 / std::cbrt(a);
+}
+
+__host__ double rcbrt(double a)
+{
+    return 1 / std::cbrt(a);
+}
+
+__host__ float normf(int dim, const float *a)
+{
+    float val = 0.0f;
+    for(int i=0;i<dim;i++)
+    {
+        val = val + a[i] * a[i];
+    }
+    return val;
+}
+
+__host__ double norm(int dim, const double *a)
+{
+    double val = 0.0;
+    for(int i=0;i<dim;i++)
+    {
+        val = val + a[i] * a[i];
+    }
+    return val;
+}
+
+__host__ float rnormf(int dim, const float *t)
+{
+    float val = 0.0f;
+    for(int i=0;i<dim;i++)
+    {
+        val = val + t[i] * t[i];
+    }
+    return 1 / std::sqrt(val);
+}
+
+__host__ double rnorm(int dim, const double *t)
+{
+    double val = 0.0;
+    for(int i=0;i<dim;i++)
+    {
+        val = val + t[i] * t[i];
+    }
+    return 1 / std::sqrt(val);
+}
+
+__host__ float rnorm4df(float a, float b, float c, float d)
+{
+    return 1 / std::sqrt(a*a + b*b + c*c + d*d);
+}
+
+__host__ double rnorm4d(double a, double b, double c, double d)
+{
+    return 1 / std::sqrt(a*a + b*b + c*c + d*d);
+}
+
+__host__ float rnorm3df(float a, float b, float c)
+{
+    return 1 / std::sqrt(a*a + b*b + c*c);
+}
+
+__host__ double rnorm3d(double a, double b, double c)
+{
+    return 1 / std::sqrt(a*a + b*b + c*c);
+}
+
+__host__ void sincospif(float x, float *sptr, float *cptr)
+{
+    *sptr = std::sin(HIP_PI*x);
+    *cptr = std::cos(HIP_PI*x);
+}
+
+__host__ void sincospi(double x, double *sptr, double *cptr)
+{
+    *sptr = std::sin(HIP_PI*x);
+    *cptr = std::cos(HIP_PI*x);
+}
+
+__host__ float normcdfinvf(float x)
+{
+    return std::sqrt(2) * erfinv(2*x-1);
+}
+
+__host__ double normcdfinv(double x)
+{
+    return std::sqrt(2) * erfinv(2*x-1);
+}
+
+__host__ float nextafterf(float x, float y)
+{
+    return std::nextafter(x, y);
+}
+
+__host__ double nextafter(double x, double y)
+{
+    return std::nextafter(x, y);
+}
+
+__host__ float norm3df(float a, float b, float c)
+{
+    return std::sqrt(a*a + b*b + c*c);
+}
+
+__host__ float norm4df(float a, float b, float c, float d)
+{
+    return std::sqrt(a*a + b*b + c*c + d*d);
+}
+
+__host__ double norm3d(double a, double b, double c)
+{
+    return std::sqrt(a*a + b*b + c*c);
+}
+
+__host__ double norm4d(double a, double b, double c, double d)
+{
+    return std::sqrt(a*a + b*b + c*c + d*d);
+}
diff --git a/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp
index 330c4fc799..1b087118b8 100644
--- a/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp
+++ b/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
@@ -27,18 +28,18 @@ THE SOFTWARE.
 
 __device__ void double_precision_intrinsics()
 {
-    //__dadd_rd(0.0, 1.0);
-    //__dadd_rn(0.0, 1.0);
-    //__dadd_ru(0.0, 1.0);
-    //__dadd_rz(0.0, 1.0);
-    //__ddiv_rd(0.0, 1.0);
-    //__ddiv_rn(0.0, 1.0);
-    //__ddiv_ru(0.0, 1.0);
-    //__ddiv_rz(0.0, 1.0);
-    //__dmul_rd(1.0, 2.0);
-    //__dmul_rn(1.0, 2.0);
-    //__dmul_ru(1.0, 2.0);
-    //__dmul_rz(1.0, 2.0);
+    __dadd_rd(0.0, 1.0);
+    __dadd_rn(0.0, 1.0);
+    __dadd_ru(0.0, 1.0);
+    __dadd_rz(0.0, 1.0);
+    __ddiv_rd(0.0, 1.0);
+    __ddiv_rn(0.0, 1.0);
+    __ddiv_ru(0.0, 1.0);
+    __ddiv_rz(0.0, 1.0);
+    __dmul_rd(1.0, 2.0);
+    __dmul_rn(1.0, 2.0);
+    __dmul_ru(1.0, 2.0);
+    __dmul_rz(1.0, 2.0);
     __drcp_rd(2.0);
     __drcp_rn(2.0);
     __drcp_ru(2.0);
@@ -47,10 +48,10 @@ __device__ void double_precision_intrinsics()
     __dsqrt_rn(4.0);
     __dsqrt_ru(4.0);
     __dsqrt_rz(4.0);
-    //__dsub_rd(2.0, 1.0);
-    //__dsub_rn(2.0, 1.0);
-    //__dsub_ru(2.0, 1.0);
-    //__dsub_rz(2.0, 1.0);
+    __dsub_rd(2.0, 1.0);
+    __dsub_rn(2.0, 1.0);
+    __dsub_ru(2.0, 1.0);
+    __dsub_rz(2.0, 1.0);
     __fma_rd(1.0, 2.0, 3.0);
     __fma_rn(1.0, 2.0, 3.0);
     __fma_ru(1.0, 2.0, 3.0);
diff --git a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp
index 996577e840..537fcbbba8 100644
--- a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp
+++ b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/math_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
@@ -43,8 +44,8 @@ __device__ void double_precision_math_functions()
     cos(0.0);
     cosh(0.0);
     cospi(0.0);
-    //cyl_bessel_i0(0.0);
-    //cyl_bessel_i1(0.0);
+    cyl_bessel_i0(0.0);
+    cyl_bessel_i1(0.0);
     erf(0.0);
     erfc(0.0);
     erfcinv(2.0);
@@ -61,7 +62,7 @@ __device__ void double_precision_math_functions()
     fmax(0.0, 0.0);
     fmin(0.0, 0.0);
     fmod(0.0, 1.0);
-    //frexp(0.0, &iX);
+    frexp(0.0, &iX);
     hypot(1.0, 0.0);
     ilogb(1.0);
     isfinite(0.0);
@@ -71,7 +72,7 @@ __device__ void double_precision_math_functions()
     j1(0.0);
     jn(-1.0, 1.0);
     ldexp(0.0, 0);
-    //lgamma(1.0);
+    lgamma(1.0);
     llrint(0.0);
     llround(0.0);
     log(1.0);
@@ -81,19 +82,19 @@ __device__ void double_precision_math_functions()
     logb(1.0);
     lrint(0.0);
     lround(0.0);
-    //modf(0.0, &fX);
+    modf(0.0, &fX);
     nan("1");
     nearbyint(0.0);
-    //nextafter(0.0);
-    //fX = 1.0; norm(1, &fX);
+    nextafter(0.0, 0.0);
+    fX = 1.0; norm(1, &fX);
     norm3d(1.0, 0.0, 0.0);
     norm4d(1.0, 0.0, 0.0, 0.0);
     normcdf(0.0);
-    //normcdfinv(1.0);
+    normcdfinv(1.0);
     pow(1.0, 0.0);
     rcbrt(1.0);
     remainder(2.0, 1.0);
-    //remquo(1.0, 2.0, &iX);
+    remquo(1.0, 2.0, &iX);
     rhypot(0.0, 1.0);
     rint(1.0);
     fX = 1.0; rnorm(1, &fX);
diff --git a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp
index 9980dad277..eff39102c6 100644
--- a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp
+++ b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/math_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
@@ -85,7 +86,7 @@ __host__ void double_precision_math_functions()
     nan("1");
     nearbyint(0.0);
     //nextafter(0.0);
-    //fX = 1.0; norm(1, &fX);
+    fX = 1.0; norm(1, &fX);
 #if defined(__HIP_PLATFORM_HCC__)
     norm3d(1.0, 0.0, 0.0);
     norm4d(1.0, 0.0, 0.0, 0.0);
diff --git a/tests/src/deviceLib/hipFloatMath.cpp b/tests/src/deviceLib/hipFloatMath.cpp
index f137ca2602..7a96b5cd0d 100644
--- a/tests/src/deviceLib/hipFloatMath.cpp
+++ b/tests/src/deviceLib/hipFloatMath.cpp
@@ -27,6 +27,7 @@ THE SOFTWARE.
  */
 
 #include "test_common.h"
+#include <hip/device_functions.h>
 
 #define LEN 512
 #define SIZE LEN<<2
diff --git a/tests/src/deviceLib/hipFloatMathPrecise.cpp b/tests/src/deviceLib/hipFloatMathPrecise.cpp
index 4f6c2cd44a..12f7875949 100644
--- a/tests/src/deviceLib/hipFloatMathPrecise.cpp
+++ b/tests/src/deviceLib/hipFloatMathPrecise.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/math_functions.h>
 #include "test_common.h"
 
 __global__ void FloatMathPrecise(hipLaunchParm lp)
diff --git a/tests/src/deviceLib/hipIntegerIntrinsics.cpp b/tests/src/deviceLib/hipIntegerIntrinsics.cpp
index 63530574d8..d712c5a93b 100644
--- a/tests/src/deviceLib/hipIntegerIntrinsics.cpp
+++ b/tests/src/deviceLib/hipIntegerIntrinsics.cpp
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
-#include "hip/device_functions.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
diff --git a/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp
index caddcc0149..6737c6ee9d 100644
--- a/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp
+++ b/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
@@ -30,44 +31,44 @@ __device__ void single_precision_intrinsics()
     float fX, fY;
 
     __cosf(0.0f);
-    //__exp10f(0.0f);
+    __exp10f(0.0f);
     __expf(0.0f);
-    //__fadd_rd(0.0f, 1.0f);
-    //__fadd_rn(0.0f, 1.0f);
-    //__fadd_ru(0.0f, 1.0f);
-    //__fadd_rz(0.0f, 1.0f);
-    //__fdiv_rd(4.0f, 2.0f);
-    //__fdiv_rn(4.0f, 2.0f);
-    //__fdiv_ru(4.0f, 2.0f);
-    //__fdiv_rz(4.0f, 2.0f);
-    //__fdividef(4.0f, 2.0f);
-    //__fmaf_rd(1.0f, 2.0f, 3.0f);
-    //__fmaf_rn(1.0f, 2.0f, 3.0f);
-    //__fmaf_ru(1.0f, 2.0f, 3.0f);
-    //__fmaf_rz(1.0f, 2.0f, 3.0f);
-    //__fmul_rd(1.0f, 2.0f);
-    //__fmul_rn(1.0f, 2.0f);
-    //__fmul_ru(1.0f, 2.0f);
-    //__fmul_rz(1.0f, 2.0f);
-    //__frcp_rd(2.0f);
-    //__frcp_rn(2.0f);
-    //__frcp_ru(2.0f);
-    //__frcp_rz(2.0f);
+    __fadd_rd(0.0f, 1.0f);
+    __fadd_rn(0.0f, 1.0f);
+    __fadd_ru(0.0f, 1.0f);
+    __fadd_rz(0.0f, 1.0f);
+    __fdiv_rd(4.0f, 2.0f);
+    __fdiv_rn(4.0f, 2.0f);
+    __fdiv_ru(4.0f, 2.0f);
+    __fdiv_rz(4.0f, 2.0f);
+    __fdividef(4.0f, 2.0f);
+    __fmaf_rd(1.0f, 2.0f, 3.0f);
+    __fmaf_rn(1.0f, 2.0f, 3.0f);
+    __fmaf_ru(1.0f, 2.0f, 3.0f);
+    __fmaf_rz(1.0f, 2.0f, 3.0f);
+    __fmul_rd(1.0f, 2.0f);
+    __fmul_rn(1.0f, 2.0f);
+    __fmul_ru(1.0f, 2.0f);
+    __fmul_rz(1.0f, 2.0f);
+    __frcp_rd(2.0f);
+    __frcp_rn(2.0f);
+    __frcp_ru(2.0f);
+    __frcp_rz(2.0f);
     __frsqrt_rn(4.0f);
     __fsqrt_rd(4.0f);
     __fsqrt_rn(4.0f);
     __fsqrt_ru(4.0f);
     __fsqrt_rz(4.0f);
-    //__fsub_rd(2.0f, 1.0f);
-    //__fsub_rn(2.0f, 1.0f);
-    //__fsub_ru(2.0f, 1.0f);
-    //__fsub_rz(2.0f, 1.0f);
+    __fsub_rd(2.0f, 1.0f);
+    __fsub_rn(2.0f, 1.0f);
+    __fsub_ru(2.0f, 1.0f);
+    __fsub_rz(2.0f, 1.0f);
     __log10f(1.0f);
     __log2f(1.0f);
     __logf(1.0f);
     __powf(1.0f, 0.0f);
-    //__saturatef(0.1f);
-    //__sincosf(0.0f, &fX, &fY);
+    __saturatef(0.1f);
+    __sincosf(0.0f, &fX, &fY);
     __sinf(0.0f);
     __tanf(0.0f);
 }
diff --git a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp
index a8c1194aab..4576faed93 100644
--- a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp
+++ b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/math_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
diff --git a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp
index 36aa852d81..d48cea5ff6 100644
--- a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp
+++ b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp
@@ -19,7 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/math_functions.h>
 #include "test_common.h"
 
 #pragma GCC diagnostic ignored "-Wall"
diff --git a/tests/src/deviceLib/hipTestDevice.cpp b/tests/src/deviceLib/hipTestDevice.cpp
index 9d90eb7de0..2c7488671b 100644
--- a/tests/src/deviceLib/hipTestDevice.cpp
+++ b/tests/src/deviceLib/hipTestDevice.cpp
@@ -24,8 +24,9 @@ THE SOFTWARE.
  */
 
 #include"test_common.h"
-#include "hip/hip_runtime.h"
-#include "hip/hip_runtime_api.h"
+#include <hip/hip_runtime.h>
+#include <hip/math_functions.h>
+#include <hip/hip_runtime_api.h>
 
 #define N 512
 #define SIZE N*sizeof(float)
diff --git a/tests/src/deviceLib/hipTestDeviceDouble.cpp b/tests/src/deviceLib/hipTestDeviceDouble.cpp
index c401a44cbd..f4e8ee20b8 100644
--- a/tests/src/deviceLib/hipTestDeviceDouble.cpp
+++ b/tests/src/deviceLib/hipTestDeviceDouble.cpp
@@ -24,8 +24,9 @@ THE SOFTWARE.
  */
 
 #include"test_common.h"
-#include "hip/hip_runtime.h"
-#include "hip/hip_runtime_api.h"
+#include <hip/hip_runtime.h>
+#include <hip/hip_runtime_api.h>
+#include <hip/math_functions.h>
 
 #define N 512
 #define SIZE N*sizeof(double)
diff --git a/tests/src/deviceLib/hip_anyall.cpp b/tests/src/deviceLib/hip_anyall.cpp
index a562b7810e..bba7915052 100644
--- a/tests/src/deviceLib/hip_anyall.cpp
+++ b/tests/src/deviceLib/hip_anyall.cpp
@@ -29,7 +29,8 @@ THE SOFTWARE.
 #include <stdio.h>
 #include <iostream>
 
-#include "hip/hip_runtime.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 #define HIP_ASSERT(x) (assert((x)==hipSuccess))
 
 __global__ void
diff --git a/tests/src/deviceLib/hip_ballot.cpp b/tests/src/deviceLib/hip_ballot.cpp
index 629e676bc7..742c47a065 100644
--- a/tests/src/deviceLib/hip_ballot.cpp
+++ b/tests/src/deviceLib/hip_ballot.cpp
@@ -25,8 +25,8 @@ THE SOFTWARE.
 
 #include <iostream>
 
-#include "hip/hip_runtime.h"
-#include "hip/device_functions.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 
 #define HIP_ASSERT(x) (assert((x)==hipSuccess))
 
diff --git a/tests/src/deviceLib/hip_brev.cpp b/tests/src/deviceLib/hip_brev.cpp
index d9228fe23c..855a8bec47 100644
--- a/tests/src/deviceLib/hip_brev.cpp
+++ b/tests/src/deviceLib/hip_brev.cpp
@@ -32,7 +32,7 @@ THE SOFTWARE.
 #include <stdlib.h>
 #include <iostream>
 #include "hip/hip_runtime.h"
-#include "hip/device_functions.h"
+#include <hip/device_functions.h>
 
 #define HIP_ASSERT(x) (assert((x)==hipSuccess))
 
diff --git a/tests/src/deviceLib/hip_clz.cpp b/tests/src/deviceLib/hip_clz.cpp
index 869f4406f5..bdb31f3e8d 100644
--- a/tests/src/deviceLib/hip_clz.cpp
+++ b/tests/src/deviceLib/hip_clz.cpp
@@ -32,7 +32,7 @@ THE SOFTWARE.
 #include <stdlib.h>
 #include <iostream>
 #include "hip/hip_runtime.h"
-#include "hip/device_functions.h"
+#include <hip/device_functions.h>
 
 #define HIP_ASSERT(x) (assert((x)==hipSuccess))
 #define WIDTH     8
diff --git a/tests/src/deviceLib/hip_ffs.cpp b/tests/src/deviceLib/hip_ffs.cpp
index ba9bd7b9a0..c855ede060 100644
--- a/tests/src/deviceLib/hip_ffs.cpp
+++ b/tests/src/deviceLib/hip_ffs.cpp
@@ -31,8 +31,8 @@ THE SOFTWARE.
 #include <algorithm>
 #include <stdlib.h>
 #include <iostream>
-#include "hip/hip_runtime.h"
-#include "hip/device_functions.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 
 #define HIP_ASSERT(x) (assert((x)==hipSuccess))
 
diff --git a/tests/src/deviceLib/hip_popc.cpp b/tests/src/deviceLib/hip_popc.cpp
index 6fe214c7fa..e503e55b42 100644
--- a/tests/src/deviceLib/hip_popc.cpp
+++ b/tests/src/deviceLib/hip_popc.cpp
@@ -31,8 +31,8 @@ THE SOFTWARE.
 #include <algorithm>
 #include <stdlib.h>
 #include <iostream>
-#include "hip/hip_runtime.h"
-#include "hip/device_functions.h"
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
 
 #define HIP_ASSERT(x) (assert((x)==hipSuccess))
 
diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp
index 7f9b5d60b0..5ec28101f3 100644
--- a/tests/src/deviceLib/hip_trig.cpp
+++ b/tests/src/deviceLib/hip_trig.cpp
@@ -21,7 +21,7 @@ THE SOFTWARE.
 */
 
 /* HIT_START
- * BUILD: %t %s 
+ * BUILD: %t %s
  * RUN: %t
  * HIT_END
  */
@@ -30,6 +30,7 @@ THE SOFTWARE.
 #include<hip/hip_runtime.h>
 #include<iostream>
 #include"test_common.h"
+#include<hip/device_functions.h>
 
 #define LEN 512
 #define SIZE LEN<<2
diff --git a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp
index a7cace0ebe..4e343121ed 100644
--- a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp
+++ b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp
@@ -24,6 +24,7 @@ THE SOFTWARE.
 
 #include<iostream>
 #include"test_common.h"
+#include"hip/math_functions.h"
 
 const int NN = 1 << 21;
 
@@ -31,7 +32,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
 	int tid = hipThreadIdx_x;
 	if(tid < 1){
 		for(int i=0;i<n;i++){
-			x[i] = sqrt(pow(3.14159,i));
+			x[i] = sqrt(powf(3.14159,i));
 		}
 		y[tid] = y[tid] + 1.0f;
 	}
diff --git a/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp
index 7706ed2f0d..63cde4563e 100644
--- a/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp
+++ b/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp
@@ -26,6 +26,7 @@ THE SOFTWARE.
 
 #include<iostream>
 #include"test_common.h"
+#include"hip/math_functions.h"
 
 const int NN = 1 << 21;
 
@@ -33,7 +34,7 @@ __global__ void kernel(hipLaunchParm lp, float *x, float *y, int n){
 	int tid = hipThreadIdx_x;
 	if(tid < 1){
 		for(int i=0;i<n;i++){
-			x[i] = sqrt(pow(3.14159,i));
+			x[i] = sqrt(powf(3.14159,i));
 		}
 		y[tid] = y[tid] + 1.0f;
 	}