From a04dad833b46ee9ecd5670fa89dc49730eab4c6d Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Wed, 23 Nov 2016 18:22:05 -0600 Subject: [PATCH] added fma for double and float 1. Added fma intrinsic support for double and float 2. Added test for fma Change-Id: I909fdbec34a3d12c03ba6eff3a39376a7128ee43 [ROCm/hip commit: 30674382a434d9820f7b35f88f2c1fc81e850d11] --- .../hip/include/hip/hcc_detail/hip_runtime.h | 35 +++++++++++++++++++ .../hipDoublePrecisionIntrinsics.cpp | 8 ++--- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index 45dbeff5a4..63cfb2ea3c 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -509,7 +509,10 @@ __device__ float __hip_fast_powf(float, float); __device__ void __hip_fast_sincosf(float,float*,float*); extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32"); __device__ float __hip_fast_tanf(float); +extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32"); + extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64"); +extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64"); #ifdef HIP_FAST_MATH // Single Precision Precise Math when enabled @@ -629,6 +632,22 @@ __device__ inline float __tanf(float x) { return __hip_fast_tanf(x); } +__device__ inline float __fmaf_rd(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_rn(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_ru(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + +__device__ inline float __fmaf_rz(float x, float y, float z) { + return __hip_fast_fmaf(x, y, z); +} + __device__ inline double __dsqrt_rd(double x) { return __hip_fast_dsqrt(x); } @@ -645,6 +664,22 @@ __device__ inline double __dsqrt_rz(double x) { return __hip_fast_dsqrt(x); } +__device__ inline double __fma_rd(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_rn(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_ru(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + +__device__ inline double __fma_rz(double x, double y, double z) { + return __hip_fast_fma(x, y, z); +} + /** * CUDA 8 device function features diff --git a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index 0dab2d7106..0b4e0840a4 100644 --- a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -51,10 +51,10 @@ __device__ void double_precision_intrinsics() //__dsub_rn(2.0, 1.0); //__dsub_ru(2.0, 1.0); //__dsub_rz(2.0, 1.0); - //__fma_rd(1.0, 2.0, 3.0); - //__fma_rn(1.0, 2.0, 3.0); - //__fma_ru(1.0, 2.0, 3.0); - //__fma_rz(1.0, 2.0, 3.0); + __fma_rd(1.0, 2.0, 3.0); + __fma_rn(1.0, 2.0, 3.0); + __fma_ru(1.0, 2.0, 3.0); + __fma_rz(1.0, 2.0, 3.0); } __global__ void compileDoublePrecisionIntrinsics(hipLaunchParm lp, int ignored)