1. Added math functions for half precision
2. HRCP is not available due to device code linking errors, will be enabled once it is fixed
3. Added math functions to half test file

Change-Id: Ie317ce70ef518a4fc3f27142143d01e0327f5df3
Этот коммит содержится в:
Aditya Atluri
2017-01-13 12:05:29 -06:00
родитель 646f566bbf
Коммит fe38e9652b
2 изменённых файлов: 87 добавлений и 0 удалений
+72
Просмотреть файл
@@ -57,6 +57,18 @@ extern "C" int __hip_hc_ir_hfma2_int(int, int, int);
extern "C" int __hip_hc_ir_hmul2_int(int, int);
extern "C" int __hip_hc_ir_hsub2_int(int, int);
extern "C" __half __hip_hc_ir_hceil_half(__half) __asm("llvm.ceil.f16");
extern "C" __half __hip_hc_ir_hcos_half(__half) __asm("llvm.cos.f16");
extern "C" __half __hip_hc_ir_hexp2_half(__half) __asm("llvm.exp2.f16");
extern "C" __half __hip_hc_ir_hfloor_half(__half) __asm("llvm.floor.f16");
extern "C" __half __hip_hc_ir_hlog2_half(__half) __asm("llvm.log2.f16");
extern "C" __half __hip_hc_ir_hrcp_half(__half) __asm("llvm.amdgcn.rcp.f16");
extern "C" __half __hip_hc_ir_hrint_half(__half) __asm("llvm.rint.f16");
extern "C" __half __hip_hc_ir_hrsqrt_half(__half) __asm("llvm.sqrt.f16");
extern "C" __half __hip_hc_ir_hsin_half(__half) __asm("llvm.sin.f16");
extern "C" __half __hip_hc_ir_hsqrt_half(__half) __asm("llvm.sqrt.f16");
extern "C" __half __hip_hc_ir_htrunc_half(__half) __asm("llvm.trunc.f16");
__device__ static inline __half __hadd(const __half a, const __half b) {
return __hip_hc_ir_hadd_half(a, b);
}
@@ -610,6 +622,66 @@ __device__ static inline __half __ushort_as_half(const unsigned short int i) {
return hH.h;
}
__device__ static inline __half hceil(const __half h) {
return __hip_hc_ir_hceil_half(h);
}
__device__ static inline __half hcos(const __half h) {
return __hip_hc_ir_hcos_half(h);
}
__device__ static inline __half hexp(const __half h) {
return __hip_hc_ir_hexp2_half(__hip_hc_ir_hmul_half(h, 1.442694));
}
__device__ static inline __half hexp10(const __half h) {
return __hip_hc_ir_hexp2_half(__hip_hc_ir_hmul_half(h, 3.3219281));
}
__device__ static inline __half hexp2(const __half h) {
return __hip_hc_ir_hexp2_half(h);
}
__device__ static inline __half hfloor(const __half h) {
return __hip_hc_ir_hfloor_half(h);
}
__device__ static inline __half hlog(const __half h) {
return __hip_hc_ir_hmul_half(__hip_hc_ir_hlog2_half(h), 0.693147);
}
__device__ static inline __half hlog10(const __half h) {
return __hip_hc_ir_hmul_half(__hip_hc_ir_hlog2_half(h), 0.301029);
}
__device__ static inline __half hlog2(const __half h) {
return __hip_hc_ir_hlog2_half(h);
}
/*
__device__ static inline __half hrcp(const __half h) {
return __hip_hc_ir_hrcp_half(h);
}
*/
__device__ static inline __half hrint(const __half h) {
return __hip_hc_ir_hrint_half(h);
}
__device__ static inline __half hrsqrt(const __half h) {
return __hip_hc_ir_hrsqrt_half(h);
}
__device__ static inline __half hsin(const __half h) {
return __hip_hc_ir_hsin_half(h);
}
__device__ static inline __half hsqrt(const __half a) {
return __hip_hc_ir_hsqrt_half(a);
}
__device__ static inline __half htrunc(const __half a) {
return __hip_hc_ir_htrunc_half(a);
}
#endif
#if __clang_major__ == 3
+15
Просмотреть файл
@@ -60,6 +60,21 @@ __global__ void CheckHalf(hipLaunchParm lp, __half* In1, __half* In2, __half* In
Out[7] = __hsub(In1[7], In2[7]);
Out[8] = __hsub_sat(In1[8], In2[8]);
Out[9] = hdiv(In1[9], In2[9]);
Out[10] = hceil(In1[10]);
Out[11] = hcos(In1[11]);
Out[12] = hexp(In1[12]);
Out[13] = hexp10(In1[13]);
Out[14] = hexp2(In1[14]);
Out[15] = hfloor(In1[15]);
Out[16] = hlog(In1[16]);
Out[17] = hlog10(In1[17]);
Out[18] = hlog2(In1[18]);
// Out[19] = hrcp(In1[19]);
Out[20] = hrint(In1[20]);
Out[21] = hrsqrt(In1[21]);
Out[22] = hsin(In1[22]);
Out[23] = hsqrt(In1[23]);
Out[24] = htrunc(In1[24]);
}
__global__ void CheckHalf2(hipLaunchParm lp, __half2* In1, __half2* In2, __half2* In3, __half2* Out){