From b09ad764a1d37595aafa8fbbee0765bc797d454a Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Mon, 16 Jan 2017 14:55:29 -0600 Subject: [PATCH] v1: Working on Integer Intrinsics 1. Half way through 2. May not work 3. No test written Change-Id: I705b743a78b142ff068e2521870e73fca7ad2b1c --- include/hip/hcc_detail/device_functions.h | 53 +++++++ include/hip/hcc_detail/hip_runtime.h | 24 ---- src/device_functions.cpp | 164 ++++++++++++++++++++++ src/device_util.cpp | 110 --------------- src/hip_ir.ll | 25 ++++ 5 files changed, 242 insertions(+), 134 deletions(-) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index e6771650a2..8eb9d6a46c 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -23,6 +23,59 @@ THE SOFTWARE. #include #include +extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); +extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int); +extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int); +extern "C" unsigned int __hip_hc_ir_umulhi_int(unsigned int, unsigned int); +// integer intrinsic function __poc __clz __ffs __brev +__device__ unsigned int __brev( unsigned int x); +__device__ unsigned long long int __brevll( unsigned long long int x); +__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); +__device__ unsigned int __clz(int x); +__device__ unsigned int __clzll(long long int x); +__device__ unsigned int __ffs(int x); +__device__ unsigned int __ffsll(long long int x); +__device__ static inline unsigned int __hadd(int x, int y) +{ + return (x + y) >> 1; +} +__device__ static inline int __mul24(int x, int y) +{ + return __hip_hc_ir_mul24_int(x, y); +} +__device__ long long int __mul64hi(long long int x, long long int y); +__device__ int __mulhi(int x, int y) +{ + return __hip_hc_ir_mulhi_int(x, y); +} +__device__ unsigned int __popc( unsigned int x); +__device__ unsigned int __popcll( unsigned long long int x); +__device__ int __rhadd(int x, int y) +{ + return (x + y + 1) >> 1; +} +//__device__ unsigned int __sad(int x, int y, int z); +/* +Implemented signed version of sad +*/ +__device__ unsigned int __uhadd(unsigned int x, unsigned int y) +{ + return (x + y) >> 1; +} +__device__ static inline int __umul24(unsigned int x, unsigned int y) +{ + return __hip_hc_ir_umul24_int(x, y); +} +__device__ unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); +__device__ unsigned int __umulhi(unsigned int x, unsigned int y); +__device__ unsigned int __urhadd(unsigned int x, unsigned int y); +__device__ unsigned int __usad(unsigned int x, unsigned int y, unsigned int z); + +// warp vote function __all __any __ballot +__device__ int __all( int input); +__device__ int __any( int input); +__device__ unsigned long long int __ballot( int input); + /* Rounding modes are not yet supported in HIP */ diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index f595ff1c05..f6967c3445 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -420,30 +420,6 @@ __device__ unsigned int atomicInc(unsigned int* address, __device__ unsigned int atomicDec(unsigned int* address, unsigned int val); -//__mul24 __umul24 -__device__ int __mul24(int arg1, int arg2); -__device__ unsigned int __umul24(unsigned int arg1, unsigned int arg2); - -// integer intrinsic function __poc __clz __ffs __brev -__device__ unsigned int __popc( unsigned int input); -__device__ unsigned int __popcll( unsigned long long int input); -__device__ unsigned int __clz(unsigned int input); -__device__ unsigned int __clzll(unsigned long long int input); -__device__ unsigned int __clz(int input); -__device__ unsigned int __clzll(long long int input); -__device__ unsigned int __ffs(unsigned int input); -__device__ unsigned int __ffsll(unsigned long long int input); -__device__ unsigned int __ffs(int input); -__device__ unsigned int __ffsll(long long int input); -__device__ unsigned int __brev( unsigned int input); -__device__ unsigned long long int __brevll( unsigned long long int input); - - -// warp vote function __all __any __ballot -__device__ int __all( int input); -__device__ int __any( int input); -__device__ unsigned long long int __ballot( int input); - // warp shuffle functions #ifdef __cplusplus __device__ int __shfl(int input, int lane, int width=warpSize); diff --git a/src/device_functions.cpp b/src/device_functions.cpp index 01d18d8d28..7fb67b787a 100644 --- a/src/device_functions.cpp +++ b/src/device_functions.cpp @@ -18,6 +18,10 @@ THE SOFTWARE. */ #include +#include +#include +#include +#include "device_util.h" struct holder64Bit{ union{ @@ -358,3 +362,163 @@ __device__ float __ull2float_rz(unsigned long long int x) { return (float)x; } + + + +// integer intrinsic function __poc __clz __ffs __brev +__device__ unsigned int __popc( unsigned int input) +{ + return hc::__popcount_u32_b32(input); +} + +__device__ unsigned int __popcll( unsigned long long int input) +{ + return hc::__popcount_u32_b64(input); +} + +__device__ unsigned int __clz(unsigned int input) +{ +#ifdef NVCC_COMPAT + return input == 0 ? 32 : hc::__firstbit_u32_u32( input); +#else + return hc::__firstbit_u32_u32( input); +#endif +} + +__device__ unsigned int __clzll(unsigned long long int input) +{ +#ifdef NVCC_COMPAT + return input == 0 ? 64 : hc::__firstbit_u32_u64( input); +#else + return hc::__firstbit_u32_u64( input); +#endif +} + +__device__ unsigned int __clz( int input) +{ +#ifdef NVCC_COMPAT + return input == 0 ? 32 : hc::__firstbit_u32_s32( input); +#else + return hc::__firstbit_u32_s32( input); +#endif +} + +__device__ unsigned int __clzll( long long int input) +{ +#ifdef NVCC_COMPAT + return input == 0 ? 64 : hc::__firstbit_u32_s64( input); +#else + return hc::__firstbit_u32_s64( input); +#endif +} + +__device__ unsigned int __ffs(unsigned int input) +{ +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_u32( input)+1; +#else + return hc::__lastbit_u32_u32( input); +#endif +} + +__device__ unsigned int __ffsll(unsigned long long int input) +{ +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_u64( input)+1; +#else + return hc::__lastbit_u32_u64( input); +#endif +} + +__device__ unsigned int __ffs( int input) +{ +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_s32( input)+1; +#else + return hc::__lastbit_u32_s32( input); +#endif +} + +__device__ unsigned int __ffsll( long long int input) +{ +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_s64( input)+1; +#else + return hc::__lastbit_u32_s64( input); +#endif +} + +__device__ unsigned int __brev( unsigned int input) +{ + return hc::__bitrev_b32( input); +} + +__device__ unsigned long long int __brevll( unsigned long long int input) +{ + return hc::__bitrev_b64( input); +} + +struct ucharHolder { + union { + unsigned char c[4]; + unsigned int ui; + }; +}__attribute__((aligned(4))); + +struct uchar2Holder { + union { + unsigned int ui[2]; + unsigned char c[8]; + }; +}__attribute__((aligned(8))); + +struct intHolder { + union { + signed int si[2]; + signed int long sl; + }; +}__attribute__((aligned(8))); + +struct uintHolder { + union { + signed int ui[2]; + signed int long ul; + }; +}__attribute__((aligned(8))); + +struct uchar2Holder cHoldVal; +struct ucharHolder cHoldKey; +struct ucharHolder cHoldOut; + +struct intHolder iHold1; +struct intHolder iHold2; +struct uintHolder uHold1; +struct uintHolder uHold2; + +__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) +{ + cHoldKey.ui = s; + cHoldVal.ui[0] = x; + cHoldVal.ui[1] = y; + cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]]; + cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]]; + cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]]; + cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]]; + return cHoldOut.ui; +} + +__device__ long long __mul64hi(long long int x, long long int y) +{ + iHold1.sl = x; + iHold2.sl = y; + iHold1.sl = iHold1.si[1] * iHold2.si[1]; + return iHold1.sl; +} + +__device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) +{ + uHold1.ul = x; + uHold2.ul = y; + uHold1.ul = uHold1.ui[1] * uHold2.ui[1]; + return uHold1.ul; +} diff --git a/src/device_util.cpp b/src/device_util.cpp index cf5d6ff5af..e875db1cf9 100644 --- a/src/device_util.cpp +++ b/src/device_util.cpp @@ -1843,117 +1843,7 @@ __device__ unsigned int atomicDec(unsigned int* address, return hc::__atomic_wrapdec(address,val); } -//__mul24 __umul24 -__device__ int __mul24(int arg1, - int arg2) -{ - return hc::__mul24(arg1, arg2); -} -__device__ unsigned int __umul24(unsigned int arg1, - unsigned int arg2) -{ - return hc::__mul24(arg1, arg2); -} -__device__ unsigned int test__popc(unsigned int input) -{ - return hc::__popcount_u32_b32(input); -} - -// integer intrinsic function __poc __clz __ffs __brev -__device__ unsigned int __popc( unsigned int input) -{ - return hc::__popcount_u32_b32(input); -} - -__device__ unsigned int test__popc(unsigned int input); - -__device__ unsigned int __popcll( unsigned long long int input) -{ - return hc::__popcount_u32_b64(input); -} - -__device__ unsigned int __clz(unsigned int input) -{ -#ifdef NVCC_COMPAT - return input == 0 ? 32 : hc::__firstbit_u32_u32( input); -#else - return hc::__firstbit_u32_u32( input); -#endif -} - -__device__ unsigned int __clzll(unsigned long long int input) -{ -#ifdef NVCC_COMPAT - return input == 0 ? 64 : hc::__firstbit_u32_u64( input); -#else - return hc::__firstbit_u32_u64( input); -#endif -} - -__device__ unsigned int __clz( int input) -{ -#ifdef NVCC_COMPAT - return input == 0 ? 32 : hc::__firstbit_u32_s32( input); -#else - return hc::__firstbit_u32_s32( input); -#endif -} - -__device__ unsigned int __clzll( long long int input) -{ -#ifdef NVCC_COMPAT - return input == 0 ? 64 : hc::__firstbit_u32_s64( input); -#else - return hc::__firstbit_u32_s64( input); -#endif -} - -__device__ unsigned int __ffs(unsigned int input) -{ -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_u32( input)+1; -#else - return hc::__lastbit_u32_u32( input); -#endif -} - -__device__ unsigned int __ffsll(unsigned long long int input) -{ -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_u64( input)+1; -#else - return hc::__lastbit_u32_u64( input); -#endif -} - -__device__ unsigned int __ffs( int input) -{ -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_s32( input)+1; -#else - return hc::__lastbit_u32_s32( input); -#endif -} - -__device__ unsigned int __ffsll( long long int input) -{ -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_s64( input)+1; -#else - return hc::__lastbit_u32_s64( input); -#endif -} - -__device__ unsigned int __brev( unsigned int input) -{ - return hc::__bitrev_b32( input); -} - -__device__ unsigned long long int __brevll( unsigned long long int input) -{ - return hc::__bitrev_b64( input); -} // warp vote function __all __any __ballot __device__ int __all( int input) diff --git a/src/hip_ir.ll b/src/hip_ir.ll index 739717c740..a20b57016e 100644 --- a/src/hip_ir.ll +++ b/src/hip_ir.ll @@ -146,4 +146,29 @@ define i32 @__hip_hc_ir_h2trunc_int(i32 %a) #1 { ret i32 %1 } +define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 { + %1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 { + %1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 { + %1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 { + %1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 { + %1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + attributes #1 = { alwaysinline nounwind }