v1: Working on Integer Intrinsics

1. Half way through
2. May not work
3. No test written

Change-Id: I705b743a78b142ff068e2521870e73fca7ad2b1c
このコミットが含まれているのは:
Aditya Atluri
2017-01-16 14:55:29 -06:00
コミット b09ad764a1
5個のファイルの変更242行の追加134行の削除
+53
ファイルの表示
@@ -23,6 +23,59 @@ THE SOFTWARE.
#include <hip/hip_runtime.h>
#include <hip/hip_vector_types.h>
extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int);
extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int);
extern "C" unsigned int __hip_hc_ir_umulhi_int(unsigned int, unsigned int);
// integer intrinsic function __poc __clz __ffs __brev
__device__ unsigned int __brev( unsigned int x);
__device__ unsigned long long int __brevll( unsigned long long int x);
__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s);
__device__ unsigned int __clz(int x);
__device__ unsigned int __clzll(long long int x);
__device__ unsigned int __ffs(int x);
__device__ unsigned int __ffsll(long long int x);
__device__ static inline unsigned int __hadd(int x, int y)
{
return (x + y) >> 1;
}
__device__ static inline int __mul24(int x, int y)
{
return __hip_hc_ir_mul24_int(x, y);
}
__device__ long long int __mul64hi(long long int x, long long int y);
__device__ int __mulhi(int x, int y)
{
return __hip_hc_ir_mulhi_int(x, y);
}
__device__ unsigned int __popc( unsigned int x);
__device__ unsigned int __popcll( unsigned long long int x);
__device__ int __rhadd(int x, int y)
{
return (x + y + 1) >> 1;
}
//__device__ unsigned int __sad(int x, int y, int z);
/*
Implemented signed version of sad
*/
__device__ unsigned int __uhadd(unsigned int x, unsigned int y)
{
return (x + y) >> 1;
}
__device__ static inline int __umul24(unsigned int x, unsigned int y)
{
return __hip_hc_ir_umul24_int(x, y);
}
__device__ unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y);
__device__ unsigned int __umulhi(unsigned int x, unsigned int y);
__device__ unsigned int __urhadd(unsigned int x, unsigned int y);
__device__ unsigned int __usad(unsigned int x, unsigned int y, unsigned int z);
// warp vote function __all __any __ballot
__device__ int __all( int input);
__device__ int __any( int input);
__device__ unsigned long long int __ballot( int input);
/*
Rounding modes are not yet supported in HIP
*/
-24
ファイルの表示
@@ -420,30 +420,6 @@ __device__ unsigned int atomicInc(unsigned int* address,
__device__ unsigned int atomicDec(unsigned int* address,
unsigned int val);
//__mul24 __umul24
__device__ int __mul24(int arg1, int arg2);
__device__ unsigned int __umul24(unsigned int arg1, unsigned int arg2);
// integer intrinsic function __poc __clz __ffs __brev
__device__ unsigned int __popc( unsigned int input);
__device__ unsigned int __popcll( unsigned long long int input);
__device__ unsigned int __clz(unsigned int input);
__device__ unsigned int __clzll(unsigned long long int input);
__device__ unsigned int __clz(int input);
__device__ unsigned int __clzll(long long int input);
__device__ unsigned int __ffs(unsigned int input);
__device__ unsigned int __ffsll(unsigned long long int input);
__device__ unsigned int __ffs(int input);
__device__ unsigned int __ffsll(long long int input);
__device__ unsigned int __brev( unsigned int input);
__device__ unsigned long long int __brevll( unsigned long long int input);
// warp vote function __all __any __ballot
__device__ int __all( int input);
__device__ int __any( int input);
__device__ unsigned long long int __ballot( int input);
// warp shuffle functions
#ifdef __cplusplus
__device__ int __shfl(int input, int lane, int width=warpSize);
+164
ファイルの表示
@@ -18,6 +18,10 @@ THE SOFTWARE.
*/
#include <hip/device_functions.h>
#include <hc.hpp>
#include <grid_launch.h>
#include <hc_math.hpp>
#include "device_util.h"
struct holder64Bit{
union{
@@ -358,3 +362,163 @@ __device__ float __ull2float_rz(unsigned long long int x)
{
return (float)x;
}
// integer intrinsic function __poc __clz __ffs __brev
__device__ unsigned int __popc( unsigned int input)
{
return hc::__popcount_u32_b32(input);
}
__device__ unsigned int __popcll( unsigned long long int input)
{
return hc::__popcount_u32_b64(input);
}
__device__ unsigned int __clz(unsigned int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_u32( input);
#else
return hc::__firstbit_u32_u32( input);
#endif
}
__device__ unsigned int __clzll(unsigned long long int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_u64( input);
#else
return hc::__firstbit_u32_u64( input);
#endif
}
__device__ unsigned int __clz( int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_s32( input);
#else
return hc::__firstbit_u32_s32( input);
#endif
}
__device__ unsigned int __clzll( long long int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_s64( input);
#else
return hc::__firstbit_u32_s64( input);
#endif
}
__device__ unsigned int __ffs(unsigned int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u32( input)+1;
#else
return hc::__lastbit_u32_u32( input);
#endif
}
__device__ unsigned int __ffsll(unsigned long long int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u64( input)+1;
#else
return hc::__lastbit_u32_u64( input);
#endif
}
__device__ unsigned int __ffs( int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s32( input)+1;
#else
return hc::__lastbit_u32_s32( input);
#endif
}
__device__ unsigned int __ffsll( long long int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s64( input)+1;
#else
return hc::__lastbit_u32_s64( input);
#endif
}
__device__ unsigned int __brev( unsigned int input)
{
return hc::__bitrev_b32( input);
}
__device__ unsigned long long int __brevll( unsigned long long int input)
{
return hc::__bitrev_b64( input);
}
struct ucharHolder {
union {
unsigned char c[4];
unsigned int ui;
};
}__attribute__((aligned(4)));
struct uchar2Holder {
union {
unsigned int ui[2];
unsigned char c[8];
};
}__attribute__((aligned(8)));
struct intHolder {
union {
signed int si[2];
signed int long sl;
};
}__attribute__((aligned(8)));
struct uintHolder {
union {
signed int ui[2];
signed int long ul;
};
}__attribute__((aligned(8)));
struct uchar2Holder cHoldVal;
struct ucharHolder cHoldKey;
struct ucharHolder cHoldOut;
struct intHolder iHold1;
struct intHolder iHold2;
struct uintHolder uHold1;
struct uintHolder uHold2;
__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s)
{
cHoldKey.ui = s;
cHoldVal.ui[0] = x;
cHoldVal.ui[1] = y;
cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]];
cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]];
cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]];
cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]];
return cHoldOut.ui;
}
__device__ long long __mul64hi(long long int x, long long int y)
{
iHold1.sl = x;
iHold2.sl = y;
iHold1.sl = iHold1.si[1] * iHold2.si[1];
return iHold1.sl;
}
__device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y)
{
uHold1.ul = x;
uHold2.ul = y;
uHold1.ul = uHold1.ui[1] * uHold2.ui[1];
return uHold1.ul;
}
-110
ファイルの表示
@@ -1843,117 +1843,7 @@ __device__ unsigned int atomicDec(unsigned int* address,
return hc::__atomic_wrapdec(address,val);
}
//__mul24 __umul24
__device__ int __mul24(int arg1,
int arg2)
{
return hc::__mul24(arg1, arg2);
}
__device__ unsigned int __umul24(unsigned int arg1,
unsigned int arg2)
{
return hc::__mul24(arg1, arg2);
}
__device__ unsigned int test__popc(unsigned int input)
{
return hc::__popcount_u32_b32(input);
}
// integer intrinsic function __poc __clz __ffs __brev
__device__ unsigned int __popc( unsigned int input)
{
return hc::__popcount_u32_b32(input);
}
__device__ unsigned int test__popc(unsigned int input);
__device__ unsigned int __popcll( unsigned long long int input)
{
return hc::__popcount_u32_b64(input);
}
__device__ unsigned int __clz(unsigned int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_u32( input);
#else
return hc::__firstbit_u32_u32( input);
#endif
}
__device__ unsigned int __clzll(unsigned long long int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_u64( input);
#else
return hc::__firstbit_u32_u64( input);
#endif
}
__device__ unsigned int __clz( int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_s32( input);
#else
return hc::__firstbit_u32_s32( input);
#endif
}
__device__ unsigned int __clzll( long long int input)
{
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_s64( input);
#else
return hc::__firstbit_u32_s64( input);
#endif
}
__device__ unsigned int __ffs(unsigned int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u32( input)+1;
#else
return hc::__lastbit_u32_u32( input);
#endif
}
__device__ unsigned int __ffsll(unsigned long long int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u64( input)+1;
#else
return hc::__lastbit_u32_u64( input);
#endif
}
__device__ unsigned int __ffs( int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s32( input)+1;
#else
return hc::__lastbit_u32_s32( input);
#endif
}
__device__ unsigned int __ffsll( long long int input)
{
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s64( input)+1;
#else
return hc::__lastbit_u32_s64( input);
#endif
}
__device__ unsigned int __brev( unsigned int input)
{
return hc::__bitrev_b32( input);
}
__device__ unsigned long long int __brevll( unsigned long long int input)
{
return hc::__bitrev_b64( input);
}
// warp vote function __all __any __ballot
__device__ int __all( int input)
+25
ファイルの表示
@@ -146,4 +146,29 @@ define i32 @__hip_hc_ir_h2trunc_int(i32 %a) #1 {
ret i32 %1
}
define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 {
%1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c)
ret i32 %1
}
attributes #1 = { alwaysinline nounwind }