Added i8 packed math intrinsics

1. Added add, sub, mul packed math i8 intrinsics
2. Removed c++ packed data structures included from HCC

Change-Id: I1d109c5ce10c48b7cd3ea059478b88fc1de78499
TODO: Add better packed data structures support


[ROCm/hip commit: 12dd9df88f]
Bu işleme şunda yer alıyor:
Aditya Atluri
2016-11-17 01:09:12 -06:00
ebeveyn c7f6ddd612
işleme da0a8cd032
3 değiştirilmiş dosya ile 59 ekleme ve 3 silme
+4
Dosyayı Görüntüle
@@ -627,6 +627,10 @@ __device__ static inline void* free(void *ptr)
return __hip_hc_free(ptr);
}
extern "C" __device__ char4 __hip_hc_add8pk(char4, char4);
extern "C" __device__ char4 __hip_hc_sub8pk(char4, char4);
extern "C" __device__ char4 __hip_hc_mul8pk(char4, char4);
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
#define HIP_KERNEL_NAME(...) __VA_ARGS__
+19 -3
Dosyayı Görüntüle
@@ -32,7 +32,7 @@ THE SOFTWARE.
#error("This version of HIP requires a newer version of HCC.");
#endif
#if __HCC__
#if 0
#include <hc_short_vector.hpp>
using namespace hc::short_vector;
@@ -137,8 +137,24 @@ struct uchar3
unsigned char x, y, z;
};
struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;);
struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;);
struct char4
{
union {
signed char x, y, z, w;
unsigned int val;
};
};
struct uchar4
{
union {
unsigned char x, y, z, w;
unsigned int val;
};
};
//struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;);
//struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;);
struct __hip_align(short1, 2, signed short x;);
struct __hip_align(ushort1, 2, unsigned short x;);
+36
Dosyayı Görüntüle
@@ -217,6 +217,42 @@ __device__ float __hip_ds_swizzlef(float src, int pattern) {
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
}
#define MASK1 0x00ff00ff
#define MASK2 0xff00ff00
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.val & MASK1;
unsigned one2 = in2.val & MASK1;
out.val = (one1 + one2) & MASK1;
one1 = in1.val & MASK2;
one2 = in2.val & MASK2;
out.val = out.val | ((one1 + one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.val & MASK1;
unsigned one2 = in2.val & MASK1;
out.val = (one1 - one2) & MASK1;
one1 = in1.val & MASK2;
one2 = in2.val & MASK2;
out.val = out.val | ((one1 - one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.val & MASK1;
unsigned one2 = in2.val & MASK1;
out.val = (one1 * one2) & MASK1;
one1 = in1.val & MASK2;
one2 = in2.val & MASK2;
out.val = out.val | ((one1 * one2) & MASK2);
return out;
}
//=================================================================================================
// Thread-local storage:
//=================================================================================================