Added i8 packed math intrinsics
1. Added add, sub, mul packed math i8 intrinsics
2. Removed c++ packed data structures included from HCC
Change-Id: I1d109c5ce10c48b7cd3ea059478b88fc1de78499
TODO: Add better packed data structures support
[ROCm/hip commit: 12dd9df88f]
Bu işleme şunda yer alıyor:
@@ -627,6 +627,10 @@ __device__ static inline void* free(void *ptr)
|
||||
return __hip_hc_free(ptr);
|
||||
}
|
||||
|
||||
extern "C" __device__ char4 __hip_hc_add8pk(char4, char4);
|
||||
extern "C" __device__ char4 __hip_hc_sub8pk(char4, char4);
|
||||
extern "C" __device__ char4 __hip_hc_mul8pk(char4, char4);
|
||||
|
||||
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
|
||||
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
#if __HCC__
|
||||
#if 0
|
||||
#include <hc_short_vector.hpp>
|
||||
|
||||
using namespace hc::short_vector;
|
||||
@@ -137,8 +137,24 @@ struct uchar3
|
||||
unsigned char x, y, z;
|
||||
};
|
||||
|
||||
struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;);
|
||||
struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;);
|
||||
struct char4
|
||||
{
|
||||
union {
|
||||
signed char x, y, z, w;
|
||||
unsigned int val;
|
||||
};
|
||||
};
|
||||
|
||||
struct uchar4
|
||||
{
|
||||
union {
|
||||
unsigned char x, y, z, w;
|
||||
unsigned int val;
|
||||
};
|
||||
};
|
||||
|
||||
//struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;);
|
||||
//struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;);
|
||||
|
||||
struct __hip_align(short1, 2, signed short x;);
|
||||
struct __hip_align(ushort1, 2, unsigned short x;);
|
||||
|
||||
@@ -217,6 +217,42 @@ __device__ float __hip_ds_swizzlef(float src, int pattern) {
|
||||
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
|
||||
return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
|
||||
}
|
||||
|
||||
#define MASK1 0x00ff00ff
|
||||
#define MASK2 0xff00ff00
|
||||
|
||||
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.val & MASK1;
|
||||
unsigned one2 = in2.val & MASK1;
|
||||
out.val = (one1 + one2) & MASK1;
|
||||
one1 = in1.val & MASK2;
|
||||
one2 = in2.val & MASK2;
|
||||
out.val = out.val | ((one1 + one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.val & MASK1;
|
||||
unsigned one2 = in2.val & MASK1;
|
||||
out.val = (one1 - one2) & MASK1;
|
||||
one1 = in1.val & MASK2;
|
||||
one2 = in2.val & MASK2;
|
||||
out.val = out.val | ((one1 - one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.val & MASK1;
|
||||
unsigned one2 = in2.val & MASK1;
|
||||
out.val = (one1 * one2) & MASK1;
|
||||
one1 = in1.val & MASK2;
|
||||
one2 = in2.val & MASK2;
|
||||
out.val = out.val | ((one1 * one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
//=================================================================================================
|
||||
// Thread-local storage:
|
||||
//=================================================================================================
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle