diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index b1edef18d7..b501f0b165 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -627,6 +627,10 @@ __device__ static inline void* free(void *ptr) return __hip_hc_free(ptr); } +extern "C" __device__ char4 __hip_hc_add8pk(char4, char4); +extern "C" __device__ char4 __hip_hc_sub8pk(char4, char4); +extern "C" __device__ char4 __hip_hc_mul8pk(char4, char4); + #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) #define HIP_KERNEL_NAME(...) __VA_ARGS__ diff --git a/projects/hip/include/hip/hcc_detail/hip_vector_types.h b/projects/hip/include/hip/hcc_detail/hip_vector_types.h index 5c2e48026e..932e271527 100644 --- a/projects/hip/include/hip/hcc_detail/hip_vector_types.h +++ b/projects/hip/include/hip/hcc_detail/hip_vector_types.h @@ -32,7 +32,7 @@ THE SOFTWARE. #error("This version of HIP requires a newer version of HCC."); #endif -#if __HCC__ +#if 0 #include using namespace hc::short_vector; @@ -137,8 +137,24 @@ struct uchar3 unsigned char x, y, z; }; -struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;); -struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;); +struct char4 +{ + union { + signed char x, y, z, w; + unsigned int val; + }; +}; + +struct uchar4 +{ + union { + unsigned char x, y, z, w; + unsigned int val; + }; +}; + +//struct __hip_align(char4, 4, signed char x; signed char y; signed char z; signed char w;); +//struct __hip_align(uchar4, 4, unsigned char x; unsigned char y; unsigned char z; unsigned char w;); struct __hip_align(short1, 2, signed short x;); struct __hip_align(ushort1, 2, unsigned short x;); diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index d066079fc0..3ebe9c3647 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -217,6 +217,42 @@ __device__ float __hip_ds_swizzlef(float src, int pattern) { __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) { return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); } + +#define MASK1 0x00ff00ff +#define MASK2 0xff00ff00 + +__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.val & MASK1; + unsigned one2 = in2.val & MASK1; + out.val = (one1 + one2) & MASK1; + one1 = in1.val & MASK2; + one2 = in2.val & MASK2; + out.val = out.val | ((one1 + one2) & MASK2); + return out; +} + +__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.val & MASK1; + unsigned one2 = in2.val & MASK1; + out.val = (one1 - one2) & MASK1; + one1 = in1.val & MASK2; + one2 = in2.val & MASK2; + out.val = out.val | ((one1 - one2) & MASK2); + return out; +} + +__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.val & MASK1; + unsigned one2 = in2.val & MASK1; + out.val = (one1 * one2) & MASK1; + one1 = in1.val & MASK2; + one2 = in2.val & MASK2; + out.val = out.val | ((one1 * one2) & MASK2); + return out; +} //================================================================================================= // Thread-local storage: //=================================================================================================