From fa370fcf19c80f10d61342e7aacdd2efd4b93369 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sat, 16 Jun 2018 22:59:36 +0100 Subject: [PATCH 01/40] Revert "Revert "Switch over to using native vector types, for better codegen. Remove noise."" This reverts commit d137271083b0723228a285c5fd68e848f2d0e521. --- .../include/hip/hcc_detail/hip_vector_types.h | 4107 +-------- .../hip/hcc_detail/texture_functions.h | 42 +- hipamd/tests/src/deviceLib/hipVectorTypes.cpp | 7924 +---------------- .../src/deviceLib/hipVectorTypesDevice.cpp | 4296 +-------- .../tests/src/deviceLib/vector_test_common.h | 105 + 5 files changed, 488 insertions(+), 15986 deletions(-) create mode 100644 hipamd/tests/src/deviceLib/vector_test_common.h diff --git a/hipamd/include/hip/hcc_detail/hip_vector_types.h b/hipamd/include/hip/hcc_detail/hip_vector_types.h index 7cd250e257..59b9c247e3 100644 --- a/hipamd/include/hip/hcc_detail/hip_vector_types.h +++ b/hipamd/include/hip/hcc_detail/hip_vector_types.h @@ -34,1132 +34,99 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -#define MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(type) \ - __device__ __host__ type() {} \ - __device__ __host__ type(const type& val) : x(val.x) {} \ - __device__ __host__ ~type() {} - -#define MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(type) \ - __device__ __host__ type() {} \ - __device__ __host__ type(const type& val) : x(val.x), y(val.y) {} \ - __device__ __host__ ~type() {} - -#define MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(type) \ - __device__ __host__ type() {} \ - __device__ __host__ type(const type& val) : x(val.x), y(val.y), z(val.z) {} \ - __device__ __host__ ~type() {} - -#define MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(type) \ - __device__ __host__ type() {} \ - __device__ __host__ type(const type& val) : x(val.x), y(val.y), z(val.z), w(val.w) {} \ - __device__ __host__ ~type() {} - -#define MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(type, type1) \ - __device__ __host__ type(type1 val) : x(val) {} - -#define MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(type, type1) \ - __device__ __host__ type(type1 val) : x(val), y(val) {} \ - __device__ __host__ type(type1 val1, type1 val2) : x(val1), y(val2) {} - -#define MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(type, type1) \ - __device__ __host__ type(type1 val) : x(val), y(val), z(val) {} \ - __device__ __host__ type(type1 val1, type1 val2, type1 val3) : x(val1), y(val2), z(val3) {} - -#define MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(type, type1) \ - __device__ __host__ type(type1 val) : x(val), y(val), z(val), w(val) {} \ - __device__ __host__ type(type1 val1, type1 val2, type1 val3, type1 val4) \ - : x(val1), y(val2), z(val3), w(val4) {} - -struct uchar1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(uchar1) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uchar1, signed long long) - +#if defined(__clang__) + #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) +#elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. + #define __ROUND_UP_TO_NEXT_POT__(x) \ + (1 << (31 - __builtin_clz(x) + (x > (1 << (31 - __builtin_clz(x)))))) + #define __NATIVE_VECTOR__(n, T) \ + __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) #endif - unsigned char x; -} __attribute__((aligned(1))); - -struct uchar2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(uchar2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uchar2, signed long long) -#endif - union { - struct { - unsigned char x, y; - }; - unsigned short a; - }; -} __attribute__((aligned(2))); - -struct uchar3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(uchar3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uchar3, signed long long) -#endif - unsigned char x, y, z; -}; - -struct uchar4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(uchar4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uchar4, signed long long) -#endif - union { - struct { - unsigned char x, y, z, w; - }; - unsigned int a; - }; -} __attribute__((aligned(4))); - - -struct char1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(char1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(char1, signed long long) -#endif - signed char x; -} __attribute__((aligned(1))); - -struct char2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(char2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(char2, signed long long) -#endif - union { - struct { - signed char x, y; - }; - unsigned short a; - }; -} __attribute__((aligned(2))); - -struct char3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(char3) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(char3, signed long long) -#endif - signed char x, y, z; -}; - -struct char4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(char4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(char4, signed long long) -#endif - union { - struct { - signed char x, y, z, w; - }; - unsigned int a; - }; -} __attribute__((aligned(4))); - - -struct ushort1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(ushort1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ushort1, signed long long) -#endif - unsigned short x; -} __attribute__((aligned(2))); - -struct ushort2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(ushort2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ushort2, signed long long) -#endif - union { - struct { - unsigned short x, y; - }; - unsigned int a; - }; -} __attribute__((aligned(4))); - -struct ushort3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(ushort3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ushort3, signed long long) -#endif - unsigned short x, y, z; -}; - -struct ushort4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(ushort4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ushort4, signed long long) -#endif - union { - struct { - unsigned short x, y, z, w; - }; - unsigned int a, b; - }; -} __attribute__((aligned(8))); - -struct short1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(short1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(short1, signed long long) -#endif - signed short x; -} __attribute__((aligned(2))); - -struct short2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(short2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(short2, signed long long) -#endif - union { - struct { - signed short x, y; - }; - unsigned int a; - }; - -} __attribute__((aligned(4))); - -struct short3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(short3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(short3, signed long long) -#endif - signed short x, y, z; -}; - -struct short4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(short4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(short4, signed long long) -#endif - union { - struct { - signed short x, y, z, w; - }; - unsigned int a, b; - }; -} __attribute__((aligned(8))); - - -struct uint1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(uint1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(uint1, signed long long) -#endif - unsigned int x; -} __attribute__((aligned(4))); - -struct uint2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(uint2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(uint2, signed long long) -#endif - unsigned int x, y; -} __attribute__((aligned(8))); - -struct uint3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(uint3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(uint3, signed long long) -#endif - unsigned int x, y, z; -}; - -struct uint4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(uint4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(uint4, signed long long) -#endif - unsigned int x, y, z, w; -} __attribute__((aligned(16))); - -struct int1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(int1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(int1, signed long long) -#endif - signed int x; -} __attribute__((aligned(4))); - -struct int2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(int2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(int2, signed long long) -#endif - signed int x, y; -} __attribute__((aligned(8))); - -struct int3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(int3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(int3, signed long long) -#endif - signed int x, y, z; -}; - -struct int4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(int4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(int4, signed long long) -#endif - signed int x, y, z, w; -} __attribute__((aligned(16))); - - -struct float1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(float1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(float1, signed long long) -#endif - float x; -} __attribute__((aligned(4))); - -struct float2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(float2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(float2, signed long long) -#endif - float x, y; -} __attribute__((aligned(8))); - -struct float3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(float3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(float3, signed long long) -#endif - float x, y, z; -}; - -struct float4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(float4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(float4, signed long long) -#endif - float x, y, z, w; -} __attribute__((aligned(16))); - - -struct double1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(double1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(double1, signed long long) -#endif - double x; -} __attribute__((aligned(8))); - -struct double2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(double2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(double2, signed long long) -#endif - double x, y; -} __attribute__((aligned(16))); - -struct double3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(double3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(double3, signed long long) -#endif - double x, y, z; -}; - -struct double4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(double4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(double4, signed long long) -#endif - double x, y, z, w; -} __attribute__((aligned(32))); - - -struct ulong1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(ulong1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulong1, signed long long) -#endif - unsigned long x; -} __attribute__((aligned(8))); - -struct ulong2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(ulong2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulong2, signed long long) -#endif - unsigned long x, y; -} __attribute__((aligned(16))); - -struct ulong3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(ulong3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulong3, signed long long) -#endif - unsigned long x, y, z; -}; - -struct ulong4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(ulong4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulong4, signed long long) -#endif - unsigned long x, y, z, w; -} __attribute__((aligned(32))); - - -struct long1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(long1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(long1, signed long long) -#endif - signed long x; -} __attribute__((aligned(8))); - -struct long2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(long2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(long2, signed long long) -#endif - signed long x, y; -} __attribute__((aligned(16))); - -struct long3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(long3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(long3, signed long long) -#endif - signed long x, y, z; -}; - -struct long4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(long4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(long4, signed long long) -#endif - signed long x, y, z, w; -} __attribute__((aligned(32))); - - -struct ulonglong1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(ulonglong1, signed long long) -#endif - unsigned long long x; -} __attribute__((aligned(8))); - -struct ulonglong2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ulonglong2, signed long long) -#endif - unsigned long long x, y; -} __attribute__((aligned(16))); - -struct ulonglong3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(ulonglong3, signed long long) -#endif - unsigned long long x, y, z; -}; - -struct ulonglong4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(ulonglong4, signed long long) -#endif - unsigned long long x, y, z, w; -} __attribute__((aligned(32))); - - -struct longlong1 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_ONE_COMPONENT(longlong1) - - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, signed char) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, signed short) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, signed int) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, float) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, double) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, signed long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_ONE_COMPONENT(longlong1, signed long long) -#endif - signed long long x; -} __attribute__((aligned(8))); - -struct longlong2 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_TWO_COMPONENT(longlong2) - - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, signed char) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(longlong2, signed long long) -#endif - signed long long x, y; -} __attribute__((aligned(16))); - -struct longlong3 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_THREE_COMPONENT(longlong3) - - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, signed char) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, signed short) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, signed int) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, float) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, double) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, signed long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_THREE_COMPONENT(longlong3, signed long long) -#endif - signed long long x, y, z; -}; - -struct longlong4 { -#ifdef __cplusplus - public: - MAKE_DEFAULT_CONSTRUCTOR_FOUR_COMPONENT(longlong4) - - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, unsigned char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, signed char) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, signed short) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, signed int) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, float) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, double) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, signed long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_FOUR_COMPONENT(longlong4, signed long long) -#endif - signed long x, y, z, w; -} __attribute__((aligned(32))); - -#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ - __device__ __host__ static inline struct type make_##type(comp x) { \ - struct type ret; \ - ret.x = x; \ - return ret; \ - } - -#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ - __device__ __host__ static inline struct type make_##type(comp x, comp y) { \ - struct type ret; \ - ret.x = x; \ - ret.y = y; \ - return ret; \ - } - -#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ - __device__ __host__ static inline struct type make_##type(comp x, comp y, comp z) { \ - struct type ret; \ - ret.x = x; \ - ret.y = y; \ - ret.z = z; \ - return ret; \ - } - -#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ - __device__ __host__ static inline struct type make_##type(comp x, comp y, comp z, comp w) { \ - struct type ret; \ - ret.x = x; \ - ret.y = y; \ - ret.z = z; \ - ret.w = w; \ - return ret; \ +typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char); +typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char); +typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char); +typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char); + +typedef char char1 __NATIVE_VECTOR__(1, char); +typedef char char2 __NATIVE_VECTOR__(2, char); +typedef char char3 __NATIVE_VECTOR__(3, char); +typedef char char4 __NATIVE_VECTOR__(4, char); + +typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short); +typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short); +typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short); +typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short); + +typedef short short1 __NATIVE_VECTOR__(1, short); +typedef short short2 __NATIVE_VECTOR__(2, short); +typedef short short3 __NATIVE_VECTOR__(3, short); +typedef short short4 __NATIVE_VECTOR__(4, short); + +typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int); +typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int); +typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int); +typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int); + +typedef int int1 __NATIVE_VECTOR__(1, int); +typedef int int2 __NATIVE_VECTOR__(2, int); +typedef int int3 __NATIVE_VECTOR__(3, int); +typedef int int4 __NATIVE_VECTOR__(4, int); + +typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long); +typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long); +typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long); +typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long); + +typedef long long1 __NATIVE_VECTOR__(1, long); +typedef long long2 __NATIVE_VECTOR__(2, long); +typedef long long3 __NATIVE_VECTOR__(3, long); +typedef long long4 __NATIVE_VECTOR__(4, long); + +typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long); +typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long); +typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long); +typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long); + +typedef long long longlong1 __NATIVE_VECTOR__(1, long long); +typedef long long longlong2 __NATIVE_VECTOR__(2, long long); +typedef long long longlong3 __NATIVE_VECTOR__(3, long long); +typedef long long longlong4 __NATIVE_VECTOR__(4, long long); + +typedef float float1 __NATIVE_VECTOR__(1, float); +typedef float float2 __NATIVE_VECTOR__(2, float); +typedef float float3 __NATIVE_VECTOR__(3, float); +typedef float float4 __NATIVE_VECTOR__(4, float); + +typedef double double1 __NATIVE_VECTOR__(1, double); +typedef double double2 __NATIVE_VECTOR__(2, double); +typedef double double3 __NATIVE_VECTOR__(3, double); +typedef double double4 __NATIVE_VECTOR__(4, double); + +#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x) { return type{x}; } + +#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x, comp y) { return type{x, y}; } + +#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x, comp y, comp z) { return type{x, y, z}; } + +#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ + __device__ __host__ \ + static \ + inline \ + type make_##type(comp x, comp y, comp z, comp w) { \ + return type{x, y, z, w}; \ } DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1); @@ -1222,2894 +189,4 @@ DECLOP_MAKE_TWO_COMPONENT(signed long, longlong2); DECLOP_MAKE_THREE_COMPONENT(signed long, longlong3); DECLOP_MAKE_FOUR_COMPONENT(signed long, longlong4); - -#if __cplusplus - -#define DECLOP_1VAR_2IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x op rhs.x; \ - return ret; \ - } - -#define DECLOP_1VAR_SCALE_PRODUCT(type, type1) \ - __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ - type ret; \ - ret.x = lhs.x * rhs; \ - return ret; \ - } \ - \ - __device__ __host__ static inline type operator*(type1 lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs * rhs.x; \ - return ret; \ - } - -#define DECLOP_1VAR_ASSIGN(type, op) \ - __device__ __host__ static inline type& operator op(type& lhs, const type& rhs) { \ - lhs.x op rhs.x; \ - return lhs; \ - } - -#define DECLOP_1VAR_PREOP(type, op) \ - __device__ __host__ static inline type& operator op(type& val) { \ - op val.x; \ - return val; \ - } - -#define DECLOP_1VAR_POSTOP(type, op) \ - __device__ __host__ static inline type operator op(type& val, int) { \ - type ret; \ - ret.x = val.x; \ - val.x op; \ - return ret; \ - } - -#define DECLOP_1VAR_COMP(type, op) \ - __device__ __host__ static inline bool operator op(type& lhs, type& rhs) { \ - return lhs.x op rhs.x; \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, type& rhs) { \ - return lhs.x op rhs.x; \ - } \ - __device__ __host__ static inline bool operator op(type& lhs, const type& rhs) { \ - return lhs.x op rhs.x; \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, const type& rhs) { \ - return lhs.x op rhs.x; \ - } - -#define DECLOP_1VAR_1IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(type& rhs) { \ - type ret; \ - ret.x = op rhs.x; \ - return ret; \ - } - -#define DECLOP_1VAR_1IN_BOOLOUT(type, op) \ - __device__ __host__ static inline bool operator op(type& rhs) { return op rhs.x; } - -/* - Two Element Access -*/ - -#define DECLOP_2VAR_2IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x op rhs.x; \ - ret.y = lhs.y op rhs.y; \ - return ret; \ - } - -#define DECLOP_2VAR_SCALE_PRODUCT(type, type1) \ - __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ - type ret; \ - ret.x = lhs.x * rhs; \ - ret.y = lhs.y * rhs; \ - return ret; \ - } \ - \ - __device__ __host__ static inline type operator*(type1 lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs * rhs.x; \ - ret.y = lhs * rhs.y; \ - return ret; \ - } - -#define DECLOP_2VAR_ASSIGN(type, op) \ - __device__ __host__ static inline type& operator op(type& lhs, const type& rhs) { \ - lhs.x op rhs.x; \ - lhs.y op rhs.y; \ - return lhs; \ - } - -#define DECLOP_2VAR_PREOP(type, op) \ - __device__ __host__ static inline type& operator op(type& val) { \ - op val.x; \ - op val.y; \ - return val; \ - } - -#define DECLOP_2VAR_POSTOP(type, op) \ - __device__ __host__ static inline type operator op(type& val, int) { \ - type ret; \ - ret.x = val.x; \ - ret.y = val.y; \ - val.x op; \ - val.y op; \ - return ret; \ - } - -#define DECLOP_2VAR_COMP(type, op) \ - __device__ __host__ static inline bool operator op(type& lhs, type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ - } \ - __device__ __host__ static inline bool operator op(type& lhs, const type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, const type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ - } - -#define DECLOP_2VAR_1IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(type& rhs) { \ - type ret; \ - ret.x = op rhs.x; \ - ret.y = op rhs.y; \ - return ret; \ - } - -#define DECLOP_2VAR_1IN_BOOLOUT(type, op) \ - __device__ __host__ static inline bool operator op(type& rhs) { \ - return (op rhs.x) && (op rhs.y); \ - } - - -/* - Three Element Access -*/ - -#define DECLOP_3VAR_2IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x op rhs.x; \ - ret.y = lhs.y op rhs.y; \ - ret.z = lhs.z op rhs.z; \ - return ret; \ - } - -#define DECLOP_3VAR_SCALE_PRODUCT(type, type1) \ - __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ - type ret; \ - ret.x = lhs.x * rhs; \ - ret.y = lhs.y * rhs; \ - ret.z = lhs.z * rhs; \ - return ret; \ - } \ - \ - __device__ __host__ static inline type operator*(type1 lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs * rhs.x; \ - ret.y = lhs * rhs.y; \ - ret.z = lhs * rhs.z; \ - return ret; \ - } - -#define DECLOP_3VAR_ASSIGN(type, op) \ - __device__ __host__ static inline type& operator op(type& lhs, const type& rhs) { \ - lhs.x op rhs.x; \ - lhs.y op rhs.y; \ - lhs.z op rhs.z; \ - return lhs; \ - } - -#define DECLOP_3VAR_PREOP(type, op) \ - __device__ __host__ static inline type& operator op(type& val) { \ - op val.x; \ - op val.y; \ - op val.z; \ - return val; \ - } - -#define DECLOP_3VAR_POSTOP(type, op) \ - __device__ __host__ static inline type operator op(type& val, int) { \ - type ret; \ - ret.x = val.x; \ - ret.y = val.y; \ - ret.z = val.z; \ - val.x op; \ - val.y op; \ - val.z op; \ - return ret; \ - } - -#define DECLOP_3VAR_COMP(type, op) \ - __device__ __host__ static inline bool operator op(type& lhs, type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ - } \ - __device__ __host__ static inline bool operator op(type& lhs, const type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, const type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ - } - -#define DECLOP_3VAR_1IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(type& rhs) { \ - type ret; \ - ret.x = op rhs.x; \ - ret.y = op rhs.y; \ - ret.z = op rhs.z; \ - return ret; \ - } - -#define DECLOP_3VAR_1IN_BOOLOUT(type, op) \ - __device__ __host__ static inline bool operator op(type& rhs) { \ - return (op rhs.x) && (op rhs.y) && (op rhs.z); \ - } - - -/* - Four Element Access -*/ - -#define DECLOP_4VAR_2IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(const type& lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs.x op rhs.x; \ - ret.y = lhs.y op rhs.y; \ - ret.z = lhs.z op rhs.z; \ - ret.w = lhs.w op rhs.w; \ - return ret; \ - } - -#define DECLOP_4VAR_SCALE_PRODUCT(type, type1) \ - __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ - type ret; \ - ret.x = lhs.x * rhs; \ - ret.y = lhs.y * rhs; \ - ret.z = lhs.z * rhs; \ - ret.w = lhs.w * rhs; \ - return ret; \ - } \ - \ - __device__ __host__ static inline type operator*(type1 lhs, const type& rhs) { \ - type ret; \ - ret.x = lhs * rhs.x; \ - ret.y = lhs * rhs.y; \ - ret.z = lhs * rhs.z; \ - ret.w = lhs * rhs.w; \ - return ret; \ - } - -#define DECLOP_4VAR_ASSIGN(type, op) \ - __device__ __host__ static inline type& operator op(type& lhs, const type& rhs) { \ - lhs.x op rhs.x; \ - lhs.y op rhs.y; \ - lhs.z op rhs.z; \ - lhs.w op rhs.w; \ - return lhs; \ - } - -#define DECLOP_4VAR_PREOP(type, op) \ - __device__ __host__ static inline type& operator op(type& val) { \ - op val.x; \ - op val.y; \ - op val.z; \ - op val.w; \ - return val; \ - } - -#define DECLOP_4VAR_POSTOP(type, op) \ - __device__ __host__ static inline type operator op(type& val, int) { \ - type ret; \ - ret.x = val.x; \ - ret.y = val.y; \ - ret.z = val.z; \ - ret.w = val.w; \ - val.x op; \ - val.y op; \ - val.z op; \ - val.w op; \ - return ret; \ - } - -#define DECLOP_4VAR_COMP(type, op) \ - __device__ __host__ static inline bool operator op(type& lhs, type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ - } \ - __device__ __host__ static inline bool operator op(type& lhs, const type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ - } \ - __device__ __host__ static inline bool operator op(const type& lhs, const type& rhs) { \ - return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ - } - -#define DECLOP_4VAR_1IN_1OUT(type, op) \ - __device__ __host__ static inline type operator op(type& rhs) { \ - type ret; \ - ret.x = op rhs.x; \ - ret.y = op rhs.y; \ - ret.z = op rhs.z; \ - ret.w = op rhs.w; \ - return ret; \ - } - -#define DECLOP_4VAR_1IN_BOOLOUT(type, op) \ - __device__ __host__ static inline bool operator op(type& rhs) { \ - return (op rhs.x) && (op rhs.y) && (op rhs.z) && (op rhs.w); \ - } - - -/* -Overloading operators -*/ - -// UNSIGNED CHAR1 - -DECLOP_1VAR_2IN_1OUT(uchar1, +) -DECLOP_1VAR_2IN_1OUT(uchar1, -) -DECLOP_1VAR_2IN_1OUT(uchar1, *) -DECLOP_1VAR_2IN_1OUT(uchar1, /) -DECLOP_1VAR_2IN_1OUT(uchar1, %) -DECLOP_1VAR_2IN_1OUT(uchar1, &) -DECLOP_1VAR_2IN_1OUT(uchar1, |) -DECLOP_1VAR_2IN_1OUT(uchar1, ^) -DECLOP_1VAR_2IN_1OUT(uchar1, <<) -DECLOP_1VAR_2IN_1OUT(uchar1, >>) - - -DECLOP_1VAR_ASSIGN(uchar1, +=) -DECLOP_1VAR_ASSIGN(uchar1, -=) -DECLOP_1VAR_ASSIGN(uchar1, *=) -DECLOP_1VAR_ASSIGN(uchar1, /=) -DECLOP_1VAR_ASSIGN(uchar1, %=) -DECLOP_1VAR_ASSIGN(uchar1, &=) -DECLOP_1VAR_ASSIGN(uchar1, |=) -DECLOP_1VAR_ASSIGN(uchar1, ^=) -DECLOP_1VAR_ASSIGN(uchar1, <<=) -DECLOP_1VAR_ASSIGN(uchar1, >>=) - -DECLOP_1VAR_PREOP(uchar1, ++) -DECLOP_1VAR_PREOP(uchar1, --) - -DECLOP_1VAR_POSTOP(uchar1, ++) -DECLOP_1VAR_POSTOP(uchar1, --) - -DECLOP_1VAR_COMP(uchar1, ==) -DECLOP_1VAR_COMP(uchar1, !=) -DECLOP_1VAR_COMP(uchar1, <) -DECLOP_1VAR_COMP(uchar1, >) -DECLOP_1VAR_COMP(uchar1, <=) -DECLOP_1VAR_COMP(uchar1, >=) - -DECLOP_1VAR_COMP(uchar1, &&) -DECLOP_1VAR_COMP(uchar1, ||) - -DECLOP_1VAR_1IN_1OUT(uchar1, ~) -DECLOP_1VAR_1IN_BOOLOUT(uchar1, !) - -DECLOP_1VAR_SCALE_PRODUCT(uchar1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, float) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, double) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(uchar1, signed long long) - -// UNSIGNED CHAR2 - -DECLOP_2VAR_2IN_1OUT(uchar2, +) -DECLOP_2VAR_2IN_1OUT(uchar2, -) -DECLOP_2VAR_2IN_1OUT(uchar2, *) -DECLOP_2VAR_2IN_1OUT(uchar2, /) -DECLOP_2VAR_2IN_1OUT(uchar2, %) -DECLOP_2VAR_2IN_1OUT(uchar2, &) -DECLOP_2VAR_2IN_1OUT(uchar2, |) -DECLOP_2VAR_2IN_1OUT(uchar2, ^) -DECLOP_2VAR_2IN_1OUT(uchar2, <<) -DECLOP_2VAR_2IN_1OUT(uchar2, >>) - -DECLOP_2VAR_ASSIGN(uchar2, +=) -DECLOP_2VAR_ASSIGN(uchar2, -=) -DECLOP_2VAR_ASSIGN(uchar2, *=) -DECLOP_2VAR_ASSIGN(uchar2, /=) -DECLOP_2VAR_ASSIGN(uchar2, %=) -DECLOP_2VAR_ASSIGN(uchar2, &=) -DECLOP_2VAR_ASSIGN(uchar2, |=) -DECLOP_2VAR_ASSIGN(uchar2, ^=) -DECLOP_2VAR_ASSIGN(uchar2, <<=) -DECLOP_2VAR_ASSIGN(uchar2, >>=) - -DECLOP_2VAR_PREOP(uchar2, ++) -DECLOP_2VAR_PREOP(uchar2, --) - -DECLOP_2VAR_POSTOP(uchar2, ++) -DECLOP_2VAR_POSTOP(uchar2, --) - -DECLOP_2VAR_COMP(uchar2, ==) -DECLOP_2VAR_COMP(uchar2, !=) -DECLOP_2VAR_COMP(uchar2, <) -DECLOP_2VAR_COMP(uchar2, >) -DECLOP_2VAR_COMP(uchar2, <=) -DECLOP_2VAR_COMP(uchar2, >=) - -DECLOP_2VAR_COMP(uchar2, &&) -DECLOP_2VAR_COMP(uchar2, ||) - -DECLOP_2VAR_1IN_1OUT(uchar2, ~) -DECLOP_2VAR_1IN_BOOLOUT(uchar2, !) - -DECLOP_2VAR_SCALE_PRODUCT(uchar2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, float) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, double) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(uchar2, signed long long) - -// UNSIGNED CHAR3 - -DECLOP_3VAR_2IN_1OUT(uchar3, +) -DECLOP_3VAR_2IN_1OUT(uchar3, -) -DECLOP_3VAR_2IN_1OUT(uchar3, *) -DECLOP_3VAR_2IN_1OUT(uchar3, /) -DECLOP_3VAR_2IN_1OUT(uchar3, %) -DECLOP_3VAR_2IN_1OUT(uchar3, &) -DECLOP_3VAR_2IN_1OUT(uchar3, |) -DECLOP_3VAR_2IN_1OUT(uchar3, ^) -DECLOP_3VAR_2IN_1OUT(uchar3, <<) -DECLOP_3VAR_2IN_1OUT(uchar3, >>) - -DECLOP_3VAR_ASSIGN(uchar3, +=) -DECLOP_3VAR_ASSIGN(uchar3, -=) -DECLOP_3VAR_ASSIGN(uchar3, *=) -DECLOP_3VAR_ASSIGN(uchar3, /=) -DECLOP_3VAR_ASSIGN(uchar3, %=) -DECLOP_3VAR_ASSIGN(uchar3, &=) -DECLOP_3VAR_ASSIGN(uchar3, |=) -DECLOP_3VAR_ASSIGN(uchar3, ^=) -DECLOP_3VAR_ASSIGN(uchar3, <<=) -DECLOP_3VAR_ASSIGN(uchar3, >>=) - -DECLOP_3VAR_PREOP(uchar3, ++) -DECLOP_3VAR_PREOP(uchar3, --) - -DECLOP_3VAR_POSTOP(uchar3, ++) -DECLOP_3VAR_POSTOP(uchar3, --) - -DECLOP_3VAR_COMP(uchar3, ==) -DECLOP_3VAR_COMP(uchar3, !=) -DECLOP_3VAR_COMP(uchar3, <) -DECLOP_3VAR_COMP(uchar3, >) -DECLOP_3VAR_COMP(uchar3, <=) -DECLOP_3VAR_COMP(uchar3, >=) - -DECLOP_3VAR_COMP(uchar3, &&) -DECLOP_3VAR_COMP(uchar3, ||) - -DECLOP_3VAR_1IN_1OUT(uchar3, ~) -DECLOP_3VAR_1IN_BOOLOUT(uchar3, !) - -DECLOP_3VAR_SCALE_PRODUCT(uchar3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, float) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, double) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(uchar3, signed long long) - -// UNSIGNED CHAR4 - -DECLOP_4VAR_2IN_1OUT(uchar4, +) -DECLOP_4VAR_2IN_1OUT(uchar4, -) -DECLOP_4VAR_2IN_1OUT(uchar4, *) -DECLOP_4VAR_2IN_1OUT(uchar4, /) -DECLOP_4VAR_2IN_1OUT(uchar4, %) -DECLOP_4VAR_2IN_1OUT(uchar4, &) -DECLOP_4VAR_2IN_1OUT(uchar4, |) -DECLOP_4VAR_2IN_1OUT(uchar4, ^) -DECLOP_4VAR_2IN_1OUT(uchar4, <<) -DECLOP_4VAR_2IN_1OUT(uchar4, >>) - -DECLOP_4VAR_ASSIGN(uchar4, +=) -DECLOP_4VAR_ASSIGN(uchar4, -=) -DECLOP_4VAR_ASSIGN(uchar4, *=) -DECLOP_4VAR_ASSIGN(uchar4, /=) -DECLOP_4VAR_ASSIGN(uchar4, %=) -DECLOP_4VAR_ASSIGN(uchar4, &=) -DECLOP_4VAR_ASSIGN(uchar4, |=) -DECLOP_4VAR_ASSIGN(uchar4, ^=) -DECLOP_4VAR_ASSIGN(uchar4, <<=) -DECLOP_4VAR_ASSIGN(uchar4, >>=) - -DECLOP_4VAR_PREOP(uchar4, ++) -DECLOP_4VAR_PREOP(uchar4, --) - -DECLOP_4VAR_POSTOP(uchar4, ++) -DECLOP_4VAR_POSTOP(uchar4, --) - -DECLOP_4VAR_COMP(uchar4, ==) -DECLOP_4VAR_COMP(uchar4, !=) -DECLOP_4VAR_COMP(uchar4, <) -DECLOP_4VAR_COMP(uchar4, >) -DECLOP_4VAR_COMP(uchar4, <=) -DECLOP_4VAR_COMP(uchar4, >=) - -DECLOP_4VAR_COMP(uchar4, &&) -DECLOP_4VAR_COMP(uchar4, ||) - -DECLOP_4VAR_1IN_1OUT(uchar4, ~) -DECLOP_4VAR_1IN_BOOLOUT(uchar4, !) - -DECLOP_4VAR_SCALE_PRODUCT(uchar4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, float) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, double) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(uchar4, signed long long) - -// SIGNED CHAR1 - -DECLOP_1VAR_2IN_1OUT(char1, +) -DECLOP_1VAR_2IN_1OUT(char1, -) -DECLOP_1VAR_2IN_1OUT(char1, *) -DECLOP_1VAR_2IN_1OUT(char1, /) -DECLOP_1VAR_2IN_1OUT(char1, %) -DECLOP_1VAR_2IN_1OUT(char1, &) -DECLOP_1VAR_2IN_1OUT(char1, |) -DECLOP_1VAR_2IN_1OUT(char1, ^) -DECLOP_1VAR_2IN_1OUT(char1, <<) -DECLOP_1VAR_2IN_1OUT(char1, >>) - - -DECLOP_1VAR_ASSIGN(char1, +=) -DECLOP_1VAR_ASSIGN(char1, -=) -DECLOP_1VAR_ASSIGN(char1, *=) -DECLOP_1VAR_ASSIGN(char1, /=) -DECLOP_1VAR_ASSIGN(char1, %=) -DECLOP_1VAR_ASSIGN(char1, &=) -DECLOP_1VAR_ASSIGN(char1, |=) -DECLOP_1VAR_ASSIGN(char1, ^=) -DECLOP_1VAR_ASSIGN(char1, <<=) -DECLOP_1VAR_ASSIGN(char1, >>=) - -DECLOP_1VAR_PREOP(char1, ++) -DECLOP_1VAR_PREOP(char1, --) - -DECLOP_1VAR_POSTOP(char1, ++) -DECLOP_1VAR_POSTOP(char1, --) - -DECLOP_1VAR_COMP(char1, ==) -DECLOP_1VAR_COMP(char1, !=) -DECLOP_1VAR_COMP(char1, <) -DECLOP_1VAR_COMP(char1, >) -DECLOP_1VAR_COMP(char1, <=) -DECLOP_1VAR_COMP(char1, >=) - -DECLOP_1VAR_COMP(char1, &&) -DECLOP_1VAR_COMP(char1, ||) - -DECLOP_1VAR_1IN_1OUT(char1, ~) -DECLOP_1VAR_1IN_BOOLOUT(char1, !) - -DECLOP_1VAR_SCALE_PRODUCT(char1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(char1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(char1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(char1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(char1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(char1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(char1, float) -DECLOP_1VAR_SCALE_PRODUCT(char1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(char1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(char1, double) -DECLOP_1VAR_SCALE_PRODUCT(char1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(char1, signed long long) - -// SIGNED CHAR2 - -DECLOP_2VAR_2IN_1OUT(char2, +) -DECLOP_2VAR_2IN_1OUT(char2, -) -DECLOP_2VAR_2IN_1OUT(char2, *) -DECLOP_2VAR_2IN_1OUT(char2, /) -DECLOP_2VAR_2IN_1OUT(char2, %) -DECLOP_2VAR_2IN_1OUT(char2, &) -DECLOP_2VAR_2IN_1OUT(char2, |) -DECLOP_2VAR_2IN_1OUT(char2, ^) -DECLOP_2VAR_2IN_1OUT(char2, <<) -DECLOP_2VAR_2IN_1OUT(char2, >>) - -DECLOP_2VAR_ASSIGN(char2, +=) -DECLOP_2VAR_ASSIGN(char2, -=) -DECLOP_2VAR_ASSIGN(char2, *=) -DECLOP_2VAR_ASSIGN(char2, /=) -DECLOP_2VAR_ASSIGN(char2, %=) -DECLOP_2VAR_ASSIGN(char2, &=) -DECLOP_2VAR_ASSIGN(char2, |=) -DECLOP_2VAR_ASSIGN(char2, ^=) -DECLOP_2VAR_ASSIGN(char2, <<=) -DECLOP_2VAR_ASSIGN(char2, >>=) - -DECLOP_2VAR_PREOP(char2, ++) -DECLOP_2VAR_PREOP(char2, --) - -DECLOP_2VAR_POSTOP(char2, ++) -DECLOP_2VAR_POSTOP(char2, --) - -DECLOP_2VAR_COMP(char2, ==) -DECLOP_2VAR_COMP(char2, !=) -DECLOP_2VAR_COMP(char2, <) -DECLOP_2VAR_COMP(char2, >) -DECLOP_2VAR_COMP(char2, <=) -DECLOP_2VAR_COMP(char2, >=) - -DECLOP_2VAR_COMP(char2, &&) -DECLOP_2VAR_COMP(char2, ||) - -DECLOP_2VAR_1IN_1OUT(char2, ~) -DECLOP_2VAR_1IN_BOOLOUT(char2, !) - -DECLOP_2VAR_SCALE_PRODUCT(char2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(char2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(char2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(char2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(char2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(char2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(char2, float) -DECLOP_2VAR_SCALE_PRODUCT(char2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(char2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(char2, double) -DECLOP_2VAR_SCALE_PRODUCT(char2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(char2, signed long long) - -// SIGNED CHAR3 - -DECLOP_3VAR_2IN_1OUT(char3, +) -DECLOP_3VAR_2IN_1OUT(char3, -) -DECLOP_3VAR_2IN_1OUT(char3, *) -DECLOP_3VAR_2IN_1OUT(char3, /) -DECLOP_3VAR_2IN_1OUT(char3, %) -DECLOP_3VAR_2IN_1OUT(char3, &) -DECLOP_3VAR_2IN_1OUT(char3, |) -DECLOP_3VAR_2IN_1OUT(char3, ^) -DECLOP_3VAR_2IN_1OUT(char3, <<) -DECLOP_3VAR_2IN_1OUT(char3, >>) - -DECLOP_3VAR_ASSIGN(char3, +=) -DECLOP_3VAR_ASSIGN(char3, -=) -DECLOP_3VAR_ASSIGN(char3, *=) -DECLOP_3VAR_ASSIGN(char3, /=) -DECLOP_3VAR_ASSIGN(char3, %=) -DECLOP_3VAR_ASSIGN(char3, &=) -DECLOP_3VAR_ASSIGN(char3, |=) -DECLOP_3VAR_ASSIGN(char3, ^=) -DECLOP_3VAR_ASSIGN(char3, <<=) -DECLOP_3VAR_ASSIGN(char3, >>=) - -DECLOP_3VAR_PREOP(char3, ++) -DECLOP_3VAR_PREOP(char3, --) - -DECLOP_3VAR_POSTOP(char3, ++) -DECLOP_3VAR_POSTOP(char3, --) - -DECLOP_3VAR_COMP(char3, ==) -DECLOP_3VAR_COMP(char3, !=) -DECLOP_3VAR_COMP(char3, <) -DECLOP_3VAR_COMP(char3, >) -DECLOP_3VAR_COMP(char3, <=) -DECLOP_3VAR_COMP(char3, >=) - -DECLOP_3VAR_COMP(char3, &&) -DECLOP_3VAR_COMP(char3, ||) - -DECLOP_3VAR_1IN_1OUT(char3, ~) -DECLOP_3VAR_1IN_BOOLOUT(char3, !) - -DECLOP_3VAR_SCALE_PRODUCT(char3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(char3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(char3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(char3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(char3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(char3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(char3, float) -DECLOP_3VAR_SCALE_PRODUCT(char3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(char3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(char3, double) -DECLOP_3VAR_SCALE_PRODUCT(char3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(char3, signed long long) - -// SIGNED CHAR4 - -DECLOP_4VAR_2IN_1OUT(char4, +) -DECLOP_4VAR_2IN_1OUT(char4, -) -DECLOP_4VAR_2IN_1OUT(char4, *) -DECLOP_4VAR_2IN_1OUT(char4, /) -DECLOP_4VAR_2IN_1OUT(char4, %) -DECLOP_4VAR_2IN_1OUT(char4, &) -DECLOP_4VAR_2IN_1OUT(char4, |) -DECLOP_4VAR_2IN_1OUT(char4, ^) -DECLOP_4VAR_2IN_1OUT(char4, <<) -DECLOP_4VAR_2IN_1OUT(char4, >>) - -DECLOP_4VAR_ASSIGN(char4, +=) -DECLOP_4VAR_ASSIGN(char4, -=) -DECLOP_4VAR_ASSIGN(char4, *=) -DECLOP_4VAR_ASSIGN(char4, /=) -DECLOP_4VAR_ASSIGN(char4, %=) -DECLOP_4VAR_ASSIGN(char4, &=) -DECLOP_4VAR_ASSIGN(char4, |=) -DECLOP_4VAR_ASSIGN(char4, ^=) -DECLOP_4VAR_ASSIGN(char4, <<=) -DECLOP_4VAR_ASSIGN(char4, >>=) - -DECLOP_4VAR_PREOP(char4, ++) -DECLOP_4VAR_PREOP(char4, --) - -DECLOP_4VAR_POSTOP(char4, ++) -DECLOP_4VAR_POSTOP(char4, --) - -DECLOP_4VAR_COMP(char4, ==) -DECLOP_4VAR_COMP(char4, !=) -DECLOP_4VAR_COMP(char4, <) -DECLOP_4VAR_COMP(char4, >) -DECLOP_4VAR_COMP(char4, <=) -DECLOP_4VAR_COMP(char4, >=) - -DECLOP_4VAR_COMP(char4, &&) -DECLOP_4VAR_COMP(char4, ||) - -DECLOP_4VAR_1IN_1OUT(char4, ~) -DECLOP_4VAR_1IN_BOOLOUT(char4, !) - -DECLOP_4VAR_SCALE_PRODUCT(char4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(char4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(char4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(char4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(char4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(char4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(char4, float) -DECLOP_4VAR_SCALE_PRODUCT(char4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(char4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(char4, double) -DECLOP_4VAR_SCALE_PRODUCT(char4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(char4, signed long long) - -// UNSIGNED SHORT1 - -DECLOP_1VAR_2IN_1OUT(ushort1, +) -DECLOP_1VAR_2IN_1OUT(ushort1, -) -DECLOP_1VAR_2IN_1OUT(ushort1, *) -DECLOP_1VAR_2IN_1OUT(ushort1, /) -DECLOP_1VAR_2IN_1OUT(ushort1, %) -DECLOP_1VAR_2IN_1OUT(ushort1, &) -DECLOP_1VAR_2IN_1OUT(ushort1, |) -DECLOP_1VAR_2IN_1OUT(ushort1, ^) -DECLOP_1VAR_2IN_1OUT(ushort1, <<) -DECLOP_1VAR_2IN_1OUT(ushort1, >>) - - -DECLOP_1VAR_ASSIGN(ushort1, +=) -DECLOP_1VAR_ASSIGN(ushort1, -=) -DECLOP_1VAR_ASSIGN(ushort1, *=) -DECLOP_1VAR_ASSIGN(ushort1, /=) -DECLOP_1VAR_ASSIGN(ushort1, %=) -DECLOP_1VAR_ASSIGN(ushort1, &=) -DECLOP_1VAR_ASSIGN(ushort1, |=) -DECLOP_1VAR_ASSIGN(ushort1, ^=) -DECLOP_1VAR_ASSIGN(ushort1, <<=) -DECLOP_1VAR_ASSIGN(ushort1, >>=) - -DECLOP_1VAR_PREOP(ushort1, ++) -DECLOP_1VAR_PREOP(ushort1, --) - -DECLOP_1VAR_POSTOP(ushort1, ++) -DECLOP_1VAR_POSTOP(ushort1, --) - -DECLOP_1VAR_COMP(ushort1, ==) -DECLOP_1VAR_COMP(ushort1, !=) -DECLOP_1VAR_COMP(ushort1, <) -DECLOP_1VAR_COMP(ushort1, >) -DECLOP_1VAR_COMP(ushort1, <=) -DECLOP_1VAR_COMP(ushort1, >=) - -DECLOP_1VAR_COMP(ushort1, &&) -DECLOP_1VAR_COMP(ushort1, ||) - -DECLOP_1VAR_1IN_1OUT(ushort1, ~) -DECLOP_1VAR_1IN_BOOLOUT(ushort1, !) - -DECLOP_1VAR_SCALE_PRODUCT(ushort1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, float) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, double) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(ushort1, signed long long) - -// UNSIGNED SHORT2 - -DECLOP_2VAR_2IN_1OUT(ushort2, +) -DECLOP_2VAR_2IN_1OUT(ushort2, -) -DECLOP_2VAR_2IN_1OUT(ushort2, *) -DECLOP_2VAR_2IN_1OUT(ushort2, /) -DECLOP_2VAR_2IN_1OUT(ushort2, %) -DECLOP_2VAR_2IN_1OUT(ushort2, &) -DECLOP_2VAR_2IN_1OUT(ushort2, |) -DECLOP_2VAR_2IN_1OUT(ushort2, ^) -DECLOP_2VAR_2IN_1OUT(ushort2, <<) -DECLOP_2VAR_2IN_1OUT(ushort2, >>) - -DECLOP_2VAR_ASSIGN(ushort2, +=) -DECLOP_2VAR_ASSIGN(ushort2, -=) -DECLOP_2VAR_ASSIGN(ushort2, *=) -DECLOP_2VAR_ASSIGN(ushort2, /=) -DECLOP_2VAR_ASSIGN(ushort2, %=) -DECLOP_2VAR_ASSIGN(ushort2, &=) -DECLOP_2VAR_ASSIGN(ushort2, |=) -DECLOP_2VAR_ASSIGN(ushort2, ^=) -DECLOP_2VAR_ASSIGN(ushort2, <<=) -DECLOP_2VAR_ASSIGN(ushort2, >>=) - -DECLOP_2VAR_PREOP(ushort2, ++) -DECLOP_2VAR_PREOP(ushort2, --) - -DECLOP_2VAR_POSTOP(ushort2, ++) -DECLOP_2VAR_POSTOP(ushort2, --) - -DECLOP_2VAR_COMP(ushort2, ==) -DECLOP_2VAR_COMP(ushort2, !=) -DECLOP_2VAR_COMP(ushort2, <) -DECLOP_2VAR_COMP(ushort2, >) -DECLOP_2VAR_COMP(ushort2, <=) -DECLOP_2VAR_COMP(ushort2, >=) - -DECLOP_2VAR_COMP(ushort2, &&) -DECLOP_2VAR_COMP(ushort2, ||) - -DECLOP_2VAR_1IN_1OUT(ushort2, ~) -DECLOP_2VAR_1IN_BOOLOUT(ushort2, !) - -DECLOP_2VAR_SCALE_PRODUCT(ushort2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, float) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, double) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(ushort2, signed long long) - -// UNSIGNED SHORT3 - -DECLOP_3VAR_2IN_1OUT(ushort3, +) -DECLOP_3VAR_2IN_1OUT(ushort3, -) -DECLOP_3VAR_2IN_1OUT(ushort3, *) -DECLOP_3VAR_2IN_1OUT(ushort3, /) -DECLOP_3VAR_2IN_1OUT(ushort3, %) -DECLOP_3VAR_2IN_1OUT(ushort3, &) -DECLOP_3VAR_2IN_1OUT(ushort3, |) -DECLOP_3VAR_2IN_1OUT(ushort3, ^) -DECLOP_3VAR_2IN_1OUT(ushort3, <<) -DECLOP_3VAR_2IN_1OUT(ushort3, >>) - -DECLOP_3VAR_ASSIGN(ushort3, +=) -DECLOP_3VAR_ASSIGN(ushort3, -=) -DECLOP_3VAR_ASSIGN(ushort3, *=) -DECLOP_3VAR_ASSIGN(ushort3, /=) -DECLOP_3VAR_ASSIGN(ushort3, %=) -DECLOP_3VAR_ASSIGN(ushort3, &=) -DECLOP_3VAR_ASSIGN(ushort3, |=) -DECLOP_3VAR_ASSIGN(ushort3, ^=) -DECLOP_3VAR_ASSIGN(ushort3, <<=) -DECLOP_3VAR_ASSIGN(ushort3, >>=) - -DECLOP_3VAR_PREOP(ushort3, ++) -DECLOP_3VAR_PREOP(ushort3, --) - -DECLOP_3VAR_POSTOP(ushort3, ++) -DECLOP_3VAR_POSTOP(ushort3, --) - -DECLOP_3VAR_COMP(ushort3, ==) -DECLOP_3VAR_COMP(ushort3, !=) -DECLOP_3VAR_COMP(ushort3, <) -DECLOP_3VAR_COMP(ushort3, >) -DECLOP_3VAR_COMP(ushort3, <=) -DECLOP_3VAR_COMP(ushort3, >=) - -DECLOP_3VAR_COMP(ushort3, &&) -DECLOP_3VAR_COMP(ushort3, ||) - -DECLOP_3VAR_1IN_1OUT(ushort3, ~) -DECLOP_3VAR_1IN_BOOLOUT(ushort3, !) - -DECLOP_3VAR_SCALE_PRODUCT(ushort3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, float) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, double) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(ushort3, signed long long) - -// UNSIGNED SHORT4 - -DECLOP_4VAR_2IN_1OUT(ushort4, +) -DECLOP_4VAR_2IN_1OUT(ushort4, -) -DECLOP_4VAR_2IN_1OUT(ushort4, *) -DECLOP_4VAR_2IN_1OUT(ushort4, /) -DECLOP_4VAR_2IN_1OUT(ushort4, %) -DECLOP_4VAR_2IN_1OUT(ushort4, &) -DECLOP_4VAR_2IN_1OUT(ushort4, |) -DECLOP_4VAR_2IN_1OUT(ushort4, ^) -DECLOP_4VAR_2IN_1OUT(ushort4, <<) -DECLOP_4VAR_2IN_1OUT(ushort4, >>) - -DECLOP_4VAR_ASSIGN(ushort4, +=) -DECLOP_4VAR_ASSIGN(ushort4, -=) -DECLOP_4VAR_ASSIGN(ushort4, *=) -DECLOP_4VAR_ASSIGN(ushort4, /=) -DECLOP_4VAR_ASSIGN(ushort4, %=) -DECLOP_4VAR_ASSIGN(ushort4, &=) -DECLOP_4VAR_ASSIGN(ushort4, |=) -DECLOP_4VAR_ASSIGN(ushort4, ^=) -DECLOP_4VAR_ASSIGN(ushort4, <<=) -DECLOP_4VAR_ASSIGN(ushort4, >>=) - -DECLOP_4VAR_PREOP(ushort4, ++) -DECLOP_4VAR_PREOP(ushort4, --) - -DECLOP_4VAR_POSTOP(ushort4, ++) -DECLOP_4VAR_POSTOP(ushort4, --) - -DECLOP_4VAR_COMP(ushort4, ==) -DECLOP_4VAR_COMP(ushort4, !=) -DECLOP_4VAR_COMP(ushort4, <) -DECLOP_4VAR_COMP(ushort4, >) -DECLOP_4VAR_COMP(ushort4, <=) -DECLOP_4VAR_COMP(ushort4, >=) - -DECLOP_4VAR_COMP(ushort4, &&) -DECLOP_4VAR_COMP(ushort4, ||) - -DECLOP_4VAR_1IN_1OUT(ushort4, ~) -DECLOP_4VAR_1IN_BOOLOUT(ushort4, !) - -DECLOP_4VAR_SCALE_PRODUCT(ushort4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, float) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, double) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(ushort4, signed long long) - -// SIGNED SHORT1 - -DECLOP_1VAR_2IN_1OUT(short1, +) -DECLOP_1VAR_2IN_1OUT(short1, -) -DECLOP_1VAR_2IN_1OUT(short1, *) -DECLOP_1VAR_2IN_1OUT(short1, /) -DECLOP_1VAR_2IN_1OUT(short1, %) -DECLOP_1VAR_2IN_1OUT(short1, &) -DECLOP_1VAR_2IN_1OUT(short1, |) -DECLOP_1VAR_2IN_1OUT(short1, ^) -DECLOP_1VAR_2IN_1OUT(short1, <<) -DECLOP_1VAR_2IN_1OUT(short1, >>) - - -DECLOP_1VAR_ASSIGN(short1, +=) -DECLOP_1VAR_ASSIGN(short1, -=) -DECLOP_1VAR_ASSIGN(short1, *=) -DECLOP_1VAR_ASSIGN(short1, /=) -DECLOP_1VAR_ASSIGN(short1, %=) -DECLOP_1VAR_ASSIGN(short1, &=) -DECLOP_1VAR_ASSIGN(short1, |=) -DECLOP_1VAR_ASSIGN(short1, ^=) -DECLOP_1VAR_ASSIGN(short1, <<=) -DECLOP_1VAR_ASSIGN(short1, >>=) - -DECLOP_1VAR_PREOP(short1, ++) -DECLOP_1VAR_PREOP(short1, --) - -DECLOP_1VAR_POSTOP(short1, ++) -DECLOP_1VAR_POSTOP(short1, --) - -DECLOP_1VAR_COMP(short1, ==) -DECLOP_1VAR_COMP(short1, !=) -DECLOP_1VAR_COMP(short1, <) -DECLOP_1VAR_COMP(short1, >) -DECLOP_1VAR_COMP(short1, <=) -DECLOP_1VAR_COMP(short1, >=) - -DECLOP_1VAR_COMP(short1, &&) -DECLOP_1VAR_COMP(short1, ||) - -DECLOP_1VAR_1IN_1OUT(short1, ~) -DECLOP_1VAR_1IN_BOOLOUT(short1, !) - -DECLOP_1VAR_SCALE_PRODUCT(short1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(short1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(short1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(short1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(short1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(short1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(short1, float) -DECLOP_1VAR_SCALE_PRODUCT(short1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(short1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(short1, double) -DECLOP_1VAR_SCALE_PRODUCT(short1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(short1, signed long long) - -// SIGNED SHORT2 - -DECLOP_2VAR_2IN_1OUT(short2, +) -DECLOP_2VAR_2IN_1OUT(short2, -) -DECLOP_2VAR_2IN_1OUT(short2, *) -DECLOP_2VAR_2IN_1OUT(short2, /) -DECLOP_2VAR_2IN_1OUT(short2, %) -DECLOP_2VAR_2IN_1OUT(short2, &) -DECLOP_2VAR_2IN_1OUT(short2, |) -DECLOP_2VAR_2IN_1OUT(short2, ^) -DECLOP_2VAR_2IN_1OUT(short2, <<) -DECLOP_2VAR_2IN_1OUT(short2, >>) - -DECLOP_2VAR_ASSIGN(short2, +=) -DECLOP_2VAR_ASSIGN(short2, -=) -DECLOP_2VAR_ASSIGN(short2, *=) -DECLOP_2VAR_ASSIGN(short2, /=) -DECLOP_2VAR_ASSIGN(short2, %=) -DECLOP_2VAR_ASSIGN(short2, &=) -DECLOP_2VAR_ASSIGN(short2, |=) -DECLOP_2VAR_ASSIGN(short2, ^=) -DECLOP_2VAR_ASSIGN(short2, <<=) -DECLOP_2VAR_ASSIGN(short2, >>=) - -DECLOP_2VAR_PREOP(short2, ++) -DECLOP_2VAR_PREOP(short2, --) - -DECLOP_2VAR_POSTOP(short2, ++) -DECLOP_2VAR_POSTOP(short2, --) - -DECLOP_2VAR_COMP(short2, ==) -DECLOP_2VAR_COMP(short2, !=) -DECLOP_2VAR_COMP(short2, <) -DECLOP_2VAR_COMP(short2, >) -DECLOP_2VAR_COMP(short2, <=) -DECLOP_2VAR_COMP(short2, >=) - -DECLOP_2VAR_COMP(short2, &&) -DECLOP_2VAR_COMP(short2, ||) - -DECLOP_2VAR_1IN_1OUT(short2, ~) -DECLOP_2VAR_1IN_BOOLOUT(short2, !) - -DECLOP_2VAR_SCALE_PRODUCT(short2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(short2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(short2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(short2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(short2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(short2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(short2, float) -DECLOP_2VAR_SCALE_PRODUCT(short2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(short2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(short2, double) -DECLOP_2VAR_SCALE_PRODUCT(short2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(short2, signed long long) - -// SIGNED SHORT3 - -DECLOP_3VAR_2IN_1OUT(short3, +) -DECLOP_3VAR_2IN_1OUT(short3, -) -DECLOP_3VAR_2IN_1OUT(short3, *) -DECLOP_3VAR_2IN_1OUT(short3, /) -DECLOP_3VAR_2IN_1OUT(short3, %) -DECLOP_3VAR_2IN_1OUT(short3, &) -DECLOP_3VAR_2IN_1OUT(short3, |) -DECLOP_3VAR_2IN_1OUT(short3, ^) -DECLOP_3VAR_2IN_1OUT(short3, <<) -DECLOP_3VAR_2IN_1OUT(short3, >>) - -DECLOP_3VAR_ASSIGN(short3, +=) -DECLOP_3VAR_ASSIGN(short3, -=) -DECLOP_3VAR_ASSIGN(short3, *=) -DECLOP_3VAR_ASSIGN(short3, /=) -DECLOP_3VAR_ASSIGN(short3, %=) -DECLOP_3VAR_ASSIGN(short3, &=) -DECLOP_3VAR_ASSIGN(short3, |=) -DECLOP_3VAR_ASSIGN(short3, ^=) -DECLOP_3VAR_ASSIGN(short3, <<=) -DECLOP_3VAR_ASSIGN(short3, >>=) - -DECLOP_3VAR_PREOP(short3, ++) -DECLOP_3VAR_PREOP(short3, --) - -DECLOP_3VAR_POSTOP(short3, ++) -DECLOP_3VAR_POSTOP(short3, --) - -DECLOP_3VAR_COMP(short3, ==) -DECLOP_3VAR_COMP(short3, !=) -DECLOP_3VAR_COMP(short3, <) -DECLOP_3VAR_COMP(short3, >) -DECLOP_3VAR_COMP(short3, <=) -DECLOP_3VAR_COMP(short3, >=) - -DECLOP_3VAR_COMP(short3, &&) -DECLOP_3VAR_COMP(short3, ||) - -DECLOP_3VAR_1IN_1OUT(short3, ~) -DECLOP_3VAR_1IN_BOOLOUT(short3, !) - -DECLOP_3VAR_SCALE_PRODUCT(short3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(short3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(short3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(short3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(short3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(short3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(short3, float) -DECLOP_3VAR_SCALE_PRODUCT(short3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(short3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(short3, double) -DECLOP_3VAR_SCALE_PRODUCT(short3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(short3, signed long long) - -// SIGNED SHORT4 - -DECLOP_4VAR_2IN_1OUT(short4, +) -DECLOP_4VAR_2IN_1OUT(short4, -) -DECLOP_4VAR_2IN_1OUT(short4, *) -DECLOP_4VAR_2IN_1OUT(short4, /) -DECLOP_4VAR_2IN_1OUT(short4, %) -DECLOP_4VAR_2IN_1OUT(short4, &) -DECLOP_4VAR_2IN_1OUT(short4, |) -DECLOP_4VAR_2IN_1OUT(short4, ^) -DECLOP_4VAR_2IN_1OUT(short4, <<) -DECLOP_4VAR_2IN_1OUT(short4, >>) - -DECLOP_4VAR_ASSIGN(short4, +=) -DECLOP_4VAR_ASSIGN(short4, -=) -DECLOP_4VAR_ASSIGN(short4, *=) -DECLOP_4VAR_ASSIGN(short4, /=) -DECLOP_4VAR_ASSIGN(short4, %=) -DECLOP_4VAR_ASSIGN(short4, &=) -DECLOP_4VAR_ASSIGN(short4, |=) -DECLOP_4VAR_ASSIGN(short4, ^=) -DECLOP_4VAR_ASSIGN(short4, <<=) -DECLOP_4VAR_ASSIGN(short4, >>=) - -DECLOP_4VAR_PREOP(short4, ++) -DECLOP_4VAR_PREOP(short4, --) - -DECLOP_4VAR_POSTOP(short4, ++) -DECLOP_4VAR_POSTOP(short4, --) - -DECLOP_4VAR_COMP(short4, ==) -DECLOP_4VAR_COMP(short4, !=) -DECLOP_4VAR_COMP(short4, <) -DECLOP_4VAR_COMP(short4, >) -DECLOP_4VAR_COMP(short4, <=) -DECLOP_4VAR_COMP(short4, >=) - -DECLOP_4VAR_COMP(short4, &&) -DECLOP_4VAR_COMP(short4, ||) - -DECLOP_4VAR_1IN_1OUT(short4, ~) -DECLOP_4VAR_1IN_BOOLOUT(short4, !) - -DECLOP_4VAR_SCALE_PRODUCT(short4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(short4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(short4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(short4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(short4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(short4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(short4, float) -DECLOP_4VAR_SCALE_PRODUCT(short4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(short4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(short4, double) -DECLOP_4VAR_SCALE_PRODUCT(short4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(short4, signed long long) - -// UNSIGNED INT1 - -DECLOP_1VAR_2IN_1OUT(uint1, +) -DECLOP_1VAR_2IN_1OUT(uint1, -) -DECLOP_1VAR_2IN_1OUT(uint1, *) -DECLOP_1VAR_2IN_1OUT(uint1, /) -DECLOP_1VAR_2IN_1OUT(uint1, %) -DECLOP_1VAR_2IN_1OUT(uint1, &) -DECLOP_1VAR_2IN_1OUT(uint1, |) -DECLOP_1VAR_2IN_1OUT(uint1, ^) -DECLOP_1VAR_2IN_1OUT(uint1, <<) -DECLOP_1VAR_2IN_1OUT(uint1, >>) - - -DECLOP_1VAR_ASSIGN(uint1, +=) -DECLOP_1VAR_ASSIGN(uint1, -=) -DECLOP_1VAR_ASSIGN(uint1, *=) -DECLOP_1VAR_ASSIGN(uint1, /=) -DECLOP_1VAR_ASSIGN(uint1, %=) -DECLOP_1VAR_ASSIGN(uint1, &=) -DECLOP_1VAR_ASSIGN(uint1, |=) -DECLOP_1VAR_ASSIGN(uint1, ^=) -DECLOP_1VAR_ASSIGN(uint1, <<=) -DECLOP_1VAR_ASSIGN(uint1, >>=) - -DECLOP_1VAR_PREOP(uint1, ++) -DECLOP_1VAR_PREOP(uint1, --) - -DECLOP_1VAR_POSTOP(uint1, ++) -DECLOP_1VAR_POSTOP(uint1, --) - -DECLOP_1VAR_COMP(uint1, ==) -DECLOP_1VAR_COMP(uint1, !=) -DECLOP_1VAR_COMP(uint1, <) -DECLOP_1VAR_COMP(uint1, >) -DECLOP_1VAR_COMP(uint1, <=) -DECLOP_1VAR_COMP(uint1, >=) - -DECLOP_1VAR_COMP(uint1, &&) -DECLOP_1VAR_COMP(uint1, ||) - -DECLOP_1VAR_1IN_1OUT(uint1, ~) -DECLOP_1VAR_1IN_BOOLOUT(uint1, !) - -DECLOP_1VAR_SCALE_PRODUCT(uint1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(uint1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(uint1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(uint1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(uint1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(uint1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(uint1, float) -DECLOP_1VAR_SCALE_PRODUCT(uint1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(uint1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(uint1, double) -DECLOP_1VAR_SCALE_PRODUCT(uint1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(uint1, signed long long) - -// UNSIGNED INT2 - -DECLOP_2VAR_2IN_1OUT(uint2, +) -DECLOP_2VAR_2IN_1OUT(uint2, -) -DECLOP_2VAR_2IN_1OUT(uint2, *) -DECLOP_2VAR_2IN_1OUT(uint2, /) -DECLOP_2VAR_2IN_1OUT(uint2, %) -DECLOP_2VAR_2IN_1OUT(uint2, &) -DECLOP_2VAR_2IN_1OUT(uint2, |) -DECLOP_2VAR_2IN_1OUT(uint2, ^) -DECLOP_2VAR_2IN_1OUT(uint2, <<) -DECLOP_2VAR_2IN_1OUT(uint2, >>) - -DECLOP_2VAR_ASSIGN(uint2, +=) -DECLOP_2VAR_ASSIGN(uint2, -=) -DECLOP_2VAR_ASSIGN(uint2, *=) -DECLOP_2VAR_ASSIGN(uint2, /=) -DECLOP_2VAR_ASSIGN(uint2, %=) -DECLOP_2VAR_ASSIGN(uint2, &=) -DECLOP_2VAR_ASSIGN(uint2, |=) -DECLOP_2VAR_ASSIGN(uint2, ^=) -DECLOP_2VAR_ASSIGN(uint2, <<=) -DECLOP_2VAR_ASSIGN(uint2, >>=) - -DECLOP_2VAR_PREOP(uint2, ++) -DECLOP_2VAR_PREOP(uint2, --) - -DECLOP_2VAR_POSTOP(uint2, ++) -DECLOP_2VAR_POSTOP(uint2, --) - -DECLOP_2VAR_COMP(uint2, ==) -DECLOP_2VAR_COMP(uint2, !=) -DECLOP_2VAR_COMP(uint2, <) -DECLOP_2VAR_COMP(uint2, >) -DECLOP_2VAR_COMP(uint2, <=) -DECLOP_2VAR_COMP(uint2, >=) - -DECLOP_2VAR_COMP(uint2, &&) -DECLOP_2VAR_COMP(uint2, ||) - -DECLOP_2VAR_1IN_1OUT(uint2, ~) -DECLOP_2VAR_1IN_BOOLOUT(uint2, !) - -DECLOP_2VAR_SCALE_PRODUCT(uint2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(uint2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(uint2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(uint2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(uint2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(uint2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(uint2, float) -DECLOP_2VAR_SCALE_PRODUCT(uint2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(uint2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(uint2, double) -DECLOP_2VAR_SCALE_PRODUCT(uint2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(uint2, signed long long) - -// UNSIGNED INT3 - -DECLOP_3VAR_2IN_1OUT(uint3, +) -DECLOP_3VAR_2IN_1OUT(uint3, -) -DECLOP_3VAR_2IN_1OUT(uint3, *) -DECLOP_3VAR_2IN_1OUT(uint3, /) -DECLOP_3VAR_2IN_1OUT(uint3, %) -DECLOP_3VAR_2IN_1OUT(uint3, &) -DECLOP_3VAR_2IN_1OUT(uint3, |) -DECLOP_3VAR_2IN_1OUT(uint3, ^) -DECLOP_3VAR_2IN_1OUT(uint3, <<) -DECLOP_3VAR_2IN_1OUT(uint3, >>) - -DECLOP_3VAR_ASSIGN(uint3, +=) -DECLOP_3VAR_ASSIGN(uint3, -=) -DECLOP_3VAR_ASSIGN(uint3, *=) -DECLOP_3VAR_ASSIGN(uint3, /=) -DECLOP_3VAR_ASSIGN(uint3, %=) -DECLOP_3VAR_ASSIGN(uint3, &=) -DECLOP_3VAR_ASSIGN(uint3, |=) -DECLOP_3VAR_ASSIGN(uint3, ^=) -DECLOP_3VAR_ASSIGN(uint3, <<=) -DECLOP_3VAR_ASSIGN(uint3, >>=) - -DECLOP_3VAR_PREOP(uint3, ++) -DECLOP_3VAR_PREOP(uint3, --) - -DECLOP_3VAR_POSTOP(uint3, ++) -DECLOP_3VAR_POSTOP(uint3, --) - -DECLOP_3VAR_COMP(uint3, ==) -DECLOP_3VAR_COMP(uint3, !=) -DECLOP_3VAR_COMP(uint3, <) -DECLOP_3VAR_COMP(uint3, >) -DECLOP_3VAR_COMP(uint3, <=) -DECLOP_3VAR_COMP(uint3, >=) - -DECLOP_3VAR_COMP(uint3, &&) -DECLOP_3VAR_COMP(uint3, ||) - -DECLOP_3VAR_1IN_1OUT(uint3, ~) -DECLOP_3VAR_1IN_BOOLOUT(uint3, !) - -DECLOP_3VAR_SCALE_PRODUCT(uint3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(uint3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(uint3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(uint3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(uint3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(uint3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(uint3, float) -DECLOP_3VAR_SCALE_PRODUCT(uint3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(uint3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(uint3, double) -DECLOP_3VAR_SCALE_PRODUCT(uint3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(uint3, signed long long) - -// UNSIGNED INT4 - -DECLOP_4VAR_2IN_1OUT(uint4, +) -DECLOP_4VAR_2IN_1OUT(uint4, -) -DECLOP_4VAR_2IN_1OUT(uint4, *) -DECLOP_4VAR_2IN_1OUT(uint4, /) -DECLOP_4VAR_2IN_1OUT(uint4, %) -DECLOP_4VAR_2IN_1OUT(uint4, &) -DECLOP_4VAR_2IN_1OUT(uint4, |) -DECLOP_4VAR_2IN_1OUT(uint4, ^) -DECLOP_4VAR_2IN_1OUT(uint4, <<) -DECLOP_4VAR_2IN_1OUT(uint4, >>) - -DECLOP_4VAR_ASSIGN(uint4, +=) -DECLOP_4VAR_ASSIGN(uint4, -=) -DECLOP_4VAR_ASSIGN(uint4, *=) -DECLOP_4VAR_ASSIGN(uint4, /=) -DECLOP_4VAR_ASSIGN(uint4, %=) -DECLOP_4VAR_ASSIGN(uint4, &=) -DECLOP_4VAR_ASSIGN(uint4, |=) -DECLOP_4VAR_ASSIGN(uint4, ^=) -DECLOP_4VAR_ASSIGN(uint4, <<=) -DECLOP_4VAR_ASSIGN(uint4, >>=) - -DECLOP_4VAR_PREOP(uint4, ++) -DECLOP_4VAR_PREOP(uint4, --) - -DECLOP_4VAR_POSTOP(uint4, ++) -DECLOP_4VAR_POSTOP(uint4, --) - -DECLOP_4VAR_COMP(uint4, ==) -DECLOP_4VAR_COMP(uint4, !=) -DECLOP_4VAR_COMP(uint4, <) -DECLOP_4VAR_COMP(uint4, >) -DECLOP_4VAR_COMP(uint4, <=) -DECLOP_4VAR_COMP(uint4, >=) - -DECLOP_4VAR_COMP(uint4, &&) -DECLOP_4VAR_COMP(uint4, ||) - -DECLOP_4VAR_1IN_1OUT(uint4, ~) -DECLOP_4VAR_1IN_BOOLOUT(uint4, !) - -DECLOP_4VAR_SCALE_PRODUCT(uint4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(uint4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(uint4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(uint4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(uint4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(uint4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(uint4, float) -DECLOP_4VAR_SCALE_PRODUCT(uint4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(uint4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(uint4, double) -DECLOP_4VAR_SCALE_PRODUCT(uint4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(uint4, signed long long) - -// SIGNED INT1 - -DECLOP_1VAR_2IN_1OUT(int1, +) -DECLOP_1VAR_2IN_1OUT(int1, -) -DECLOP_1VAR_2IN_1OUT(int1, *) -DECLOP_1VAR_2IN_1OUT(int1, /) -DECLOP_1VAR_2IN_1OUT(int1, %) -DECLOP_1VAR_2IN_1OUT(int1, &) -DECLOP_1VAR_2IN_1OUT(int1, |) -DECLOP_1VAR_2IN_1OUT(int1, ^) -DECLOP_1VAR_2IN_1OUT(int1, <<) -DECLOP_1VAR_2IN_1OUT(int1, >>) - - -DECLOP_1VAR_ASSIGN(int1, +=) -DECLOP_1VAR_ASSIGN(int1, -=) -DECLOP_1VAR_ASSIGN(int1, *=) -DECLOP_1VAR_ASSIGN(int1, /=) -DECLOP_1VAR_ASSIGN(int1, %=) -DECLOP_1VAR_ASSIGN(int1, &=) -DECLOP_1VAR_ASSIGN(int1, |=) -DECLOP_1VAR_ASSIGN(int1, ^=) -DECLOP_1VAR_ASSIGN(int1, <<=) -DECLOP_1VAR_ASSIGN(int1, >>=) - -DECLOP_1VAR_PREOP(int1, ++) -DECLOP_1VAR_PREOP(int1, --) - -DECLOP_1VAR_POSTOP(int1, ++) -DECLOP_1VAR_POSTOP(int1, --) - -DECLOP_1VAR_COMP(int1, ==) -DECLOP_1VAR_COMP(int1, !=) -DECLOP_1VAR_COMP(int1, <) -DECLOP_1VAR_COMP(int1, >) -DECLOP_1VAR_COMP(int1, <=) -DECLOP_1VAR_COMP(int1, >=) - -DECLOP_1VAR_COMP(int1, &&) -DECLOP_1VAR_COMP(int1, ||) - -DECLOP_1VAR_1IN_1OUT(int1, ~) -DECLOP_1VAR_1IN_BOOLOUT(int1, !) - -DECLOP_1VAR_SCALE_PRODUCT(int1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(int1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(int1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(int1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(int1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(int1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(int1, float) -DECLOP_1VAR_SCALE_PRODUCT(int1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(int1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(int1, double) -DECLOP_1VAR_SCALE_PRODUCT(int1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(int1, signed long long) - -// SIGNED INT2 - -DECLOP_2VAR_2IN_1OUT(int2, +) -DECLOP_2VAR_2IN_1OUT(int2, -) -DECLOP_2VAR_2IN_1OUT(int2, *) -DECLOP_2VAR_2IN_1OUT(int2, /) -DECLOP_2VAR_2IN_1OUT(int2, %) -DECLOP_2VAR_2IN_1OUT(int2, &) -DECLOP_2VAR_2IN_1OUT(int2, |) -DECLOP_2VAR_2IN_1OUT(int2, ^) -DECLOP_2VAR_2IN_1OUT(int2, <<) -DECLOP_2VAR_2IN_1OUT(int2, >>) - -DECLOP_2VAR_ASSIGN(int2, +=) -DECLOP_2VAR_ASSIGN(int2, -=) -DECLOP_2VAR_ASSIGN(int2, *=) -DECLOP_2VAR_ASSIGN(int2, /=) -DECLOP_2VAR_ASSIGN(int2, %=) -DECLOP_2VAR_ASSIGN(int2, &=) -DECLOP_2VAR_ASSIGN(int2, |=) -DECLOP_2VAR_ASSIGN(int2, ^=) -DECLOP_2VAR_ASSIGN(int2, <<=) -DECLOP_2VAR_ASSIGN(int2, >>=) - -DECLOP_2VAR_PREOP(int2, ++) -DECLOP_2VAR_PREOP(int2, --) - -DECLOP_2VAR_POSTOP(int2, ++) -DECLOP_2VAR_POSTOP(int2, --) - -DECLOP_2VAR_COMP(int2, ==) -DECLOP_2VAR_COMP(int2, !=) -DECLOP_2VAR_COMP(int2, <) -DECLOP_2VAR_COMP(int2, >) -DECLOP_2VAR_COMP(int2, <=) -DECLOP_2VAR_COMP(int2, >=) - -DECLOP_2VAR_COMP(int2, &&) -DECLOP_2VAR_COMP(int2, ||) - -DECLOP_2VAR_1IN_1OUT(int2, ~) -DECLOP_2VAR_1IN_BOOLOUT(int2, !) - -DECLOP_2VAR_SCALE_PRODUCT(int2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(int2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(int2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(int2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(int2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(int2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(int2, float) -DECLOP_2VAR_SCALE_PRODUCT(int2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(int2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(int2, double) -DECLOP_2VAR_SCALE_PRODUCT(int2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(int2, signed long long) - -// SIGNED INT3 - -DECLOP_3VAR_2IN_1OUT(int3, +) -DECLOP_3VAR_2IN_1OUT(int3, -) -DECLOP_3VAR_2IN_1OUT(int3, *) -DECLOP_3VAR_2IN_1OUT(int3, /) -DECLOP_3VAR_2IN_1OUT(int3, %) -DECLOP_3VAR_2IN_1OUT(int3, &) -DECLOP_3VAR_2IN_1OUT(int3, |) -DECLOP_3VAR_2IN_1OUT(int3, ^) -DECLOP_3VAR_2IN_1OUT(int3, <<) -DECLOP_3VAR_2IN_1OUT(int3, >>) - -DECLOP_3VAR_ASSIGN(int3, +=) -DECLOP_3VAR_ASSIGN(int3, -=) -DECLOP_3VAR_ASSIGN(int3, *=) -DECLOP_3VAR_ASSIGN(int3, /=) -DECLOP_3VAR_ASSIGN(int3, %=) -DECLOP_3VAR_ASSIGN(int3, &=) -DECLOP_3VAR_ASSIGN(int3, |=) -DECLOP_3VAR_ASSIGN(int3, ^=) -DECLOP_3VAR_ASSIGN(int3, <<=) -DECLOP_3VAR_ASSIGN(int3, >>=) - -DECLOP_3VAR_PREOP(int3, ++) -DECLOP_3VAR_PREOP(int3, --) - -DECLOP_3VAR_POSTOP(int3, ++) -DECLOP_3VAR_POSTOP(int3, --) - -DECLOP_3VAR_COMP(int3, ==) -DECLOP_3VAR_COMP(int3, !=) -DECLOP_3VAR_COMP(int3, <) -DECLOP_3VAR_COMP(int3, >) -DECLOP_3VAR_COMP(int3, <=) -DECLOP_3VAR_COMP(int3, >=) - -DECLOP_3VAR_COMP(int3, &&) -DECLOP_3VAR_COMP(int3, ||) - -DECLOP_3VAR_1IN_1OUT(int3, ~) -DECLOP_3VAR_1IN_BOOLOUT(int3, !) - -DECLOP_3VAR_SCALE_PRODUCT(int3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(int3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(int3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(int3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(int3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(int3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(int3, float) -DECLOP_3VAR_SCALE_PRODUCT(int3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(int3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(int3, double) -DECLOP_3VAR_SCALE_PRODUCT(int3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(int3, signed long long) - -// SIGNED INT4 - -DECLOP_4VAR_2IN_1OUT(int4, +) -DECLOP_4VAR_2IN_1OUT(int4, -) -DECLOP_4VAR_2IN_1OUT(int4, *) -DECLOP_4VAR_2IN_1OUT(int4, /) -DECLOP_4VAR_2IN_1OUT(int4, %) -DECLOP_4VAR_2IN_1OUT(int4, &) -DECLOP_4VAR_2IN_1OUT(int4, |) -DECLOP_4VAR_2IN_1OUT(int4, ^) -DECLOP_4VAR_2IN_1OUT(int4, <<) -DECLOP_4VAR_2IN_1OUT(int4, >>) - -DECLOP_4VAR_ASSIGN(int4, +=) -DECLOP_4VAR_ASSIGN(int4, -=) -DECLOP_4VAR_ASSIGN(int4, *=) -DECLOP_4VAR_ASSIGN(int4, /=) -DECLOP_4VAR_ASSIGN(int4, %=) -DECLOP_4VAR_ASSIGN(int4, &=) -DECLOP_4VAR_ASSIGN(int4, |=) -DECLOP_4VAR_ASSIGN(int4, ^=) -DECLOP_4VAR_ASSIGN(int4, <<=) -DECLOP_4VAR_ASSIGN(int4, >>=) - -DECLOP_4VAR_PREOP(int4, ++) -DECLOP_4VAR_PREOP(int4, --) - -DECLOP_4VAR_POSTOP(int4, ++) -DECLOP_4VAR_POSTOP(int4, --) - -DECLOP_4VAR_COMP(int4, ==) -DECLOP_4VAR_COMP(int4, !=) -DECLOP_4VAR_COMP(int4, <) -DECLOP_4VAR_COMP(int4, >) -DECLOP_4VAR_COMP(int4, <=) -DECLOP_4VAR_COMP(int4, >=) - -DECLOP_4VAR_COMP(int4, &&) -DECLOP_4VAR_COMP(int4, ||) - -DECLOP_4VAR_1IN_1OUT(int4, ~) -DECLOP_4VAR_1IN_BOOLOUT(int4, !) - -DECLOP_4VAR_SCALE_PRODUCT(int4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(int4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(int4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(int4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(int4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(int4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(int4, float) -DECLOP_4VAR_SCALE_PRODUCT(int4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(int4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(int4, double) -DECLOP_4VAR_SCALE_PRODUCT(int4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(int4, signed long long) - -// FLOAT1 - -DECLOP_1VAR_2IN_1OUT(float1, +) -DECLOP_1VAR_2IN_1OUT(float1, -) -DECLOP_1VAR_2IN_1OUT(float1, *) -DECLOP_1VAR_2IN_1OUT(float1, /) - -DECLOP_1VAR_ASSIGN(float1, +=) -DECLOP_1VAR_ASSIGN(float1, -=) -DECLOP_1VAR_ASSIGN(float1, *=) -DECLOP_1VAR_ASSIGN(float1, /=) - -DECLOP_1VAR_PREOP(float1, ++) -DECLOP_1VAR_PREOP(float1, --) - -DECLOP_1VAR_POSTOP(float1, ++) -DECLOP_1VAR_POSTOP(float1, --) - -DECLOP_1VAR_COMP(float1, ==) -DECLOP_1VAR_COMP(float1, !=) -DECLOP_1VAR_COMP(float1, <) -DECLOP_1VAR_COMP(float1, >) -DECLOP_1VAR_COMP(float1, <=) -DECLOP_1VAR_COMP(float1, >=) - -DECLOP_1VAR_SCALE_PRODUCT(float1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(float1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(float1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(float1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(float1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(float1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(float1, float) -DECLOP_1VAR_SCALE_PRODUCT(float1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(float1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(float1, double) -DECLOP_1VAR_SCALE_PRODUCT(float1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(float1, signed long long) - -// FLOAT2 - -DECLOP_2VAR_2IN_1OUT(float2, +) -DECLOP_2VAR_2IN_1OUT(float2, -) -DECLOP_2VAR_2IN_1OUT(float2, *) -DECLOP_2VAR_2IN_1OUT(float2, /) - -DECLOP_2VAR_ASSIGN(float2, +=) -DECLOP_2VAR_ASSIGN(float2, -=) -DECLOP_2VAR_ASSIGN(float2, *=) -DECLOP_2VAR_ASSIGN(float2, /=) - -DECLOP_2VAR_PREOP(float2, ++) -DECLOP_2VAR_PREOP(float2, --) - -DECLOP_2VAR_POSTOP(float2, ++) -DECLOP_2VAR_POSTOP(float2, --) - -DECLOP_2VAR_COMP(float2, ==) -DECLOP_2VAR_COMP(float2, !=) -DECLOP_2VAR_COMP(float2, <) -DECLOP_2VAR_COMP(float2, >) -DECLOP_2VAR_COMP(float2, <=) -DECLOP_2VAR_COMP(float2, >=) - -DECLOP_2VAR_SCALE_PRODUCT(float2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(float2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(float2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(float2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(float2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(float2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(float2, float) -DECLOP_2VAR_SCALE_PRODUCT(float2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(float2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(float2, double) -DECLOP_2VAR_SCALE_PRODUCT(float2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(float2, signed long long) - -// FLOAT3 - -DECLOP_3VAR_2IN_1OUT(float3, +) -DECLOP_3VAR_2IN_1OUT(float3, -) -DECLOP_3VAR_2IN_1OUT(float3, *) -DECLOP_3VAR_2IN_1OUT(float3, /) - -DECLOP_3VAR_ASSIGN(float3, +=) -DECLOP_3VAR_ASSIGN(float3, -=) -DECLOP_3VAR_ASSIGN(float3, *=) -DECLOP_3VAR_ASSIGN(float3, /=) - -DECLOP_3VAR_PREOP(float3, ++) -DECLOP_3VAR_PREOP(float3, --) - -DECLOP_3VAR_POSTOP(float3, ++) -DECLOP_3VAR_POSTOP(float3, --) - -DECLOP_3VAR_COMP(float3, ==) -DECLOP_3VAR_COMP(float3, !=) -DECLOP_3VAR_COMP(float3, <) -DECLOP_3VAR_COMP(float3, >) -DECLOP_3VAR_COMP(float3, <=) -DECLOP_3VAR_COMP(float3, >=) - -DECLOP_3VAR_SCALE_PRODUCT(float3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(float3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(float3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(float3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(float3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(float3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(float3, float) -DECLOP_3VAR_SCALE_PRODUCT(float3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(float3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(float3, double) -DECLOP_3VAR_SCALE_PRODUCT(float3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(float3, signed long long) - -// FLOAT4 - -DECLOP_4VAR_2IN_1OUT(float4, +) -DECLOP_4VAR_2IN_1OUT(float4, -) -DECLOP_4VAR_2IN_1OUT(float4, *) -DECLOP_4VAR_2IN_1OUT(float4, /) - -DECLOP_4VAR_ASSIGN(float4, +=) -DECLOP_4VAR_ASSIGN(float4, -=) -DECLOP_4VAR_ASSIGN(float4, *=) -DECLOP_4VAR_ASSIGN(float4, /=) - -DECLOP_4VAR_PREOP(float4, ++) -DECLOP_4VAR_PREOP(float4, --) - -DECLOP_4VAR_POSTOP(float4, ++) -DECLOP_4VAR_POSTOP(float4, --) - -DECLOP_4VAR_COMP(float4, ==) -DECLOP_4VAR_COMP(float4, !=) -DECLOP_4VAR_COMP(float4, <) -DECLOP_4VAR_COMP(float4, >) -DECLOP_4VAR_COMP(float4, <=) -DECLOP_4VAR_COMP(float4, >=) - -DECLOP_4VAR_SCALE_PRODUCT(float4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(float4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(float4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(float4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(float4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(float4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(float4, float) -DECLOP_4VAR_SCALE_PRODUCT(float4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(float4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(float4, double) -DECLOP_4VAR_SCALE_PRODUCT(float4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(float4, signed long long) - -// DOUBLE1 - -DECLOP_1VAR_2IN_1OUT(double1, +) -DECLOP_1VAR_2IN_1OUT(double1, -) -DECLOP_1VAR_2IN_1OUT(double1, *) -DECLOP_1VAR_2IN_1OUT(double1, /) - -DECLOP_1VAR_ASSIGN(double1, +=) -DECLOP_1VAR_ASSIGN(double1, -=) -DECLOP_1VAR_ASSIGN(double1, *=) -DECLOP_1VAR_ASSIGN(double1, /=) - -DECLOP_1VAR_PREOP(double1, ++) -DECLOP_1VAR_PREOP(double1, --) - -DECLOP_1VAR_POSTOP(double1, ++) -DECLOP_1VAR_POSTOP(double1, --) - -DECLOP_1VAR_COMP(double1, ==) -DECLOP_1VAR_COMP(double1, !=) -DECLOP_1VAR_COMP(double1, <) -DECLOP_1VAR_COMP(double1, >) -DECLOP_1VAR_COMP(double1, <=) -DECLOP_1VAR_COMP(double1, >=) - -DECLOP_1VAR_SCALE_PRODUCT(double1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(double1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(double1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(double1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(double1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(double1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(double1, float) -DECLOP_1VAR_SCALE_PRODUCT(double1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(double1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(double1, double) -DECLOP_1VAR_SCALE_PRODUCT(double1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(double1, signed long long) - -// DOUBLE2 - -DECLOP_2VAR_2IN_1OUT(double2, +) -DECLOP_2VAR_2IN_1OUT(double2, -) -DECLOP_2VAR_2IN_1OUT(double2, *) -DECLOP_2VAR_2IN_1OUT(double2, /) - -DECLOP_2VAR_ASSIGN(double2, +=) -DECLOP_2VAR_ASSIGN(double2, -=) -DECLOP_2VAR_ASSIGN(double2, *=) -DECLOP_2VAR_ASSIGN(double2, /=) - -DECLOP_2VAR_PREOP(double2, ++) -DECLOP_2VAR_PREOP(double2, --) - -DECLOP_2VAR_POSTOP(double2, ++) -DECLOP_2VAR_POSTOP(double2, --) - -DECLOP_2VAR_COMP(double2, ==) -DECLOP_2VAR_COMP(double2, !=) -DECLOP_2VAR_COMP(double2, <) -DECLOP_2VAR_COMP(double2, >) -DECLOP_2VAR_COMP(double2, <=) -DECLOP_2VAR_COMP(double2, >=) - -DECLOP_2VAR_SCALE_PRODUCT(double2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(double2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(double2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(double2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(double2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(double2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(double2, float) -DECLOP_2VAR_SCALE_PRODUCT(double2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(double2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(double2, double) -DECLOP_2VAR_SCALE_PRODUCT(double2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(double2, signed long long) - -// DOUBLE3 - -DECLOP_3VAR_2IN_1OUT(double3, +) -DECLOP_3VAR_2IN_1OUT(double3, -) -DECLOP_3VAR_2IN_1OUT(double3, *) -DECLOP_3VAR_2IN_1OUT(double3, /) - -DECLOP_3VAR_ASSIGN(double3, +=) -DECLOP_3VAR_ASSIGN(double3, -=) -DECLOP_3VAR_ASSIGN(double3, *=) -DECLOP_3VAR_ASSIGN(double3, /=) - -DECLOP_3VAR_PREOP(double3, ++) -DECLOP_3VAR_PREOP(double3, --) - -DECLOP_3VAR_POSTOP(double3, ++) -DECLOP_3VAR_POSTOP(double3, --) - -DECLOP_3VAR_COMP(double3, ==) -DECLOP_3VAR_COMP(double3, !=) -DECLOP_3VAR_COMP(double3, <) -DECLOP_3VAR_COMP(double3, >) -DECLOP_3VAR_COMP(double3, <=) -DECLOP_3VAR_COMP(double3, >=) - -DECLOP_3VAR_SCALE_PRODUCT(double3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(double3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(double3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(double3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(double3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(double3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(double3, float) -DECLOP_3VAR_SCALE_PRODUCT(double3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(double3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(double3, double) -DECLOP_3VAR_SCALE_PRODUCT(double3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(double3, signed long long) - -// DOUBLE4 - -DECLOP_4VAR_2IN_1OUT(double4, +) -DECLOP_4VAR_2IN_1OUT(double4, -) -DECLOP_4VAR_2IN_1OUT(double4, *) -DECLOP_4VAR_2IN_1OUT(double4, /) - -DECLOP_4VAR_ASSIGN(double4, +=) -DECLOP_4VAR_ASSIGN(double4, -=) -DECLOP_4VAR_ASSIGN(double4, *=) -DECLOP_4VAR_ASSIGN(double4, /=) - -DECLOP_4VAR_PREOP(double4, ++) -DECLOP_4VAR_PREOP(double4, --) - -DECLOP_4VAR_POSTOP(double4, ++) -DECLOP_4VAR_POSTOP(double4, --) - -DECLOP_4VAR_COMP(double4, ==) -DECLOP_4VAR_COMP(double4, !=) -DECLOP_4VAR_COMP(double4, <) -DECLOP_4VAR_COMP(double4, >) -DECLOP_4VAR_COMP(double4, <=) -DECLOP_4VAR_COMP(double4, >=) - -DECLOP_4VAR_SCALE_PRODUCT(double4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(double4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(double4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(double4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(double4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(double4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(double4, float) -DECLOP_4VAR_SCALE_PRODUCT(double4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(double4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(double4, double) -DECLOP_4VAR_SCALE_PRODUCT(double4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(double4, signed long long) - -// UNSIGNED LONG1 - -DECLOP_1VAR_2IN_1OUT(ulong1, +) -DECLOP_1VAR_2IN_1OUT(ulong1, -) -DECLOP_1VAR_2IN_1OUT(ulong1, *) -DECLOP_1VAR_2IN_1OUT(ulong1, /) -DECLOP_1VAR_2IN_1OUT(ulong1, %) -DECLOP_1VAR_2IN_1OUT(ulong1, &) -DECLOP_1VAR_2IN_1OUT(ulong1, |) -DECLOP_1VAR_2IN_1OUT(ulong1, ^) -DECLOP_1VAR_2IN_1OUT(ulong1, <<) -DECLOP_1VAR_2IN_1OUT(ulong1, >>) - - -DECLOP_1VAR_ASSIGN(ulong1, +=) -DECLOP_1VAR_ASSIGN(ulong1, -=) -DECLOP_1VAR_ASSIGN(ulong1, *=) -DECLOP_1VAR_ASSIGN(ulong1, /=) -DECLOP_1VAR_ASSIGN(ulong1, %=) -DECLOP_1VAR_ASSIGN(ulong1, &=) -DECLOP_1VAR_ASSIGN(ulong1, |=) -DECLOP_1VAR_ASSIGN(ulong1, ^=) -DECLOP_1VAR_ASSIGN(ulong1, <<=) -DECLOP_1VAR_ASSIGN(ulong1, >>=) - -DECLOP_1VAR_PREOP(ulong1, ++) -DECLOP_1VAR_PREOP(ulong1, --) - -DECLOP_1VAR_POSTOP(ulong1, ++) -DECLOP_1VAR_POSTOP(ulong1, --) - -DECLOP_1VAR_COMP(ulong1, ==) -DECLOP_1VAR_COMP(ulong1, !=) -DECLOP_1VAR_COMP(ulong1, <) -DECLOP_1VAR_COMP(ulong1, >) -DECLOP_1VAR_COMP(ulong1, <=) -DECLOP_1VAR_COMP(ulong1, >=) - -DECLOP_1VAR_COMP(ulong1, &&) -DECLOP_1VAR_COMP(ulong1, ||) - -DECLOP_1VAR_1IN_1OUT(ulong1, ~) -DECLOP_1VAR_1IN_BOOLOUT(ulong1, !) - -DECLOP_1VAR_SCALE_PRODUCT(ulong1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, float) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, double) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(ulong1, signed long long) - -// UNSIGNED LONG2 - -DECLOP_2VAR_2IN_1OUT(ulong2, +) -DECLOP_2VAR_2IN_1OUT(ulong2, -) -DECLOP_2VAR_2IN_1OUT(ulong2, *) -DECLOP_2VAR_2IN_1OUT(ulong2, /) -DECLOP_2VAR_2IN_1OUT(ulong2, %) -DECLOP_2VAR_2IN_1OUT(ulong2, &) -DECLOP_2VAR_2IN_1OUT(ulong2, |) -DECLOP_2VAR_2IN_1OUT(ulong2, ^) -DECLOP_2VAR_2IN_1OUT(ulong2, <<) -DECLOP_2VAR_2IN_1OUT(ulong2, >>) - -DECLOP_2VAR_ASSIGN(ulong2, +=) -DECLOP_2VAR_ASSIGN(ulong2, -=) -DECLOP_2VAR_ASSIGN(ulong2, *=) -DECLOP_2VAR_ASSIGN(ulong2, /=) -DECLOP_2VAR_ASSIGN(ulong2, %=) -DECLOP_2VAR_ASSIGN(ulong2, &=) -DECLOP_2VAR_ASSIGN(ulong2, |=) -DECLOP_2VAR_ASSIGN(ulong2, ^=) -DECLOP_2VAR_ASSIGN(ulong2, <<=) -DECLOP_2VAR_ASSIGN(ulong2, >>=) - -DECLOP_2VAR_PREOP(ulong2, ++) -DECLOP_2VAR_PREOP(ulong2, --) - -DECLOP_2VAR_POSTOP(ulong2, ++) -DECLOP_2VAR_POSTOP(ulong2, --) - -DECLOP_2VAR_COMP(ulong2, ==) -DECLOP_2VAR_COMP(ulong2, !=) -DECLOP_2VAR_COMP(ulong2, <) -DECLOP_2VAR_COMP(ulong2, >) -DECLOP_2VAR_COMP(ulong2, <=) -DECLOP_2VAR_COMP(ulong2, >=) - -DECLOP_2VAR_COMP(ulong2, &&) -DECLOP_2VAR_COMP(ulong2, ||) - -DECLOP_2VAR_1IN_1OUT(ulong2, ~) -DECLOP_2VAR_1IN_BOOLOUT(ulong2, !) - -DECLOP_2VAR_SCALE_PRODUCT(ulong2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, float) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, double) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(ulong2, signed long long) - -// UNSIGNED LONG3 - -DECLOP_3VAR_2IN_1OUT(ulong3, +) -DECLOP_3VAR_2IN_1OUT(ulong3, -) -DECLOP_3VAR_2IN_1OUT(ulong3, *) -DECLOP_3VAR_2IN_1OUT(ulong3, /) -DECLOP_3VAR_2IN_1OUT(ulong3, %) -DECLOP_3VAR_2IN_1OUT(ulong3, &) -DECLOP_3VAR_2IN_1OUT(ulong3, |) -DECLOP_3VAR_2IN_1OUT(ulong3, ^) -DECLOP_3VAR_2IN_1OUT(ulong3, <<) -DECLOP_3VAR_2IN_1OUT(ulong3, >>) - -DECLOP_3VAR_ASSIGN(ulong3, +=) -DECLOP_3VAR_ASSIGN(ulong3, -=) -DECLOP_3VAR_ASSIGN(ulong3, *=) -DECLOP_3VAR_ASSIGN(ulong3, /=) -DECLOP_3VAR_ASSIGN(ulong3, %=) -DECLOP_3VAR_ASSIGN(ulong3, &=) -DECLOP_3VAR_ASSIGN(ulong3, |=) -DECLOP_3VAR_ASSIGN(ulong3, ^=) -DECLOP_3VAR_ASSIGN(ulong3, <<=) -DECLOP_3VAR_ASSIGN(ulong3, >>=) - -DECLOP_3VAR_PREOP(ulong3, ++) -DECLOP_3VAR_PREOP(ulong3, --) - -DECLOP_3VAR_POSTOP(ulong3, ++) -DECLOP_3VAR_POSTOP(ulong3, --) - -DECLOP_3VAR_COMP(ulong3, ==) -DECLOP_3VAR_COMP(ulong3, !=) -DECLOP_3VAR_COMP(ulong3, <) -DECLOP_3VAR_COMP(ulong3, >) -DECLOP_3VAR_COMP(ulong3, <=) -DECLOP_3VAR_COMP(ulong3, >=) - -DECLOP_3VAR_COMP(ulong3, &&) -DECLOP_3VAR_COMP(ulong3, ||) - -DECLOP_3VAR_1IN_1OUT(ulong3, ~) -DECLOP_3VAR_1IN_BOOLOUT(ulong3, !) - -DECLOP_3VAR_SCALE_PRODUCT(ulong3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, float) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, double) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(ulong3, signed long long) - -// UNSIGNED LONG4 - -DECLOP_4VAR_2IN_1OUT(ulong4, +) -DECLOP_4VAR_2IN_1OUT(ulong4, -) -DECLOP_4VAR_2IN_1OUT(ulong4, *) -DECLOP_4VAR_2IN_1OUT(ulong4, /) -DECLOP_4VAR_2IN_1OUT(ulong4, %) -DECLOP_4VAR_2IN_1OUT(ulong4, &) -DECLOP_4VAR_2IN_1OUT(ulong4, |) -DECLOP_4VAR_2IN_1OUT(ulong4, ^) -DECLOP_4VAR_2IN_1OUT(ulong4, <<) -DECLOP_4VAR_2IN_1OUT(ulong4, >>) - -DECLOP_4VAR_ASSIGN(ulong4, +=) -DECLOP_4VAR_ASSIGN(ulong4, -=) -DECLOP_4VAR_ASSIGN(ulong4, *=) -DECLOP_4VAR_ASSIGN(ulong4, /=) -DECLOP_4VAR_ASSIGN(ulong4, %=) -DECLOP_4VAR_ASSIGN(ulong4, &=) -DECLOP_4VAR_ASSIGN(ulong4, |=) -DECLOP_4VAR_ASSIGN(ulong4, ^=) -DECLOP_4VAR_ASSIGN(ulong4, <<=) -DECLOP_4VAR_ASSIGN(ulong4, >>=) - -DECLOP_4VAR_PREOP(ulong4, ++) -DECLOP_4VAR_PREOP(ulong4, --) - -DECLOP_4VAR_POSTOP(ulong4, ++) -DECLOP_4VAR_POSTOP(ulong4, --) - -DECLOP_4VAR_COMP(ulong4, ==) -DECLOP_4VAR_COMP(ulong4, !=) -DECLOP_4VAR_COMP(ulong4, <) -DECLOP_4VAR_COMP(ulong4, >) -DECLOP_4VAR_COMP(ulong4, <=) -DECLOP_4VAR_COMP(ulong4, >=) - -DECLOP_4VAR_COMP(ulong4, &&) -DECLOP_4VAR_COMP(ulong4, ||) - -DECLOP_4VAR_1IN_1OUT(ulong4, ~) -DECLOP_4VAR_1IN_BOOLOUT(ulong4, !) - -DECLOP_4VAR_SCALE_PRODUCT(ulong4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, float) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, double) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(ulong4, signed long long) - -// SIGNED LONG1 - -DECLOP_1VAR_2IN_1OUT(long1, +) -DECLOP_1VAR_2IN_1OUT(long1, -) -DECLOP_1VAR_2IN_1OUT(long1, *) -DECLOP_1VAR_2IN_1OUT(long1, /) -DECLOP_1VAR_2IN_1OUT(long1, %) -DECLOP_1VAR_2IN_1OUT(long1, &) -DECLOP_1VAR_2IN_1OUT(long1, |) -DECLOP_1VAR_2IN_1OUT(long1, ^) -DECLOP_1VAR_2IN_1OUT(long1, <<) -DECLOP_1VAR_2IN_1OUT(long1, >>) - - -DECLOP_1VAR_ASSIGN(long1, +=) -DECLOP_1VAR_ASSIGN(long1, -=) -DECLOP_1VAR_ASSIGN(long1, *=) -DECLOP_1VAR_ASSIGN(long1, /=) -DECLOP_1VAR_ASSIGN(long1, %=) -DECLOP_1VAR_ASSIGN(long1, &=) -DECLOP_1VAR_ASSIGN(long1, |=) -DECLOP_1VAR_ASSIGN(long1, ^=) -DECLOP_1VAR_ASSIGN(long1, <<=) -DECLOP_1VAR_ASSIGN(long1, >>=) - -DECLOP_1VAR_PREOP(long1, ++) -DECLOP_1VAR_PREOP(long1, --) - -DECLOP_1VAR_POSTOP(long1, ++) -DECLOP_1VAR_POSTOP(long1, --) - -DECLOP_1VAR_COMP(long1, ==) -DECLOP_1VAR_COMP(long1, !=) -DECLOP_1VAR_COMP(long1, <) -DECLOP_1VAR_COMP(long1, >) -DECLOP_1VAR_COMP(long1, <=) -DECLOP_1VAR_COMP(long1, >=) - -DECLOP_1VAR_COMP(long1, &&) -DECLOP_1VAR_COMP(long1, ||) - -DECLOP_1VAR_1IN_1OUT(long1, ~) -DECLOP_1VAR_1IN_BOOLOUT(long1, !) - -DECLOP_1VAR_SCALE_PRODUCT(long1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(long1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(long1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(long1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(long1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(long1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(long1, float) -DECLOP_1VAR_SCALE_PRODUCT(long1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(long1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(long1, double) -DECLOP_1VAR_SCALE_PRODUCT(long1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(long1, signed long long) - -// SIGNED LONG2 - -DECLOP_2VAR_2IN_1OUT(long2, +) -DECLOP_2VAR_2IN_1OUT(long2, -) -DECLOP_2VAR_2IN_1OUT(long2, *) -DECLOP_2VAR_2IN_1OUT(long2, /) -DECLOP_2VAR_2IN_1OUT(long2, %) -DECLOP_2VAR_2IN_1OUT(long2, &) -DECLOP_2VAR_2IN_1OUT(long2, |) -DECLOP_2VAR_2IN_1OUT(long2, ^) -DECLOP_2VAR_2IN_1OUT(long2, <<) -DECLOP_2VAR_2IN_1OUT(long2, >>) - -DECLOP_2VAR_ASSIGN(long2, +=) -DECLOP_2VAR_ASSIGN(long2, -=) -DECLOP_2VAR_ASSIGN(long2, *=) -DECLOP_2VAR_ASSIGN(long2, /=) -DECLOP_2VAR_ASSIGN(long2, %=) -DECLOP_2VAR_ASSIGN(long2, &=) -DECLOP_2VAR_ASSIGN(long2, |=) -DECLOP_2VAR_ASSIGN(long2, ^=) -DECLOP_2VAR_ASSIGN(long2, <<=) -DECLOP_2VAR_ASSIGN(long2, >>=) - -DECLOP_2VAR_PREOP(long2, ++) -DECLOP_2VAR_PREOP(long2, --) - -DECLOP_2VAR_POSTOP(long2, ++) -DECLOP_2VAR_POSTOP(long2, --) - -DECLOP_2VAR_COMP(long2, ==) -DECLOP_2VAR_COMP(long2, !=) -DECLOP_2VAR_COMP(long2, <) -DECLOP_2VAR_COMP(long2, >) -DECLOP_2VAR_COMP(long2, <=) -DECLOP_2VAR_COMP(long2, >=) - -DECLOP_2VAR_COMP(long2, &&) -DECLOP_2VAR_COMP(long2, ||) - -DECLOP_2VAR_1IN_1OUT(long2, ~) -DECLOP_2VAR_1IN_BOOLOUT(long2, !) - -DECLOP_2VAR_SCALE_PRODUCT(long2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(long2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(long2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(long2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(long2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(long2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(long2, float) -DECLOP_2VAR_SCALE_PRODUCT(long2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(long2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(long2, double) -DECLOP_2VAR_SCALE_PRODUCT(long2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(long2, signed long long) - -// SIGNED LONG3 - -DECLOP_3VAR_2IN_1OUT(long3, +) -DECLOP_3VAR_2IN_1OUT(long3, -) -DECLOP_3VAR_2IN_1OUT(long3, *) -DECLOP_3VAR_2IN_1OUT(long3, /) -DECLOP_3VAR_2IN_1OUT(long3, %) -DECLOP_3VAR_2IN_1OUT(long3, &) -DECLOP_3VAR_2IN_1OUT(long3, |) -DECLOP_3VAR_2IN_1OUT(long3, ^) -DECLOP_3VAR_2IN_1OUT(long3, <<) -DECLOP_3VAR_2IN_1OUT(long3, >>) - -DECLOP_3VAR_ASSIGN(long3, +=) -DECLOP_3VAR_ASSIGN(long3, -=) -DECLOP_3VAR_ASSIGN(long3, *=) -DECLOP_3VAR_ASSIGN(long3, /=) -DECLOP_3VAR_ASSIGN(long3, %=) -DECLOP_3VAR_ASSIGN(long3, &=) -DECLOP_3VAR_ASSIGN(long3, |=) -DECLOP_3VAR_ASSIGN(long3, ^=) -DECLOP_3VAR_ASSIGN(long3, <<=) -DECLOP_3VAR_ASSIGN(long3, >>=) - -DECLOP_3VAR_PREOP(long3, ++) -DECLOP_3VAR_PREOP(long3, --) - -DECLOP_3VAR_POSTOP(long3, ++) -DECLOP_3VAR_POSTOP(long3, --) - -DECLOP_3VAR_COMP(long3, ==) -DECLOP_3VAR_COMP(long3, !=) -DECLOP_3VAR_COMP(long3, <) -DECLOP_3VAR_COMP(long3, >) -DECLOP_3VAR_COMP(long3, <=) -DECLOP_3VAR_COMP(long3, >=) - -DECLOP_3VAR_COMP(long3, &&) -DECLOP_3VAR_COMP(long3, ||) - -DECLOP_3VAR_1IN_1OUT(long3, ~) -DECLOP_3VAR_1IN_BOOLOUT(long3, !) - -DECLOP_3VAR_SCALE_PRODUCT(long3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(long3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(long3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(long3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(long3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(long3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(long3, float) -DECLOP_3VAR_SCALE_PRODUCT(long3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(long3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(long3, double) -DECLOP_3VAR_SCALE_PRODUCT(long3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(long3, signed long long) - -// SIGNED LONG4 - -DECLOP_4VAR_2IN_1OUT(long4, +) -DECLOP_4VAR_2IN_1OUT(long4, -) -DECLOP_4VAR_2IN_1OUT(long4, *) -DECLOP_4VAR_2IN_1OUT(long4, /) -DECLOP_4VAR_2IN_1OUT(long4, %) -DECLOP_4VAR_2IN_1OUT(long4, &) -DECLOP_4VAR_2IN_1OUT(long4, |) -DECLOP_4VAR_2IN_1OUT(long4, ^) -DECLOP_4VAR_2IN_1OUT(long4, <<) -DECLOP_4VAR_2IN_1OUT(long4, >>) - -DECLOP_4VAR_ASSIGN(long4, +=) -DECLOP_4VAR_ASSIGN(long4, -=) -DECLOP_4VAR_ASSIGN(long4, *=) -DECLOP_4VAR_ASSIGN(long4, /=) -DECLOP_4VAR_ASSIGN(long4, %=) -DECLOP_4VAR_ASSIGN(long4, &=) -DECLOP_4VAR_ASSIGN(long4, |=) -DECLOP_4VAR_ASSIGN(long4, ^=) -DECLOP_4VAR_ASSIGN(long4, <<=) -DECLOP_4VAR_ASSIGN(long4, >>=) - -DECLOP_4VAR_PREOP(long4, ++) -DECLOP_4VAR_PREOP(long4, --) - -DECLOP_4VAR_POSTOP(long4, ++) -DECLOP_4VAR_POSTOP(long4, --) - -DECLOP_4VAR_COMP(long4, ==) -DECLOP_4VAR_COMP(long4, !=) -DECLOP_4VAR_COMP(long4, <) -DECLOP_4VAR_COMP(long4, >) -DECLOP_4VAR_COMP(long4, <=) -DECLOP_4VAR_COMP(long4, >=) - -DECLOP_4VAR_COMP(long4, &&) -DECLOP_4VAR_COMP(long4, ||) - -DECLOP_4VAR_1IN_1OUT(long4, ~) -DECLOP_4VAR_1IN_BOOLOUT(long4, !) - -DECLOP_4VAR_SCALE_PRODUCT(long4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(long4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(long4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(long4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(long4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(long4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(long4, float) -DECLOP_4VAR_SCALE_PRODUCT(long4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(long4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(long4, double) -DECLOP_4VAR_SCALE_PRODUCT(long4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(long4, signed long long) - -// UNSIGNED LONGLONG1 - -DECLOP_1VAR_2IN_1OUT(ulonglong1, +) -DECLOP_1VAR_2IN_1OUT(ulonglong1, -) -DECLOP_1VAR_2IN_1OUT(ulonglong1, *) -DECLOP_1VAR_2IN_1OUT(ulonglong1, /) -DECLOP_1VAR_2IN_1OUT(ulonglong1, %) -DECLOP_1VAR_2IN_1OUT(ulonglong1, &) -DECLOP_1VAR_2IN_1OUT(ulonglong1, |) -DECLOP_1VAR_2IN_1OUT(ulonglong1, ^) -DECLOP_1VAR_2IN_1OUT(ulonglong1, <<) -DECLOP_1VAR_2IN_1OUT(ulonglong1, >>) - - -DECLOP_1VAR_ASSIGN(ulonglong1, +=) -DECLOP_1VAR_ASSIGN(ulonglong1, -=) -DECLOP_1VAR_ASSIGN(ulonglong1, *=) -DECLOP_1VAR_ASSIGN(ulonglong1, /=) -DECLOP_1VAR_ASSIGN(ulonglong1, %=) -DECLOP_1VAR_ASSIGN(ulonglong1, &=) -DECLOP_1VAR_ASSIGN(ulonglong1, |=) -DECLOP_1VAR_ASSIGN(ulonglong1, ^=) -DECLOP_1VAR_ASSIGN(ulonglong1, <<=) -DECLOP_1VAR_ASSIGN(ulonglong1, >>=) - -DECLOP_1VAR_PREOP(ulonglong1, ++) -DECLOP_1VAR_PREOP(ulonglong1, --) - -DECLOP_1VAR_POSTOP(ulonglong1, ++) -DECLOP_1VAR_POSTOP(ulonglong1, --) - -DECLOP_1VAR_COMP(ulonglong1, ==) -DECLOP_1VAR_COMP(ulonglong1, !=) -DECLOP_1VAR_COMP(ulonglong1, <) -DECLOP_1VAR_COMP(ulonglong1, >) -DECLOP_1VAR_COMP(ulonglong1, <=) -DECLOP_1VAR_COMP(ulonglong1, >=) - -DECLOP_1VAR_COMP(ulonglong1, &&) -DECLOP_1VAR_COMP(ulonglong1, ||) - -DECLOP_1VAR_1IN_1OUT(ulonglong1, ~) -DECLOP_1VAR_1IN_BOOLOUT(ulonglong1, !) - -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, float) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, double) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(ulonglong1, signed long long) - -// UNSIGNED LONGLONG2 - -DECLOP_2VAR_2IN_1OUT(ulonglong2, +) -DECLOP_2VAR_2IN_1OUT(ulonglong2, -) -DECLOP_2VAR_2IN_1OUT(ulonglong2, *) -DECLOP_2VAR_2IN_1OUT(ulonglong2, /) -DECLOP_2VAR_2IN_1OUT(ulonglong2, %) -DECLOP_2VAR_2IN_1OUT(ulonglong2, &) -DECLOP_2VAR_2IN_1OUT(ulonglong2, |) -DECLOP_2VAR_2IN_1OUT(ulonglong2, ^) -DECLOP_2VAR_2IN_1OUT(ulonglong2, <<) -DECLOP_2VAR_2IN_1OUT(ulonglong2, >>) - -DECLOP_2VAR_ASSIGN(ulonglong2, +=) -DECLOP_2VAR_ASSIGN(ulonglong2, -=) -DECLOP_2VAR_ASSIGN(ulonglong2, *=) -DECLOP_2VAR_ASSIGN(ulonglong2, /=) -DECLOP_2VAR_ASSIGN(ulonglong2, %=) -DECLOP_2VAR_ASSIGN(ulonglong2, &=) -DECLOP_2VAR_ASSIGN(ulonglong2, |=) -DECLOP_2VAR_ASSIGN(ulonglong2, ^=) -DECLOP_2VAR_ASSIGN(ulonglong2, <<=) -DECLOP_2VAR_ASSIGN(ulonglong2, >>=) - -DECLOP_2VAR_PREOP(ulonglong2, ++) -DECLOP_2VAR_PREOP(ulonglong2, --) - -DECLOP_2VAR_POSTOP(ulonglong2, ++) -DECLOP_2VAR_POSTOP(ulonglong2, --) - -DECLOP_2VAR_COMP(ulonglong2, ==) -DECLOP_2VAR_COMP(ulonglong2, !=) -DECLOP_2VAR_COMP(ulonglong2, <) -DECLOP_2VAR_COMP(ulonglong2, >) -DECLOP_2VAR_COMP(ulonglong2, <=) -DECLOP_2VAR_COMP(ulonglong2, >=) - -DECLOP_2VAR_COMP(ulonglong2, &&) -DECLOP_2VAR_COMP(ulonglong2, ||) - -DECLOP_2VAR_1IN_1OUT(ulonglong2, ~) -DECLOP_2VAR_1IN_BOOLOUT(ulonglong2, !) - -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, float) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, double) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(ulonglong2, signed long long) - -// UNSIGNED LONGLONG3 - -DECLOP_3VAR_2IN_1OUT(ulonglong3, +) -DECLOP_3VAR_2IN_1OUT(ulonglong3, -) -DECLOP_3VAR_2IN_1OUT(ulonglong3, *) -DECLOP_3VAR_2IN_1OUT(ulonglong3, /) -DECLOP_3VAR_2IN_1OUT(ulonglong3, %) -DECLOP_3VAR_2IN_1OUT(ulonglong3, &) -DECLOP_3VAR_2IN_1OUT(ulonglong3, |) -DECLOP_3VAR_2IN_1OUT(ulonglong3, ^) -DECLOP_3VAR_2IN_1OUT(ulonglong3, <<) -DECLOP_3VAR_2IN_1OUT(ulonglong3, >>) - -DECLOP_3VAR_ASSIGN(ulonglong3, +=) -DECLOP_3VAR_ASSIGN(ulonglong3, -=) -DECLOP_3VAR_ASSIGN(ulonglong3, *=) -DECLOP_3VAR_ASSIGN(ulonglong3, /=) -DECLOP_3VAR_ASSIGN(ulonglong3, %=) -DECLOP_3VAR_ASSIGN(ulonglong3, &=) -DECLOP_3VAR_ASSIGN(ulonglong3, |=) -DECLOP_3VAR_ASSIGN(ulonglong3, ^=) -DECLOP_3VAR_ASSIGN(ulonglong3, <<=) -DECLOP_3VAR_ASSIGN(ulonglong3, >>=) - -DECLOP_3VAR_PREOP(ulonglong3, ++) -DECLOP_3VAR_PREOP(ulonglong3, --) - -DECLOP_3VAR_POSTOP(ulonglong3, ++) -DECLOP_3VAR_POSTOP(ulonglong3, --) - -DECLOP_3VAR_COMP(ulonglong3, ==) -DECLOP_3VAR_COMP(ulonglong3, !=) -DECLOP_3VAR_COMP(ulonglong3, <) -DECLOP_3VAR_COMP(ulonglong3, >) -DECLOP_3VAR_COMP(ulonglong3, <=) -DECLOP_3VAR_COMP(ulonglong3, >=) - -DECLOP_3VAR_COMP(ulonglong3, &&) -DECLOP_3VAR_COMP(ulonglong3, ||) - -DECLOP_3VAR_1IN_1OUT(ulonglong3, ~) -DECLOP_3VAR_1IN_BOOLOUT(ulonglong3, !) - -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, float) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, double) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(ulonglong3, signed long long) - -// UNSIGNED LONGLONG4 - -DECLOP_4VAR_2IN_1OUT(ulonglong4, +) -DECLOP_4VAR_2IN_1OUT(ulonglong4, -) -DECLOP_4VAR_2IN_1OUT(ulonglong4, *) -DECLOP_4VAR_2IN_1OUT(ulonglong4, /) -DECLOP_4VAR_2IN_1OUT(ulonglong4, %) -DECLOP_4VAR_2IN_1OUT(ulonglong4, &) -DECLOP_4VAR_2IN_1OUT(ulonglong4, |) -DECLOP_4VAR_2IN_1OUT(ulonglong4, ^) -DECLOP_4VAR_2IN_1OUT(ulonglong4, <<) -DECLOP_4VAR_2IN_1OUT(ulonglong4, >>) - -DECLOP_4VAR_ASSIGN(ulonglong4, +=) -DECLOP_4VAR_ASSIGN(ulonglong4, -=) -DECLOP_4VAR_ASSIGN(ulonglong4, *=) -DECLOP_4VAR_ASSIGN(ulonglong4, /=) -DECLOP_4VAR_ASSIGN(ulonglong4, %=) -DECLOP_4VAR_ASSIGN(ulonglong4, &=) -DECLOP_4VAR_ASSIGN(ulonglong4, |=) -DECLOP_4VAR_ASSIGN(ulonglong4, ^=) -DECLOP_4VAR_ASSIGN(ulonglong4, <<=) -DECLOP_4VAR_ASSIGN(ulonglong4, >>=) - -DECLOP_4VAR_PREOP(ulonglong4, ++) -DECLOP_4VAR_PREOP(ulonglong4, --) - -DECLOP_4VAR_POSTOP(ulonglong4, ++) -DECLOP_4VAR_POSTOP(ulonglong4, --) - -DECLOP_4VAR_COMP(ulonglong4, ==) -DECLOP_4VAR_COMP(ulonglong4, !=) -DECLOP_4VAR_COMP(ulonglong4, <) -DECLOP_4VAR_COMP(ulonglong4, >) -DECLOP_4VAR_COMP(ulonglong4, <=) -DECLOP_4VAR_COMP(ulonglong4, >=) - -DECLOP_4VAR_COMP(ulonglong4, &&) -DECLOP_4VAR_COMP(ulonglong4, ||) - -DECLOP_4VAR_1IN_1OUT(ulonglong4, ~) -DECLOP_4VAR_1IN_BOOLOUT(ulonglong4, !) - -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, float) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, double) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(ulonglong4, signed long long) - -// SIGNED LONGLONG1 - -DECLOP_1VAR_2IN_1OUT(longlong1, +) -DECLOP_1VAR_2IN_1OUT(longlong1, -) -DECLOP_1VAR_2IN_1OUT(longlong1, *) -DECLOP_1VAR_2IN_1OUT(longlong1, /) -DECLOP_1VAR_2IN_1OUT(longlong1, %) -DECLOP_1VAR_2IN_1OUT(longlong1, &) -DECLOP_1VAR_2IN_1OUT(longlong1, |) -DECLOP_1VAR_2IN_1OUT(longlong1, ^) -DECLOP_1VAR_2IN_1OUT(longlong1, <<) -DECLOP_1VAR_2IN_1OUT(longlong1, >>) - - -DECLOP_1VAR_ASSIGN(longlong1, +=) -DECLOP_1VAR_ASSIGN(longlong1, -=) -DECLOP_1VAR_ASSIGN(longlong1, *=) -DECLOP_1VAR_ASSIGN(longlong1, /=) -DECLOP_1VAR_ASSIGN(longlong1, %=) -DECLOP_1VAR_ASSIGN(longlong1, &=) -DECLOP_1VAR_ASSIGN(longlong1, |=) -DECLOP_1VAR_ASSIGN(longlong1, ^=) -DECLOP_1VAR_ASSIGN(longlong1, <<=) -DECLOP_1VAR_ASSIGN(longlong1, >>=) - -DECLOP_1VAR_PREOP(longlong1, ++) -DECLOP_1VAR_PREOP(longlong1, --) - -DECLOP_1VAR_POSTOP(longlong1, ++) -DECLOP_1VAR_POSTOP(longlong1, --) - -DECLOP_1VAR_COMP(longlong1, ==) -DECLOP_1VAR_COMP(longlong1, !=) -DECLOP_1VAR_COMP(longlong1, <) -DECLOP_1VAR_COMP(longlong1, >) -DECLOP_1VAR_COMP(longlong1, <=) -DECLOP_1VAR_COMP(longlong1, >=) - -DECLOP_1VAR_COMP(longlong1, &&) -DECLOP_1VAR_COMP(longlong1, ||) - -DECLOP_1VAR_1IN_1OUT(longlong1, ~) -DECLOP_1VAR_1IN_BOOLOUT(longlong1, !) - -DECLOP_1VAR_SCALE_PRODUCT(longlong1, unsigned char) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, signed char) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, unsigned short) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, signed short) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, unsigned int) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, signed int) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, float) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, unsigned long) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, signed long) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, double) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, unsigned long long) -DECLOP_1VAR_SCALE_PRODUCT(longlong1, signed long long) - -// SIGNED LONGLONG2 - -DECLOP_2VAR_2IN_1OUT(longlong2, +) -DECLOP_2VAR_2IN_1OUT(longlong2, -) -DECLOP_2VAR_2IN_1OUT(longlong2, *) -DECLOP_2VAR_2IN_1OUT(longlong2, /) -DECLOP_2VAR_2IN_1OUT(longlong2, %) -DECLOP_2VAR_2IN_1OUT(longlong2, &) -DECLOP_2VAR_2IN_1OUT(longlong2, |) -DECLOP_2VAR_2IN_1OUT(longlong2, ^) -DECLOP_2VAR_2IN_1OUT(longlong2, <<) -DECLOP_2VAR_2IN_1OUT(longlong2, >>) - -DECLOP_2VAR_ASSIGN(longlong2, +=) -DECLOP_2VAR_ASSIGN(longlong2, -=) -DECLOP_2VAR_ASSIGN(longlong2, *=) -DECLOP_2VAR_ASSIGN(longlong2, /=) -DECLOP_2VAR_ASSIGN(longlong2, %=) -DECLOP_2VAR_ASSIGN(longlong2, &=) -DECLOP_2VAR_ASSIGN(longlong2, |=) -DECLOP_2VAR_ASSIGN(longlong2, ^=) -DECLOP_2VAR_ASSIGN(longlong2, <<=) -DECLOP_2VAR_ASSIGN(longlong2, >>=) - -DECLOP_2VAR_PREOP(longlong2, ++) -DECLOP_2VAR_PREOP(longlong2, --) - -DECLOP_2VAR_POSTOP(longlong2, ++) -DECLOP_2VAR_POSTOP(longlong2, --) - -DECLOP_2VAR_COMP(longlong2, ==) -DECLOP_2VAR_COMP(longlong2, !=) -DECLOP_2VAR_COMP(longlong2, <) -DECLOP_2VAR_COMP(longlong2, >) -DECLOP_2VAR_COMP(longlong2, <=) -DECLOP_2VAR_COMP(longlong2, >=) - -DECLOP_2VAR_COMP(longlong2, &&) -DECLOP_2VAR_COMP(longlong2, ||) - -DECLOP_2VAR_1IN_1OUT(longlong2, ~) -DECLOP_2VAR_1IN_BOOLOUT(longlong2, !) - -DECLOP_2VAR_SCALE_PRODUCT(longlong2, unsigned char) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, signed char) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, unsigned short) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, signed short) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, unsigned int) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, signed int) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, float) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, unsigned long) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, signed long) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, double) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, unsigned long long) -DECLOP_2VAR_SCALE_PRODUCT(longlong2, signed long long) - -// SIGNED LONGLONG3 - -DECLOP_3VAR_2IN_1OUT(longlong3, +) -DECLOP_3VAR_2IN_1OUT(longlong3, -) -DECLOP_3VAR_2IN_1OUT(longlong3, *) -DECLOP_3VAR_2IN_1OUT(longlong3, /) -DECLOP_3VAR_2IN_1OUT(longlong3, %) -DECLOP_3VAR_2IN_1OUT(longlong3, &) -DECLOP_3VAR_2IN_1OUT(longlong3, |) -DECLOP_3VAR_2IN_1OUT(longlong3, ^) -DECLOP_3VAR_2IN_1OUT(longlong3, <<) -DECLOP_3VAR_2IN_1OUT(longlong3, >>) - -DECLOP_3VAR_ASSIGN(longlong3, +=) -DECLOP_3VAR_ASSIGN(longlong3, -=) -DECLOP_3VAR_ASSIGN(longlong3, *=) -DECLOP_3VAR_ASSIGN(longlong3, /=) -DECLOP_3VAR_ASSIGN(longlong3, %=) -DECLOP_3VAR_ASSIGN(longlong3, &=) -DECLOP_3VAR_ASSIGN(longlong3, |=) -DECLOP_3VAR_ASSIGN(longlong3, ^=) -DECLOP_3VAR_ASSIGN(longlong3, <<=) -DECLOP_3VAR_ASSIGN(longlong3, >>=) - -DECLOP_3VAR_PREOP(longlong3, ++) -DECLOP_3VAR_PREOP(longlong3, --) - -DECLOP_3VAR_POSTOP(longlong3, ++) -DECLOP_3VAR_POSTOP(longlong3, --) - -DECLOP_3VAR_COMP(longlong3, ==) -DECLOP_3VAR_COMP(longlong3, !=) -DECLOP_3VAR_COMP(longlong3, <) -DECLOP_3VAR_COMP(longlong3, >) -DECLOP_3VAR_COMP(longlong3, <=) -DECLOP_3VAR_COMP(longlong3, >=) - -DECLOP_3VAR_COMP(longlong3, &&) -DECLOP_3VAR_COMP(longlong3, ||) - -DECLOP_3VAR_1IN_1OUT(longlong3, ~) -DECLOP_3VAR_1IN_BOOLOUT(longlong3, !) - -DECLOP_3VAR_SCALE_PRODUCT(longlong3, unsigned char) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, signed char) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, unsigned short) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, signed short) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, unsigned int) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, signed int) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, float) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, unsigned long) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, signed long) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, double) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, unsigned long long) -DECLOP_3VAR_SCALE_PRODUCT(longlong3, signed long long) - -// SIGNED LONGLONG4 - -DECLOP_4VAR_2IN_1OUT(longlong4, +) -DECLOP_4VAR_2IN_1OUT(longlong4, -) -DECLOP_4VAR_2IN_1OUT(longlong4, *) -DECLOP_4VAR_2IN_1OUT(longlong4, /) -DECLOP_4VAR_2IN_1OUT(longlong4, %) -DECLOP_4VAR_2IN_1OUT(longlong4, &) -DECLOP_4VAR_2IN_1OUT(longlong4, |) -DECLOP_4VAR_2IN_1OUT(longlong4, ^) -DECLOP_4VAR_2IN_1OUT(longlong4, <<) -DECLOP_4VAR_2IN_1OUT(longlong4, >>) - -DECLOP_4VAR_ASSIGN(longlong4, +=) -DECLOP_4VAR_ASSIGN(longlong4, -=) -DECLOP_4VAR_ASSIGN(longlong4, *=) -DECLOP_4VAR_ASSIGN(longlong4, /=) -DECLOP_4VAR_ASSIGN(longlong4, %=) -DECLOP_4VAR_ASSIGN(longlong4, &=) -DECLOP_4VAR_ASSIGN(longlong4, |=) -DECLOP_4VAR_ASSIGN(longlong4, ^=) -DECLOP_4VAR_ASSIGN(longlong4, <<=) -DECLOP_4VAR_ASSIGN(longlong4, >>=) - -DECLOP_4VAR_PREOP(longlong4, ++) -DECLOP_4VAR_PREOP(longlong4, --) - -DECLOP_4VAR_POSTOP(longlong4, ++) -DECLOP_4VAR_POSTOP(longlong4, --) - -DECLOP_4VAR_COMP(longlong4, ==) -DECLOP_4VAR_COMP(longlong4, !=) -DECLOP_4VAR_COMP(longlong4, <) -DECLOP_4VAR_COMP(longlong4, >) -DECLOP_4VAR_COMP(longlong4, <=) -DECLOP_4VAR_COMP(longlong4, >=) - -DECLOP_4VAR_COMP(longlong4, &&) -DECLOP_4VAR_COMP(longlong4, ||) - -DECLOP_4VAR_1IN_1OUT(longlong4, ~) -DECLOP_4VAR_1IN_BOOLOUT(longlong4, !) - -DECLOP_4VAR_SCALE_PRODUCT(longlong4, unsigned char) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, signed char) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, unsigned short) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, signed short) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, unsigned int) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, signed int) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, float) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, unsigned long) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, signed long) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, double) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, unsigned long long) -DECLOP_4VAR_SCALE_PRODUCT(longlong4, signed long long) - - -#endif - - -#endif +#endif \ No newline at end of file diff --git a/hipamd/include/hip/hcc_detail/texture_functions.h b/hipamd/include/hip/hcc_detail/texture_functions.h index 8a7aec9212..999f97e65e 100644 --- a/hipamd/include/hip/hcc_detail/texture_functions.h +++ b/hipamd/include/hip/hcc_detail/texture_functions.h @@ -110,47 +110,47 @@ union TData { #define TEXTURE_RETURN_UNSIGNED return texel.u.x; -#define TEXTURE_RETURN_CHAR_X return char1(texel.i.x); +#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x); -#define TEXTURE_RETURN_UCHAR_X return uchar1(texel.u.x); +#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x); -#define TEXTURE_RETURN_SHORT_X return short1(texel.i.x); +#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x); -#define TEXTURE_RETURN_USHORT_X return ushort1(texel.u.x); +#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x); -#define TEXTURE_RETURN_INT_X return int1(texel.i.x); +#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x); -#define TEXTURE_RETURN_UINT_X return uint1(texel.u.x); +#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x); -#define TEXTURE_RETURN_FLOAT_X return float1(texel.f.x); +#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x); -#define TEXTURE_RETURN_CHAR_XY return char2(texel.i.x, texel.i.y); +#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y); -#define TEXTURE_RETURN_UCHAR_XY return uchar2(texel.u.x, texel.u.y); +#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y); -#define TEXTURE_RETURN_SHORT_XY return short2(texel.i.x, texel.i.y); +#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y); -#define TEXTURE_RETURN_USHORT_XY return ushort2(texel.u.x, texel.u.y); +#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y); -#define TEXTURE_RETURN_INT_XY return int2(texel.i.x, texel.i.y); +#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y); -#define TEXTURE_RETURN_UINT_XY return uint2(texel.u.x, texel.u.y); +#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y); -#define TEXTURE_RETURN_FLOAT_XY return float2(texel.f.x, texel.f.y); +#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y); -#define TEXTURE_RETURN_CHAR_XYZW return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); +#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); -#define TEXTURE_RETURN_UCHAR_XYZW return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); +#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); -#define TEXTURE_RETURN_SHORT_XYZW return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); +#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); -#define TEXTURE_RETURN_USHORT_XYZW return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); +#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); -#define TEXTURE_RETURN_INT_XYZW return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); +#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); -#define TEXTURE_RETURN_UINT_XYZW return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); +#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); -#define TEXTURE_RETURN_FLOAT_XYZW return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w); +#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w); extern "C" { hc::short_vector::float4::vector_value_type __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, diff --git a/hipamd/tests/src/deviceLib/hipVectorTypes.cpp b/hipamd/tests/src/deviceLib/hipVectorTypes.cpp index da9e24d079..3c36fb5d2e 100644 --- a/hipamd/tests/src/deviceLib/hipVectorTypes.cpp +++ b/hipamd/tests/src/deviceLib/hipVectorTypes.cpp @@ -26,7832 +26,182 @@ THE SOFTWARE. * HIT_END */ -#include -#include #include + +#include "vector_test_common.h" #include "test_common.h" -#define cmpVal1(in, exp) \ - if (in.x != exp) { \ - std::cout << "Failed at: " << __LINE__ << " in func: " << __func__ \ - << " expected output: " << exp << " but got: " << in.x << std::endl; \ - assert(-1); \ - } -#define cmpVal2(in, exp) \ - if (in.x != exp || in.y != exp) { \ - std::cout << "Failed at: " << __LINE__ << " in func: " << __func__ \ - << " expected output: " << exp << " but got: " << in.x << "," << in.y \ - << std::endl; \ - assert(-1); \ - } +#include +#include +#include +#include -#define cmpVal3(in, exp) \ - if (in.x != exp || in.y != exp || in.z != exp) { \ - std::cout << "Failed at: " << __LINE__ << " in func: " << __func__ \ - << " expected output: " << exp << " but got: " << in.x << "," << in.y << "," \ - << in.z << std::endl; \ - assert(-1); \ - } +using namespace std; -#define cmpVal4(in, exp) \ - if (in.x != exp || in.y != exp || in.z != exp || in.w != exp) { \ - std::cout << "Failed at: " << __LINE__ << " in func: " << __func__ \ - << " expected output: " << exp << " but got: " << in.x << "," << in.y << "," \ - << in.z << "," << in.w << std::endl; \ - assert(-1); \ - } +bool integer_unary_tests(...) { + return true; +} -bool TestUChar1() { - uchar1 f1, f2, f3; +bool integer_binary_tests(...) { + return true; +} + +template< + typename V, + Enable_if_t().x)>{}>* = nullptr> +__device__ +bool integer_unary_tests(V& f1, V& f2) { + f1 %= f2; + if (!cmp(f1, 0)) return false; + f1 &= f2; + if (!cmp(f1, 0)) return false; + f1 |= f2; + if (!cmp(f1, 1)) return false; + f1 ^= f2; + if (!cmp(f1, 0)) return false; f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); + f1 <<= f2; + if (!cmp(f1, 2)) return false; + f1 >>= f2; + if (!cmp(f1, 1)) return false; + f2 = ~f1; + return cmp(f2, ~1); +} + +template< + typename V, + Enable_if_t().x)>{}>* = nullptr> +__device__ +bool integer_binary_tests(V& f1, V& f2, V& f3) { f3 = f1 % f2; - cmpVal1(f3, 0); + if (!cmp(f3, 0)) return false; f1 = f3 & f2; - cmpVal1(f1, 0); + if (!cmp(f1, 0)) return false; f2 = f1 ^ f3; - cmpVal1(f2, 0); + if (!cmp(f2, 0)) return false; f1.x = 1; f2.x = 2; f3 = f1 << f2; - cmpVal1(f3, 4); + if (!cmp(f3, 4)) return false; f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, 253); - assert(!f1 == false); - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - uchar1 fa((unsigned char)1); - uchar1 fb((signed char)1); - uchar1 fc((unsigned short)1); - uchar1 fd((signed short)1); - uchar1 fe((unsigned int)1); - uchar1 fg((signed int)1); - uchar1 fh((float)1); - uchar1 fi((double)1); - uchar1 fj((unsigned long)1); - uchar1 fk((signed long)1); - uchar1 fl((unsigned long long)1); - uchar1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; + if (!cmp(f2, 2)) return false; } -bool TestUChar2() { - uchar2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, 253); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - uchar2 fa1((unsigned char)1); - uchar2 fa2((unsigned char)1, (unsigned char)1); - uchar2 fb1((signed char)1); - uchar2 fb2((signed char)1, (signed char)1); - uchar2 fc1((unsigned short)1); - uchar2 fc2((unsigned short)1, (unsigned short)1); - uchar2 fd1((signed short)1); - uchar2 fd2((signed short)1, (signed short)1); - uchar2 fe1((unsigned int)1); - uchar2 fe2((unsigned int)1, (unsigned int)1); - uchar2 fg1((signed int)1); - uchar2 fg2((signed int)1, (signed int)1); - uchar2 fh1((float)1); - uchar2 fh2((float)1, (float)1); - uchar2 fi1((double)1); - uchar2 fi2((double)1, (double)1); - uchar2 fj1((unsigned long)1); - uchar2 fj2((unsigned long)1, (unsigned long)1); - uchar2 fk1((signed long)1); - uchar2 fk2((signed long)1, (signed long)1); - uchar2 fl1((unsigned long long)1); - uchar2 fl2((unsigned long long)1, (unsigned long long)1); - uchar2 fm1((signed long long)1); - uchar2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUChar3() { - uchar3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, 253); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - uchar3 fa1((unsigned char)1); - uchar3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - uchar3 fb1((signed char)1); - uchar3 fb2((signed char)1, (signed char)1, (signed char)1); - uchar3 fc1((unsigned short)1); - uchar3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - uchar3 fd1((signed short)1); - uchar3 fd2((signed short)1, (signed short)1, (signed short)1); - uchar3 fe1((unsigned int)1); - uchar3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - uchar3 fg1((signed int)1); - uchar3 fg2((signed int)1, (signed int)1, (signed int)1); - uchar3 fh1((float)1); - uchar3 fh2((float)1, (float)1, (float)1); - uchar3 fi1((double)1); - uchar3 fi2((double)1, (double)1, (double)1); - uchar3 fj1((unsigned long)1); - uchar3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - uchar3 fk1((signed long)1); - uchar3 fk2((signed long)1, (signed long)1, (signed long)1); - uchar3 fl1((unsigned long long)1); - uchar3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - uchar3 fm1((signed long long)1); - uchar3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUChar4() { - uchar4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, 253); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - - uchar4 fa1((unsigned char)1); - uchar4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - uchar4 fb1((signed char)1); - uchar4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - uchar4 fc1((unsigned short)1); - uchar4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - uchar4 fd1((signed short)1); - uchar4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - uchar4 fe1((unsigned int)1); - uchar4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - uchar4 fg1((signed int)1); - uchar4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - uchar4 fh1((float)1); - uchar4 fh2((float)1, (float)1, (float)1, (float)1); - uchar4 fi1((double)1); - uchar4 fi2((double)1, (double)1, (double)1, (double)1); - uchar4 fj1((unsigned long)1); - uchar4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - uchar4 fk1((signed long)1); - uchar4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - uchar4 fl1((unsigned long long)1); - uchar4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - uchar4 fm1((signed long long)1); - uchar4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestChar1() { - char1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (char)253); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - char1 fa((unsigned char)1); - char1 fb((signed char)1); - char1 fc((unsigned short)1); - char1 fd((signed short)1); - char1 fe((unsigned int)1); - char1 fg((signed int)1); - char1 fh((float)1); - char1 fi((double)1); - char1 fj((unsigned long)1); - char1 fk((signed long)1); - char1 fl((unsigned long long)1); - char1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestChar2() { - char2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (char)253); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - char2 fa1((unsigned char)1); - char2 fa2((unsigned char)1, (unsigned char)1); - char2 fb1((signed char)1); - char2 fb2((signed char)1, (signed char)1); - char2 fc1((unsigned short)1); - char2 fc2((unsigned short)1, (unsigned short)1); - char2 fd1((signed short)1); - char2 fd2((signed short)1, (signed short)1); - char2 fe1((unsigned int)1); - char2 fe2((unsigned int)1, (unsigned int)1); - char2 fg1((signed int)1); - char2 fg2((signed int)1, (signed int)1); - char2 fh1((float)1); - char2 fh2((float)1, (float)1); - char2 fi1((double)1); - char2 fi2((double)1, (double)1); - char2 fj1((unsigned long)1); - char2 fj2((unsigned long)1, (unsigned long)1); - char2 fk1((signed long)1); - char2 fk2((signed long)1, (signed long)1); - char2 fl1((unsigned long long)1); - char2 fl2((unsigned long long)1, (unsigned long long)1); - char2 fm1((signed long long)1); - char2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestChar3() { - char3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (char)253); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - char3 fa1((unsigned char)1); - char3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - char3 fb1((signed char)1); - char3 fb2((signed char)1, (signed char)1, (signed char)1); - char3 fc1((unsigned short)1); - char3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - char3 fd1((signed short)1); - char3 fd2((signed short)1, (signed short)1, (signed short)1); - char3 fe1((unsigned int)1); - char3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - char3 fg1((signed int)1); - char3 fg2((signed int)1, (signed int)1, (signed int)1); - char3 fh1((float)1); - char3 fh2((float)1, (float)1, (float)1); - char3 fi1((double)1); - char3 fi2((double)1, (double)1, (double)1); - char3 fj1((unsigned long)1); - char3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - char3 fk1((signed long)1); - char3 fk2((signed long)1, (signed long)1, (signed long)1); - char3 fl1((unsigned long long)1); - char3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - char3 fm1((signed long long)1); - char3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestChar4() { - char4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (char)253); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - char4 fa1((unsigned char)1); - char4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - char4 fb1((signed char)1); - char4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - char4 fc1((unsigned short)1); - char4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - char4 fd1((signed short)1); - char4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - char4 fe1((unsigned int)1); - char4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - char4 fg1((signed int)1); - char4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - char4 fh1((float)1); - char4 fh2((float)1, (float)1, (float)1, (float)1); - char4 fi1((double)1); - char4 fi2((double)1, (double)1, (double)1, (double)1); - char4 fj1((unsigned long)1); - char4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - char4 fk1((signed long)1); - char4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - char4 fl1((unsigned long long)1); - char4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - char4 fm1((signed long long)1); - char4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestUShort1() { - ushort1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (unsigned short)65533); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - ushort1 fa((unsigned char)1); - ushort1 fb((signed char)1); - ushort1 fc((unsigned short)1); - ushort1 fd((signed short)1); - ushort1 fe((unsigned int)1); - ushort1 fg((signed int)1); - ushort1 fh((float)1); - ushort1 fi((double)1); - ushort1 fj((unsigned long)1); - ushort1 fk((signed long)1); - ushort1 fl((unsigned long long)1); - ushort1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUShort2() { - ushort2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (unsigned short)65533); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - ushort2 fa1((unsigned char)1); - ushort2 fa2((unsigned char)1, (unsigned char)1); - ushort2 fb1((signed char)1); - ushort2 fb2((signed char)1, (signed char)1); - ushort2 fc1((unsigned short)1); - ushort2 fc2((unsigned short)1, (unsigned short)1); - ushort2 fd1((signed short)1); - ushort2 fd2((signed short)1, (signed short)1); - ushort2 fe1((unsigned int)1); - ushort2 fe2((unsigned int)1, (unsigned int)1); - ushort2 fg1((signed int)1); - ushort2 fg2((signed int)1, (signed int)1); - ushort2 fh1((float)1); - ushort2 fh2((float)1, (float)1); - ushort2 fi1((double)1); - ushort2 fi2((double)1, (double)1); - ushort2 fj1((unsigned long)1); - ushort2 fj2((unsigned long)1, (unsigned long)1); - ushort2 fk1((signed long)1); - ushort2 fk2((signed long)1, (signed long)1); - ushort2 fl1((unsigned long long)1); - ushort2 fl2((unsigned long long)1, (unsigned long long)1); - ushort2 fm1((signed long long)1); - ushort2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUShort3() { - ushort3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (unsigned short)65533); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - ushort3 fa1((unsigned char)1); - ushort3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - ushort3 fb1((signed char)1); - ushort3 fb2((signed char)1, (signed char)1, (signed char)1); - ushort3 fc1((unsigned short)1); - ushort3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - ushort3 fd1((signed short)1); - ushort3 fd2((signed short)1, (signed short)1, (signed short)1); - ushort3 fe1((unsigned int)1); - ushort3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - ushort3 fg1((signed int)1); - ushort3 fg2((signed int)1, (signed int)1, (signed int)1); - ushort3 fh1((float)1); - ushort3 fh2((float)1, (float)1, (float)1); - ushort3 fi1((double)1); - ushort3 fi2((double)1, (double)1, (double)1); - ushort3 fj1((unsigned long)1); - ushort3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - ushort3 fk1((signed long)1); - ushort3 fk2((signed long)1, (signed long)1, (signed long)1); - ushort3 fl1((unsigned long long)1); - ushort3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - ushort3 fm1((signed long long)1); - ushort3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUShort4() { - ushort4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (unsigned short)65533); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - ushort4 fa1((unsigned char)1); - ushort4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - ushort4 fb1((signed char)1); - ushort4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - ushort4 fc1((unsigned short)1); - ushort4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - ushort4 fd1((signed short)1); - ushort4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - ushort4 fe1((unsigned int)1); - ushort4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - ushort4 fg1((signed int)1); - ushort4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - ushort4 fh1((float)1); - ushort4 fh2((float)1, (float)1, (float)1, (float)1); - ushort4 fi1((double)1); - ushort4 fi2((double)1, (double)1, (double)1, (double)1); - ushort4 fj1((unsigned long)1); - ushort4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - ushort4 fk1((signed long)1); - ushort4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - ushort4 fl1((unsigned long long)1); - ushort4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - ushort4 fm1((signed long long)1); - ushort4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestShort1() { - short1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (signed short)65533); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - short1 fa((unsigned char)1); - short1 fb((signed char)1); - short1 fc((unsigned short)1); - short1 fd((signed short)1); - short1 fe((unsigned int)1); - short1 fg((signed int)1); - short1 fh((float)1); - short1 fi((double)1); - short1 fj((unsigned long)1); - short1 fk((signed long)1); - short1 fl((unsigned long long)1); - short1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestShort2() { - short2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (signed short)65533); - assert(!f1 == false); - - cmpVal2(f1, 3); - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - short2 fa1((unsigned char)1); - short2 fa2((unsigned char)1, (unsigned char)1); - short2 fb1((signed char)1); - short2 fb2((signed char)1, (signed char)1); - short2 fc1((unsigned short)1); - short2 fc2((unsigned short)1, (unsigned short)1); - short2 fd1((signed short)1); - short2 fd2((signed short)1, (signed short)1); - short2 fe1((unsigned int)1); - short2 fe2((unsigned int)1, (unsigned int)1); - short2 fg1((signed int)1); - short2 fg2((signed int)1, (signed int)1); - short2 fh1((float)1); - short2 fh2((float)1, (float)1); - short2 fi1((double)1); - short2 fi2((double)1, (double)1); - short2 fj1((unsigned long)1); - short2 fj2((unsigned long)1, (unsigned long)1); - short2 fk1((signed long)1); - short2 fk2((signed long)1, (signed long)1); - short2 fl1((unsigned long long)1); - short2 fl2((unsigned long long)1, (unsigned long long)1); - short2 fm1((signed long long)1); - short2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestShort3() { - short3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (signed short)65533); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - short3 fa1((unsigned char)1); - short3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - short3 fb1((signed char)1); - short3 fb2((signed char)1, (signed char)1, (signed char)1); - short3 fc1((unsigned short)1); - short3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - short3 fd1((signed short)1); - short3 fd2((signed short)1, (signed short)1, (signed short)1); - short3 fe1((unsigned int)1); - short3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - short3 fg1((signed int)1); - short3 fg2((signed int)1, (signed int)1, (signed int)1); - short3 fh1((float)1); - short3 fh2((float)1, (float)1, (float)1); - short3 fi1((double)1); - short3 fi2((double)1, (double)1, (double)1); - short3 fj1((unsigned long)1); - short3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - short3 fk1((signed long)1); - short3 fk2((signed long)1, (signed long)1, (signed long)1); - short3 fl1((unsigned long long)1); - short3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - short3 fm1((signed long long)1); - short3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestShort4() { - short4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (signed short)65533); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - short4 fa1((unsigned char)1); - short4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - short4 fb1((signed char)1); - short4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - short4 fc1((unsigned short)1); - short4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - short4 fd1((signed short)1); - short4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - short4 fe1((unsigned int)1); - short4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - short4 fg1((signed int)1); - short4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - short4 fh1((float)1); - short4 fh2((float)1, (float)1, (float)1, (float)1); - short4 fi1((double)1); - short4 fi2((double)1, (double)1, (double)1, (double)1); - short4 fj1((unsigned long)1); - short4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - short4 fk1((signed long)1); - short4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - short4 fl1((unsigned long long)1); - short4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - short4 fm1((signed long long)1); - short4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestUInt1() { - uint1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (unsigned int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - uint1 fa((unsigned char)1); - uint1 fb((signed char)1); - uint1 fc((unsigned short)1); - uint1 fd((signed short)1); - uint1 fe((unsigned int)1); - uint1 fg((signed int)1); - uint1 fh((float)1); - uint1 fi((double)1); - uint1 fj((unsigned long)1); - uint1 fk((signed long)1); - uint1 fl((unsigned long long)1); - uint1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUInt2() { - uint2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (unsigned int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - uint2 fa1((unsigned char)1); - uint2 fa2((unsigned char)1, (unsigned char)1); - uint2 fb1((signed char)1); - uint2 fb2((signed char)1, (signed char)1); - uint2 fc1((unsigned short)1); - uint2 fc2((unsigned short)1, (unsigned short)1); - uint2 fd1((signed short)1); - uint2 fd2((signed short)1, (signed short)1); - uint2 fe1((unsigned int)1); - uint2 fe2((unsigned int)1, (unsigned int)1); - uint2 fg1((signed int)1); - uint2 fg2((signed int)1, (signed int)1); - uint2 fh1((float)1); - uint2 fh2((float)1, (float)1); - uint2 fi1((double)1); - uint2 fi2((double)1, (double)1); - uint2 fj1((unsigned long)1); - uint2 fj2((unsigned long)1, (unsigned long)1); - uint2 fk1((signed long)1); - uint2 fk2((signed long)1, (signed long)1); - uint2 fl1((unsigned long long)1); - uint2 fl2((unsigned long long)1, (unsigned long long)1); - uint2 fm1((signed long long)1); - uint2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUInt3() { - uint3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (unsigned int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - uint3 fa1((unsigned char)1); - uint3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - uint3 fb1((signed char)1); - uint3 fb2((signed char)1, (signed char)1, (signed char)1); - uint3 fc1((unsigned short)1); - uint3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - uint3 fd1((signed short)1); - uint3 fd2((signed short)1, (signed short)1, (signed short)1); - uint3 fe1((unsigned int)1); - uint3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - uint3 fg1((signed int)1); - uint3 fg2((signed int)1, (signed int)1, (signed int)1); - uint3 fh1((float)1); - uint3 fh2((float)1, (float)1, (float)1); - uint3 fi1((double)1); - uint3 fi2((double)1, (double)1, (double)1); - uint3 fj1((unsigned long)1); - uint3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - uint3 fk1((signed long)1); - uint3 fk2((signed long)1, (signed long)1, (signed long)1); - uint3 fl1((unsigned long long)1); - uint3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - uint3 fm1((signed long long)1); - uint3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestUInt4() { - uint4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (unsigned int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - uint4 fa1((unsigned char)1); - uint4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - uint4 fb1((signed char)1); - uint4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - uint4 fc1((unsigned short)1); - uint4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - uint4 fd1((signed short)1); - uint4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - uint4 fe1((unsigned int)1); - uint4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - uint4 fg1((signed int)1); - uint4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - uint4 fh1((float)1); - uint4 fh2((float)1, (float)1, (float)1, (float)1); - uint4 fi1((double)1); - uint4 fi2((double)1, (double)1, (double)1, (double)1); - uint4 fj1((unsigned long)1); - uint4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - uint4 fk1((signed long)1); - uint4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - uint4 fl1((unsigned long long)1); - uint4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - uint4 fm1((signed long long)1); - uint4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestInt1() { - int1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (signed int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - int1 fa((unsigned char)1); - int1 fb((signed char)1); - int1 fc((unsigned short)1); - int1 fd((signed short)1); - int1 fe((unsigned int)1); - int1 fg((signed int)1); - int1 fh((float)1); - int1 fi((double)1); - int1 fj((unsigned long)1); - int1 fk((signed long)1); - int1 fl((unsigned long long)1); - int1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestInt2() { - int2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (signed int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - int2 fa1((unsigned char)1); - int2 fa2((unsigned char)1, (unsigned char)1); - int2 fb1((signed char)1); - int2 fb2((signed char)1, (signed char)1); - int2 fc1((unsigned short)1); - int2 fc2((unsigned short)1, (unsigned short)1); - int2 fd1((signed short)1); - int2 fd2((signed short)1, (signed short)1); - int2 fe1((unsigned int)1); - int2 fe2((unsigned int)1, (unsigned int)1); - int2 fg1((signed int)1); - int2 fg2((signed int)1, (signed int)1); - int2 fh1((float)1); - int2 fh2((float)1, (float)1); - int2 fi1((double)1); - int2 fi2((double)1, (double)1); - int2 fj1((unsigned long)1); - int2 fj2((unsigned long)1, (unsigned long)1); - int2 fk1((signed long)1); - int2 fk2((signed long)1, (signed long)1); - int2 fl1((unsigned long long)1); - int2 fl2((unsigned long long)1, (unsigned long long)1); - int2 fm1((signed long long)1); - int2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestInt3() { - int3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (signed int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - int3 fa1((unsigned char)1); - int3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - int3 fb1((signed char)1); - int3 fb2((signed char)1, (signed char)1, (signed char)1); - int3 fc1((unsigned short)1); - int3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - int3 fd1((signed short)1); - int3 fd2((signed short)1, (signed short)1, (signed short)1); - int3 fe1((unsigned int)1); - int3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - int3 fg1((signed int)1); - int3 fg2((signed int)1, (signed int)1, (signed int)1); - int3 fh1((float)1); - int3 fh2((float)1, (float)1, (float)1); - int3 fi1((double)1); - int3 fi2((double)1, (double)1, (double)1); - int3 fj1((unsigned long)1); - int3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - int3 fk1((signed long)1); - int3 fk2((signed long)1, (signed long)1, (signed long)1); - int3 fl1((unsigned long long)1); - int3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - int3 fm1((signed long long)1); - int3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestInt4() { - int4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (signed int)4294967293); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - int4 fa1((unsigned char)1); - int4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - int4 fb1((signed char)1); - int4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - int4 fc1((unsigned short)1); - int4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - int4 fd1((signed short)1); - int4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - int4 fe1((unsigned int)1); - int4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - int4 fg1((signed int)1); - int4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - int4 fh1((float)1); - int4 fh2((float)1, (float)1, (float)1, (float)1); - int4 fi1((double)1); - int4 fi2((double)1, (double)1, (double)1, (double)1); - int4 fj1((unsigned long)1); - int4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - int4 fk1((signed long)1); - int4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - int4 fl1((unsigned long long)1); - int4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - int4 fm1((signed long long)1); - int4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestFloat1() { - float1 f1, f2, f3; - f1.x = 1.0f; - f2.x = 1.0f; - f3 = f1 + f2; - cmpVal1(f3, 2.0f); - f2 = f3 - f1; - cmpVal1(f2, 1.0f); - f1 = f2 * f3; - cmpVal1(f1, 2.0f); - f2 = f1 / f3; - cmpVal1(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal1(f1, 3.0f); - f1 -= f2; - cmpVal1(f1, 2.0f); - f1 *= f2; - cmpVal1(f1, 2.0f); - f1 /= f2; - cmpVal1(f1, 2.0f); - f2 = f1++; - cmpVal1(f1, 3.0f); - cmpVal1(f2, 2.0f); - f2 = f1--; - cmpVal1(f2, 3.0f); - cmpVal1(f1, 2.0f); - f2 = ++f1; - cmpVal1(f1, 3.0f); - cmpVal1(f2, 3.0f); - f2 = --f1; - cmpVal1(f1, 2.0f); - cmpVal1(f1, 2.0f); - - f1.x = 3.0f; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3.0f); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3.0f); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3.0f); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3.0f); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3.0f); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3.0f); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (float)1; - cmpVal1(f1, 3.0f); - f1 = (float)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3.0f); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3.0f); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (double)1; - cmpVal1(f1, 3.0f); - f1 = (double)1 * f1; - cmpVal1(f1, 3.0f); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3.0f); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3.0f); - - float1 fa((unsigned char)1); - float1 fb((signed char)1); - float1 fc((unsigned short)1); - float1 fd((signed short)1); - float1 fe((unsigned int)1); - float1 fg((signed int)1); - float1 fh((float)1); - float1 fi((double)1); - float1 fj((unsigned long)1); - float1 fk((signed long)1); - float1 fl((unsigned long long)1); - float1 fm((signed long long)1); - - - f1.x = 3.0f; - f2.x = 4.0f; - f3.x = 3.0f; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); +template +bool constructor_tests() { + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); + static_assert(is_constructible{}, ""); return true; } -bool TestFloat2() { - float2 f1, f2, f3; - f1.x = 1.0f; - f1.y = 1.0f; - f2.x = 1.0f; - f2.y = 1.0f; - f3 = f1 + f2; - cmpVal2(f3, 2.0f); +template +bool TestVectorType() { + V f1(1); + V f2(1); + V f3 = f1 + f2; + if (!cmp(f3, 2)) return false; f2 = f3 - f1; - cmpVal2(f2, 1.0f); + if (!cmp(f2, 1)) return false; f1 = f2 * f3; - cmpVal2(f1, 2.0f); + if (!cmp(f1, 2)) return false; f2 = f1 / f3; - cmpVal2(f2, 2.0f / 2.0f); + if (!cmp(f2, 2 / 2)) return false; + if (!integer_binary_tests(f1, f2, f3)) return false; + + f1 = V(2); + f2 = V(1); f1 += f2; - cmpVal2(f1, 3.0f); + if (!cmp(f1, 3)) return false; f1 -= f2; - cmpVal2(f1, 2.0f); + if (!cmp(f1, 2)) return false; f1 *= f2; - cmpVal2(f1, 2.0f); + if (!cmp(f1, 2)) return false; f1 /= f2; - cmpVal2(f1, 2.0f); + if (!cmp(f1, 2)) return false; + if (!integer_unary_tests(f1, f2)) return false; - f2 = f1++; - cmpVal2(f1, 3.0f); - cmpVal2(f2, 2.0f); - f2 = f1--; - cmpVal2(f2, 3.0f); - cmpVal2(f1, 2.0f); - f2 = ++f1; - cmpVal2(f1, 3.0f); - cmpVal2(f2, 3.0f); - f2 = --f1; - cmpVal2(f1, 2.0f); - cmpVal2(f1, 2.0f); + #if false // We do not enable nullary increment / decrement yet. + f1 = V(2); + f2 = f1++; + if (!cmp(f1, 3)) return false; + if (!cmp(f2, 2)) return false; + f2 = f1--; + if (!cmp(f2, 3)) return false; + if (!cmp(f1, 2)) return false; + f2 = ++f1; + if (!cmp(f1, 3)) return false; + if (!cmp(f2, 3)) return false; + f2 = --f1; + if (!cmp(f1, 2)) return false; + if (!cmp(f2, 2)) return false; + #endif - f1.x = 3.0f; - f1.y = 3.0f; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3.0f); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3.0f); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3.0f); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3.0f); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3.0f); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3.0f); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (float)1; - cmpVal2(f1, 3.0f); - f1 = (float)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3.0f); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3.0f); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (double)1; - cmpVal2(f1, 3.0f); - f1 = (double)1 * f1; - cmpVal2(f1, 3.0f); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3.0f); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3.0f); - - float2 fa1((unsigned char)1); - float2 fa2((unsigned char)1, (unsigned char)1); - float2 fb1((signed char)1); - float2 fb2((signed char)1, (signed char)1); - float2 fc1((unsigned short)1); - float2 fc2((unsigned short)1, (unsigned short)1); - float2 fd1((signed short)1); - float2 fd2((signed short)1, (signed short)1); - float2 fe1((unsigned int)1); - float2 fe2((unsigned int)1, (unsigned int)1); - float2 fg1((signed int)1); - float2 fg2((signed int)1, (signed int)1); - float2 fh1((float)1); - float2 fh2((float)1, (float)1); - float2 fi1((double)1); - float2 fi2((double)1, (double)1); - float2 fj1((unsigned long)1); - float2 fj2((unsigned long)1, (unsigned long)1); - float2 fk1((signed long)1); - float2 fk2((signed long)1, (signed long)1); - float2 fl1((unsigned long long)1); - float2 fl2((unsigned long long)1, (unsigned long long)1); - float2 fm1((signed long long)1); - float2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3.0f; - f1.y = 3.0f; - f2.x = 4.0f; - f2.y = 4.0f; - f3.x = 3.0f; - f3.y = 3.0f; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); + if (!constructor_tests()) return false; + f1 = V(3); + f2 = V(4); + f3 = V(3); + if (cmp(f1 == f2, true)) return false; + if (cmp(f1 != f2, false)) return false; + if (cmp(f1 < f2, false)) return false; + if (cmp(f2 > f1, false)) return false; + if (cmp(f1 >= f3, false)) return false; + if (cmp(f1 <= f3, false)) return false; + if (cmp(f1 && f2, false)) return false; + if (cmp(f1 || f2, false)) return false; return true; } -bool TestFloat3() { - float3 f1, f2, f3; - f1.x = 1.0f; - f1.y = 1.0f; - f1.z = 1.0f; - f2.x = 1.0f; - f2.y = 1.0f; - f2.z = 1.0f; - f3 = f1 + f2; - cmpVal3(f3, 2.0f); - f2 = f3 - f1; - cmpVal3(f2, 1.0f); - f1 = f2 * f3; - cmpVal3(f1, 2.0f); - f2 = f1 / f3; - cmpVal3(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal3(f1, 3.0f); - f1 -= f2; - cmpVal3(f1, 2.0f); - f1 *= f2; - cmpVal3(f1, 2.0f); - f1 /= f2; - f2 = f1++; - cmpVal3(f1, 3.0f); - cmpVal3(f2, 2.0f); - f2 = f1--; - cmpVal3(f2, 3.0f); - cmpVal3(f1, 2.0f); - f2 = ++f1; - cmpVal3(f1, 3.0f); - cmpVal3(f2, 3.0f); - f2 = --f1; - cmpVal3(f1, 2.0f); - cmpVal3(f1, 2.0f); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - float3 fa1((unsigned char)1); - float3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - float3 fb1((signed char)1); - float3 fb2((signed char)1, (signed char)1, (signed char)1); - float3 fc1((unsigned short)1); - float3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - float3 fd1((signed short)1); - float3 fd2((signed short)1, (signed short)1, (signed short)1); - float3 fe1((unsigned int)1); - float3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - float3 fg1((signed int)1); - float3 fg2((signed int)1, (signed int)1, (signed int)1); - float3 fh1((float)1); - float3 fh2((float)1, (float)1, (float)1); - float3 fi1((double)1); - float3 fi2((double)1, (double)1, (double)1); - float3 fj1((unsigned long)1); - float3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - float3 fk1((signed long)1); - float3 fk2((signed long)1, (signed long)1, (signed long)1); - float3 fl1((unsigned long long)1); - float3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - float3 fm1((signed long long)1); - float3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3.0f; - f1.y = 3.0f; - f1.z = 3.0f; - f2.x = 4.0f; - f2.y = 4.0f; - f2.z = 4.0f; - f3.x = 3.0f; - f3.y = 3.0f; - f3.z = 3.0f; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - +template* = nullptr> +bool TestVectorTypes() { return true; } -bool TestFloat4() { - float4 f1, f2, f3; - f1.x = 1.0f; - f1.y = 1.0f; - f1.z = 1.0f; - f1.w = 1.0f; - f2.x = 1.0f; - f2.y = 1.0f; - f2.z = 1.0f; - f2.w = 1.0f; - f3 = f1 + f2; - cmpVal4(f3, 2.0f); - f2 = f3 - f1; - cmpVal4(f2, 1.0f); - f1 = f2 * f3; - cmpVal4(f1, 2.0f); - f2 = f1 / f3; - cmpVal4(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal4(f1, 3.0f); - f1 -= f2; - cmpVal4(f1, 2.0f); - f1 *= f2; - cmpVal4(f1, 2.0f); - f1 /= f2; - f2 = f1++; - cmpVal4(f1, 3.0f); - cmpVal4(f2, 2.0f); - f2 = f1--; - cmpVal4(f2, 3.0f); - cmpVal4(f1, 2.0f); - f2 = ++f1; - cmpVal4(f1, 3.0f); - cmpVal4(f2, 3.0f); - f2 = --f1; - cmpVal4(f1, 2.0f); - cmpVal4(f1, 2.0f); - - f1.x = 3.0f; - f1.y = 3.0f; - f1.z = 3.0f; - f1.w = 3.0f; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3.0f); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3.0f); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3.0f); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3.0f); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3.0f); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3.0f); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (float)1; - cmpVal4(f1, 3.0f); - f1 = (float)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3.0f); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3.0f); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (double)1; - cmpVal4(f1, 3.0f); - f1 = (double)1 * f1; - cmpVal4(f1, 3.0f); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3.0f); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3.0f); - - float4 fa1((unsigned char)1); - float4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - float4 fb1((signed char)1); - float4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - float4 fc1((unsigned short)1); - float4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - float4 fd1((signed short)1); - float4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - float4 fe1((unsigned int)1); - float4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - float4 fg1((signed int)1); - float4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - float4 fh1((float)1); - float4 fh2((float)1, (float)1, (float)1, (float)1); - float4 fi1((double)1); - float4 fi2((double)1, (double)1, (double)1, (double)1); - float4 fj1((unsigned long)1); - float4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - float4 fk1((signed long)1); - float4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - float4 fl1((unsigned long long)1); - float4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - float4 fm1((signed long long)1); - float4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3.0f; - f1.y = 3.0f; - f1.z = 3.0f; - f1.w = 3.0f; - f2.x = 4.0f; - f2.y = 4.0f; - f2.z = 4.0f; - f2.w = 4.0f; - f3.x = 3.0f; - f3.y = 3.0f; - f3.z = 3.0f; - f3.w = 3.0f; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - return true; +template +bool TestVectorTypes() { + if (!TestVectorType()) return false; + return TestVectorTypes(); } - -bool TestULong1() { - ulong1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, 18446744073709551613UL); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - ulong1 fa((unsigned char)1); - ulong1 fb((signed char)1); - ulong1 fc((unsigned short)1); - ulong1 fd((signed short)1); - ulong1 fe((unsigned int)1); - ulong1 fg((signed int)1); - ulong1 fh((float)1); - ulong1 fi((double)1); - ulong1 fj((unsigned long)1); - ulong1 fk((signed long)1); - ulong1 fl((unsigned long long)1); - ulong1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestULong2() { - ulong2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, 18446744073709551613UL); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - ulong2 fa1((unsigned char)1); - ulong2 fa2((unsigned char)1, (unsigned char)1); - ulong2 fb1((signed char)1); - ulong2 fb2((signed char)1, (signed char)1); - ulong2 fc1((unsigned short)1); - ulong2 fc2((unsigned short)1, (unsigned short)1); - ulong2 fd1((signed short)1); - ulong2 fd2((signed short)1, (signed short)1); - ulong2 fe1((unsigned int)1); - ulong2 fe2((unsigned int)1, (unsigned int)1); - ulong2 fg1((signed int)1); - ulong2 fg2((signed int)1, (signed int)1); - ulong2 fh1((float)1); - ulong2 fh2((float)1, (float)1); - ulong2 fi1((double)1); - ulong2 fi2((double)1, (double)1); - ulong2 fj1((unsigned long)1); - ulong2 fj2((unsigned long)1, (unsigned long)1); - ulong2 fk1((signed long)1); - ulong2 fk2((signed long)1, (signed long)1); - ulong2 fl1((unsigned long long)1); - ulong2 fl2((unsigned long long)1, (unsigned long long)1); - ulong2 fm1((signed long long)1); - ulong2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestULong3() { - ulong3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, 18446744073709551613UL); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - ulong3 fa1((unsigned char)1); - ulong3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - ulong3 fb1((signed char)1); - ulong3 fb2((signed char)1, (signed char)1, (signed char)1); - ulong3 fc1((unsigned short)1); - ulong3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - ulong3 fd1((signed short)1); - ulong3 fd2((signed short)1, (signed short)1, (signed short)1); - ulong3 fe1((unsigned int)1); - ulong3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - ulong3 fg1((signed int)1); - ulong3 fg2((signed int)1, (signed int)1, (signed int)1); - ulong3 fh1((float)1); - ulong3 fh2((float)1, (float)1, (float)1); - ulong3 fi1((double)1); - ulong3 fi2((double)1, (double)1, (double)1); - ulong3 fj1((unsigned long)1); - ulong3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - ulong3 fk1((signed long)1); - ulong3 fk2((signed long)1, (signed long)1, (signed long)1); - ulong3 fl1((unsigned long long)1); - ulong3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - ulong3 fm1((signed long long)1); - ulong3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestULong4() { - ulong4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, 18446744073709551613UL); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - ulong4 fa1((unsigned char)1); - ulong4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - ulong4 fb1((signed char)1); - ulong4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - ulong4 fc1((unsigned short)1); - ulong4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - ulong4 fd1((signed short)1); - ulong4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - ulong4 fe1((unsigned int)1); - ulong4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - ulong4 fg1((signed int)1); - ulong4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - ulong4 fh1((float)1); - ulong4 fh2((float)1, (float)1, (float)1, (float)1); - ulong4 fi1((double)1); - ulong4 fi2((double)1, (double)1, (double)1, (double)1); - ulong4 fj1((unsigned long)1); - ulong4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - ulong4 fk1((signed long)1); - ulong4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - ulong4 fl1((unsigned long long)1); - ulong4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - ulong4 fm1((signed long long)1); - ulong4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestLong1() { - long1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - long1 fa((unsigned char)1); - long1 fb((signed char)1); - long1 fc((unsigned short)1); - long1 fd((signed short)1); - long1 fe((unsigned int)1); - long1 fg((signed int)1); - long1 fh((float)1); - long1 fi((double)1); - long1 fj((unsigned long)1); - long1 fk((signed long)1); - long1 fl((unsigned long long)1); - long1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestLong2() { - long2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - long2 fa1((unsigned char)1); - long2 fa2((unsigned char)1, (unsigned char)1); - long2 fb1((signed char)1); - long2 fb2((signed char)1, (signed char)1); - long2 fc1((unsigned short)1); - long2 fc2((unsigned short)1, (unsigned short)1); - long2 fd1((signed short)1); - long2 fd2((signed short)1, (signed short)1); - long2 fe1((unsigned int)1); - long2 fe2((unsigned int)1, (unsigned int)1); - long2 fg1((signed int)1); - long2 fg2((signed int)1, (signed int)1); - long2 fh1((float)1); - long2 fh2((float)1, (float)1); - long2 fi1((double)1); - long2 fi2((double)1, (double)1); - long2 fj1((unsigned long)1); - long2 fj2((unsigned long)1, (unsigned long)1); - long2 fk1((signed long)1); - long2 fk2((signed long)1, (signed long)1); - long2 fl1((unsigned long long)1); - long2 fl2((unsigned long long)1, (unsigned long long)1); - long2 fm1((signed long long)1); - long2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestLong3() { - long3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - - long3 fa1((unsigned char)1); - long3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - long3 fb1((signed char)1); - long3 fb2((signed char)1, (signed char)1, (signed char)1); - long3 fc1((unsigned short)1); - long3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - long3 fd1((signed short)1); - long3 fd2((signed short)1, (signed short)1, (signed short)1); - long3 fe1((unsigned int)1); - long3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - long3 fg1((signed int)1); - long3 fg2((signed int)1, (signed int)1, (signed int)1); - long3 fh1((float)1); - long3 fh2((float)1, (float)1, (float)1); - long3 fi1((double)1); - long3 fi2((double)1, (double)1, (double)1); - long3 fj1((unsigned long)1); - long3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - long3 fk1((signed long)1); - long3 fk2((signed long)1, (signed long)1, (signed long)1); - long3 fl1((unsigned long long)1); - long3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - long3 fm1((signed long long)1); - long3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestLong4() { - long4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - long4 fa1((unsigned char)1); - long4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - long4 fb1((signed char)1); - long4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - long4 fc1((unsigned short)1); - long4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - long4 fd1((signed short)1); - long4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - long4 fe1((unsigned int)1); - long4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - long4 fg1((signed int)1); - long4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - long4 fh1((float)1); - long4 fh2((float)1, (float)1, (float)1, (float)1); - long4 fi1((double)1); - long4 fi2((double)1, (double)1, (double)1, (double)1); - long4 fj1((unsigned long)1); - long4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - long4 fk1((signed long)1); - long4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - long4 fl1((unsigned long long)1); - long4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - long4 fm1((signed long long)1); - long4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestDouble1() { - double1 f1, f2, f3; - f1.x = 1.0; - f2.x = 1.0; - f3 = f1 + f2; - cmpVal1(f3, 2.0); - f2 = f3 - f1; - cmpVal1(f2, 1.0); - f1 = f2 * f3; - cmpVal1(f1, 2.0); - f2 = f1 / f3; - cmpVal1(f2, 2.0 / 2.0); - f1 += f2; - cmpVal1(f1, 3.0); - f1 -= f2; - cmpVal1(f1, 2.0); - f1 *= f2; - cmpVal1(f1, 2.0); - f1 /= f2; - cmpVal1(f1, 2.0); - f2 = f1++; - cmpVal1(f1, 3.0); - cmpVal1(f2, 2.0); - f2 = f1--; - cmpVal1(f2, 3.0); - cmpVal1(f1, 2.0); - f2 = ++f1; - cmpVal1(f1, 3.0); - cmpVal1(f2, 3.0); - f2 = --f1; - cmpVal1(f1, 2.0); - cmpVal1(f1, 2.0); - - f1.x = 3.0; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3.0); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3.0); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3.0); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3.0); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3.0); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3.0); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (float)1; - cmpVal1(f1, 3.0); - f1 = (float)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3.0); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3.0); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (double)1; - cmpVal1(f1, 3.0); - f1 = (double)1 * f1; - cmpVal1(f1, 3.0); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3.0); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3.0); - - double1 fa((unsigned char)1); - double1 fb((signed char)1); - double1 fc((unsigned short)1); - double1 fd((signed short)1); - double1 fe((unsigned int)1); - double1 fg((signed int)1); - double1 fh((float)1); - double1 fi((double)1); - double1 fj((unsigned long)1); - double1 fk((signed long)1); - double1 fl((unsigned long long)1); - double1 fm((signed long long)1); - - - f1.x = 3.0; - f2.x = 4.0; - f3.x = 3.0; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - return true; -} - -bool TestDouble2() { - double2 f1, f2, f3; - f1.x = 1.0; - f1.y = 1.0; - f2.x = 1.0; - f2.y = 1.0; - f3 = f1 + f2; - cmpVal2(f3, 2.0); - f2 = f3 - f1; - cmpVal2(f2, 1.0); - f1 = f2 * f3; - cmpVal2(f1, 2.0); - f2 = f1 / f3; - cmpVal2(f2, 2.0f / 2.0); - f1 += f2; - cmpVal2(f1, 3.0); - f1 -= f2; - cmpVal2(f1, 2.0); - f1 *= f2; - cmpVal2(f1, 2.0); - f1 /= f2; - cmpVal2(f1, 2.0); - - f2 = f1++; - cmpVal2(f1, 3.0); - cmpVal2(f2, 2.0); - f2 = f1--; - cmpVal2(f2, 3.0); - cmpVal2(f1, 2.0); - f2 = ++f1; - cmpVal2(f1, 3.0); - cmpVal2(f2, 3.0); - f2 = --f1; - cmpVal2(f1, 2.0); - cmpVal2(f1, 2.0); - - f1.x = 3.0; - f1.y = 3.0; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3.0); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3.0); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3.0); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3.0); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3.0); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3.0); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (float)1; - cmpVal2(f1, 3.0); - f1 = (float)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3.0); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3.0); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (double)1; - cmpVal2(f1, 3.0); - f1 = (double)1 * f1; - cmpVal2(f1, 3.0); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3.0); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3.0); - - double2 fa1((unsigned char)1); - double2 fa2((unsigned char)1, (unsigned char)1); - double2 fb1((signed char)1); - double2 fb2((signed char)1, (signed char)1); - double2 fc1((unsigned short)1); - double2 fc2((unsigned short)1, (unsigned short)1); - double2 fd1((signed short)1); - double2 fd2((signed short)1, (signed short)1); - double2 fe1((unsigned int)1); - double2 fe2((unsigned int)1, (unsigned int)1); - double2 fg1((signed int)1); - double2 fg2((signed int)1, (signed int)1); - double2 fh1((float)1); - double2 fh2((float)1, (float)1); - double2 fi1((double)1); - double2 fi2((double)1, (double)1); - double2 fj1((unsigned long)1); - double2 fj2((unsigned long)1, (unsigned long)1); - double2 fk1((signed long)1); - double2 fk2((signed long)1, (signed long)1); - double2 fl1((unsigned long long)1); - double2 fl2((unsigned long long)1, (unsigned long long)1); - double2 fm1((signed long long)1); - double2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3.0; - f1.y = 3.0; - f2.x = 4.0; - f2.y = 4.0; - f3.x = 3.0; - f3.y = 3.0; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - - return true; -} - -bool TestDouble3() { - double3 f1, f2, f3; - f1.x = 1.0; - f1.y = 1.0; - f1.z = 1.0; - f2.x = 1.0; - f2.y = 1.0; - f2.z = 1.0; - f3 = f1 + f2; - cmpVal3(f3, 2.0); - f2 = f3 - f1; - cmpVal3(f2, 1.0); - f1 = f2 * f3; - cmpVal3(f1, 2.0); - f2 = f1 / f3; - cmpVal3(f2, 2.0f / 2.0); - f1 += f2; - cmpVal3(f1, 3.0); - f1 -= f2; - cmpVal3(f1, 2.0); - f1 *= f2; - cmpVal3(f1, 2.0); - f1 /= f2; - f2 = f1++; - cmpVal3(f1, 3.0); - cmpVal3(f2, 2.0); - f2 = f1--; - cmpVal3(f2, 3.0); - cmpVal3(f1, 2.0); - f2 = ++f1; - cmpVal3(f1, 3.0); - cmpVal3(f2, 3.0); - f2 = --f1; - cmpVal3(f1, 2.0); - cmpVal3(f1, 2.0); - - f1.x = 3.0; - f1.y = 3.0; - f1.z = 3.0; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3.0); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3.0); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3.0); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3.0); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3.0); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3.0); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (float)1; - cmpVal3(f1, 3.0); - f1 = (float)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3.0); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3.0); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (double)1; - cmpVal3(f1, 3.0); - f1 = (double)1 * f1; - cmpVal3(f1, 3.0); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3.0); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3.0); - - double3 fa1((unsigned char)1); - double3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - double3 fb1((signed char)1); - double3 fb2((signed char)1, (signed char)1, (signed char)1); - double3 fc1((unsigned short)1); - double3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - double3 fd1((signed short)1); - double3 fd2((signed short)1, (signed short)1, (signed short)1); - double3 fe1((unsigned int)1); - double3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - double3 fg1((signed int)1); - double3 fg2((signed int)1, (signed int)1, (signed int)1); - double3 fh1((float)1); - double3 fh2((float)1, (float)1, (float)1); - double3 fi1((double)1); - double3 fi2((double)1, (double)1, (double)1); - double3 fj1((unsigned long)1); - double3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - double3 fk1((signed long)1); - double3 fk2((signed long)1, (signed long)1, (signed long)1); - double3 fl1((unsigned long long)1); - double3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - double3 fm1((signed long long)1); - double3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3.0; - f1.y = 3.0; - f1.z = 3.0; - f2.x = 4.0; - f2.y = 4.0; - f2.z = 4.0; - f3.x = 3.0; - f3.y = 3.0; - f3.z = 3.0; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - - return true; -} - -bool TestDouble4() { - double4 f1, f2, f3; - f1.x = 1.0; - f1.y = 1.0; - f1.z = 1.0; - f1.w = 1.0; - f2.x = 1.0; - f2.y = 1.0; - f2.z = 1.0; - f2.w = 1.0; - f3 = f1 + f2; - cmpVal4(f3, 2.0); - f2 = f3 - f1; - cmpVal4(f2, 1.0); - f1 = f2 * f3; - cmpVal4(f1, 2.0); - f2 = f1 / f3; - cmpVal4(f2, 2.0f / 2.0); - f1 += f2; - cmpVal4(f1, 3.0); - f1 -= f2; - cmpVal4(f1, 2.0); - f1 *= f2; - cmpVal4(f1, 2.0); - f1 /= f2; - f2 = f1++; - cmpVal4(f1, 3.0); - cmpVal4(f2, 2.0); - f2 = f1--; - cmpVal4(f2, 3.0); - cmpVal4(f1, 2.0); - f2 = ++f1; - cmpVal4(f1, 3.0); - cmpVal4(f2, 3.0); - f2 = --f1; - cmpVal4(f1, 2.0); - cmpVal4(f1, 2.0); - - f1.x = 3.0; - f1.y = 3.0; - f1.z = 3.0; - f1.w = 3.0; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3.0); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3.0); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3.0); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3.0); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3.0); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3.0); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (float)1; - cmpVal4(f1, 3.0); - f1 = (float)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3.0); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3.0); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (double)1; - cmpVal4(f1, 3.0); - f1 = (double)1 * f1; - cmpVal4(f1, 3.0); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3.0); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3.0); - - double4 fa1((unsigned char)1); - double4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - double4 fb1((signed char)1); - double4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - double4 fc1((unsigned short)1); - double4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - double4 fd1((signed short)1); - double4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - double4 fe1((unsigned int)1); - double4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - double4 fg1((signed int)1); - double4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - double4 fh1((float)1); - double4 fh2((float)1, (float)1, (float)1, (float)1); - double4 fi1((double)1); - double4 fi2((double)1, (double)1, (double)1, (double)1); - double4 fj1((unsigned long)1); - double4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - double4 fk1((signed long)1); - double4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - double4 fl1((unsigned long long)1); - double4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - double4 fm1((signed long long)1); - double4 fm2((signed long long)1, (signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3.0; - f1.y = 3.0; - f1.z = 3.0; - f1.w = 3.0; - f2.x = 4.0; - f2.y = 4.0; - f2.z = 4.0; - f2.w = 4.0; - f3.x = 3.0; - f3.y = 3.0; - f3.z = 3.0; - f3.w = 3.0; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - return true; -} - - -bool TestULongLong1() { - long1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - ulonglong1 fa((unsigned char)1); - ulonglong1 fb((signed char)1); - ulonglong1 fc((unsigned short)1); - ulonglong1 fd((signed short)1); - ulonglong1 fe((unsigned int)1); - ulonglong1 fg((signed int)1); - ulonglong1 fh((float)1); - ulonglong1 fi((double)1); - ulonglong1 fj((unsigned long)1); - ulonglong1 fk((signed long)1); - ulonglong1 fl((unsigned long long)1); - ulonglong1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestULongLong2() { - long2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - ulonglong2 fa1((unsigned char)1); - ulonglong2 fa2((unsigned char)1, (unsigned char)1); - ulonglong2 fb1((signed char)1); - ulonglong2 fb2((signed char)1, (signed char)1); - ulonglong2 fc1((unsigned short)1); - ulonglong2 fc2((unsigned short)1, (unsigned short)1); - ulonglong2 fd1((signed short)1); - ulonglong2 fd2((signed short)1, (signed short)1); - ulonglong2 fe1((unsigned int)1); - ulonglong2 fe2((unsigned int)1, (unsigned int)1); - ulonglong2 fg1((signed int)1); - ulonglong2 fg2((signed int)1, (signed int)1); - ulonglong2 fh1((float)1); - ulonglong2 fh2((float)1, (float)1); - ulonglong2 fi1((double)1); - ulonglong2 fi2((double)1, (double)1); - ulonglong2 fj1((unsigned long)1); - ulonglong2 fj2((unsigned long)1, (unsigned long)1); - ulonglong2 fk1((signed long)1); - ulonglong2 fk2((signed long)1, (signed long)1); - ulonglong2 fl1((unsigned long long)1); - ulonglong2 fl2((unsigned long long)1, (unsigned long long)1); - ulonglong2 fm1((signed long long)1); - ulonglong2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestULongLong3() { - long3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - ulonglong3 fa1((unsigned char)1); - ulonglong3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - ulonglong3 fb1((signed char)1); - ulonglong3 fb2((signed char)1, (signed char)1, (signed char)1); - ulonglong3 fc1((unsigned short)1); - ulonglong3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - ulonglong3 fd1((signed short)1); - ulonglong3 fd2((signed short)1, (signed short)1, (signed short)1); - ulonglong3 fe1((unsigned int)1); - ulonglong3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - ulonglong3 fg1((signed int)1); - ulonglong3 fg2((signed int)1, (signed int)1, (signed int)1); - ulonglong3 fh1((float)1); - ulonglong3 fh2((float)1, (float)1, (float)1); - ulonglong3 fi1((double)1); - ulonglong3 fi2((double)1, (double)1, (double)1); - ulonglong3 fj1((unsigned long)1); - ulonglong3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - ulonglong3 fk1((signed long)1); - ulonglong3 fk2((signed long)1, (signed long)1, (signed long)1); - ulonglong3 fl1((unsigned long long)1); - ulonglong3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - ulonglong3 fm1((signed long long)1); - ulonglong3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestULongLong4() { - long4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - ulonglong4 fa1((unsigned char)1); - ulonglong4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - ulonglong4 fb1((signed char)1); - ulonglong4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - ulonglong4 fc1((unsigned short)1); - ulonglong4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - ulonglong4 fd1((signed short)1); - ulonglong4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - ulonglong4 fe1((unsigned int)1); - ulonglong4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - ulonglong4 fg1((signed int)1); - ulonglong4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - ulonglong4 fh1((float)1); - ulonglong4 fh2((float)1, (float)1, (float)1, (float)1); - ulonglong4 fi1((double)1); - ulonglong4 fi2((double)1, (double)1, (double)1, (double)1); - ulonglong4 fj1((unsigned long)1); - ulonglong4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - ulonglong4 fk1((signed long)1); - ulonglong4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - ulonglong4 fl1((unsigned long long)1); - ulonglong4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - ulonglong4 fm1((signed long long)1); - ulonglong4 fm2((signed long long)1, (signed long long)1, (signed long long)1, - (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - - -bool TestLongLong1() { - long1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (double)1; - cmpVal1(f1, 3); - f1 = (double)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal1(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal1(f1, 3); - - longlong1 fa((unsigned char)1); - longlong1 fb((signed char)1); - longlong1 fc((unsigned short)1); - longlong1 fd((signed short)1); - longlong1 fe((unsigned int)1); - longlong1 fg((signed int)1); - longlong1 fh((float)1); - longlong1 fi((double)1); - longlong1 fj((unsigned long)1); - longlong1 fk((signed long)1); - longlong1 fl((unsigned long long)1); - longlong1 fm((signed long long)1); - - - f1.x = 3; - f2.x = 4; - f3.x = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestLongLong2() { - long2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1 = f1 * (unsigned char)1; - cmpVal2(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed char)1; - cmpVal2(f1, 3); - f1 = (signed char)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal2(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed short)1; - cmpVal2(f1, 3); - f1 = (signed short)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal2(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed int)1; - cmpVal2(f1, 3); - f1 = (signed int)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (float)1; - cmpVal2(f1, 3); - f1 = (float)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal2(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (signed long)1; - cmpVal2(f1, 3); - f1 = (signed long)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (double)1; - cmpVal2(f1, 3); - f1 = (double)1 * f1; - cmpVal2(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal2(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal2(f1, 3); - - longlong2 fa1((unsigned char)1); - longlong2 fa2((unsigned char)1, (unsigned char)1); - longlong2 fb1((signed char)1); - longlong2 fb2((signed char)1, (signed char)1); - longlong2 fc1((unsigned short)1); - longlong2 fc2((unsigned short)1, (unsigned short)1); - longlong2 fd1((signed short)1); - longlong2 fd2((signed short)1, (signed short)1); - longlong2 fe1((unsigned int)1); - longlong2 fe2((unsigned int)1, (unsigned int)1); - longlong2 fg1((signed int)1); - longlong2 fg2((signed int)1, (signed int)1); - longlong2 fh1((float)1); - longlong2 fh2((float)1, (float)1); - longlong2 fi1((double)1); - longlong2 fi2((double)1, (double)1); - longlong2 fj1((unsigned long)1); - longlong2 fj2((unsigned long)1, (unsigned long)1); - longlong2 fk1((signed long)1); - longlong2 fk2((signed long)1, (signed long)1); - longlong2 fl1((unsigned long long)1); - longlong2 fl2((unsigned long long)1, (unsigned long long)1); - longlong2 fm1((signed long long)1); - longlong2 fm2((signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestLongLong3() { - long3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1 = f1 * (unsigned char)1; - cmpVal3(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed char)1; - cmpVal3(f1, 3); - f1 = (signed char)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal3(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed short)1; - cmpVal3(f1, 3); - f1 = (signed short)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal3(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed int)1; - cmpVal3(f1, 3); - f1 = (signed int)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (float)1; - cmpVal3(f1, 3); - f1 = (float)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal3(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (signed long)1; - cmpVal3(f1, 3); - f1 = (signed long)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (double)1; - cmpVal3(f1, 3); - f1 = (double)1 * f1; - cmpVal3(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal3(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal3(f1, 3); - - longlong3 fa1((unsigned char)1); - longlong3 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1); - longlong3 fb1((signed char)1); - longlong3 fb2((signed char)1, (signed char)1, (signed char)1); - longlong3 fc1((unsigned short)1); - longlong3 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1); - longlong3 fd1((signed short)1); - longlong3 fd2((signed short)1, (signed short)1, (signed short)1); - longlong3 fe1((unsigned int)1); - longlong3 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1); - longlong3 fg1((signed int)1); - longlong3 fg2((signed int)1, (signed int)1, (signed int)1); - longlong3 fh1((float)1); - longlong3 fh2((float)1, (float)1, (float)1); - longlong3 fi1((double)1); - longlong3 fi2((double)1, (double)1, (double)1); - longlong3 fj1((unsigned long)1); - longlong3 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1); - longlong3 fk1((signed long)1); - longlong3 fk2((signed long)1, (signed long)1, (signed long)1); - longlong3 fl1((unsigned long long)1); - longlong3 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1); - longlong3 fm1((signed long long)1); - longlong3 fm2((signed long long)1, (signed long long)1, (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; -} - -bool TestLongLong4() { - long4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, -3); - assert(!f1 == false); - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f1 = f1 * (unsigned char)1; - cmpVal4(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed char)1; - cmpVal4(f1, 3); - f1 = (signed char)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal4(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed short)1; - cmpVal4(f1, 3); - f1 = (signed short)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal4(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed int)1; - cmpVal4(f1, 3); - f1 = (signed int)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (float)1; - cmpVal4(f1, 3); - f1 = (float)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal4(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (signed long)1; - cmpVal4(f1, 3); - f1 = (signed long)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (double)1; - cmpVal4(f1, 3); - f1 = (double)1 * f1; - cmpVal4(f1, 3); - f1 = f1 * (unsigned long long)1; - cmpVal4(f1, 3); - f1 = (unsigned long long)1 * f1; - cmpVal4(f1, 3); - - longlong4 fa1((unsigned char)1); - longlong4 fa2((unsigned char)1, (unsigned char)1, (unsigned char)1, (unsigned char)1); - longlong4 fb1((signed char)1); - longlong4 fb2((signed char)1, (signed char)1, (signed char)1, (signed char)1); - longlong4 fc1((unsigned short)1); - longlong4 fc2((unsigned short)1, (unsigned short)1, (unsigned short)1, (unsigned short)1); - longlong4 fd1((signed short)1); - longlong4 fd2((signed short)1, (signed short)1, (signed short)1, (signed short)1); - longlong4 fe1((unsigned int)1); - longlong4 fe2((unsigned int)1, (unsigned int)1, (unsigned int)1, (unsigned int)1); - longlong4 fg1((signed int)1); - longlong4 fg2((signed int)1, (signed int)1, (signed int)1, (signed int)1); - longlong4 fh1((float)1); - longlong4 fh2((float)1, (float)1, (float)1, (float)1); - longlong4 fi1((double)1); - longlong4 fi2((double)1, (double)1, (double)1, (double)1); - longlong4 fj1((unsigned long)1); - longlong4 fj2((unsigned long)1, (unsigned long)1, (unsigned long)1, (unsigned long)1); - longlong4 fk1((signed long)1); - longlong4 fk2((signed long)1, (signed long)1, (signed long)1, (signed long)1); - longlong4 fl1((unsigned long long)1); - longlong4 fl2((unsigned long long)1, (unsigned long long)1, (unsigned long long)1, - (unsigned long long)1); - longlong4 fm1((signed long long)1); - longlong4 fm2((signed long long)1, (signed long long)1, (signed long long)1, - (signed long long)1); - - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - assert((f1 == f2) == false); - assert((f1 != f2) == true); - assert((f1 < f2) == true); - assert((f2 > f1) == true); - assert((f1 >= f3) == true); - assert((f1 <= f3) == true); - - assert((f1 && f2) == true); - assert((f1 || f2) == true); - return true; +bool CheckVectorTypes() { + return TestVectorTypes< + char1, char2, char3, char4, + uchar1, uchar2, uchar3, uchar4, + short1, short2, short3, short4, + ushort1, ushort2, ushort3, ushort4, + int1, int2, int3, int4, + uint1, uint2, uint3, uint4, + long1, long2, long3, long4, + ulong1, ulong2, ulong3, ulong4, + longlong1, longlong2, longlong3, longlong4, + ulonglong1, ulonglong2, ulonglong3, ulonglong4, + float1, float2, float3, float4, + double1, double2, double3, double4>(); } int main() { - assert(sizeof(float1) == 4); - assert(sizeof(float2) == 8); - assert(sizeof(float3) == 12); - assert(sizeof(float4) == 16); - assert(TestFloat1() && TestFloat2() && TestFloat3() && TestFloat4() && TestDouble1() && - TestDouble2() && TestDouble3() && TestDouble4() && TestUChar1() && TestUChar2() && - TestUChar3() && TestUChar4() && TestChar1() && TestChar2() && TestChar3() && - TestChar4() && TestUShort1() && TestUShort2() && TestUShort3() && TestUShort4() && - TestShort1() && TestShort2() && TestShort3() && TestShort4() && TestUInt1() && - TestUInt2() && TestUInt3() && TestUInt4() && TestInt1() && TestInt2() && TestInt3() && - TestInt4() && TestULong1() && TestULong2() && TestULong3() && TestULong4() && - TestLong1() && TestLong2() && TestLong3() && TestLong4() && TestULongLong1() && - TestULongLong2() && TestULongLong3() && TestULongLong4() && TestLongLong1() && - TestLongLong2() && TestLongLong3() && TestLongLong4() == true); - passed(); - float1 f1 = make_float1(1.0f); -} + static_assert(sizeof(float1) == 4, ""); + static_assert(sizeof(float2) >= 8, ""); + static_assert(sizeof(float3) >= 12, ""); + static_assert(sizeof(float4) >= 16, ""); + + if (CheckVectorTypes()) { + float1 f1 = make_float1(1.0f); + passed(); + } + else { + failed("Failed some vector test on the host side."); + } +} \ No newline at end of file diff --git a/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp b/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp index 24ac3f4a02..edb817ced1 100644 --- a/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp +++ b/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp @@ -26,4212 +26,182 @@ THE SOFTWARE. * HIT_END */ -#include #include + +#include "vector_test_common.h" #include "test_common.h" -#define cmpVal1(in, exp) \ - if (in.x != exp) { \ - } -#define cmpVal2(in, exp) \ - if (in.x != exp || in.y != exp) { \ - } +#include +#include +#include -#define cmpVal3(in, exp) \ - if (in.x != exp || in.y != exp || in.z != exp) { \ - } +using namespace std; -#define cmpVal4(in, exp) \ - if (in.x != exp || in.y != exp || in.z != exp || in.w != exp) { \ - } +template< + typename V, + Enable_if_t().x)>{}>* = nullptr> +constexpr +bool integer_unary_tests(const V&, const V&) { + return true; +} -__device__ bool TestUChar1() { - uchar1 f1, f2, f3; +template< + typename V, + Enable_if_t().x)>{}>* = nullptr> +__device__ +bool integer_unary_tests(V& f1, V& f2) { + f1 %= f2; + if (!cmp(f1, 0)) return false; + f1 &= f2; + if (!cmp(f1, 0)) return false; + f1 |= f2; + if (!cmp(f1, 1)) return false; + f1 ^= f2; + if (!cmp(f1, 0)) return false; f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); + f1 <<= f2; + if (!cmp(f1, 2)) return false; + f1 >>= f2; + if (!cmp(f1, 1)) return false; + f2 = ~f1; + return cmp(f2, ~1); +} + +template< + typename V, + Enable_if_t().x)>{}>* = nullptr> +constexpr +bool integer_binary_tests(const V&, const V&, const V&) { + return true; +} + +template< + typename V, + Enable_if_t().x)>{}>* = nullptr> +__device__ +bool integer_binary_tests(V& f1, V& f2, V& f3) { f3 = f1 % f2; - cmpVal1(f3, 0); + if (!cmp(f3, 0)) return false; f1 = f3 & f2; - cmpVal1(f1, 0); + if (!cmp(f1, 0)) return false; f2 = f1 ^ f3; - cmpVal1(f2, 0); + if (!cmp(f2, 0)) return false; f1.x = 1; f2.x = 2; f3 = f1 << f2; - cmpVal1(f3, 4); + if (!cmp(f3, 4)) return false; f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, 253); - - f1.x = 3; - f1 = f1 * (unsigned char)1; - cmpVal1(f1, 3); - f1 = (unsigned char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed char)1; - cmpVal1(f1, 3); - f1 = (signed char)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned short)1; - cmpVal1(f1, 3); - f1 = (unsigned short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed short)1; - cmpVal1(f1, 3); - f1 = (signed short)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned int)1; - cmpVal1(f1, 3); - f1 = (unsigned int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed int)1; - cmpVal1(f1, 3); - f1 = (signed int)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (float)1; - cmpVal1(f1, 3); - f1 = (float)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (unsigned long)1; - cmpVal1(f1, 3); - f1 = (unsigned long)1 * f1; - cmpVal1(f1, 3); - f1 = f1 * (signed long)1; - cmpVal1(f1, 3); - f1 = (signed long)1 * f1; - cmpVal1(f1, 3); - - // signed char sc = 1; - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; + if (!cmp(f2, 2)) return false; } -__device__ bool TestUChar2() { - uchar2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); +template +__device__ +bool TestVectorType() { + V f1(1); + V f2(1); + V f3 = f1 + f2; + if (!cmp(f3, 2)) return false; f2 = f3 - f1; - cmpVal2(f2, 1); + if (!cmp(f2, 1)) return false; f1 = f2 * f3; - cmpVal2(f1, 2); + if (!cmp(f1, 2)) return false; f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); + if (!cmp(f2, 2 / 2)) return false; + if (!integer_binary_tests(f1, f2, f3)) return false; - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; + f1 = V(2); + f2 = V(1); f1 += f2; - cmpVal2(f1, 3); + if (!cmp(f1, 3)) return false; f1 -= f2; - cmpVal2(f1, 2); + if (!cmp(f1, 2)) return false; f1 *= f2; - cmpVal2(f1, 2); + if (!cmp(f1, 2)) return false; f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); + if (!cmp(f1, 2)) return false; + if (!integer_unary_tests(f1, f2)) return false; - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); + #if false // We do not enable nullary increment / decrement yet. + f1 = V(2); + f2 = f1++; + if (!cmp(f1, 3)) return false; + if (!cmp(f2, 2)) return false; + f2 = f1--; + if (!cmp(f2, 3)) return false; + if (!cmp(f1, 2)) return false; + f2 = ++f1; + if (!cmp(f1, 3)) return false; + if (!cmp(f2, 3)) return false; + f2 = --f1; + if (!cmp(f1, 2)) return false; + if (!cmp(f2, 2)) return false; + #endif - f2 = ~f1; - cmpVal2(f2, 253); - if (!f1 == false) { - } + f1 = V(3); + f2 = V(4); + f3 = V(3); + if (cmp(f1 == f2, true)) return false; + if (cmp(f1 != f2, false)) return false; + if (cmp(f1 < f2, false)) return false; + if (cmp(f2 > f1, false)) return false; + if (cmp(f1 >= f3, false)) return false; + if (cmp(f1 <= f3, false)) return false; - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } + if (cmp(f1 && f2, false)) return false; + if (cmp(f1 || f2, false)) return false; return true; } -__device__ bool TestUChar3() { - uchar3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, 253); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } +template* = nullptr> +__device__ +bool TestVectorTypes() { return true; } -__device__ bool TestUChar4() { - uchar4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, 253); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; +template +__device__ +bool TestVectorTypes() { + if (!TestVectorType()) return false; + return TestVectorTypes(); } -__device__ bool TestChar1() { - char1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (char)253); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestChar2() { - char2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (char)253); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestChar3() { - char3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (char)253); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestChar4() { - char4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (char)253); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUShort1() { - ushort1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (unsigned short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUShort2() { - ushort2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (unsigned short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUShort3() { - ushort3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (unsigned short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUShort4() { - ushort4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (unsigned short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestShort1() { - short1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (signed short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestShort2() { - short2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (signed short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestShort3() { - short3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (signed short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestShort4() { - short4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (signed short)65533); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - - -__device__ bool TestUInt1() { - uint1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (unsigned int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUInt2() { - uint2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (unsigned int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUInt3() { - uint3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (unsigned int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestUInt4() { - uint4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (unsigned int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestInt1() { - int1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, (signed int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestInt2() { - int2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, (signed int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestInt3() { - int3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, (signed int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestInt4() { - int4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, (signed int)4294967293); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestULong1() { - ulong1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, 18446744073709551613UL); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestULong2() { - ulong2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, 18446744073709551613UL); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestULong3() { - ulong3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, 18446744073709551613UL); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestULong4() { - ulong4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, 18446744073709551613UL); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestLong1() { - long1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestLong2() { - long2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestLong3() { - long3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestLong4() { - long4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - - -__device__ bool TestFloat1() { - float1 f1, f2, f3; - // float1 f4(1); - // cmpVal1(f4, 1.0f); - // float1 f5(2.0f); - // cmpVal1(f5, 2.0f); - f1.x = 1.0f; - f2.x = 1.0f; - f3 = f1 + f2; - cmpVal1(f3, 2.0f); - f2 = f3 - f1; - cmpVal1(f2, 1.0f); - f1 = f2 * f3; - cmpVal1(f1, 2.0f); - f2 = f1 / f3; - cmpVal1(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal1(f1, 3.0f); - f1 -= f2; - cmpVal1(f1, 2.0f); - f1 *= f2; - cmpVal1(f1, 2.0f); - f1 /= f2; - cmpVal1(f1, 2.0f); - f2 = f1++; - cmpVal1(f1, 3.0f); - cmpVal1(f2, 2.0f); - f2 = f1--; - cmpVal1(f2, 3.0f); - cmpVal1(f1, 2.0f); - f2 = ++f1; - cmpVal1(f1, 3.0f); - cmpVal1(f2, 3.0f); - f2 = --f1; - cmpVal1(f1, 2.0f); - cmpVal1(f1, 2.0f); - - f1.x = 3.0f; - f2.x = 4.0f; - f3.x = 3.0f; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - return true; -} - -__device__ bool TestFloat2() { - float2 f1, f2, f3; - f1.x = 1.0f; - f1.y = 1.0f; - f2.x = 1.0f; - f2.y = 1.0f; - f3 = f1 + f2; - cmpVal2(f3, 2.0f); - f2 = f3 - f1; - cmpVal2(f2, 1.0f); - f1 = f2 * f3; - cmpVal2(f1, 2.0f); - f2 = f1 / f3; - cmpVal2(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal2(f1, 3.0f); - f1 -= f2; - cmpVal2(f1, 2.0f); - f1 *= f2; - cmpVal2(f1, 2.0f); - f1 /= f2; - cmpVal2(f1, 2.0f); - - f2 = f1++; - cmpVal2(f1, 3.0f); - cmpVal2(f2, 2.0f); - f2 = f1--; - cmpVal2(f2, 3.0f); - cmpVal2(f1, 2.0f); - f2 = ++f1; - cmpVal2(f1, 3.0f); - cmpVal2(f2, 3.0f); - f2 = --f1; - cmpVal2(f1, 2.0f); - cmpVal2(f1, 2.0f); - - f1.x = 3.0f; - f1.y = 3.0f; - f2.x = 4.0f; - f2.y = 4.0f; - f3.x = 3.0f; - f3.y = 3.0f; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - - return true; -} - -__device__ bool TestFloat3() { - float3 f1, f2, f3; - f1.x = 1.0f; - f1.y = 1.0f; - f1.z = 1.0f; - f2.x = 1.0f; - f2.y = 1.0f; - f2.z = 1.0f; - f3 = f1 + f2; - cmpVal3(f3, 2.0f); - f2 = f3 - f1; - cmpVal3(f2, 1.0f); - f1 = f2 * f3; - cmpVal3(f1, 2.0f); - f2 = f1 / f3; - cmpVal3(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal3(f1, 3.0f); - f1 -= f2; - cmpVal3(f1, 2.0f); - f1 *= f2; - cmpVal3(f1, 2.0f); - f1 /= f2; - f2 = f1++; - cmpVal3(f1, 3.0f); - cmpVal3(f2, 2.0f); - f2 = f1--; - cmpVal3(f2, 3.0f); - cmpVal3(f1, 2.0f); - f2 = ++f1; - cmpVal3(f1, 3.0f); - cmpVal3(f2, 3.0f); - f2 = --f1; - cmpVal3(f1, 2.0f); - cmpVal3(f1, 2.0f); - - f1.x = 3.0f; - f1.y = 3.0f; - f1.z = 3.0f; - f2.x = 4.0f; - f2.y = 4.0f; - f2.z = 4.0f; - f3.x = 3.0f; - f3.y = 3.0f; - f3.z = 3.0f; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - - return true; -} - - -__device__ bool TestFloat4() { - float4 f1, f2, f3; - f1.x = 1.0f; - f1.y = 1.0f; - f1.z = 1.0f; - f1.w = 1.0f; - f2.x = 1.0f; - f2.y = 1.0f; - f2.z = 1.0f; - f2.w = 1.0f; - f3 = f1 + f2; - cmpVal4(f3, 2.0f); - f2 = f3 - f1; - cmpVal4(f2, 1.0f); - f1 = f2 * f3; - cmpVal4(f1, 2.0f); - f2 = f1 / f3; - cmpVal4(f2, 2.0f / 2.0f); - f1 += f2; - cmpVal4(f1, 3.0f); - f1 -= f2; - cmpVal4(f1, 2.0f); - f1 *= f2; - cmpVal4(f1, 2.0f); - f1 /= f2; - f2 = f1++; - cmpVal4(f1, 3.0f); - cmpVal4(f2, 2.0f); - f2 = f1--; - cmpVal4(f2, 3.0f); - cmpVal4(f1, 2.0f); - f2 = ++f1; - cmpVal4(f1, 3.0f); - cmpVal4(f2, 3.0f); - f2 = --f1; - cmpVal4(f1, 2.0f); - cmpVal4(f1, 2.0f); - - f1.x = 3.0f; - f1.y = 3.0f; - f1.z = 3.0f; - f1.w = 3.0f; - f2.x = 4.0f; - f2.y = 4.0f; - f2.z = 4.0f; - f2.w = 4.0f; - f3.x = 3.0f; - f3.y = 3.0f; - f3.z = 3.0f; - f3.w = 3.0f; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - return true; -} - -__device__ bool TestULongLong1() { - ulonglong1 f1, f2, f3; - f1.x = 1; - f2.x = 1; - f3 = f1 + f2; - cmpVal1(f3, 2); - f2 = f3 - f1; - cmpVal1(f2, 1); - f1 = f2 * f3; - cmpVal1(f1, 2); - f2 = f1 / f3; - cmpVal1(f2, 2 / 2); - f3 = f1 % f2; - cmpVal1(f3, 0); - f1 = f3 & f2; - cmpVal1(f1, 0); - f2 = f1 ^ f3; - cmpVal1(f2, 0); - f1.x = 1; - f2.x = 2; - f3 = f1 << f2; - cmpVal1(f3, 4); - f2 = f3 >> f1; - cmpVal1(f2, 2); - - f1.x = 2; - f2.x = 1; - f1 += f2; - cmpVal1(f1, 3); - f1 -= f2; - cmpVal1(f1, 2); - f1 *= f2; - cmpVal1(f1, 2); - f1 /= f2; - cmpVal1(f1, 2); - f1 %= f2; - cmpVal1(f1, 0); - f1 &= f2; - cmpVal1(f1, 0); - f1 |= f2; - cmpVal1(f1, 1); - f1 ^= f2; - cmpVal1(f1, 0); - f1.x = 1; - f1 <<= f2; - cmpVal1(f1, 2); - f1 >>= f2; - cmpVal1(f1, 1); - - f1.x = 2; - f2 = f1++; - cmpVal1(f1, 3); - cmpVal1(f2, 2); - f2 = f1--; - cmpVal1(f2, 3); - cmpVal1(f1, 2); - f2 = ++f1; - cmpVal1(f1, 3); - cmpVal1(f2, 3); - f2 = --f1; - cmpVal1(f1, 2); - cmpVal1(f2, 2); - - f2 = ~f1; - cmpVal1(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f2.x = 4; - f3.x = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - - -__device__ bool TestULongLong2() { - ulonglong2 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f2.x = 1; - f2.y = 1; - f3 = f1 + f2; - cmpVal2(f3, 2); - f2 = f3 - f1; - cmpVal2(f2, 1); - f1 = f2 * f3; - cmpVal2(f1, 2); - f2 = f1 / f3; - cmpVal2(f2, 2 / 2); - f3 = f1 % f2; - cmpVal2(f3, 0); - f1 = f3 & f2; - cmpVal2(f1, 0); - f2 = f1 ^ f3; - cmpVal2(f2, 0); - f1.x = 1; - f1.y = 1; - f2.x = 2; - f2.y = 2; - f3 = f1 << f2; - cmpVal2(f3, 4); - f2 = f3 >> f1; - cmpVal2(f2, 2); - - f1.x = 2; - f1.y = 2; - f2.x = 1; - f2.y = 1; - f1 += f2; - cmpVal2(f1, 3); - f1 -= f2; - cmpVal2(f1, 2); - f1 *= f2; - cmpVal2(f1, 2); - f1 /= f2; - cmpVal2(f1, 2); - f1 %= f2; - cmpVal2(f1, 0); - f1 &= f2; - cmpVal2(f1, 0); - f1 |= f2; - cmpVal2(f1, 1); - f1 ^= f2; - cmpVal2(f1, 0); - f1.x = 1; - f1.y = 1; - f1 <<= f2; - cmpVal2(f1, 2); - f1 >>= f2; - cmpVal2(f1, 1); - - f1.x = 2; - f1.y = 2; - f2 = f1++; - cmpVal2(f1, 3); - cmpVal2(f2, 2); - f2 = f1--; - cmpVal2(f2, 3); - cmpVal2(f1, 2); - f2 = ++f1; - cmpVal2(f1, 3); - cmpVal2(f2, 3); - f2 = --f1; - cmpVal2(f1, 2); - cmpVal2(f2, 2); - - f2 = ~f1; - cmpVal2(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f2.x = 4; - f2.y = 4; - f3.x = 3; - f3.y = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestULongLong3() { - ulonglong3 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f3 = f1 + f2; - cmpVal3(f3, 2); - f2 = f3 - f1; - cmpVal3(f2, 1); - f1 = f2 * f3; - cmpVal3(f1, 2); - f2 = f1 / f3; - cmpVal3(f2, 2 / 2); - f3 = f1 % f2; - cmpVal3(f3, 0); - f1 = f3 & f2; - cmpVal3(f1, 0); - f2 = f1 ^ f3; - cmpVal3(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f3 = f1 << f2; - cmpVal3(f3, 4); - f2 = f3 >> f1; - cmpVal3(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f1 += f2; - cmpVal3(f1, 3); - f1 -= f2; - cmpVal3(f1, 2); - f1 *= f2; - cmpVal3(f1, 2); - f1 /= f2; - cmpVal3(f1, 2); - f1 %= f2; - cmpVal3(f1, 0); - f1 &= f2; - cmpVal3(f1, 0); - f1 |= f2; - cmpVal3(f1, 1); - f1 ^= f2; - cmpVal3(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1 <<= f2; - cmpVal3(f1, 2); - f1 >>= f2; - cmpVal3(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f2 = f1++; - cmpVal3(f1, 3); - cmpVal3(f2, 2); - f2 = f1--; - cmpVal3(f2, 3); - cmpVal3(f1, 2); - f2 = ++f1; - cmpVal3(f1, 3); - cmpVal3(f2, 3); - f2 = --f1; - cmpVal3(f1, 2); - cmpVal3(f2, 2); - - f2 = ~f1; - cmpVal3(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - -__device__ bool TestULongLong4() { - ulonglong4 f1, f2, f3; - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f3 = f1 + f2; - cmpVal4(f3, 2); - f2 = f3 - f1; - cmpVal4(f2, 1); - f1 = f2 * f3; - cmpVal4(f1, 2); - f2 = f1 / f3; - cmpVal4(f2, 2 / 2); - f3 = f1 % f2; - cmpVal4(f3, 0); - f1 = f3 & f2; - cmpVal4(f1, 0); - f2 = f1 ^ f3; - cmpVal4(f2, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f2.x = 2; - f2.y = 2; - f2.z = 2; - f2.w = 2; - f3 = f1 << f2; - cmpVal4(f3, 4); - f2 = f3 >> f1; - cmpVal4(f2, 2); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2.x = 1; - f2.y = 1; - f2.z = 1; - f2.w = 1; - f1 += f2; - cmpVal4(f1, 3); - f1 -= f2; - cmpVal4(f1, 2); - f1 *= f2; - cmpVal4(f1, 2); - f1 /= f2; - cmpVal4(f1, 2); - f1 %= f2; - cmpVal4(f1, 0); - f1 &= f2; - cmpVal4(f1, 0); - f1 |= f2; - cmpVal4(f1, 1); - f1 ^= f2; - cmpVal4(f1, 0); - f1.x = 1; - f1.y = 1; - f1.z = 1; - f1.w = 1; - f1 <<= f2; - cmpVal4(f1, 2); - f1 >>= f2; - cmpVal4(f1, 1); - - f1.x = 2; - f1.y = 2; - f1.z = 2; - f1.w = 2; - f2 = f1++; - cmpVal4(f1, 3); - cmpVal4(f2, 2); - f2 = f1--; - cmpVal4(f2, 3); - cmpVal4(f1, 2); - f2 = ++f1; - cmpVal4(f1, 3); - cmpVal4(f2, 3); - f2 = --f1; - cmpVal4(f1, 2); - cmpVal4(f2, 2); - - f2 = ~f1; - cmpVal4(f2, -3); - if (!f1 == false) { - } - - f1.x = 3; - f1.y = 3; - f1.z = 3; - f1.w = 3; - f2.x = 4; - f2.y = 4; - f2.z = 4; - f2.w = 4; - f3.x = 3; - f3.y = 3; - f3.z = 3; - f3.w = 3; - if ((f1 == f2) == false) { - } - if ((f1 != f2) == true) { - } - if ((f1 < f2) == true) { - } - if ((f2 > f1) == true) { - } - if ((f1 >= f3) == true) { - } - if ((f1 <= f3) == true) { - } - - if ((f1 && f2) == true) { - } - if ((f1 || f2) == true) { - } - return true; -} - - -__global__ void CheckVectorTypes(hipLaunchParm lp, bool* ptr) { - if (TestFloat1() && TestFloat2() && TestFloat3() && TestFloat4() && TestUChar1() && - TestUChar2() && TestUChar3() && TestUChar4() && TestChar1() && TestChar2() && TestChar3() && - TestChar4() && TestUShort1() && TestUShort2() && TestUShort3() && TestUShort4() && - TestShort1() && TestShort2() && TestShort3() && TestShort4() && TestUInt1() && - TestUInt2() && TestUInt3() && TestUInt4() && TestInt1() && TestInt2() && TestInt3() && - TestInt4() && TestULong1() && TestULong2() && TestULong3() && TestULong4() && TestLong1() && - TestLong2() && TestLong3() && TestLong4() && TestULongLong1() && TestULongLong2() && - TestULongLong3() && TestULongLong4() == true) { - ptr[0] = true; - } +__global__ +void CheckVectorTypes(bool* ptr) { + ptr[0] = TestVectorTypes< + char1, char2, char3, char4, + uchar1, uchar2, uchar3, uchar4, + short1, short2, short3, short4, + ushort1, ushort2, ushort3, ushort4, + int1, int2, int3, int4, + uint1, uint2, uint3, uint4, + long1, long2, long3, long4, + ulong1, ulong2, ulong3, ulong4, + longlong1, longlong2, longlong3, longlong4, + ulonglong1, ulonglong2, ulonglong3, ulonglong4, + float1, float2, float3, float4, + double1, double2, double3, double4>(); } int main() { - assert(sizeof(float1) == 4); - assert(sizeof(float2) == 8); - assert(sizeof(float3) == 12); - assert(sizeof(float4) == 16); + static_assert(sizeof(float1) == 4, ""); + static_assert(sizeof(float2) >= 8, ""); + static_assert(sizeof(float3) >= 12, ""); + static_assert(sizeof(float4) >= 16, ""); bool* ptr = nullptr; if (hipMalloc(&ptr, sizeof(bool)) != HIP_SUCCESS) return EXIT_FAILURE; - std::unique_ptr correct{ptr, hipFree}; - hipLaunchKernel(CheckVectorTypes, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get()); - bool passed = false; + unique_ptr correct{ptr, hipFree}; + hipLaunchKernelGGL( + CheckVectorTypes, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get()); + bool passed = true; if (hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)) != HIP_SUCCESS) { return EXIT_FAILURE; } if (passed == true) { - std::cout << "PASSED" << std::endl; - return 0; - } else - return EXIT_FAILURE; -} + passed(); + } + else { + failed("Failed some vector test."); + } +} \ No newline at end of file diff --git a/hipamd/tests/src/deviceLib/vector_test_common.h b/hipamd/tests/src/deviceLib/vector_test_common.h new file mode 100644 index 0000000000..d5bc4c57a2 --- /dev/null +++ b/hipamd/tests/src/deviceLib/vector_test_common.h @@ -0,0 +1,105 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include + +template +using Enable_if_t = typename std::enable_if::type; + +__host__ __device__ +std::false_type is_vec4(...); +__host__ __device__ +std::false_type is_vec3(...); +__host__ __device__ +std::false_type is_vec2(...); +__host__ __device__ +std::false_type is_vec1(...); + +template +__host__ __device__ +auto is_vec4(const T&) -> decltype(std::declval().xyzw, std::true_type{}); +template< + typename T, Enable_if_t())){}>* = nullptr> +__host__ __device__ +auto is_vec3(const T&) -> decltype(std::declval().xyz, std::true_type{}); +template< + typename T, + Enable_if_t< + !decltype(is_vec4(std::declval())){} && + !decltype(is_vec3(std::declval())){}>* = nullptr> +__host__ __device__ +auto is_vec2(const T&) -> decltype(std::declval().xy, std::true_type{}); +template< + typename T, + Enable_if_t< + !decltype(is_vec4(std::declval())){} && + !decltype(is_vec3(std::declval())){} && + !decltype(is_vec2(std::declval())){}>* = nullptr> +__host__ __device__ +auto is_vec1(const T&) -> decltype(std::declval().x, std::true_type{}); + +template +__host__ __device__ +constexpr +bool is_vec() { + return (dimension == 1) ? decltype(is_vec1(std::declval())){} : + ((dimension == 2) ? decltype(is_vec2(std::declval())){} : + ((dimension == 3) ? decltype(is_vec3(std::declval())){} : + decltype(is_vec4(std::declval())){})); +} + +template()>* = nullptr> +__host__ __device__ +inline +bool cmp(const T& x, U expected) { + const auto r = x == T(expected); + + return r.x != 0; +} + +template()>* = nullptr> +__host__ __device__ +inline +bool cmp(const T& x, U expected) { + const auto r = x == T(expected); + + return r.x != 0 && r.y != 0; +} + +template()>* = nullptr> +__host__ __device__ +inline +bool cmp(const T& x, U expected) { + const auto r = x == T(expected); + + return r.x != 0 && r.y != 0 && r.z != 0; +} + +template()>* = nullptr> +__host__ __device__ +inline +bool cmp(const T& x, U expected) { + const auto r = x == T(expected); + + return r.x != 0 && r.y != 0 && r.z != 0 && r.w != 0; +} \ No newline at end of file From f4399d57a368ca8f0914097f41aecfd269a83bd5 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 12 Jun 2018 22:05:59 +0000 Subject: [PATCH 02/40] Add hipclang amdgcn functions These are moving from hipclang in device library to hip headers. These are required for the functionality of HIPclang project. --- hipamd/include/hip/hcc_detail/hip_runtime.h | 197 +++++++++++++++++++- hipamd/src/device_util.cpp | 9 +- 2 files changed, 192 insertions(+), 14 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index c62c85df64..34650d728a 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -189,8 +189,16 @@ extern int HIP_TRACE_API; static constexpr int warpSize = 64; #define clock_t long long int -__device__ long long int clock64(); -__device__ clock_t clock(); +__device__ +unsigned long __llvm_amdgcn_s_memrealtime(void) __asm("llvm.amdgcn.s.memrealtime"); + +__device__ +inline +long long int __clock64() { return (long long int)__llvm_amdgcn_s_memrealtime(); } + +__device__ +inline +clock_t __clock() { return (clock_t)__llvm_amdgcn_s_memrealtime(); } // abort __device__ void abort(); @@ -200,6 +208,45 @@ __device__ int __all(int input); __device__ int __any(int input); __device__ unsigned long long int __ballot(int input); +__device__ +inline +int64_t __ballot64(int a) { + int64_t s; + // define i64 @__ballot64(i32 %a) #0 { + // %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1 + // ret i64 %b + // } + __asm("v_cmp_ne_i32_e64 $0, 0, $1" : "=s"(s) : "v"(a)); + return s; +} + +// hip.amdgcn.bc - lanemask +extern "C" __device__ int32_t __ockl_activelane_u32(void); + +__device__ +inline +int64_t __lanemask_gt() +{ + int32_t activelane = __ockl_activelane_u32(); + int64_t ballot = __ballot64(1); + if (activelane != 63) { + int64_t tmp = (~0UL) << (activelane + 1); + return tmp & ballot; + } + return 0; +} + +__device__ +inline +int64_t __lanemask_lt() +{ + int32_t activelane = __ockl_activelane_u32(); + int64_t ballot = __ballot64(1); + if (activelane == 0) + return 0; + return ballot; +} + #if __HIP_ARCH_GFX701__ == 0 // warp shuffle functions @@ -238,8 +285,11 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); -__device__ void* __get_dynamicgroupbaseptr(); +extern "C" __device__ void* get_dynamic_group_segment_base_pointer(); +__device__ +inline +void* __get_dynamicgroupbaseptr() { return get_dynamic_group_segment_base_pointer(); } /** * CUDA 8 device function features @@ -315,6 +365,11 @@ __device__ void __threadfence_system(void); * @} */ +// hip.amdgcn.bc - named sync +__device__ void __llvm_amdgcn_s_barrier() __asm("llvm.amdgcn.s.barrier"); + +__device__ inline void __named_sync(int a, int b) { __llvm_amdgcn_s_barrier(); } + #endif // __HCC_OR_HIP_CLANG__ #if defined __HCC__ @@ -572,7 +627,9 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; __DEVICE__ void __device_trap() __asm("llvm.trap"); -__DEVICE__ void inline __assert_fail(const char * __assertion, +__DEVICE__ +inline +void __assert_fail(const char * __assertion, const char *__file, unsigned int __line, const char *__function) @@ -581,8 +638,136 @@ __DEVICE__ void inline __assert_fail(const char * __assertion, __device_trap(); } -extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); -extern "C" __device__ void *__amdgcn_get_dynamicgroupbaseptr(); +__DEVICE__ +inline +void __assertfail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function, + size_t charsize) +{ + // ignore all the args for now. + __device_trap(); +} + +// hip.amdgcn.bc - sync threads +// extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); +#define CLK_LOCAL_MEM_FENCE 0x01 +#define local __attribute__((address_space(3))) + +typedef unsigned cl_mem_fence_flags; + +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} memory_scope; + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum memory_order +{ + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +extern "C" __device__ __attribute__((overloadable)) +void atomic_work_item_fence(cl_mem_fence_flags, memory_order, memory_scope); + +__device__ +inline +static void hc_work_group_barrier(cl_mem_fence_flags flags, memory_scope scope) +{ + if (flags) { + atomic_work_item_fence(flags, memory_order_release, scope); + __builtin_amdgcn_s_barrier(); + atomic_work_item_fence(flags, memory_order_acquire, scope); + } else { + __builtin_amdgcn_s_barrier(); + } +} + +__device__ +inline +static void hc_barrier(int n) +{ + hc_work_group_barrier((cl_mem_fence_flags)n, memory_scope_work_group); +} + +__device__ +inline +__attribute__((noduplicate)) +void __syncthreads() +{ + hc_barrier(CLK_LOCAL_MEM_FENCE); +} + + +__device__ unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg"); + +__device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); + +__device__ inline static local char* __to_local(unsigned x) { return (local char*)x; } + +__device__ inline void *__amdgcn_get_dynamicgroupbaseptr() { +#if 0 + // Get group segment base pointer. + char* base = __llvm_amdgcn_s_getreg(14342) << 8); + base += __llvm_amdgcn_groupstaticsize(); + return base; +#endif + return __get_dynamicgroupbaseptr(); +} + +// hip.amdgcn.bc - device routine +/* + HW_ID Register bit structure + WAVE_ID 3:0 Wave buffer slot number. 0-9. + SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. + PIPE_ID 7:6 Pipeline from which the wave was dispatched. + CU_ID 11:8 Compute Unit the wave is assigned to. + SH_ID 12 Shader Array (within an SE) the wave is assigned to. + SE_ID 14:13 Shader Engine the wave is assigned to. + TG_ID 19:16 Thread-group ID + VM_ID 23:20 Virtual Memory ID + QUEUE_ID 26:24 Queue from which this wave was dispatched. + STATE_ID 29:27 State ID (graphics only, not compute). + ME_ID 31:30 Micro-engine ID. + */ + +#define HW_ID 4 + +#define HW_ID_CU_ID_SIZE 4 +#define HW_ID_CU_ID_OFFSET 8 + +#define HW_ID_SE_ID_SIZE 2 +#define HW_ID_SE_ID_OFFSET 13 + +/* + Encoding of parameter bitmask + HW_ID 5:0 HW_ID + OFFSET 10:6 Range: 0..31 + SIZE 15:11 Range: 1..32 + */ + +#define GETREG_IMMED(SZ,OFF,REG) (SZ << 11) | (OFF << 6) | REG + +__device__ +inline +unsigned __smid(void) +{ + unsigned cu_id = __llvm_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID)); + unsigned se_id = __llvm_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID)); + + /* Each shader engine has 16 CU */ + return (se_id << HW_ID_CU_ID_SIZE) + cu_id; +} // Macro to replace extern __shared__ declarations // to local variable definitions diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index 613e35f0cc..a3386ba14d 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -144,9 +144,6 @@ __device__ void* __hip_hc_memset(void* dst, uint8_t val, size_t size) { return dst; } -__device__ long long int clock64() { return (long long int)hc::__cycle_u64(); }; -__device__ clock_t clock() { return (clock_t)hc::__cycle_u64(); }; - // abort __device__ void abort() { return hc::abort(); } @@ -203,11 +200,7 @@ __host__ __device__ int max(int arg1, int arg2) { return (int)(hc::precise_math::fmax((float)arg1, (float)arg2)); } -__device__ void* __get_dynamicgroupbaseptr() { - return hc::get_dynamic_group_segment_base_pointer(); -} - __host__ void* __get_dynamicgroupbaseptr() { return nullptr; } -__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); } \ No newline at end of file +__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); } From 4298ed308e7564f28554e9022291e598db3e06b4 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 13 Jun 2018 15:59:45 +0000 Subject: [PATCH 03/40] Add __llvm_fence funcs and __ prefixes --- hipamd/include/hip/hcc_detail/hip_runtime.h | 52 +++++++++++++++------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 34650d728a..b9c7ea603c 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -652,40 +652,62 @@ void __assertfail(const char * __assertion, // hip.amdgcn.bc - sync threads // extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); -#define CLK_LOCAL_MEM_FENCE 0x01 -#define local __attribute__((address_space(3))) +#define __CLK_LOCAL_MEM_FENCE 0x01 +#define __local __attribute__((address_space(3))) -typedef unsigned cl_mem_fence_flags; +typedef unsigned __cl_mem_fence_flags; -typedef enum memory_scope { +typedef enum __memory_scope { memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} memory_scope; +} __memory_scope; // enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum memory_order +typedef enum __memory_order { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, memory_order_seq_cst = __ATOMIC_SEQ_CST -} memory_order; +} __memory_order; -extern "C" __device__ __attribute__((overloadable)) -void atomic_work_item_fence(cl_mem_fence_flags, memory_order, memory_scope); +// __llvm_fence* functions from device-libs/irif/src/fence.ll +extern "C" __device__ void __llvm_fence_acq_sg(void); +extern "C" __device__ void __llvm_fence_acq_wg(void); +extern "C" __device__ void __llvm_fence_acq_dev(void); +extern "C" __device__ void __llvm_fence_acq_sys(void); + +extern "C" __device__ void __llvm_fence_rel_sg(void); +extern "C" __device__ void __llvm_fence_rel_wg(void); +extern "C" __device__ void __llvm_fence_rel_dev(void); +extern "C" __device__ void __llvm_fence_rel_sys(void); __device__ inline -static void hc_work_group_barrier(cl_mem_fence_flags flags, memory_scope scope) +static void hc_work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) { if (flags) { - atomic_work_item_fence(flags, memory_order_release, scope); + switch(scope) { + case memory_scope_work_item: break; + case memory_scope_sub_group: __llvm_fence_rel_sg(); break; + case memory_scope_work_group: __llvm_fence_rel_wg(); break; + case memory_scope_device: __llvm_fence_rel_dev(); break; + case memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; + } + //atomic_work_item_fence(flags, memory_order_release, scope); __builtin_amdgcn_s_barrier(); - atomic_work_item_fence(flags, memory_order_acquire, scope); + //atomic_work_item_fence(flags, memory_order_acquire, scope); + switch(scope) { + case memory_scope_work_item: break; + case memory_scope_sub_group: __llvm_fence_acq_sg(); break; + case memory_scope_work_group: __llvm_fence_acq_wg(); break; + case memory_scope_device: __llvm_fence_acq_dev(); break; + case memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; + } } else { __builtin_amdgcn_s_barrier(); } @@ -695,7 +717,7 @@ __device__ inline static void hc_barrier(int n) { - hc_work_group_barrier((cl_mem_fence_flags)n, memory_scope_work_group); + hc_work_group_barrier((__cl_mem_fence_flags)n, memory_scope_work_group); } __device__ @@ -703,7 +725,7 @@ inline __attribute__((noduplicate)) void __syncthreads() { - hc_barrier(CLK_LOCAL_MEM_FENCE); + hc_barrier(__CLK_LOCAL_MEM_FENCE); } @@ -711,7 +733,7 @@ __device__ unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); -__device__ inline static local char* __to_local(unsigned x) { return (local char*)x; } +__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } __device__ inline void *__amdgcn_get_dynamicgroupbaseptr() { #if 0 From 7cade79d981b48580da442f66ce9663710d77f0f Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 13 Jun 2018 16:06:58 +0000 Subject: [PATCH 04/40] Add prefix __ to memory scope and order --- hipamd/include/hip/hcc_detail/hip_runtime.h | 42 ++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index b9c7ea603c..58bbc1a91d 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -658,21 +658,21 @@ void __assertfail(const char * __assertion, typedef unsigned __cl_mem_fence_flags; typedef enum __memory_scope { - memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, - memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP } __memory_scope; // enum values aligned with what clang uses in EmitAtomicExpr() typedef enum __memory_order { - memory_order_relaxed = __ATOMIC_RELAXED, - memory_order_acquire = __ATOMIC_ACQUIRE, - memory_order_release = __ATOMIC_RELEASE, - memory_order_acq_rel = __ATOMIC_ACQ_REL, - memory_order_seq_cst = __ATOMIC_SEQ_CST + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST } __memory_order; // __llvm_fence* functions from device-libs/irif/src/fence.ll @@ -692,21 +692,21 @@ static void hc_work_group_barrier(__cl_mem_fence_flags flags, __memory_scope sco { if (flags) { switch(scope) { - case memory_scope_work_item: break; - case memory_scope_sub_group: __llvm_fence_rel_sg(); break; - case memory_scope_work_group: __llvm_fence_rel_wg(); break; - case memory_scope_device: __llvm_fence_rel_dev(); break; - case memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; + case __memory_scope_work_item: break; + case __memory_scope_sub_group: __llvm_fence_rel_sg(); break; + case __memory_scope_work_group: __llvm_fence_rel_wg(); break; + case __memory_scope_device: __llvm_fence_rel_dev(); break; + case __memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; } //atomic_work_item_fence(flags, memory_order_release, scope); __builtin_amdgcn_s_barrier(); //atomic_work_item_fence(flags, memory_order_acquire, scope); switch(scope) { - case memory_scope_work_item: break; - case memory_scope_sub_group: __llvm_fence_acq_sg(); break; - case memory_scope_work_group: __llvm_fence_acq_wg(); break; - case memory_scope_device: __llvm_fence_acq_dev(); break; - case memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; + case __memory_scope_work_item: break; + case __memory_scope_sub_group: __llvm_fence_acq_sg(); break; + case __memory_scope_work_group: __llvm_fence_acq_wg(); break; + case __memory_scope_device: __llvm_fence_acq_dev(); break; + case __memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; } } else { __builtin_amdgcn_s_barrier(); @@ -717,7 +717,7 @@ __device__ inline static void hc_barrier(int n) { - hc_work_group_barrier((__cl_mem_fence_flags)n, memory_scope_work_group); + hc_work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); } __device__ From 0694d0300acec84c3b9ccb936bd7bd6fa765e1ed Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 13 Jun 2018 20:40:10 +0000 Subject: [PATCH 05/40] Add get_dynamicgroupbaseptr def and remove hc_ --- hipamd/include/hip/hcc_detail/hip_runtime.h | 49 ++++++++++----------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 58bbc1a91d..1589f19395 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -285,11 +285,29 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); -extern "C" __device__ void* get_dynamic_group_segment_base_pointer(); +// Introduce local address space +#define __local __attribute__((address_space(3))) +__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } +extern "C" __device__ void* __local_to_generic(__local void* p); + +__device__ unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg"); + +__device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); __device__ inline -void* __get_dynamicgroupbaseptr() { return get_dynamic_group_segment_base_pointer(); } +void* __get_dynamicgroupbaseptr() +{ + // Get group segment base pointer. + unsigned lds_base = __llvm_amdgcn_s_getreg(14342) << 8; + __local char* base = __to_local(lds_base); + unsigned long long group_static_size = __llvm_amdgcn_groupstaticsize(); + return (char*)__local_to_generic(base + group_static_size); +} + +__device__ inline void *__amdgcn_get_dynamicgroupbaseptr() { + return __get_dynamicgroupbaseptr(); +} /** * CUDA 8 device function features @@ -653,8 +671,6 @@ void __assertfail(const char * __assertion, // hip.amdgcn.bc - sync threads // extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); #define __CLK_LOCAL_MEM_FENCE 0x01 -#define __local __attribute__((address_space(3))) - typedef unsigned __cl_mem_fence_flags; typedef enum __memory_scope { @@ -688,7 +704,7 @@ extern "C" __device__ void __llvm_fence_rel_sys(void); __device__ inline -static void hc_work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) +static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) { if (flags) { switch(scope) { @@ -715,9 +731,9 @@ static void hc_work_group_barrier(__cl_mem_fence_flags flags, __memory_scope sco __device__ inline -static void hc_barrier(int n) +static void __barrier(int n) { - hc_work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); + __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); } __device__ @@ -725,24 +741,7 @@ inline __attribute__((noduplicate)) void __syncthreads() { - hc_barrier(__CLK_LOCAL_MEM_FENCE); -} - - -__device__ unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg"); - -__device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); - -__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } - -__device__ inline void *__amdgcn_get_dynamicgroupbaseptr() { -#if 0 - // Get group segment base pointer. - char* base = __llvm_amdgcn_s_getreg(14342) << 8); - base += __llvm_amdgcn_groupstaticsize(); - return base; -#endif - return __get_dynamicgroupbaseptr(); + __barrier(__CLK_LOCAL_MEM_FENCE); } // hip.amdgcn.bc - device routine From 366207f8affa6b3d1b495661a5d13fb826f630d8 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 14 Jun 2018 17:49:35 +0000 Subject: [PATCH 06/40] Split __llvm and device lib funcs into new headers --- .../hip/hcc_detail/device_library_decls.h | 51 +++++++++++++++++++ hipamd/include/hip/hcc_detail/hip_runtime.h | 38 +++----------- .../include/hip/hcc_detail/llvm_intrinsics.h | 46 +++++++++++++++++ 3 files changed, 103 insertions(+), 32 deletions(-) create mode 100644 hipamd/include/hip/hcc_detail/device_library_decls.h create mode 100644 hipamd/include/hip/hcc_detail/llvm_intrinsics.h diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h new file mode 100644 index 0000000000..b3fa556bd4 --- /dev/null +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -0,0 +1,51 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/device_library_decls.h + * @brief Contains declarations for types and functions in device library. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H + +#include "hip/hcc_detail/host_defines.h" + +extern "C" __device__ int32_t __ockl_activelane_u32(void); + +// Introduce local address space +#define __local __attribute__((address_space(3))) +__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } +extern "C" __device__ void* __local_to_generic(__local void* p); + +// __llvm_fence* functions from device-libs/irif/src/fence.ll +extern "C" __device__ void __llvm_fence_acq_sg(void); +extern "C" __device__ void __llvm_fence_acq_wg(void); +extern "C" __device__ void __llvm_fence_acq_dev(void); +extern "C" __device__ void __llvm_fence_acq_sys(void); + +extern "C" __device__ void __llvm_fence_rel_sg(void); +extern "C" __device__ void __llvm_fence_rel_wg(void); +extern "C" __device__ void __llvm_fence_rel_dev(void); +extern "C" __device__ void __llvm_fence_rel_sys(void); + +#endif diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 1589f19395..4cd41a0c86 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -62,6 +62,8 @@ THE SOFTWARE. #define CUDA_SUCCESS hipSuccess #include +#include +#include #endif // __HCC_OR_HIP_CLANG__ #if __HCC__ @@ -189,9 +191,6 @@ extern int HIP_TRACE_API; static constexpr int warpSize = 64; #define clock_t long long int -__device__ -unsigned long __llvm_amdgcn_s_memrealtime(void) __asm("llvm.amdgcn.s.memrealtime"); - __device__ inline long long int __clock64() { return (long long int)__llvm_amdgcn_s_memrealtime(); } @@ -221,8 +220,6 @@ int64_t __ballot64(int a) { } // hip.amdgcn.bc - lanemask -extern "C" __device__ int32_t __ockl_activelane_u32(void); - __device__ inline int64_t __lanemask_gt() @@ -285,27 +282,18 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); -// Introduce local address space -#define __local __attribute__((address_space(3))) -__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } -extern "C" __device__ void* __local_to_generic(__local void* p); - -__device__ unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg"); - -__device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); __device__ inline void* __get_dynamicgroupbaseptr() { // Get group segment base pointer. - unsigned lds_base = __llvm_amdgcn_s_getreg(14342) << 8; - __local char* base = __to_local(lds_base); - unsigned long long group_static_size = __llvm_amdgcn_groupstaticsize(); - return (char*)__local_to_generic(base + group_static_size); + return (char*)__local_to_generic(__to_local(__llvm_amdgcn_groupstaticsize())); } -__device__ inline void *__amdgcn_get_dynamicgroupbaseptr() { +__device__ +inline +void *__amdgcn_get_dynamicgroupbaseptr() { return __get_dynamicgroupbaseptr(); } @@ -384,8 +372,6 @@ __device__ void __threadfence_system(void); */ // hip.amdgcn.bc - named sync -__device__ void __llvm_amdgcn_s_barrier() __asm("llvm.amdgcn.s.barrier"); - __device__ inline void __named_sync(int a, int b) { __llvm_amdgcn_s_barrier(); } #endif // __HCC_OR_HIP_CLANG__ @@ -669,7 +655,6 @@ void __assertfail(const char * __assertion, } // hip.amdgcn.bc - sync threads -// extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); #define __CLK_LOCAL_MEM_FENCE 0x01 typedef unsigned __cl_mem_fence_flags; @@ -691,17 +676,6 @@ typedef enum __memory_order __memory_order_seq_cst = __ATOMIC_SEQ_CST } __memory_order; -// __llvm_fence* functions from device-libs/irif/src/fence.ll -extern "C" __device__ void __llvm_fence_acq_sg(void); -extern "C" __device__ void __llvm_fence_acq_wg(void); -extern "C" __device__ void __llvm_fence_acq_dev(void); -extern "C" __device__ void __llvm_fence_acq_sys(void); - -extern "C" __device__ void __llvm_fence_rel_sg(void); -extern "C" __device__ void __llvm_fence_rel_wg(void); -extern "C" __device__ void __llvm_fence_rel_dev(void); -extern "C" __device__ void __llvm_fence_rel_sys(void); - __device__ inline static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h new file mode 100644 index 0000000000..b608ad6819 --- /dev/null +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/llvm_intrinsics.h + * @brief Contains declarations for wrapper functions for llvm intrinsics + * like llvm.amdgcn.s.barrier. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H + +#include "hip/hcc_detail/host_defines.h" + +__device__ +unsigned long __llvm_amdgcn_s_memrealtime(void) __asm("llvm.amdgcn.s.memrealtime"); + +__device__ +unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg"); + +__device__ +unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); + +__device__ +void __llvm_amdgcn_s_barrier() __asm("llvm.amdgcn.s.barrier"); + +#endif From e8a18a54731b0e6d3ed4f3bac5a3f011587ef480 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 15 Jun 2018 23:06:40 +0000 Subject: [PATCH 07/40] Move hipclang funcs into corresponding headers --- .../include/hip/hcc_detail/device_functions.h | 256 ++++++++++++++++++ hipamd/include/hip/hcc_detail/hip_runtime.h | 229 ---------------- .../include/hip/hcc_detail/llvm_intrinsics.h | 9 - 3 files changed, 256 insertions(+), 238 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index ae1b96c979..8bae5325fd 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -24,8 +24,12 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H #include "host_defines.h" +#include "math_fwd.h" +#include #include +#include +#include extern "C" __device__ unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); extern "C" __device__ signed int __hip_hc_ir_mul24_int(signed int, signed int); @@ -209,5 +213,257 @@ __device__ char4 __hip_hc_add8pk(char4, char4); __device__ char4 __hip_hc_sub8pk(char4, char4); __device__ char4 __hip_hc_mul8pk(char4, char4); +#if defined(__HCC__) +#define __HCC_OR_HIP_CLANG__ 1 +#elif defined(__clang__) && defined(__HIP__) +#define __HCC_OR_HIP_CLANG__ 1 +#else +#define __HCC_OR_HIP_CLANG__ 0 +#endif + +#ifdef __HCC_OR_HIP_CLANG__ + +#ifdef __HIP_DEVICE_COMPILE__ + +// Clock functions +__device__ +inline +long long int __clock64() { return (long long int) __builtin_amdgcn_s_memrealtime(); } + +__device__ +inline +long long int __clock() { return (long long int) __builtin_amdgcn_s_memrealtime(); } + +// hip.amdgcn.bc - named sync +__device__ +inline +void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); } + +#endif // __HIP_DEVICE_COMPILE__ + +// warp vote function __all __any __ballot +__device__ +int __all(int input); +__device__ +int __any(int input); +__device__ +unsigned long long int __ballot(int input); + +__device__ +inline +uint64_t __ballot64(int a) { + int64_t s; + // define i64 @__ballot64(i32 %a) #0 { + // %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1 + // ret i64 %b + // } + __asm("v_cmp_ne_i32_e64 $0, 0, $1" : "=s"(s) : "v"(a)); + return s; +} + +// hip.amdgcn.bc - lanemask +__device__ +inline +int64_t __lanemask_gt() +{ + int32_t activelane = __ockl_activelane_u32(); + int64_t ballot = __ballot64(1); + if (activelane != 63) { + int64_t tmp = (~0UL) << (activelane + 1); + return tmp & ballot; + } + return 0; +} + +__device__ +inline +int64_t __lanemask_lt() +{ + int32_t activelane = __ockl_activelane_u32(); + int64_t ballot = __ballot64(1); + if (activelane == 0) + return 0; + return ballot; +} + +__device__ +inline +void* __get_dynamicgroupbaseptr() +{ + // Get group segment base pointer. + return (char*)__local_to_generic(__to_local(__llvm_amdgcn_groupstaticsize())); +} + +__device__ +inline +void *__amdgcn_get_dynamicgroupbaseptr() { + return __get_dynamicgroupbaseptr(); +} + +#endif // __HCC_OR_HIP_CLANG__ + +#ifdef __HCC__ + +/** + * extern __shared__ + */ + +// Macro to replace extern __shared__ declarations +// to local variable definitions +#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr(); + +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + + +#elif defined(__clang__) && defined(__HIP__) + +#pragma push_macro("__DEVICE__") +#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \ + __attribute__((weak)) + +__DEVICE__ +inline +void __assert_fail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function) +{ + // Ignore all the args for now. + __builtin_trap(); +} + +__DEVICE__ +inline +void __assertfail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function, + size_t charsize) +{ + // ignore all the args for now. + __builtin_trap(); +} + +// hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; + +typedef enum __memory_scope { + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} __memory_scope; + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum __memory_order +{ + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST +} __memory_order; + +__device__ +inline +static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) +{ + if (flags) { + switch(scope) { + case __memory_scope_work_item: break; + case __memory_scope_sub_group: __llvm_fence_rel_sg(); break; + case __memory_scope_work_group: __llvm_fence_rel_wg(); break; + case __memory_scope_device: __llvm_fence_rel_dev(); break; + case __memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; + } + //atomic_work_item_fence(flags, memory_order_release, scope); + __builtin_amdgcn_s_barrier(); + //atomic_work_item_fence(flags, memory_order_acquire, scope); + switch(scope) { + case __memory_scope_work_item: break; + case __memory_scope_sub_group: __llvm_fence_acq_sg(); break; + case __memory_scope_work_group: __llvm_fence_acq_wg(); break; + case __memory_scope_device: __llvm_fence_acq_dev(); break; + case __memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; + } + } else { + __builtin_amdgcn_s_barrier(); + } +} + +__device__ +inline +static void __barrier(int n) +{ + __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); +} + +__device__ +inline +__attribute__((noduplicate)) +void __syncthreads() +{ + __barrier(__CLK_LOCAL_MEM_FENCE); +} + +// hip.amdgcn.bc - device routine +/* + HW_ID Register bit structure + WAVE_ID 3:0 Wave buffer slot number. 0-9. + SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. + PIPE_ID 7:6 Pipeline from which the wave was dispatched. + CU_ID 11:8 Compute Unit the wave is assigned to. + SH_ID 12 Shader Array (within an SE) the wave is assigned to. + SE_ID 14:13 Shader Engine the wave is assigned to. + TG_ID 19:16 Thread-group ID + VM_ID 23:20 Virtual Memory ID + QUEUE_ID 26:24 Queue from which this wave was dispatched. + STATE_ID 29:27 State ID (graphics only, not compute). + ME_ID 31:30 Micro-engine ID. + */ + +#define HW_ID 4 + +#define HW_ID_CU_ID_SIZE 4 +#define HW_ID_CU_ID_OFFSET 8 + +#define HW_ID_SE_ID_SIZE 2 +#define HW_ID_SE_ID_OFFSET 13 + +/* + Encoding of parameter bitmask + HW_ID 5:0 HW_ID + OFFSET 10:6 Range: 0..31 + SIZE 15:11 Range: 1..32 + */ + +#define GETREG_IMMED(SZ,OFF,REG) (SZ << 11) | (OFF << 6) | REG + +__device__ +inline +unsigned __smid(void) +{ + unsigned cu_id = __builtin_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID)); + unsigned se_id = __builtin_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID)); + + /* Each shader engine has 16 CU */ + return (se_id << HW_ID_CU_ID_SIZE) + cu_id; +} + +#pragma push_macro("__DEVICE__") + +// Macro to replace extern __shared__ declarations +// to local variable definitions +#define HIP_DYNAMIC_SHARED(type, var) \ + type* var = (type*)__amdgcn_get_dynamicgroupbaseptr(); + +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + + +#endif //defined(__clang__) && defined(__HIP__) #endif diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 4cd41a0c86..18b04daf77 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -62,8 +62,6 @@ THE SOFTWARE. #define CUDA_SUCCESS hipSuccess #include -#include -#include #endif // __HCC_OR_HIP_CLANG__ #if __HCC__ @@ -190,60 +188,9 @@ extern int HIP_TRACE_API; //#define warpSize hc::__wavesize() static constexpr int warpSize = 64; -#define clock_t long long int -__device__ -inline -long long int __clock64() { return (long long int)__llvm_amdgcn_s_memrealtime(); } - -__device__ -inline -clock_t __clock() { return (clock_t)__llvm_amdgcn_s_memrealtime(); } - // abort __device__ void abort(); -// warp vote function __all __any __ballot -__device__ int __all(int input); -__device__ int __any(int input); -__device__ unsigned long long int __ballot(int input); - -__device__ -inline -int64_t __ballot64(int a) { - int64_t s; - // define i64 @__ballot64(i32 %a) #0 { - // %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1 - // ret i64 %b - // } - __asm("v_cmp_ne_i32_e64 $0, 0, $1" : "=s"(s) : "v"(a)); - return s; -} - -// hip.amdgcn.bc - lanemask -__device__ -inline -int64_t __lanemask_gt() -{ - int32_t activelane = __ockl_activelane_u32(); - int64_t ballot = __ballot64(1); - if (activelane != 63) { - int64_t tmp = (~0UL) << (activelane + 1); - return tmp & ballot; - } - return 0; -} - -__device__ -inline -int64_t __lanemask_lt() -{ - int32_t activelane = __ockl_activelane_u32(); - int64_t ballot = __ballot64(1); - if (activelane == 0) - return 0; - return ballot; -} - #if __HIP_ARCH_GFX701__ == 0 // warp shuffle functions @@ -283,20 +230,6 @@ __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); -__device__ -inline -void* __get_dynamicgroupbaseptr() -{ - // Get group segment base pointer. - return (char*)__local_to_generic(__to_local(__llvm_amdgcn_groupstaticsize())); -} - -__device__ -inline -void *__amdgcn_get_dynamicgroupbaseptr() { - return __get_dynamicgroupbaseptr(); -} - /** * CUDA 8 device function features @@ -371,9 +304,6 @@ __device__ void __threadfence_system(void); * @} */ -// hip.amdgcn.bc - named sync -__device__ inline void __named_sync(int a, int b) { __llvm_amdgcn_s_barrier(); } - #endif // __HCC_OR_HIP_CLANG__ #if defined __HCC__ @@ -496,17 +426,6 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri #endif //__HCC_CPP__ -/** - * extern __shared__ - */ - -// Macro to replace extern __shared__ declarations -// to local variable definitions -#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr(); - -#define HIP_DYNAMIC_SHARED_ATTRIBUTE - - /** * @defgroup HIP-ENV HIP Environment Variables * @{ @@ -625,154 +544,6 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; #define hipGridDim_y gridDim.y #define hipGridDim_z gridDim.z -#pragma push_macro("__DEVICE__") -#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \ - __attribute__((weak)) - -__DEVICE__ void __device_trap() __asm("llvm.trap"); - -__DEVICE__ -inline -void __assert_fail(const char * __assertion, - const char *__file, - unsigned int __line, - const char *__function) -{ - // Ignore all the args for now. - __device_trap(); -} - -__DEVICE__ -inline -void __assertfail(const char * __assertion, - const char *__file, - unsigned int __line, - const char *__function, - size_t charsize) -{ - // ignore all the args for now. - __device_trap(); -} - -// hip.amdgcn.bc - sync threads -#define __CLK_LOCAL_MEM_FENCE 0x01 -typedef unsigned __cl_mem_fence_flags; - -typedef enum __memory_scope { - __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, - __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} __memory_scope; - -// enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum __memory_order -{ - __memory_order_relaxed = __ATOMIC_RELAXED, - __memory_order_acquire = __ATOMIC_ACQUIRE, - __memory_order_release = __ATOMIC_RELEASE, - __memory_order_acq_rel = __ATOMIC_ACQ_REL, - __memory_order_seq_cst = __ATOMIC_SEQ_CST -} __memory_order; - -__device__ -inline -static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) -{ - if (flags) { - switch(scope) { - case __memory_scope_work_item: break; - case __memory_scope_sub_group: __llvm_fence_rel_sg(); break; - case __memory_scope_work_group: __llvm_fence_rel_wg(); break; - case __memory_scope_device: __llvm_fence_rel_dev(); break; - case __memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; - } - //atomic_work_item_fence(flags, memory_order_release, scope); - __builtin_amdgcn_s_barrier(); - //atomic_work_item_fence(flags, memory_order_acquire, scope); - switch(scope) { - case __memory_scope_work_item: break; - case __memory_scope_sub_group: __llvm_fence_acq_sg(); break; - case __memory_scope_work_group: __llvm_fence_acq_wg(); break; - case __memory_scope_device: __llvm_fence_acq_dev(); break; - case __memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; - } - } else { - __builtin_amdgcn_s_barrier(); - } -} - -__device__ -inline -static void __barrier(int n) -{ - __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group); -} - -__device__ -inline -__attribute__((noduplicate)) -void __syncthreads() -{ - __barrier(__CLK_LOCAL_MEM_FENCE); -} - -// hip.amdgcn.bc - device routine -/* - HW_ID Register bit structure - WAVE_ID 3:0 Wave buffer slot number. 0-9. - SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. - PIPE_ID 7:6 Pipeline from which the wave was dispatched. - CU_ID 11:8 Compute Unit the wave is assigned to. - SH_ID 12 Shader Array (within an SE) the wave is assigned to. - SE_ID 14:13 Shader Engine the wave is assigned to. - TG_ID 19:16 Thread-group ID - VM_ID 23:20 Virtual Memory ID - QUEUE_ID 26:24 Queue from which this wave was dispatched. - STATE_ID 29:27 State ID (graphics only, not compute). - ME_ID 31:30 Micro-engine ID. - */ - -#define HW_ID 4 - -#define HW_ID_CU_ID_SIZE 4 -#define HW_ID_CU_ID_OFFSET 8 - -#define HW_ID_SE_ID_SIZE 2 -#define HW_ID_SE_ID_OFFSET 13 - -/* - Encoding of parameter bitmask - HW_ID 5:0 HW_ID - OFFSET 10:6 Range: 0..31 - SIZE 15:11 Range: 1..32 - */ - -#define GETREG_IMMED(SZ,OFF,REG) (SZ << 11) | (OFF << 6) | REG - -__device__ -inline -unsigned __smid(void) -{ - unsigned cu_id = __llvm_amdgcn_s_getreg( - GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID)); - unsigned se_id = __llvm_amdgcn_s_getreg( - GETREG_IMMED(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID)); - - /* Each shader engine has 16 CU */ - return (se_id << HW_ID_CU_ID_SIZE) + cu_id; -} - -// Macro to replace extern __shared__ declarations -// to local variable definitions -#define HIP_DYNAMIC_SHARED(type, var) \ - type* var = (type*)__amdgcn_get_dynamicgroupbaseptr(); - -#define HIP_DYNAMIC_SHARED_ATTRIBUTE - -#pragma push_macro("__DEVICE__") - #include #endif diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h index b608ad6819..2c7819b535 100644 --- a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -31,16 +31,7 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -__device__ -unsigned long __llvm_amdgcn_s_memrealtime(void) __asm("llvm.amdgcn.s.memrealtime"); - -__device__ -unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg"); - __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); -__device__ -void __llvm_amdgcn_s_barrier() __asm("llvm.amdgcn.s.barrier"); - #endif From ad5f1dcbf218274f692cf2fa1a87495649c1038c Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 19 Jun 2018 19:22:31 +0000 Subject: [PATCH 08/40] Implement device_functions.cpp into HIP headers Move all Integer Intrinsics, device_functions.cpp definitions and HIP specific device functions into HIP headers. Implement the device functions using llvm_intrinsics and device-libs functions instead of calling hc::__* functions. Remove device_functions.cpp since everything is now defined in header. --- hipamd/CMakeLists.txt | 3 +- .../include/hip/hcc_detail/device_functions.h | 475 ++++++++++++++---- .../hip/hcc_detail/device_library_decls.h | 5 + .../include/hip/hcc_detail/llvm_intrinsics.h | 29 ++ hipamd/src/device_functions.cpp | 387 -------------- 5 files changed, 422 insertions(+), 477 deletions(-) delete mode 100644 hipamd/src/device_functions.cpp diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 537764548f..b649b20c21 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -194,8 +194,7 @@ if(HIP_PLATFORM STREQUAL "hcc") set(SOURCE_FILES_DEVICE src/device_util.cpp - src/hip_ldg.cpp - src/device_functions.cpp) + src/hip_ldg.cpp) execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 8bae5325fd..b147cd9b80 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -31,35 +31,142 @@ THE SOFTWARE. #include #include +typedef unsigned long ulong; +typedef unsigned int uint; + extern "C" __device__ unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); extern "C" __device__ signed int __hip_hc_ir_mul24_int(signed int, signed int); extern "C" __device__ signed int __hip_hc_ir_mulhi_int(signed int, signed int); extern "C" __device__ unsigned int __hip_hc_ir_umulhi_int(unsigned int, unsigned int); extern "C" __device__ unsigned int __hip_hc_ir_usad_int(unsigned int, unsigned int, unsigned int); +/* +Integer Intrinsics +*/ + // integer intrinsic function __poc __clz __ffs __brev -__device__ unsigned int __brev(unsigned int x); -__device__ unsigned long long int __brevll(unsigned long long int x); -__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); -__device__ unsigned int __clz(int x); -__device__ unsigned int __clzll(long long int x); -__device__ unsigned int __ffs(int x); -__device__ unsigned int __ffsll(long long int x); +__device__ static inline unsigned int __popc(unsigned int input) { + return __builtin_popcount(input); +} +__device__ static inline unsigned int __popcll(unsigned long long int input) { + return __builtin_popcountl(input); +} + +__device__ static inline unsigned int __clz(unsigned int input) { +#ifdef NVCC_COMPAT + return input == 0 ? 32 : __builtin_clz(input); +#else + return input == 0 ? -1 : __builtin_clz(input); +#endif +} + +__device__ static inline unsigned int __clzll(unsigned long long int input) { +#ifdef NVCC_COMPAT + return input == 0 ? 64 : ( input == 0 ? -1 : __builtin_clzl(input) ); +#else + return input == 0 ? -1 : __builtin_clzl(input); +#endif +} + +__device__ static inline unsigned int __clz(int input) { +#ifdef NVCC_COMPAT + return input == 0 ? 32 : ( input > 0 ? __builtin_clz(input) : __builtin_clz(~input) ); +#else + if (input == 0) return -1; + return input > 0 ? __builtin_clz(input) : __builtin_clz(~input); +#endif +} + +__device__ static inline unsigned int __clzll(long long int input) { +#ifdef NVCC_COMPAT + return input == 0 ? 64 : input > 0 ? __builtin_clzl(input) : __builtin_clzl(~input); +#else + if (input == 0) return -1; + return input > 0 ? __builtin_clzl(input) : __builtin_clzl(~input); +#endif +} + +__device__ static inline unsigned int __ffs(unsigned int input) { +#ifdef NVCC_COMPAT + return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; +#else + return input == 0 ? -1 : __builtin_ctz(input); +#endif +} + +__device__ static inline unsigned int __ffsll(unsigned long long int input) { +#ifdef NVCC_COMPAT + return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; +#else + return input == 0 ? -1 : __builtin_ctzl(input); +#endif +} + +__device__ static inline unsigned int __ffs(int input) { +#ifdef NVCC_COMPAT + return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; +#else + return input == 0 ? -1 : __builtin_ctz(input); +#endif +} + +__device__ static inline unsigned int __ffsll(long long int input) { +#ifdef NVCC_COMPAT + return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; +#else + return input == 0 ? -1 : __builtin_ctzl(input); +#endif +} + +__device__ static inline unsigned int __brev(unsigned int input) { return __llvm_bitrev_b32(input); } + +__device__ static inline unsigned long long int __brevll(unsigned long long int input) { + return __llvm_bitrev_b64(input); +} + +__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); __device__ static unsigned int __hadd(int x, int y); __device__ static int __mul24(int x, int y); -__device__ long long int __mul64hi(long long int x, long long int y); +__device__ static long long int __mul64hi(long long int x, long long int y); __device__ static int __mulhi(int x, int y); -__device__ unsigned int __popc(unsigned int x); -__device__ unsigned int __popcll(unsigned long long int x); __device__ static int __rhadd(int x, int y); __device__ static unsigned int __sad(int x, int y, int z); __device__ static unsigned int __uhadd(unsigned int x, unsigned int y); __device__ static int __umul24(unsigned int x, unsigned int y); -__device__ unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); +__device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); __device__ static unsigned int __umulhi(unsigned int x, unsigned int y); __device__ static unsigned int __urhadd(unsigned int x, unsigned int y); __device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z); +struct ucharHolder { + union { + unsigned char c[4]; + unsigned int ui; + }; +} __attribute__((aligned(4))); + +struct uchar2Holder { + union { + unsigned int ui[2]; + unsigned char c[8]; + }; +} __attribute__((aligned(8))); + +__device__ +static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { + struct uchar2Holder cHoldVal; + struct ucharHolder cHoldKey; + struct ucharHolder cHoldOut; + cHoldKey.ui = s; + cHoldVal.ui[0] = x; + cHoldVal.ui[1] = y; + cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]]; + cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]]; + cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]]; + cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]]; + return cHoldOut.ui; +} + __device__ static inline unsigned int __hadd(int x, int y) { int z = x + y; int sign = z & 0x8000000; @@ -67,6 +174,20 @@ __device__ static inline unsigned int __hadd(int x, int y) { return ((value) >> 1 || sign); } __device__ static inline int __mul24(int x, int y) { return __hip_hc_ir_mul24_int(x, y); } + +__device__ static inline long long __mul64hi(long long int x, long long int y) { + ulong x0 = (ulong)x & 0xffffffffUL; + long x1 = x >> 32; + ulong y0 = (ulong)y & 0xffffffffUL; + long y1 = y >> 32; + ulong z0 = x0*y0; + long t = x1*y0 + (z0 >> 32); + long z1 = t & 0xffffffffL; + long z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); +} + __device__ static inline int __mulhi(int x, int y) { return __hip_hc_ir_mulhi_int(x, y); } __device__ static inline int __rhadd(int x, int y) { int z = x + y + 1; @@ -83,6 +204,21 @@ __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { __device__ static inline int __umul24(unsigned int x, unsigned int y) { return __hip_hc_ir_umul24_int(x, y); } + +__device__ +static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) { + ulong x0 = x & 0xffffffffUL; + ulong x1 = x >> 32; + ulong y0 = y & 0xffffffffUL; + ulong y1 = y >> 32; + ulong z0 = x0*y0; + ulong t = x1*y0 + (z0 >> 32); + ulong z1 = t & 0xffffffffUL; + ulong z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); +} + __device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { return __hip_hc_ir_umulhi_int(x, y); } @@ -93,45 +229,158 @@ __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, uns return __hip_hc_ir_usad_int(x, y, z); } -extern __device__ __attribute__((const)) unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); -extern __device__ __attribute__((const)) unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); - __device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } +/* +HIP specific device functions +*/ + +// utility union type +union __u { + int i; + unsigned int u; + float f; +}; + +__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_bpermutef(int index, float src) { + __u tmp; tmp.f = src; + tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + return tmp.f; +} + +__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_permutef(int index, float src) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline unsigned __hip_ds_swizzle(unsigned int src, int pattern) { + __u tmp; tmp.u = src; + tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.u; +} +__device__ static inline float __hip_ds_swizzlef(float src, int pattern) { + __u tmp; tmp.f = src; + tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.f; +} + +__device__ static inline int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, + int bank_mask, bool bound_ctrl) { + return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); +} + +#define MASK1 0x00ff00ff +#define MASK2 0xff00ff00 + +__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.a & MASK1; + unsigned one2 = in2.a & MASK1; + out.a = (one1 + one2) & MASK1; + one1 = in1.a & MASK2; + one2 = in2.a & MASK2; + out.a = out.a | ((one1 + one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.a & MASK1; + unsigned one2 = in2.a & MASK1; + out.a = (one1 - one2) & MASK1; + one1 = in1.a & MASK2; + one2 = in2.a & MASK2; + out.a = out.a | ((one1 - one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.a & MASK1; + unsigned one2 = in2.a & MASK1; + out.a = (one1 * one2) & MASK1; + one1 = in1.a & MASK2; + one2 = in2.a & MASK2; + out.a = out.a | ((one1 * one2) & MASK2); + return out; +} + /* Rounding modes are not yet supported in HIP */ -__device__ float __double2float_rd(double x); -__device__ float __double2float_rn(double x); -__device__ float __double2float_ru(double x); -__device__ float __double2float_rz(double x); +__device__ static inline float __double2float_rd(double x) { return (double)x; } +__device__ static inline float __double2float_rn(double x) { return (double)x; } +__device__ static inline float __double2float_ru(double x) { return (double)x; } +__device__ static inline float __double2float_rz(double x) { return (double)x; } -__device__ int __double2hiint(double x); +__device__ static inline int __double2hiint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); -__device__ int __double2int_rd(double x); -__device__ int __double2int_rn(double x); -__device__ int __double2int_ru(double x); -__device__ int __double2int_rz(double x); + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); -__device__ long long int __double2ll_rd(double x); -__device__ long long int __double2ll_rn(double x); -__device__ long long int __double2ll_ru(double x); -__device__ long long int __double2ll_rz(double x); + return tmp[1]; +} +__device__ static inline int __double2loint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); -__device__ int __double2loint(double x); + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); -__device__ unsigned int __double2uint_rd(double x); -__device__ unsigned int __double2uint_rn(double x); -__device__ unsigned int __double2uint_ru(double x); -__device__ unsigned int __double2uint_rz(double x); + return tmp[0]; +} -__device__ unsigned long long int __double2ull_rd(double x); -__device__ unsigned long long int __double2ull_rn(double x); -__device__ unsigned long long int __double2ull_ru(double x); -__device__ unsigned long long int __double2ull_rz(double x); +__device__ static inline int __double2int_rd(double x) { return (int)x; } +__device__ static inline int __double2int_rn(double x) { return (int)x; } +__device__ static inline int __double2int_ru(double x) { return (int)x; } +__device__ static inline int __double2int_rz(double x) { return (int)x; } + +__device__ static inline long long int __double2ll_rd(double x) { return (long long int)x; } +__device__ static inline long long int __double2ll_rn(double x) { return (long long int)x; } +__device__ static inline long long int __double2ll_ru(double x) { return (long long int)x; } +__device__ static inline long long int __double2ll_rz(double x) { return (long long int)x; } + +__device__ static inline unsigned int __double2uint_rd(double x) { return (unsigned int)x; } +__device__ static inline unsigned int __double2uint_rn(double x) { return (unsigned int)x; } +__device__ static inline unsigned int __double2uint_ru(double x) { return (unsigned int)x; } +__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __double2ull_rd(double x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __double2ull_rn(double x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __double2ull_ru(double x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __double2ull_rz(double x) { + return (unsigned long long int)x; +} + +__device__ static inline long long int __double_as_longlong(double x) { + static_assert(sizeof(long long) == sizeof(double), ""); + + long long tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} -__device__ long long int __double_as_longlong(double x); /* __device__ unsigned short __float2half_rn(float x); __device__ float __half2float(unsigned short); @@ -146,72 +395,122 @@ CUDA implements half as unsigned short whereas, HIP doesn't. */ -__device__ int __float2int_rd(float x); -__device__ int __float2int_rn(float x); -__device__ int __float2int_ru(float x); -__device__ int __float2int_rz(float x); +__device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); } +__device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); } +__device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); } +__device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); } -__device__ long long int __float2ll_rd(float x); -__device__ long long int __float2ll_rn(float x); -__device__ long long int __float2ll_ru(float x); -__device__ long long int __float2ll_rz(float x); +__device__ static inline long long int __float2ll_rd(float x) { return (long long int)x; } +__device__ static inline long long int __float2ll_rn(float x) { return (long long int)x; } +__device__ static inline long long int __float2ll_ru(float x) { return (long long int)x; } +__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; } -__device__ unsigned int __float2uint_rd(float x); -__device__ unsigned int __float2uint_rn(float x); -__device__ unsigned int __float2uint_ru(float x); -__device__ unsigned int __float2uint_rz(float x); +__device__ static inline unsigned int __float2uint_rd(float x) { return (unsigned int)x; } +__device__ static inline unsigned int __float2uint_rn(float x) { return (unsigned int)x; } +__device__ static inline unsigned int __float2uint_ru(float x) { return (unsigned int)x; } +__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; } -__device__ unsigned long long int __float2ull_rd(float x); -__device__ unsigned long long int __float2ull_rn(float x); -__device__ unsigned long long int __float2ull_ru(float x); -__device__ unsigned long long int __float2ull_rz(float x); +__device__ static inline unsigned long long int __float2ull_rd(float x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __float2ull_rn(float x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __float2ull_ru(float x) { + return (unsigned long long int)x; +} +__device__ static inline unsigned long long int __float2ull_rz(float x) { + return (unsigned long long int)x; +} -__device__ int __float_as_int(float x); -__device__ unsigned int __float_as_uint(float x); -__device__ double __hiloint2double(int hi, int lo); -__device__ double __int2double_rn(int x); +__device__ static inline int __float_as_int(float x) { + static_assert(sizeof(int) == sizeof(float), ""); -__device__ float __int2float_rd(int x); -__device__ float __int2float_rn(int x); -__device__ float __int2float_ru(int x); -__device__ float __int2float_rz(int x); + int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); -__device__ float __int_as_float(int x); + return tmp; +} -__device__ double __ll2double_rd(long long int x); -__device__ double __ll2double_rn(long long int x); -__device__ double __ll2double_ru(long long int x); -__device__ double __ll2double_rz(long long int x); +__device__ static inline unsigned int __float_as_uint(float x) { + static_assert(sizeof(unsigned int) == sizeof(float), ""); -__device__ float __ll2float_rd(long long int x); -__device__ float __ll2float_rn(long long int x); -__device__ float __ll2float_ru(long long int x); -__device__ float __ll2float_rz(long long int x); + unsigned int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); -__device__ double __longlong_as_double(long long int x); + return tmp; +} -__device__ double __uint2double_rn(int x); +__device__ static inline double __hiloint2double(int hi, int lo) { + static_assert(sizeof(double) == sizeof(uint64_t), ""); -__device__ float __uint2float_rd(unsigned int x); -__device__ float __uint2float_rn(unsigned int x); -__device__ float __uint2float_ru(unsigned int x); -__device__ float __uint2float_rz(unsigned int x); + uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); -__device__ float __uint_as_float(unsigned int x); + return tmp1; +} -__device__ double __ull2double_rd(unsigned long long int x); -__device__ double __ull2double_rn(unsigned long long int x); -__device__ double __ull2double_ru(unsigned long long int x); -__device__ double __ull2double_rz(unsigned long long int x); +__device__ static inline double __int2double_rn(int x) { return (double)x; } -__device__ float __ull2float_rd(unsigned long long int x); -__device__ float __ull2float_rn(unsigned long long int x); -__device__ float __ull2float_ru(unsigned long long int x); -__device__ float __ull2float_rz(unsigned long long int x); +__device__ static inline float __int2float_rd(int x) { return (float)x; } +__device__ static inline float __int2float_rn(int x) { return (float)x; } +__device__ static inline float __int2float_ru(int x) { return (float)x; } +__device__ static inline float __int2float_rz(int x) { return (float)x; } -__device__ char4 __hip_hc_add8pk(char4, char4); -__device__ char4 __hip_hc_sub8pk(char4, char4); -__device__ char4 __hip_hc_mul8pk(char4, char4); +__device__ static inline float __int_as_float(int x) { + static_assert(sizeof(float) == sizeof(int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ll2double_rd(long long int x) { return (double)x; } +__device__ static inline double __ll2double_rn(long long int x) { return (double)x; } +__device__ static inline double __ll2double_ru(long long int x) { return (double)x; } +__device__ static inline double __ll2double_rz(long long int x) { return (double)x; } + +__device__ static inline float __ll2float_rd(long long int x) { return (float)x; } +__device__ static inline float __ll2float_rn(long long int x) { return (float)x; } +__device__ static inline float __ll2float_ru(long long int x) { return (float)x; } +__device__ static inline float __ll2float_rz(long long int x) { return (float)x; } + +__device__ static inline double __longlong_as_double(long long int x) { + static_assert(sizeof(double) == sizeof(long long), ""); + + double tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return x; +} + +__device__ static inline double __uint2double_rn(int x) { return (double)x; } + +__device__ static inline float __uint2float_rd(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_ru(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_rz(unsigned int x) { return (float)x; } + +__device__ static inline float __uint_as_float(unsigned int x) { + static_assert(sizeof(float) == sizeof(unsigned int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ull2double_rd(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_ru(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_rz(unsigned long long int x) { return (double)x; } + +__device__ static inline float __ull2float_rd(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; } #if defined(__HCC__) #define __HCC_OR_HIP_CLANG__ 1 diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index b3fa556bd4..8bf3ce3a19 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -32,6 +32,11 @@ THE SOFTWARE. extern "C" __device__ int32_t __ockl_activelane_u32(void); +extern "C" __device__ float __ocml_floor_f32(float); +extern "C" __device__ float __ocml_rint_f32(float); +extern "C" __device__ float __ocml_ceil_f32(float); +extern "C" __device__ float __ocml_trunc_f32(float); + // Introduce local address space #define __local __attribute__((address_space(3))) __device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h index 2c7819b535..02df3c2fbe 100644 --- a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -34,4 +34,33 @@ THE SOFTWARE. __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); +__device__ +unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32"); + +__device__ +uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64"); + +extern +__device__ +__attribute__((const)) +unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); + +extern +__device__ +__attribute__((const)) +unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); + +__device__ +int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute"); + +__device__ +int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute"); + +__device__ +int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle"); + +__device__ +int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, + bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32"); + #endif diff --git a/hipamd/src/device_functions.cpp b/hipamd/src/device_functions.cpp deleted file mode 100644 index 8ef19bab3f..0000000000 --- a/hipamd/src/device_functions.cpp +++ /dev/null @@ -1,387 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include "device_util.h" - -__device__ float __double2float_rd(double x) { return (double)x; } -__device__ float __double2float_rn(double x) { return (double)x; } -__device__ float __double2float_ru(double x) { return (double)x; } -__device__ float __double2float_rz(double x) { return (double)x; } - - -__device__ int __double2hiint(double x) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - - int tmp[2]; - __builtin_memcpy(tmp, &x, sizeof(tmp)); - - return tmp[1]; -} -__device__ int __double2loint(double x) { - static_assert(sizeof(double) == 2 * sizeof(int), ""); - - int tmp[2]; - __builtin_memcpy(tmp, &x, sizeof(tmp)); - - return tmp[0]; -} - - -__device__ int __double2int_rd(double x) { return (int)x; } -__device__ int __double2int_rn(double x) { return (int)x; } -__device__ int __double2int_ru(double x) { return (int)x; } -__device__ int __double2int_rz(double x) { return (int)x; } - -__device__ long long int __double2ll_rd(double x) { return (long long int)x; } -__device__ long long int __double2ll_rn(double x) { return (long long int)x; } -__device__ long long int __double2ll_ru(double x) { return (long long int)x; } -__device__ long long int __double2ll_rz(double x) { return (long long int)x; } - - -__device__ unsigned int __double2uint_rd(double x) { return (unsigned int)x; } -__device__ unsigned int __double2uint_rn(double x) { return (unsigned int)x; } -__device__ unsigned int __double2uint_ru(double x) { return (unsigned int)x; } -__device__ unsigned int __double2uint_rz(double x) { return (unsigned int)x; } - -__device__ unsigned long long int __double2ull_rd(double x) { return (unsigned long long int)x; } -__device__ unsigned long long int __double2ull_rn(double x) { return (unsigned long long int)x; } -__device__ unsigned long long int __double2ull_ru(double x) { return (unsigned long long int)x; } -__device__ unsigned long long int __double2ull_rz(double x) { return (unsigned long long int)x; } - -__device__ long long int __double_as_longlong(double x) { - static_assert(sizeof(long long) == sizeof(double), ""); - - long long tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); } -__device__ int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); } -__device__ int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); } -__device__ int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); } - -__device__ long long int __float2ll_rd(float x) { return (long long int)x; } -__device__ long long int __float2ll_rn(float x) { return (long long int)x; } -__device__ long long int __float2ll_ru(float x) { return (long long int)x; } -__device__ long long int __float2ll_rz(float x) { return (long long int)x; } - -__device__ unsigned int __float2uint_rd(float x) { return (unsigned int)x; } -__device__ unsigned int __float2uint_rn(float x) { return (unsigned int)x; } -__device__ unsigned int __float2uint_ru(float x) { return (unsigned int)x; } -__device__ unsigned int __float2uint_rz(float x) { return (unsigned int)x; } - -__device__ unsigned long long int __float2ull_rd(float x) { return (unsigned long long int)x; } -__device__ unsigned long long int __float2ull_rn(float x) { return (unsigned long long int)x; } -__device__ unsigned long long int __float2ull_ru(float x) { return (unsigned long long int)x; } -__device__ unsigned long long int __float2ull_rz(float x) { return (unsigned long long int)x; } - -__device__ int __float_as_int(float x) { - static_assert(sizeof(int) == sizeof(float), ""); - - int tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} -__device__ unsigned int __float_as_uint(float x) { - static_assert(sizeof(unsigned int) == sizeof(float), ""); - - unsigned int tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} -__device__ double __hiloint2double(int32_t hi, int32_t lo) { - static_assert(sizeof(double) == sizeof(uint64_t), ""); - - uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); - double tmp1; - __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); - - return tmp1; -} -__device__ double __int2double_rn(int x) { return (double)x; } - -__device__ float __int2float_rd(int x) { return (float)x; } -__device__ float __int2float_rn(int x) { return (float)x; } -__device__ float __int2float_ru(int x) { return (float)x; } -__device__ float __int2float_rz(int x) { return (float)x; } - -__device__ float __int_as_float(int x) { - static_assert(sizeof(float) == sizeof(int), ""); - - float tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ double __ll2double_rd(long long int x) { return (double)x; } -__device__ double __ll2double_rn(long long int x) { return (double)x; } -__device__ double __ll2double_ru(long long int x) { return (double)x; } -__device__ double __ll2double_rz(long long int x) { return (double)x; } - -__device__ float __ll2float_rd(long long int x) { return (float)x; } -__device__ float __ll2float_rn(long long int x) { return (float)x; } -__device__ float __ll2float_ru(long long int x) { return (float)x; } -__device__ float __ll2float_rz(long long int x) { return (float)x; } - -__device__ double __longlong_as_double(long long int x) { - static_assert(sizeof(double) == sizeof(long long), ""); - - double tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return x; -} - -__device__ double __uint2double_rn(int x) { return (double)x; } - -__device__ float __uint2float_rd(unsigned int x) { return (float)x; } -__device__ float __uint2float_rn(unsigned int x) { return (float)x; } -__device__ float __uint2float_ru(unsigned int x) { return (float)x; } -__device__ float __uint2float_rz(unsigned int x) { return (float)x; } - -__device__ float __uint_as_float(unsigned int x) { - static_assert(sizeof(float) == sizeof(unsigned int), ""); - - float tmp; - __builtin_memcpy(&tmp, &x, sizeof(tmp)); - - return tmp; -} - -__device__ double __ull2double_rd(unsigned long long int x) { return (double)x; } -__device__ double __ull2double_rn(unsigned long long int x) { return (double)x; } -__device__ double __ull2double_ru(unsigned long long int x) { return (double)x; } -__device__ double __ull2double_rz(unsigned long long int x) { return (double)x; } - -__device__ float __ull2float_rd(unsigned long long int x) { return (float)x; } -__device__ float __ull2float_rn(unsigned long long int x) { return (float)x; } -__device__ float __ull2float_ru(unsigned long long int x) { return (float)x; } -__device__ float __ull2float_rz(unsigned long long int x) { return (float)x; } - -/* -Integer Intrinsics -*/ - -// integer intrinsic function __poc __clz __ffs __brev -__device__ unsigned int __popc(unsigned int input) { return hc::__popcount_u32_b32(input); } - -__device__ unsigned int __popcll(unsigned long long int input) { - return hc::__popcount_u32_b64(input); -} - -__device__ unsigned int __clz(unsigned int input) { -#ifdef NVCC_COMPAT - return input == 0 ? 32 : hc::__firstbit_u32_u32(input); -#else - return hc::__firstbit_u32_u32(input); -#endif -} - -__device__ unsigned int __clzll(unsigned long long int input) { -#ifdef NVCC_COMPAT - return input == 0 ? 64 : hc::__firstbit_u32_u64(input); -#else - return hc::__firstbit_u32_u64(input); -#endif -} - -__device__ unsigned int __clz(int input) { -#ifdef NVCC_COMPAT - return input == 0 ? 32 : hc::__firstbit_u32_s32(input); -#else - return hc::__firstbit_u32_s32(input); -#endif -} - -__device__ unsigned int __clzll(long long int input) { -#ifdef NVCC_COMPAT - return input == 0 ? 64 : hc::__firstbit_u32_s64(input); -#else - return hc::__firstbit_u32_s64(input); -#endif -} - -__device__ unsigned int __ffs(unsigned int input) { -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_u32(input) + 1; -#else - return hc::__lastbit_u32_u32(input); -#endif -} - -__device__ unsigned int __ffsll(unsigned long long int input) { -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_u64(input) + 1; -#else - return hc::__lastbit_u32_u64(input); -#endif -} - -__device__ unsigned int __ffs(int input) { -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_s32(input) + 1; -#else - return hc::__lastbit_u32_s32(input); -#endif -} - -__device__ unsigned int __ffsll(long long int input) { -#ifdef NVCC_COMPAT - return hc::__lastbit_u32_s64(input) + 1; -#else - return hc::__lastbit_u32_s64(input); -#endif -} - -__device__ unsigned int __brev(unsigned int input) { return hc::__bitrev_b32(input); } - -__device__ unsigned long long int __brevll(unsigned long long int input) { - return hc::__bitrev_b64(input); -} - -struct ucharHolder { - union { - unsigned char c[4]; - unsigned int ui; - }; -} __attribute__((aligned(4))); - -struct uchar2Holder { - union { - unsigned int ui[2]; - unsigned char c[8]; - }; -} __attribute__((aligned(8))); - -__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { - struct uchar2Holder cHoldVal; - struct ucharHolder cHoldKey; - struct ucharHolder cHoldOut; - cHoldKey.ui = s; - cHoldVal.ui[0] = x; - cHoldVal.ui[1] = y; - cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]]; - cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]]; - cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]]; - cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]]; - return cHoldOut.ui; -} - -__device__ long long __mul64hi(long long int x, long long int y) { - ulong x0 = (ulong)x & 0xffffffffUL; - long x1 = x >> 32; - ulong y0 = (ulong)y & 0xffffffffUL; - long y1 = y >> 32; - ulong z0 = x0*y0; - long t = x1*y0 + (z0 >> 32); - long z1 = t & 0xffffffffL; - long z2 = t >> 32; - z1 = x0*y1 + z1; - return x1*y1 + z2 + (z1 >> 32); -} - -__device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) { - ulong x0 = x & 0xffffffffUL; - ulong x1 = x >> 32; - ulong y0 = y & 0xffffffffUL; - ulong y1 = y >> 32; - ulong z0 = x0*y0; - ulong t = x1*y0 + (z0 >> 32); - ulong z1 = t & 0xffffffffUL; - ulong z2 = t >> 32; - z1 = x0*y1 + z1; - return x1*y1 + z2 + (z1 >> 32); -} - -/* -HIP specific device functions -*/ - -__device__ unsigned __hip_ds_bpermute(int index, unsigned src) { - return hc::__amdgcn_ds_bpermute(index, src); -} - -__device__ float __hip_ds_bpermutef(int index, float src) { - return hc::__amdgcn_ds_bpermute(index, src); -} - -__device__ unsigned __hip_ds_permute(int index, unsigned src) { - return hc::__amdgcn_ds_permute(index, src); -} - -__device__ float __hip_ds_permutef(int index, float src) { - return hc::__amdgcn_ds_permute(index, src); -} - -__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) { - return hc::__amdgcn_ds_swizzle(src, pattern); -} - -__device__ float __hip_ds_swizzlef(float src, int pattern) { - return hc::__amdgcn_ds_swizzle(src, pattern); -} - -__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) { - return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); -} - -#define MASK1 0x00ff00ff -#define MASK2 0xff00ff00 - -__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 + one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 + one2) & MASK2); - return out; -} - -__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 - one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 - one2) & MASK2); - return out; -} - -__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) { - char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 * one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 * one2) & MASK2); - return out; -} From 175c87f2bfbc3ae5399f519313ef1be69a506ed1 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 19 Jun 2018 21:09:44 +0000 Subject: [PATCH 09/40] Implement hip_hc.ll into HIP headers Move all __hip_hc_ir_* functions from hip_hc.ll into HIP header as inline asm. Remove hip_hc.ll and build dependencies from HIP. --- hipamd/CMakeLists.txt | 1 - hipamd/bin/hipcc | 5 +- .../include/hip/hcc_detail/device_functions.h | 56 +++++++++++++++++-- hipamd/packaging/hip_hcc.txt | 1 - hipamd/src/hip_hc.ll | 30 ---------- 5 files changed, 52 insertions(+), 41 deletions(-) delete mode 100644 hipamd/src/hip_hc.ll diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index b649b20c21..9095ff6531 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -237,7 +237,6 @@ endif() # Install hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") install(TARGETS hip_hcc_static hip_hcc hip_device DESTINATION lib) - install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll DESTINATION lib) # Install .hipInfo install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 03f35b27fc..4f56ffd875 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -254,8 +254,7 @@ if($HIP_PLATFORM eq "hcc"){ } if(($HIP_PLATFORM eq "hcc")){ - $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n"; - $ENV{HIP_HC_IR_FILE}=""; + $ENV{HCC_EXTRA_LIBRARIES}="\n"; } if($HIP_PLATFORM eq "nvcc"){ @@ -508,7 +507,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ print "No valid AMD GPU target was either specified or found. Please specify a valid target using --amdgpu-target=" and die(); } - $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n"; + $ENV{HCC_EXTRA_LIBRARIES}="\n"; if($HIP_PLATFORM eq "hcc") { $GPU_ARCH_OPT = " --amdgpu-target="; diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index b147cd9b80..cb9dd82c0d 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -34,11 +34,55 @@ THE SOFTWARE. typedef unsigned long ulong; typedef unsigned int uint; -extern "C" __device__ unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int); -extern "C" __device__ signed int __hip_hc_ir_mul24_int(signed int, signed int); -extern "C" __device__ signed int __hip_hc_ir_mulhi_int(signed int, signed int); -extern "C" __device__ unsigned int __hip_hc_ir_umulhi_int(unsigned int, unsigned int); -extern "C" __device__ unsigned int __hip_hc_ir_usad_int(unsigned int, unsigned int, unsigned int); +extern "C" __device__ inline uint __hip_hc_ir_umul24_int(uint a, uint b) { + // define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 { + // %1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + // ret i32 %1 + // } + uint out; + __asm volatile("v_mul_u32_u24 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); + return out; +} + +extern "C" __device__ inline int __hip_hc_ir_mul24_int(int a, int b) { + // define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 { + // %1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + // ret i32 %1 + // } + int out; + __asm volatile("v_mul_i32_i24 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); + return out; +} + +extern "C" __device__ inline int __hip_hc_ir_mulhi_int(int a, int b) { + // define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 { + // %1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + // ret i32 %1 + // } + int out; + __asm volatile("v_mul_hi_i32 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); + return out; +} + +extern "C" __device__ inline uint __hip_hc_ir_umulhi_int(uint a, uint b) { + // define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 { + // %1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) + // ret i32 %1 + // } + uint out; + __asm volatile("v_mul_hi_u32 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); + return out; +} + +extern "C" __device__ inline uint __hip_hc_ir_usad_int(uint a, uint b, uint c) { + // define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 { + // %1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c) + // ret i32 %1 + // } + uint out; + __asm volatile("v_sad_u32 %0, %1, %2, %3" : "=v"(out) : "v"(a), "v"(b), "v"(c)); + return out; +} /* Integer Intrinsics @@ -556,7 +600,7 @@ uint64_t __ballot64(int a) { // %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1 // ret i64 %b // } - __asm("v_cmp_ne_i32_e64 $0, 0, $1" : "=s"(s) : "v"(a)); + __asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(s) : "v"(a)); return s; } diff --git a/hipamd/packaging/hip_hcc.txt b/hipamd/packaging/hip_hcc.txt index 04293f2044..9d4b96761d 100644 --- a/hipamd/packaging/hip_hcc.txt +++ b/hipamd/packaging/hip_hcc.txt @@ -5,7 +5,6 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) -install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) diff --git a/hipamd/src/hip_hc.ll b/hipamd/src/hip_hc.ll deleted file mode 100644 index aba9205912..0000000000 --- a/hipamd/src/hip_hc.ll +++ /dev/null @@ -1,30 +0,0 @@ -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" -target triple = "amdgcn--amdhsa" - -define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 { - %1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - ret i32 %1 -} - -define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 { - %1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - ret i32 %1 -} - -define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 { - %1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - ret i32 %1 -} - -define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 { - %1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - ret i32 %1 -} - -define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 { - %1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c) - ret i32 %1 -} - -attributes #1 = { alwaysinline nounwind } - From 9a295a732d60428cc4790b9416c43b98f5992e37 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 19 Jun 2018 22:07:26 +0000 Subject: [PATCH 10/40] Implement __ballot, __any, __all into HIP headers --- .../include/hip/hcc_detail/device_functions.h | 56 ++++++++++++++----- hipamd/src/device_util.cpp | 17 ------ 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index cb9dd82c0d..71963a99e0 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -585,23 +585,51 @@ void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); } #endif // __HIP_DEVICE_COMPILE__ // warp vote function __all __any __ballot -__device__ -int __all(int input); -__device__ -int __any(int input); -__device__ -unsigned long long int __ballot(int input); +extern "C" __device__ inline uint64_t __activelanemask_v4_b64_b1(unsigned int input) { + uint64_t output; + // define i64 @__activelanemask_v4_b64_b1(i32 %input) #5 { + // %a = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %input) #9 + // ret i64 %a + // } + __asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(output) : "v"(input)); + return output; +} __device__ inline -uint64_t __ballot64(int a) { - int64_t s; - // define i64 @__ballot64(i32 %a) #0 { - // %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1 - // ret i64 %b - // } - __asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(s) : "v"(a)); - return s; +unsigned int __activelanecount_u32_b1(unsigned int input) { + return __popcll(__activelanemask_v4_b64_b1(input)); +} + +__device__ +inline +int __all(int predicate) { + return __popcll(__activelanemask_v4_b64_b1(predicate)) == __activelanecount_u32_b1(1); +} + +__device__ +inline +int __any(int predicate) { +#ifdef NVCC_COMPAT + if (__popcll(__activelanemask_v4_b64_b1(predicate)) != 0) + return 1; + else + return 0; +#else + return __popcll(__activelanemask_v4_b64_b1(predicate)); +#endif +} + +__device__ +inline +unsigned long long int __ballot(int predicate) { + return __activelanemask_v4_b64_b1(predicate); +} + +__device__ +inline +unsigned long long int __ballot64(int predicate) { + return __activelanemask_v4_b64_b1(predicate); } // hip.amdgcn.bc - lanemask diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index a3386ba14d..853ca71c09 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -147,23 +147,6 @@ __device__ void* __hip_hc_memset(void* dst, uint8_t val, size_t size) { // abort __device__ void abort() { return hc::abort(); } -// warp vote function __all __any __ballot -__device__ int __all(int input) { return hc::__all(input); } - - -__device__ int __any(int input) { -#ifdef NVCC_COMPAT - if (hc::__any(input) != 0) - return 1; - else - return 0; -#else - return hc::__any(input); -#endif -} - -__device__ unsigned long long int __ballot(int input) { return hc::__ballot(input); } - // warp shuffle functions __device__ int __shfl(int input, int lane, int width) { return hc::__shfl(input, lane, width); } From 5d4986d4703015e02cccc7e65c4fece839a03e30 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 20 Jun 2018 20:39:30 +0000 Subject: [PATCH 11/40] Replace __hip_hc_ir_ inline asm with __ockl_* functions --- .../include/hip/hcc_detail/device_functions.h | 66 ++++--------------- .../hip/hcc_detail/device_library_decls.h | 6 ++ 2 files changed, 17 insertions(+), 55 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 71963a99e0..8ea6632ffc 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -34,56 +34,6 @@ THE SOFTWARE. typedef unsigned long ulong; typedef unsigned int uint; -extern "C" __device__ inline uint __hip_hc_ir_umul24_int(uint a, uint b) { - // define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 { - // %1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - // ret i32 %1 - // } - uint out; - __asm volatile("v_mul_u32_u24 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); - return out; -} - -extern "C" __device__ inline int __hip_hc_ir_mul24_int(int a, int b) { - // define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 { - // %1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - // ret i32 %1 - // } - int out; - __asm volatile("v_mul_i32_i24 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); - return out; -} - -extern "C" __device__ inline int __hip_hc_ir_mulhi_int(int a, int b) { - // define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 { - // %1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - // ret i32 %1 - // } - int out; - __asm volatile("v_mul_hi_i32 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); - return out; -} - -extern "C" __device__ inline uint __hip_hc_ir_umulhi_int(uint a, uint b) { - // define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 { - // %1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b) - // ret i32 %1 - // } - uint out; - __asm volatile("v_mul_hi_u32 %0, %1, %2" : "=v"(out) : "v"(a), "v"(b)); - return out; -} - -extern "C" __device__ inline uint __hip_hc_ir_usad_int(uint a, uint b, uint c) { - // define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 { - // %1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c) - // ret i32 %1 - // } - uint out; - __asm volatile("v_sad_u32 %0, %1, %2, %3" : "=v"(out) : "v"(a), "v"(b), "v"(c)); - return out; -} - /* Integer Intrinsics */ @@ -217,7 +167,10 @@ __device__ static inline unsigned int __hadd(int x, int y) { int value = z & 0x7FFFFFFF; return ((value) >> 1 || sign); } -__device__ static inline int __mul24(int x, int y) { return __hip_hc_ir_mul24_int(x, y); } + +__device__ static inline int __mul24(int x, int y) { + return __ockl_mul24_i32(x, y); +} __device__ static inline long long __mul64hi(long long int x, long long int y) { ulong x0 = (ulong)x & 0xffffffffUL; @@ -232,7 +185,10 @@ __device__ static inline long long __mul64hi(long long int x, long long int y) { return x1*y1 + z2 + (z1 >> 32); } -__device__ static inline int __mulhi(int x, int y) { return __hip_hc_ir_mulhi_int(x, y); } +__device__ static inline int __mulhi(int x, int y) { + return __ockl_mul_hi_i32(x, y); +} + __device__ static inline int __rhadd(int x, int y) { int z = x + y + 1; int sign = z & 0x8000000; @@ -246,7 +202,7 @@ __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { return (x + y) >> 1; } __device__ static inline int __umul24(unsigned int x, unsigned int y) { - return __hip_hc_ir_umul24_int(x, y); + return __ockl_mul24_u32(x, y); } __device__ @@ -264,13 +220,13 @@ static inline unsigned long long __umul64hi(unsigned long long int x, unsigned l } __device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { - return __hip_hc_ir_umulhi_int(x, y); + return __ockl_mul_hi_u32(x, y); } __device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { return (x + y + 1) >> 1; } __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { - return __hip_hc_ir_usad_int(x, y, z); + return __ockl_sad_u32(x, y, z); } __device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index 8bf3ce3a19..fba2d53e2e 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -32,6 +32,12 @@ THE SOFTWARE. extern "C" __device__ int32_t __ockl_activelane_u32(void); +extern "C" __device__ uint __ockl_mul24_u32(uint, uint); +extern "C" __device__ int __ockl_mul24_i32(int, int); +extern "C" __device__ uint __ockl_mul_hi_u32(uint, uint); +extern "C" __device__ int __ockl_mul_hi_i32(int, int); +extern "C" __device__ uint __ockl_sad_u32(uint, uint, uint); + extern "C" __device__ float __ocml_floor_f32(float); extern "C" __device__ float __ocml_rint_f32(float); extern "C" __device__ float __ocml_ceil_f32(float); From 3ff1d4081e10a6b4be5348b36f70558d954c29fa Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 25 Jun 2018 17:49:50 +0100 Subject: [PATCH 12/40] Let's try this again... --- .../include/hip/hcc_detail/hip_vector_types.h | 571 ++++++++++++++++-- hipamd/src/device_functions.cpp | 36 +- hipamd/tests/src/deviceLib/hipVectorTypes.cpp | 104 ++-- .../src/deviceLib/hipVectorTypesDevice.cpp | 107 ++-- .../tests/src/deviceLib/vector_test_common.h | 36 -- 5 files changed, 646 insertions(+), 208 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_vector_types.h b/hipamd/include/hip/hcc_detail/hip_vector_types.h index 59b9c247e3..704ff7e550 100644 --- a/hipamd/include/hip/hcc_detail/hip_vector_types.h +++ b/hipamd/include/hip/hcc_detail/hip_vector_types.h @@ -34,6 +34,8 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" +#include + #if defined(__clang__) #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) #elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. @@ -43,65 +45,538 @@ THE SOFTWARE. __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) #endif -typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char); -typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char); -typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char); -typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char); +#if defined(__cplusplus) + template struct HIP_vector_base; -typedef char char1 __NATIVE_VECTOR__(1, char); -typedef char char2 __NATIVE_VECTOR__(2, char); -typedef char char3 __NATIVE_VECTOR__(3, char); -typedef char char4 __NATIVE_VECTOR__(4, char); + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(1, T); -typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short); -typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short); -typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short); -typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short); + union { + Native_vec_ data; + struct { + typename std::decay< + decltype(std::declval().x)>::type x; + }; + }; + }; -typedef short short1 __NATIVE_VECTOR__(1, short); -typedef short short2 __NATIVE_VECTOR__(2, short); -typedef short short3 __NATIVE_VECTOR__(3, short); -typedef short short4 __NATIVE_VECTOR__(4, short); + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(2, T); -typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int); -typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int); -typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int); -typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int); + union { + Native_vec_ data; + struct { + typename std::decay< + decltype(std::declval().x)>::type x; + typename std::decay< + decltype(std::declval().y)>::type y; + }; + }; + }; -typedef int int1 __NATIVE_VECTOR__(1, int); -typedef int int2 __NATIVE_VECTOR__(2, int); -typedef int int3 __NATIVE_VECTOR__(3, int); -typedef int int4 __NATIVE_VECTOR__(4, int); + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(3, T); -typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long); -typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long); -typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long); -typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long); + union { + Native_vec_ data; + struct { + typename std::decay< + decltype(std::declval().x)>::type x; + typename std::decay< + decltype(std::declval().y)>::type y; + typename std::decay< + decltype(std::declval().z)>::type z; + }; + }; + }; -typedef long long1 __NATIVE_VECTOR__(1, long); -typedef long long2 __NATIVE_VECTOR__(2, long); -typedef long long3 __NATIVE_VECTOR__(3, long); -typedef long long4 __NATIVE_VECTOR__(4, long); + template + struct HIP_vector_base { + typedef T Native_vec_ __NATIVE_VECTOR__(4, T); -typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long); -typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long); -typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long); -typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long); + union { + Native_vec_ data; + struct { + typename std::decay< + decltype(std::declval().x)>::type x; + typename std::decay< + decltype(std::declval().y)>::type y; + typename std::decay< + decltype(std::declval().z)>::type z; + typename std::decay< + decltype(std::declval().w)>::type w; + }; + }; + }; -typedef long long longlong1 __NATIVE_VECTOR__(1, long long); -typedef long long longlong2 __NATIVE_VECTOR__(2, long long); -typedef long long longlong3 __NATIVE_VECTOR__(3, long long); -typedef long long longlong4 __NATIVE_VECTOR__(4, long long); + template + struct HIP_vector_type : public HIP_vector_base { + using HIP_vector_base::data; + using typename HIP_vector_base::Native_vec_; -typedef float float1 __NATIVE_VECTOR__(1, float); -typedef float float2 __NATIVE_VECTOR__(2, float); -typedef float float3 __NATIVE_VECTOR__(3, float); -typedef float float4 __NATIVE_VECTOR__(4, float); + __host__ __device__ + HIP_vector_type() = default; + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __host__ __device__ + explicit + HIP_vector_type(U x) noexcept { data = Native_vec_(x); } + template< // TODO: constrain based on type as well. + typename... Us, + typename std::enable_if::type* = nullptr> + __host__ __device__ + HIP_vector_type(Us... xs) noexcept { data = Native_vec_{xs...}; } + __host__ __device__ + HIP_vector_type(const HIP_vector_type&) = default; + __host__ __device__ + HIP_vector_type(HIP_vector_type&&) = default; + __host__ __device__ + ~HIP_vector_type() = default; -typedef double double1 __NATIVE_VECTOR__(1, double); -typedef double double2 __NATIVE_VECTOR__(2, double); -typedef double double3 __NATIVE_VECTOR__(3, double); -typedef double double4 __NATIVE_VECTOR__(4, double); + __host__ __device__ + HIP_vector_type& operator=(const HIP_vector_type&) = default; + __host__ __device__ + HIP_vector_type& operator=(HIP_vector_type&&) = default; + + // Operators + __host__ __device__ + HIP_vector_type& operator++() noexcept + { + data += Native_vec_(1); + return *this; + } + __host__ __device__ + HIP_vector_type operator++(int) noexcept + { + auto tmp(*this); + ++*this; + return tmp; + } + __host__ __device__ + HIP_vector_type& operator--() noexcept + { + data -= Native_vec_(1); + return *this; + } + __host__ __device__ + HIP_vector_type operator--(int) noexcept + { + auto tmp(*this); + --*this; + return tmp; + } + __host__ __device__ + HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept + { + data += x.data; + return *this; + } + __host__ __device__ + HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept + { + data -= x.data; + return *this; + } + __host__ __device__ + HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept + { + data *= x.data; + return *this; + } + __host__ __device__ + HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept + { + data /= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type operator-() noexcept + { + auto tmp(*this); + tmp.data = -tmp.data; + return tmp; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type operator~() noexcept + { + HIP_vector_type r{*this}; + r.data = ~r.data; + return r; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept + { + data %= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept + { + data ^= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept + { + data |= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept + { + data &= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept + { + data >>= x.data; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept + { + data <<= x.data; + return *this; + } + }; + + + template + __host__ __device__ + inline + HIP_vector_type operator+( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} += y; + } + + template + __host__ __device__ + inline + HIP_vector_type operator-( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} -= y; + } + + template + __host__ __device__ + inline + HIP_vector_type operator*( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} *= y; + } + + template + __host__ __device__ + inline + HIP_vector_type operator/( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} /= y; + } + + template + __host__ __device__ + inline + bool operator==( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + auto tmp = x.data == y.data; + for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; + return true; + } + + template + __host__ __device__ + inline + bool operator!=( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return !(x == y); + } + + template + __host__ __device__ + inline + bool operator<( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + auto tmp = x.data < y.data; + for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; + return true; + } + + template + __host__ __device__ + inline + bool operator>( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return y < x; + } + + template + __host__ __device__ + inline + bool operator<=( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return !(y < x); + } + + template + __host__ __device__ + inline + bool operator>=( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return !(x < y); + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator%( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} %= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator^( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} ^= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator|( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} |= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator&( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} &= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator>>( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} >>= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator<<( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} <<= y; + } + + // TODO: the following are rather dubious in terms of general utility. + template + inline + bool operator||( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + auto tmp = x.data || y.data; + for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; + return true; + } + + template + inline + bool operator&&( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + auto tmp = x.data && y.data; + for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; + return true; + } + + #define __MAKE_VECTOR_TYPE__(CUDA_name, T, n) \ + using CUDA_name = HIP_vector_type; +#else + typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char); + typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char); + typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char); + typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char); + + typedef char char1 __NATIVE_VECTOR__(1, char); + typedef char char2 __NATIVE_VECTOR__(2, char); + typedef char char3 __NATIVE_VECTOR__(3, char); + typedef char char4 __NATIVE_VECTOR__(4, char); + + typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short); + typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short); + typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short); + typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short); + + typedef short short1 __NATIVE_VECTOR__(1, short); + typedef short short2 __NATIVE_VECTOR__(2, short); + typedef short short3 __NATIVE_VECTOR__(3, short); + typedef short short4 __NATIVE_VECTOR__(4, short); + + typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int); + typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int); + typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int); + typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int); + + typedef int int1 __NATIVE_VECTOR__(1, int); + typedef int int2 __NATIVE_VECTOR__(2, int); + typedef int int3 __NATIVE_VECTOR__(3, int); + typedef int int4 __NATIVE_VECTOR__(4, int); + + typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long); + typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long); + typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long); + typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long); + + typedef long long1 __NATIVE_VECTOR__(1, long); + typedef long long2 __NATIVE_VECTOR__(2, long); + typedef long long3 __NATIVE_VECTOR__(3, long); + typedef long long4 __NATIVE_VECTOR__(4, long); + + typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long); + typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long); + typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long); + typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long); + + typedef long long longlong1 __NATIVE_VECTOR__(1, long long); + typedef long long longlong2 __NATIVE_VECTOR__(2, long long); + typedef long long longlong3 __NATIVE_VECTOR__(3, long long); + typedef long long longlong4 __NATIVE_VECTOR__(4, long long); + + typedef float float1 __NATIVE_VECTOR__(1, float); + typedef float float2 __NATIVE_VECTOR__(2, float); + typedef float float3 __NATIVE_VECTOR__(3, float); + typedef float float4 __NATIVE_VECTOR__(4, float); + + typedef double double1 __NATIVE_VECTOR__(1, double); + typedef double double2 __NATIVE_VECTOR__(2, double); + typedef double double3 __NATIVE_VECTOR__(3, double); + typedef double double4 __NATIVE_VECTOR__(4, double); +#endif + +__MAKE_VECTOR_TYPE__(uchar1, unsigned char, 1); +__MAKE_VECTOR_TYPE__(uchar2, unsigned char, 2); +__MAKE_VECTOR_TYPE__(uchar3, unsigned char, 3); +__MAKE_VECTOR_TYPE__(uchar4, unsigned char, 4); + +__MAKE_VECTOR_TYPE__(char1, char, 1); +__MAKE_VECTOR_TYPE__(char2, char, 2); +__MAKE_VECTOR_TYPE__(char3, char, 3); +__MAKE_VECTOR_TYPE__(char4, char, 4); + +__MAKE_VECTOR_TYPE__(ushort1, unsigned short, 1); +__MAKE_VECTOR_TYPE__(ushort2, unsigned short, 2); +__MAKE_VECTOR_TYPE__(ushort3, unsigned short, 3); +__MAKE_VECTOR_TYPE__(ushort4, unsigned short, 4); + +__MAKE_VECTOR_TYPE__(short1, short, 1); +__MAKE_VECTOR_TYPE__(short2, short, 2); +__MAKE_VECTOR_TYPE__(short3, short, 3); +__MAKE_VECTOR_TYPE__(short4, short, 4); + +__MAKE_VECTOR_TYPE__(uint1, unsigned int, 1); +__MAKE_VECTOR_TYPE__(uint2, unsigned int, 2); +__MAKE_VECTOR_TYPE__(uint3, unsigned int, 3); +__MAKE_VECTOR_TYPE__(uint4, unsigned int, 4); + +__MAKE_VECTOR_TYPE__(int1, int, 1); +__MAKE_VECTOR_TYPE__(int2, int, 2); +__MAKE_VECTOR_TYPE__(int3, int, 3); +__MAKE_VECTOR_TYPE__(int4, int, 4); + +__MAKE_VECTOR_TYPE__(ulong1, unsigned long, 1); +__MAKE_VECTOR_TYPE__(ulong2, unsigned long, 2); +__MAKE_VECTOR_TYPE__(ulong3, unsigned long, 3); +__MAKE_VECTOR_TYPE__(ulong4, unsigned long, 4); + +__MAKE_VECTOR_TYPE__(long1, long, 1); +__MAKE_VECTOR_TYPE__(long2, long, 2); +__MAKE_VECTOR_TYPE__(long3, long, 3); +__MAKE_VECTOR_TYPE__(long4, long, 4); + +__MAKE_VECTOR_TYPE__(ulonglong1, unsigned long long, 1); +__MAKE_VECTOR_TYPE__(ulonglong2, unsigned long long, 2); +__MAKE_VECTOR_TYPE__(ulonglong3, unsigned long long, 3); +__MAKE_VECTOR_TYPE__(ulonglong4, unsigned long long, 4); + +__MAKE_VECTOR_TYPE__(longlong1, long long, 1); +__MAKE_VECTOR_TYPE__(longlong2, long long, 2); +__MAKE_VECTOR_TYPE__(longlong3, long long, 3); +__MAKE_VECTOR_TYPE__(longlong4, long long, 4); + +__MAKE_VECTOR_TYPE__(float1, float, 1); +__MAKE_VECTOR_TYPE__(float2, float, 2); +__MAKE_VECTOR_TYPE__(float3, float, 3); +__MAKE_VECTOR_TYPE__(float4, float, 4); + +__MAKE_VECTOR_TYPE__(double1, double, 1); +__MAKE_VECTOR_TYPE__(double2, double, 2); +__MAKE_VECTOR_TYPE__(double3, double, 3); +__MAKE_VECTOR_TYPE__(double4, double, 4); #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ __device__ __host__ \ diff --git a/hipamd/src/device_functions.cpp b/hipamd/src/device_functions.cpp index 8ef19bab3f..8dfd5a07c4 100644 --- a/hipamd/src/device_functions.cpp +++ b/hipamd/src/device_functions.cpp @@ -355,33 +355,33 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask __device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) { char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 + one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 + one2) & MASK2); + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 + one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 + one2) & MASK2); return out; } __device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) { char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 - one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 - one2) & MASK2); + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 - one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 - one2) & MASK2); return out; } __device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) { char4 out; - unsigned one1 = in1.a & MASK1; - unsigned one2 = in2.a & MASK1; - out.a = (one1 * one2) & MASK1; - one1 = in1.a & MASK2; - one2 = in2.a & MASK2; - out.a = out.a | ((one1 * one2) & MASK2); + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 * one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 * one2) & MASK2); return out; } diff --git a/hipamd/tests/src/deviceLib/hipVectorTypes.cpp b/hipamd/tests/src/deviceLib/hipVectorTypes.cpp index 3c36fb5d2e..bc7e4eb356 100644 --- a/hipamd/tests/src/deviceLib/hipVectorTypes.cpp +++ b/hipamd/tests/src/deviceLib/hipVectorTypes.cpp @@ -52,20 +52,20 @@ template< __device__ bool integer_unary_tests(V& f1, V& f2) { f1 %= f2; - if (!cmp(f1, 0)) return false; + if (f1 != V{0}) return false; f1 &= f2; - if (!cmp(f1, 0)) return false; + if (f1 != V{0}) return false; f1 |= f2; - if (!cmp(f1, 1)) return false; + if (f1 != V{1}) return false; f1 ^= f2; - if (!cmp(f1, 0)) return false; - f1.x = 1; + if (f1 != V{0}) return false; + f1 = V{1}; f1 <<= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f1 >>= f2; - if (!cmp(f1, 1)) return false; + if (f1 != V{1}) return false; f2 = ~f1; - return cmp(f2, ~1); + return f2 == V{~1}; } template< @@ -74,17 +74,17 @@ template< __device__ bool integer_binary_tests(V& f1, V& f2, V& f3) { f3 = f1 % f2; - if (!cmp(f3, 0)) return false; + if (f3 != V{0}) return false; f1 = f3 & f2; - if (!cmp(f1, 0)) return false; + if (f1 != V{0}) return false; f2 = f1 ^ f3; - if (!cmp(f2, 0)) return false; - f1.x = 1; - f2.x = 2; + if (f2 != V{0}) return false; + f1 = V{1}; + f2 = V{2}; f3 = f1 << f2; - if (!cmp(f3, 4)) return false; + if (f3 != V{4}) return false; f2 = f3 >> f1; - if (!cmp(f2, 2)) return false; + return f2 == V{2}; } template @@ -107,60 +107,58 @@ bool constructor_tests() { template bool TestVectorType() { - V f1(1); - V f2(1); + V f1{1}; + V f2{1}; V f3 = f1 + f2; - if (!cmp(f3, 2)) return false; + if (f3 != V{2}) return false; f2 = f3 - f1; - if (!cmp(f2, 1)) return false; + if (f2 != V{1}) return false; f1 = f2 * f3; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f2 = f1 / f3; - if (!cmp(f2, 2 / 2)) return false; + if (f2 != V{1}) return false; if (!integer_binary_tests(f1, f2, f3)) return false; - f1 = V(2); - f2 = V(1); + f1 = V{2}; + f2 = V{1}; f1 += f2; - if (!cmp(f1, 3)) return false; + if (f1 != V{3}) return false; f1 -= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f1 *= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f1 /= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; if (!integer_unary_tests(f1, f2)) return false; - #if false // We do not enable nullary increment / decrement yet. - f1 = V(2); - f2 = f1++; - if (!cmp(f1, 3)) return false; - if (!cmp(f2, 2)) return false; - f2 = f1--; - if (!cmp(f2, 3)) return false; - if (!cmp(f1, 2)) return false; - f2 = ++f1; - if (!cmp(f1, 3)) return false; - if (!cmp(f2, 3)) return false; - f2 = --f1; - if (!cmp(f1, 2)) return false; - if (!cmp(f2, 2)) return false; - #endif + f1 = V{2}; + f2 = f1++; + if (f1 != V{3}) return false; + if (f2 != V{2}) return false; + f2 = f1--; + if (f2 != V{3}) return false; + if (f1 != V{2}) return false; + f2 = ++f1; + if (f1 != V{3}) return false; + if (f2 != V{3}) return false; + f2 = --f1; + if (f1 != V{2}) return false; + if (f2 != V{2}) return false; if (!constructor_tests()) return false; - f1 = V(3); - f2 = V(4); - f3 = V(3); - if (cmp(f1 == f2, true)) return false; - if (cmp(f1 != f2, false)) return false; - if (cmp(f1 < f2, false)) return false; - if (cmp(f2 > f1, false)) return false; - if (cmp(f1 >= f3, false)) return false; - if (cmp(f1 <= f3, false)) return false; + f1 = V{3}; + f2 = V{4}; + f3 = V{3}; + if (f1 == f2) return false; + if (!(f1 != f2)) return false; + if (!(f1 < f2)) return false; + if (!(f2 > f1)) return false; + if (!(f1 >= f3)) return false; + if (!(f1 <= f3)) return false; - if (cmp(f1 && f2, false)) return false; - if (cmp(f1 || f2, false)) return false; + if (!(f1 && f2)) return false; + if (!(f1 || f2)) return false; return true; } diff --git a/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp b/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp index edb817ced1..21dc1f1a75 100644 --- a/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp +++ b/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp @@ -51,20 +51,23 @@ template< __device__ bool integer_unary_tests(V& f1, V& f2) { f1 %= f2; - if (!cmp(f1, 0)) return false; + if (f1 != V{0}) return false; + f1 &= f2; - if (!cmp(f1, 0)) return false; + if (f1 != V{0}) return false; f1 |= f2; - if (!cmp(f1, 1)) return false; + if (f1 != V{1}) return false; f1 ^= f2; - if (!cmp(f1, 0)) return false; - f1.x = 1; + if (f1 != V{0}) return false; + f1 = V{1}; f1 <<= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f1 >>= f2; - if (!cmp(f1, 1)) return false; + if (f1 != V{1}) return false; f2 = ~f1; - return cmp(f2, ~1); + return f2 == V{~1}; + + return true; } template< @@ -81,74 +84,72 @@ template< __device__ bool integer_binary_tests(V& f1, V& f2, V& f3) { f3 = f1 % f2; - if (!cmp(f3, 0)) return false; + if (f3 != V{0}) return false; f1 = f3 & f2; - if (!cmp(f1, 0)) return false; + if (f1 != V{0}) return false; f2 = f1 ^ f3; - if (!cmp(f2, 0)) return false; - f1.x = 1; - f2.x = 2; + if (f2 != V{0}) return false; + f1 = V{1}; + f2 = V{2}; f3 = f1 << f2; - if (!cmp(f3, 4)) return false; + if (f3 != V{4}) return false; f2 = f3 >> f1; - if (!cmp(f2, 2)) return false; + return f2 == V{2}; } template __device__ bool TestVectorType() { - V f1(1); - V f2(1); + V f1{1}; + V f2{1}; V f3 = f1 + f2; - if (!cmp(f3, 2)) return false; + if (f3 != V{2}) return false; f2 = f3 - f1; - if (!cmp(f2, 1)) return false; + if (f2 != V{1}) return false; f1 = f2 * f3; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f2 = f1 / f3; - if (!cmp(f2, 2 / 2)) return false; + if (f2 != V{1}) return false; if (!integer_binary_tests(f1, f2, f3)) return false; - f1 = V(2); - f2 = V(1); + f1 = V{2}; + f2 = V{1}; f1 += f2; - if (!cmp(f1, 3)) return false; + if (f1 != V{3}) return false; f1 -= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f1 *= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; f1 /= f2; - if (!cmp(f1, 2)) return false; + if (f1 != V{2}) return false; if (!integer_unary_tests(f1, f2)) return false; - #if false // We do not enable nullary increment / decrement yet. - f1 = V(2); - f2 = f1++; - if (!cmp(f1, 3)) return false; - if (!cmp(f2, 2)) return false; - f2 = f1--; - if (!cmp(f2, 3)) return false; - if (!cmp(f1, 2)) return false; - f2 = ++f1; - if (!cmp(f1, 3)) return false; - if (!cmp(f2, 3)) return false; - f2 = --f1; - if (!cmp(f1, 2)) return false; - if (!cmp(f2, 2)) return false; - #endif + f1 = V{2}; + f2 = f1++; + if (f1 != V{3}) return false; + if (f2 != V{2}) return false; + f2 = f1--; + if (f2 != V{3}) return false; + if (f1 != V{2}) return false; + f2 = ++f1; + if (f1 != V{3}) return false; + if (f2 != V{3}) return false; + f2 = --f1; + if (f1 != V{2}) return false; + if (f2 != V{2}) return false; - f1 = V(3); - f2 = V(4); - f3 = V(3); - if (cmp(f1 == f2, true)) return false; - if (cmp(f1 != f2, false)) return false; - if (cmp(f1 < f2, false)) return false; - if (cmp(f2 > f1, false)) return false; - if (cmp(f1 >= f3, false)) return false; - if (cmp(f1 <= f3, false)) return false; + f1 = V{3}; + f2 = V{4}; + f3 = V{3}; + if (f1 == f2) return false; + if (!(f1 != f2)) return false; + if (!(f1 < f2)) return false; + if (!(f2 > f1)) return false; + if (!(f1 >= f3)) return false; + if (!(f1 <= f3)) return false; - if (cmp(f1 && f2, false)) return false; - if (cmp(f1 || f2, false)) return false; + if (!(f1 && f2)) return false; + if (!(f1 || f2)) return false; return true; } diff --git a/hipamd/tests/src/deviceLib/vector_test_common.h b/hipamd/tests/src/deviceLib/vector_test_common.h index d5bc4c57a2..fac5ab84a1 100644 --- a/hipamd/tests/src/deviceLib/vector_test_common.h +++ b/hipamd/tests/src/deviceLib/vector_test_common.h @@ -66,40 +66,4 @@ bool is_vec() { ((dimension == 2) ? decltype(is_vec2(std::declval())){} : ((dimension == 3) ? decltype(is_vec3(std::declval())){} : decltype(is_vec4(std::declval())){})); -} - -template()>* = nullptr> -__host__ __device__ -inline -bool cmp(const T& x, U expected) { - const auto r = x == T(expected); - - return r.x != 0; -} - -template()>* = nullptr> -__host__ __device__ -inline -bool cmp(const T& x, U expected) { - const auto r = x == T(expected); - - return r.x != 0 && r.y != 0; -} - -template()>* = nullptr> -__host__ __device__ -inline -bool cmp(const T& x, U expected) { - const auto r = x == T(expected); - - return r.x != 0 && r.y != 0 && r.z != 0; -} - -template()>* = nullptr> -__host__ __device__ -inline -bool cmp(const T& x, U expected) { - const auto r = x == T(expected); - - return r.x != 0 && r.y != 0 && r.z != 0 && r.w != 0; } \ No newline at end of file From b9ead384783afc2e857ce87af9692fb6644968d5 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 25 Jun 2018 22:59:07 +0100 Subject: [PATCH 13/40] Be nice to GCC, it is old and worthy of respect. --- .../include/hip/hcc_detail/hip_vector_types.h | 41 ++++++++----------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_vector_types.h b/hipamd/include/hip/hcc_detail/hip_vector_types.h index 704ff7e550..7c12bda0a5 100644 --- a/hipamd/include/hip/hcc_detail/hip_vector_types.h +++ b/hipamd/include/hip/hcc_detail/hip_vector_types.h @@ -55,8 +55,7 @@ THE SOFTWARE. union { Native_vec_ data; struct { - typename std::decay< - decltype(std::declval().x)>::type x; + T x; }; }; }; @@ -68,10 +67,8 @@ THE SOFTWARE. union { Native_vec_ data; struct { - typename std::decay< - decltype(std::declval().x)>::type x; - typename std::decay< - decltype(std::declval().y)>::type y; + T x; + T y; }; }; }; @@ -83,12 +80,9 @@ THE SOFTWARE. union { Native_vec_ data; struct { - typename std::decay< - decltype(std::declval().x)>::type x; - typename std::decay< - decltype(std::declval().y)>::type y; - typename std::decay< - decltype(std::declval().z)>::type z; + T x; + T y; + T z; }; }; }; @@ -100,14 +94,10 @@ THE SOFTWARE. union { Native_vec_ data; struct { - typename std::decay< - decltype(std::declval().x)>::type x; - typename std::decay< - decltype(std::declval().y)>::type y; - typename std::decay< - decltype(std::declval().z)>::type z; - typename std::decay< - decltype(std::declval().w)>::type w; + T x; + T y; + T z; + T w; }; }; }; @@ -125,7 +115,10 @@ THE SOFTWARE. std::is_convertible{}>::type* = nullptr> __host__ __device__ explicit - HIP_vector_type(U x) noexcept { data = Native_vec_(x); } + HIP_vector_type(U x) noexcept + { + for (auto i = 0u; i != rank; ++i) data[i] = x; + } template< // TODO: constrain based on type as well. typename... Us, typename std::enable_if::type* = nullptr> @@ -147,8 +140,7 @@ THE SOFTWARE. __host__ __device__ HIP_vector_type& operator++() noexcept { - data += Native_vec_(1); - return *this; + return *this += HIP_vector_type{1}; } __host__ __device__ HIP_vector_type operator++(int) noexcept @@ -160,8 +152,7 @@ THE SOFTWARE. __host__ __device__ HIP_vector_type& operator--() noexcept { - data -= Native_vec_(1); - return *this; + return *this -= HIP_vector_type{1}; } __host__ __device__ HIP_vector_type operator--(int) noexcept From 2acf7f033c37f390648df3a52870a36db24a0d1f Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 26 Jun 2018 00:41:35 +0100 Subject: [PATCH 14/40] Existence is a complex affair. --- hipamd/include/hip/hcc_detail/hip_complex.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hipamd/include/hip/hcc_detail/hip_complex.h b/hipamd/include/hip/hcc_detail/hip_complex.h index 973d5f564b..5ed3b04b55 100644 --- a/hipamd/include/hip/hcc_detail/hip_complex.h +++ b/hipamd/include/hip/hcc_detail/hip_complex.h @@ -27,6 +27,10 @@ THE SOFTWARE. #include #if __cplusplus +#define MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(type, type1) \ + __device__ __host__ type(type1 val) : x(val), y(val) {} \ + __device__ __host__ type(type1 val1, type1 val2) : x(val1), y(val2) {} + #define COMPLEX_ADD_OP_OVERLOAD(type) \ __device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \ type ret; \ From e44eaa1a1e19adbdc040e7b55f5a8b1b68da3427 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 22 Jun 2018 19:11:35 +0000 Subject: [PATCH 15/40] Implement __shfl_* funcs into HIP headers --- .../include/hip/hcc_detail/device_functions.h | 121 ++++++++++++++++++ hipamd/include/hip/hcc_detail/hip_runtime.h | 25 ---- hipamd/src/device_util.cpp | 29 ----- 3 files changed, 121 insertions(+), 54 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 8ea6632ffc..aae0706033 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -282,6 +282,127 @@ __device__ static inline int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); } +static constexpr int warpSize = 64; + + __device__ +inline +int __shfl(int var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = src_lane + (self & ~(width-1)); + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.u; +} +__device__ +inline +float __shfl(float var, int src_lane, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.f; +} +__device__ +inline +double __shfl(double var, int src_lane, int width = warpSize) { + __u tmp; tmp.f = (float) var; + tmp.i = __shfl(tmp.i, src_lane, width); + return (double) tmp.f; +} + + __device__ +inline +int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) { + int self = __lane_id(); + int index = self - lane_delta; + index = (index < (self & ~(width-1)))?self:index; + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ +inline +float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ +inline +double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.f = (float) var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return (double) tmp.f; +} + +__device__ +inline +int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) { + int self = __lane_id(); + int index = self + lane_delta; + index = (int)((self&(width-1))+lane_delta) >= width?self:index; + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ +inline +float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ +inline +double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) { + __u tmp; tmp.f = (float) var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return (double) tmp.f; +} + +__device__ +inline +int __shfl_xor(int var, int lane_mask, int width = warpSize) { + int self = __lane_id(); + int index = self^lane_mask; + index = index >= ((self+width)&~(width-1))?self:index; + return __llvm_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) { + __u tmp; tmp.u = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.u; +} +__device__ +inline +float __shfl_xor(float var, int lane_mask, int width = warpSize) { + __u tmp; tmp.f = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.f; +} +__device__ +inline +double __shfl_xor(double var, int lane_mask, int width = warpSize) { + __u tmp; tmp.f = (float) var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return (double) tmp.f; +} + #define MASK1 0x00ff00ff #define MASK2 0xff00ff00 diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 18b04daf77..8107f00a4e 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -184,36 +184,11 @@ extern int HIP_TRACE_API; #if __HCC_OR_HIP_CLANG__ -// TODO - hipify-clang - change to use the function call. -//#define warpSize hc::__wavesize() -static constexpr int warpSize = 64; - // abort __device__ void abort(); #if __HIP_ARCH_GFX701__ == 0 -// warp shuffle functions -#ifdef __cplusplus -__device__ int __shfl(int input, int lane, int width = warpSize); -__device__ int __shfl_up(int input, unsigned int lane_delta, int width = warpSize); -__device__ int __shfl_down(int input, unsigned int lane_delta, int width = warpSize); -__device__ int __shfl_xor(int input, int lane_mask, int width = warpSize); -__device__ float __shfl(float input, int lane, int width = warpSize); -__device__ float __shfl_up(float input, unsigned int lane_delta, int width = warpSize); -__device__ float __shfl_down(float input, unsigned int lane_delta, int width = warpSize); -__device__ float __shfl_xor(float input, int lane_mask, int width = warpSize); -#else -__device__ int __shfl(int input, int lane, int width); -__device__ int __shfl_up(int input, unsigned int lane_delta, int width); -__device__ int __shfl_down(int input, unsigned int lane_delta, int width); -__device__ int __shfl_xor(int input, int lane_mask, int width); -__device__ float __shfl(float input, int lane, int width); -__device__ float __shfl_up(float input, unsigned int lane_delta, int width); -__device__ float __shfl_down(float input, unsigned int lane_delta, int width); -__device__ float __shfl_xor(float input, int lane_mask, int width); -#endif //__cplusplus - __device__ unsigned __hip_ds_bpermute(int index, unsigned src); __device__ float __hip_ds_bpermutef(int index, float src); __device__ unsigned __hip_ds_permute(int index, unsigned src); diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index 853ca71c09..65ee5f4368 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -147,35 +147,6 @@ __device__ void* __hip_hc_memset(void* dst, uint8_t val, size_t size) { // abort __device__ void abort() { return hc::abort(); } -// warp shuffle functions -__device__ int __shfl(int input, int lane, int width) { return hc::__shfl(input, lane, width); } - -__device__ int __shfl_up(int input, unsigned int lane_delta, int width) { - return hc::__shfl_up(input, lane_delta, width); -} - -__device__ int __shfl_down(int input, unsigned int lane_delta, int width) { - return hc::__shfl_down(input, lane_delta, width); -} - -__device__ int __shfl_xor(int input, int lane_mask, int width) { - return hc::__shfl_xor(input, lane_mask, width); -} - -__device__ float __shfl(float input, int lane, int width) { return hc::__shfl(input, lane, width); } - -__device__ float __shfl_up(float input, unsigned int lane_delta, int width) { - return hc::__shfl_up(input, lane_delta, width); -} - -__device__ float __shfl_down(float input, unsigned int lane_delta, int width) { - return hc::__shfl_down(input, lane_delta, width); -} - -__device__ float __shfl_xor(float input, int lane_mask, int width) { - return hc::__shfl_xor(input, lane_mask, width); -} - __host__ __device__ int min(int arg1, int arg2) { return (int)(hc::precise_math::fmin((float)arg1, (float)arg2)); } From f931980a6ae94a4d0b02e92f065dabf55950b489 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 29 Jun 2018 05:23:49 +0100 Subject: [PATCH 16/40] Add scalar operands and fix C implementation. --- .../include/hip/hcc_detail/hip_vector_types.h | 464 +++++++++++------- hipamd/tests/src/deviceLib/hipVectorTypes.cpp | 6 - .../src/deviceLib/hipVectorTypesDevice.cpp | 6 - 3 files changed, 289 insertions(+), 187 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_vector_types.h b/hipamd/include/hip/hcc_detail/hip_vector_types.h index 7c12bda0a5..cf7058af2b 100644 --- a/hipamd/include/hip/hcc_detail/hip_vector_types.h +++ b/hipamd/include/hip/hcc_detail/hip_vector_types.h @@ -34,8 +34,6 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -#include - #if defined(__clang__) #define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n))) #elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax. @@ -46,6 +44,8 @@ THE SOFTWARE. #endif #if defined(__cplusplus) + #include + template struct HIP_vector_base; template @@ -114,7 +114,6 @@ THE SOFTWARE. typename std::enable_if< std::is_convertible{}>::type* = nullptr> __host__ __device__ - explicit HIP_vector_type(U x) noexcept { for (auto i = 0u; i != rank; ++i) data[i] = x; @@ -173,6 +172,15 @@ THE SOFTWARE. data -= x.data; return *this; } + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __host__ __device__ + HIP_vector_type& operator-=(U x) noexcept + { + return *this -= HIP_vector_type{x}; + } __host__ __device__ HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept { @@ -272,6 +280,22 @@ THE SOFTWARE. { return HIP_vector_type{x} += y; } + template + __host__ __device__ + inline + HIP_vector_type operator+( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} += y; + } + template + __host__ __device__ + inline + HIP_vector_type operator+( + U x, const HIP_vector_type& y) noexcept + { + return y + x; + } template __host__ __device__ @@ -281,6 +305,22 @@ THE SOFTWARE. { return HIP_vector_type{x} -= y; } + template + __host__ __device__ + inline + HIP_vector_type operator-( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} -= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator-( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} -= y; + } template __host__ __device__ @@ -290,6 +330,22 @@ THE SOFTWARE. { return HIP_vector_type{x} *= y; } + template + __host__ __device__ + inline + HIP_vector_type operator*( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} *= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator*( + U x, const HIP_vector_type& y) noexcept + { + return y * x; + } template __host__ __device__ @@ -299,6 +355,22 @@ THE SOFTWARE. { return HIP_vector_type{x} /= y; } + template + __host__ __device__ + inline + HIP_vector_type operator/( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} /= y; + } + template + __host__ __device__ + inline + HIP_vector_type operator/( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} /= y; + } template __host__ __device__ @@ -310,6 +382,20 @@ THE SOFTWARE. for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; return true; } + template + __host__ __device__ + inline + bool operator==(const HIP_vector_type& x, U y) noexcept + { + return x == HIP_vector_type{y}; + } + template + __host__ __device__ + inline + bool operator==(U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} == y; + } template __host__ __device__ @@ -319,43 +405,19 @@ THE SOFTWARE. { return !(x == y); } - - template + template __host__ __device__ inline - bool operator<( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept + bool operator!=(const HIP_vector_type& x, U y) noexcept { - auto tmp = x.data < y.data; - for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; - return true; + return !(x == y); } - - template + template __host__ __device__ inline - bool operator>( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept + bool operator!=(U x, const HIP_vector_type& y) noexcept { - return y < x; - } - - template - __host__ __device__ - inline - bool operator<=( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return !(y < x); - } - - template - __host__ __device__ - inline - bool operator>=( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - return !(x < y); + return !(x == y); } template< @@ -368,6 +430,28 @@ THE SOFTWARE. { return HIP_vector_type{x} %= y; } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator%( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} %= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator%( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} %= y; + } template< typename T, @@ -379,6 +463,28 @@ THE SOFTWARE. { return HIP_vector_type{x} ^= y; } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator^( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} ^= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator^( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} ^= y; + } template< typename T, @@ -390,6 +496,28 @@ THE SOFTWARE. { return HIP_vector_type{x} |= y; } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator|( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} |= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator|( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} |= y; + } template< typename T, @@ -401,6 +529,28 @@ THE SOFTWARE. { return HIP_vector_type{x} &= y; } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator&( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} &= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator&( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} &= y; + } template< typename T, @@ -412,6 +562,28 @@ THE SOFTWARE. { return HIP_vector_type{x} >>= y; } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator>>( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} >>= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator>>( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} >>= y; + } template< typename T, @@ -423,176 +595,118 @@ THE SOFTWARE. { return HIP_vector_type{x} <<= y; } - - // TODO: the following are rather dubious in terms of general utility. - template + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> inline - bool operator||( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept + HIP_vector_type operator<<( + const HIP_vector_type& x, U y) noexcept { - auto tmp = x.data || y.data; - for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; - return true; + return HIP_vector_type{x} <<= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + inline + HIP_vector_type operator<<( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} <<= y; } - template - inline - bool operator&&( - const HIP_vector_type& x, const HIP_vector_type& y) noexcept - { - auto tmp = x.data && y.data; - for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false; - return true; - } - - #define __MAKE_VECTOR_TYPE__(CUDA_name, T, n) \ - using CUDA_name = HIP_vector_type; + #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + using CUDA_name##1 = HIP_vector_type;\ + using CUDA_name##2 = HIP_vector_type;\ + using CUDA_name##3 = HIP_vector_type;\ + using CUDA_name##4 = HIP_vector_type; #else - typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char); - typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char); - typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char); - typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char); - - typedef char char1 __NATIVE_VECTOR__(1, char); - typedef char char2 __NATIVE_VECTOR__(2, char); - typedef char char3 __NATIVE_VECTOR__(3, char); - typedef char char4 __NATIVE_VECTOR__(4, char); - - typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short); - typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short); - typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short); - typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short); - - typedef short short1 __NATIVE_VECTOR__(1, short); - typedef short short2 __NATIVE_VECTOR__(2, short); - typedef short short3 __NATIVE_VECTOR__(3, short); - typedef short short4 __NATIVE_VECTOR__(4, short); - - typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int); - typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int); - typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int); - typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int); - - typedef int int1 __NATIVE_VECTOR__(1, int); - typedef int int2 __NATIVE_VECTOR__(2, int); - typedef int int3 __NATIVE_VECTOR__(3, int); - typedef int int4 __NATIVE_VECTOR__(4, int); - - typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long); - typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long); - typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long); - typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long); - - typedef long long1 __NATIVE_VECTOR__(1, long); - typedef long long2 __NATIVE_VECTOR__(2, long); - typedef long long3 __NATIVE_VECTOR__(3, long); - typedef long long4 __NATIVE_VECTOR__(4, long); - - typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long); - typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long); - typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long); - typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long); - - typedef long long longlong1 __NATIVE_VECTOR__(1, long long); - typedef long long longlong2 __NATIVE_VECTOR__(2, long long); - typedef long long longlong3 __NATIVE_VECTOR__(3, long long); - typedef long long longlong4 __NATIVE_VECTOR__(4, long long); - - typedef float float1 __NATIVE_VECTOR__(1, float); - typedef float float2 __NATIVE_VECTOR__(2, float); - typedef float float3 __NATIVE_VECTOR__(3, float); - typedef float float4 __NATIVE_VECTOR__(4, float); - - typedef double double1 __NATIVE_VECTOR__(1, double); - typedef double double2 __NATIVE_VECTOR__(2, double); - typedef double double3 __NATIVE_VECTOR__(3, double); - typedef double double4 __NATIVE_VECTOR__(4, double); + #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + typedef T CUDA_name##_impl1 __NATIVE_VECTOR__(1, T);\ + typedef T CUDA_name##_impl2 __NATIVE_VECTOR__(2, T);\ + typedef T CUDA_name##_impl3 __NATIVE_VECTOR__(3, T);\ + typedef T CUDA_name##_impl4 __NATIVE_VECTOR__(4, T);\ + typedef struct {\ + union {\ + CUDA_name##_impl1 data;\ + struct {\ + T x;\ + };\ + };\ + } CUDA_name##1;\ + typedef struct {\ + union {\ + CUDA_name##_impl2 data;\ + struct {\ + T x;\ + T y;\ + };\ + };\ + } CUDA_name##2;\ + typedef struct {\ + union {\ + CUDA_name##_impl3 data;\ + struct {\ + T x;\ + T y;\ + T z;\ + };\ + };\ + } CUDA_name##3;\ + typedef struct {\ + union {\ + CUDA_name##_impl4 data;\ + struct {\ + T x;\ + T y;\ + T z;\ + T w;\ + };\ + };\ + } CUDA_name##4; #endif -__MAKE_VECTOR_TYPE__(uchar1, unsigned char, 1); -__MAKE_VECTOR_TYPE__(uchar2, unsigned char, 2); -__MAKE_VECTOR_TYPE__(uchar3, unsigned char, 3); -__MAKE_VECTOR_TYPE__(uchar4, unsigned char, 4); - -__MAKE_VECTOR_TYPE__(char1, char, 1); -__MAKE_VECTOR_TYPE__(char2, char, 2); -__MAKE_VECTOR_TYPE__(char3, char, 3); -__MAKE_VECTOR_TYPE__(char4, char, 4); - -__MAKE_VECTOR_TYPE__(ushort1, unsigned short, 1); -__MAKE_VECTOR_TYPE__(ushort2, unsigned short, 2); -__MAKE_VECTOR_TYPE__(ushort3, unsigned short, 3); -__MAKE_VECTOR_TYPE__(ushort4, unsigned short, 4); - -__MAKE_VECTOR_TYPE__(short1, short, 1); -__MAKE_VECTOR_TYPE__(short2, short, 2); -__MAKE_VECTOR_TYPE__(short3, short, 3); -__MAKE_VECTOR_TYPE__(short4, short, 4); - -__MAKE_VECTOR_TYPE__(uint1, unsigned int, 1); -__MAKE_VECTOR_TYPE__(uint2, unsigned int, 2); -__MAKE_VECTOR_TYPE__(uint3, unsigned int, 3); -__MAKE_VECTOR_TYPE__(uint4, unsigned int, 4); - -__MAKE_VECTOR_TYPE__(int1, int, 1); -__MAKE_VECTOR_TYPE__(int2, int, 2); -__MAKE_VECTOR_TYPE__(int3, int, 3); -__MAKE_VECTOR_TYPE__(int4, int, 4); - -__MAKE_VECTOR_TYPE__(ulong1, unsigned long, 1); -__MAKE_VECTOR_TYPE__(ulong2, unsigned long, 2); -__MAKE_VECTOR_TYPE__(ulong3, unsigned long, 3); -__MAKE_VECTOR_TYPE__(ulong4, unsigned long, 4); - -__MAKE_VECTOR_TYPE__(long1, long, 1); -__MAKE_VECTOR_TYPE__(long2, long, 2); -__MAKE_VECTOR_TYPE__(long3, long, 3); -__MAKE_VECTOR_TYPE__(long4, long, 4); - -__MAKE_VECTOR_TYPE__(ulonglong1, unsigned long long, 1); -__MAKE_VECTOR_TYPE__(ulonglong2, unsigned long long, 2); -__MAKE_VECTOR_TYPE__(ulonglong3, unsigned long long, 3); -__MAKE_VECTOR_TYPE__(ulonglong4, unsigned long long, 4); - -__MAKE_VECTOR_TYPE__(longlong1, long long, 1); -__MAKE_VECTOR_TYPE__(longlong2, long long, 2); -__MAKE_VECTOR_TYPE__(longlong3, long long, 3); -__MAKE_VECTOR_TYPE__(longlong4, long long, 4); - -__MAKE_VECTOR_TYPE__(float1, float, 1); -__MAKE_VECTOR_TYPE__(float2, float, 2); -__MAKE_VECTOR_TYPE__(float3, float, 3); -__MAKE_VECTOR_TYPE__(float4, float, 4); - -__MAKE_VECTOR_TYPE__(double1, double, 1); -__MAKE_VECTOR_TYPE__(double2, double, 2); -__MAKE_VECTOR_TYPE__(double3, double, 3); -__MAKE_VECTOR_TYPE__(double4, double, 4); +__MAKE_VECTOR_TYPE__(uchar, unsigned char); +__MAKE_VECTOR_TYPE__(char, char); +__MAKE_VECTOR_TYPE__(ushort, unsigned short); +__MAKE_VECTOR_TYPE__(short, short); +__MAKE_VECTOR_TYPE__(uint, unsigned int); +__MAKE_VECTOR_TYPE__(int, int); +__MAKE_VECTOR_TYPE__(ulong, unsigned long); +__MAKE_VECTOR_TYPE__(long, long); +__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long); +__MAKE_VECTOR_TYPE__(longlong, long long); +__MAKE_VECTOR_TYPE__(float, float); +__MAKE_VECTOR_TYPE__(double, double); #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ __device__ __host__ \ static \ inline \ - type make_##type(comp x) { return type{x}; } + type make_##type(comp x) { type r = {x}; return r; } #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ __device__ __host__ \ static \ inline \ - type make_##type(comp x, comp y) { return type{x, y}; } + type make_##type(comp x, comp y) { type r = {x, y}; return r; } #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ __device__ __host__ \ static \ inline \ - type make_##type(comp x, comp y, comp z) { return type{x, y, z}; } + type make_##type(comp x, comp y, comp z) { type r = {x, y, z}; return r; } #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ __device__ __host__ \ static \ inline \ type make_##type(comp x, comp y, comp z, comp w) { \ - return type{x, y, z, w}; \ + type r = {x, y, z, w}; \ + return r; \ } DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1); diff --git a/hipamd/tests/src/deviceLib/hipVectorTypes.cpp b/hipamd/tests/src/deviceLib/hipVectorTypes.cpp index bc7e4eb356..734878b516 100644 --- a/hipamd/tests/src/deviceLib/hipVectorTypes.cpp +++ b/hipamd/tests/src/deviceLib/hipVectorTypes.cpp @@ -152,13 +152,7 @@ bool TestVectorType() { f3 = V{3}; if (f1 == f2) return false; if (!(f1 != f2)) return false; - if (!(f1 < f2)) return false; - if (!(f2 > f1)) return false; - if (!(f1 >= f3)) return false; - if (!(f1 <= f3)) return false; - if (!(f1 && f2)) return false; - if (!(f1 || f2)) return false; return true; } diff --git a/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp b/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp index 21dc1f1a75..03e8158a4e 100644 --- a/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp +++ b/hipamd/tests/src/deviceLib/hipVectorTypesDevice.cpp @@ -143,13 +143,7 @@ bool TestVectorType() { f3 = V{3}; if (f1 == f2) return false; if (!(f1 != f2)) return false; - if (!(f1 < f2)) return false; - if (!(f2 > f1)) return false; - if (!(f1 >= f3)) return false; - if (!(f1 <= f3)) return false; - if (!(f1 && f2)) return false; - if (!(f1 || f2)) return false; return true; } From cc14d6440f9cd6bebf6b0a4acc58c2f9079c853c Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Sat, 30 Jun 2018 11:40:32 +0530 Subject: [PATCH 17/40] Updated indentation --- .../docs/markdown/hip_deprecated_api_list.md | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/hipamd/docs/markdown/hip_deprecated_api_list.md b/hipamd/docs/markdown/hip_deprecated_api_list.md index a96f7f4d3f..6a9ed48839 100644 --- a/hipamd/docs/markdown/hip_deprecated_api_list.md +++ b/hipamd/docs/markdown/hip_deprecated_api_list.md @@ -4,19 +4,19 @@ CUDA supports cuCtx API, the Driver API that defines "Context" and "Devices" as separate entities. Contexts contain a single device, and a device can theoretically have multiple contexts. HIP initially added limited support for these API to facilitate easy porting from existing driver codes. These API are marked as deprecated now since there are better alternate interface (such as hipSetDevice or the stream API) to achieve the required functions. -###hipCtxCreate -###hipCtxDestroy -###hipCtxPopCurrent -###hipCtxPushCurrent -###hipCtxSetCurrent -###hipCtxGetCurrent -###hipCtxGetDevice -###hipCtxGetApiVersion -###hipCtxGetCacheConfig -###hipCtxSetCacheConfig -###hipCtxSetSharedMemConfig -###hipCtxGetSharedMemConfig -###hipCtxSynchronize -###hipCtxGetFlags -###hipCtxEnablePeerAccess -###hipCtxDisablePeerAccess +### hipCtxCreate +### hipCtxDestroy +### hipCtxPopCurrent +### hipCtxPushCurrent +### hipCtxSetCurrent +### hipCtxGetCurrent +### hipCtxGetDevice +### hipCtxGetApiVersion +### hipCtxGetCacheConfig +### hipCtxSetCacheConfig +### hipCtxSetSharedMemConfig +### hipCtxGetSharedMemConfig +### hipCtxSynchronize +### hipCtxGetFlags +### hipCtxEnablePeerAccess +### hipCtxDisablePeerAccess From 805906d2ad19c5d58b02818193826459b3a88c96 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Sat, 30 Jun 2018 11:42:17 +0530 Subject: [PATCH 18/40] Updated heading --- hipamd/docs/markdown/hip_deprecated_api_list.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/docs/markdown/hip_deprecated_api_list.md b/hipamd/docs/markdown/hip_deprecated_api_list.md index 6a9ed48839..dfb202c8ee 100644 --- a/hipamd/docs/markdown/hip_deprecated_api_list.md +++ b/hipamd/docs/markdown/hip_deprecated_api_list.md @@ -1,4 +1,4 @@ -# HIP Deprecated API List +# HIP Deprecated APIs ## HIP Context API From f554e48db35e6e1bd4b2ee44a756526755e13609 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 2 Jul 2018 14:32:11 +0530 Subject: [PATCH 19/40] Revert "Use memcpy kernel for all pinned memory cases in hipMemcpy2DAsync" --- hipamd/src/hip_memory.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 787e49683b..d6c04ae98c 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -1680,12 +1680,9 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp actualDest = pinnedPtr; } } -#if 0 if((width == dpitch) && (width == spitch)) { hip_internal::memcpyAsync(dst, src, width*height, kind, stream); - } else -#endif - { + } else { try { if(!isLocked){ for (int i = 0; i < height; ++i) From 77cbd4476de20270d577fdc54f2f7abd9b3c19b9 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 3 Jul 2018 08:54:17 +0530 Subject: [PATCH 20/40] Fixed offset null check in bind texture functions --- hipamd/src/hip_texture.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hipamd/src/hip_texture.cpp b/hipamd/src/hip_texture.cpp index 24c6eef3af..d6caf853de 100644 --- a/hipamd/src/hip_texture.cpp +++ b/hipamd/src/hip_texture.cpp @@ -389,7 +389,8 @@ hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t enum hipTextureFilterMode filterMode = tex->filterMode; int normalizedCoords = tex->normalized; hipTextureObject_t& textureObject = tex->textureObject; - *offset = 0; + if(offset != nullptr) + *offset = 0; auto ctx = ihipGetTlsDefaultCtx(); if (ctx) { hc::accelerator acc = ctx->getDevice()->_acc; @@ -459,7 +460,8 @@ hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size enum hipTextureFilterMode filterMode = tex->filterMode; int normalizedCoords = tex->normalized; hipTextureObject_t& textureObject = tex->textureObject; - *offset = 0; + if(offset != nullptr) + *offset = 0; auto ctx = ihipGetTlsDefaultCtx(); if (ctx) { hc::accelerator acc = ctx->getDevice()->_acc; From ce9ca744dc8d4cae0c0e94065e6442c18c010df8 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 4 Jul 2018 17:56:50 +0000 Subject: [PATCH 21/40] Workaround cast warning of smaller integer type for __to_local For now, guard the __to_local function for device compile only since a local pointer should be same size as unsigned int on GPU compile. Also change to void* instead of char*. --- hipamd/include/hip/hcc_detail/device_functions.h | 4 ++++ hipamd/include/hip/hcc_detail/device_library_decls.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index aae0706033..32509ffffd 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -734,6 +734,8 @@ int64_t __lanemask_lt() return ballot; } +#ifdef __HIP_DEVICE_COMPILE__ + __device__ inline void* __get_dynamicgroupbaseptr() @@ -748,6 +750,8 @@ void *__amdgcn_get_dynamicgroupbaseptr() { return __get_dynamicgroupbaseptr(); } +#endif // __HIP_DEVICE_COMPILE__ + #endif // __HCC_OR_HIP_CLANG__ #ifdef __HCC__ diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index fba2d53e2e..a636c2c950 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -45,7 +45,11 @@ extern "C" __device__ float __ocml_trunc_f32(float); // Introduce local address space #define __local __attribute__((address_space(3))) -__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } +#endif //__HIP_DEVICE_COMPILE__ + extern "C" __device__ void* __local_to_generic(__local void* p); // __llvm_fence* functions from device-libs/irif/src/fence.ll From 96ab7c7b254c8612733bba08c2cf85dbeffb7ebe Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 4 Jul 2018 23:13:35 +0000 Subject: [PATCH 22/40] Implement Memory Fence Functions in header Enabled __llvm_fence_* functions for seq_cst. --- .../include/hip/hcc_detail/device_functions.h | 136 +++++++++++++----- .../hip/hcc_detail/device_library_decls.h | 11 ++ hipamd/include/hip/hcc_detail/hip_runtime.h | 75 ---------- hipamd/src/device_util.cpp | 3 - hipamd/src/device_util.h | 1 - 5 files changed, 109 insertions(+), 117 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 32509ffffd..716c51a887 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -752,6 +752,101 @@ void *__amdgcn_get_dynamicgroupbaseptr() { #endif // __HIP_DEVICE_COMPILE__ + +// hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; + +typedef enum __memory_scope { + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} __memory_scope; + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum __memory_order +{ + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST +} __memory_order; + +__device__ +inline +static void +__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope) +{ + // We're tying global-happens-before and local-happens-before together as does HSA + if (order != __memory_order_relaxed) { + switch (scope) { + case __memory_scope_work_item: + break; + case __memory_scope_sub_group: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_sg(); break; + case __memory_order_release: __llvm_fence_rel_sg(); break; + case __memory_order_acq_rel: __llvm_fence_ar_sg(); break; + case __memory_order_seq_cst: __llvm_fence_sc_sg(); break; + } + break; + case __memory_scope_work_group: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_wg(); break; + case __memory_order_release: __llvm_fence_rel_wg(); break; + case __memory_order_acq_rel: __llvm_fence_ar_wg(); break; + case __memory_order_seq_cst: __llvm_fence_sc_wg(); break; + } + break; + case __memory_scope_device: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_dev(); break; + case __memory_order_release: __llvm_fence_rel_dev(); break; + case __memory_order_acq_rel: __llvm_fence_ar_dev(); break; + case __memory_order_seq_cst: __llvm_fence_sc_dev(); break; + } + break; + case __memory_scope_all_svm_devices: + switch (order) { + case __memory_order_relaxed: break; + case __memory_order_acquire: __llvm_fence_acq_sys(); break; + case __memory_order_release: __llvm_fence_rel_sys(); break; + case __memory_order_acq_rel: __llvm_fence_ar_sys(); break; + case __memory_order_seq_cst: __llvm_fence_sc_sys(); break; + } + break; + } + } +} + +// Memory Fence Functions +__device__ +inline +static void __threadfence() +{ + __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_device); +} + +__device__ +inline +static void __threadfence_block() +{ + __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_work_group); +} + +__device__ +inline +static void __threadfence_system() +{ + __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_all_svm_devices); +} + #endif // __HCC_OR_HIP_CLANG__ #ifdef __HCC__ @@ -796,50 +891,14 @@ void __assertfail(const char * __assertion, __builtin_trap(); } -// hip.amdgcn.bc - sync threads -#define __CLK_LOCAL_MEM_FENCE 0x01 -typedef unsigned __cl_mem_fence_flags; - -typedef enum __memory_scope { - __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, - __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} __memory_scope; - -// enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum __memory_order -{ - __memory_order_relaxed = __ATOMIC_RELAXED, - __memory_order_acquire = __ATOMIC_ACQUIRE, - __memory_order_release = __ATOMIC_RELEASE, - __memory_order_acq_rel = __ATOMIC_ACQ_REL, - __memory_order_seq_cst = __ATOMIC_SEQ_CST -} __memory_order; - __device__ inline static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) { if (flags) { - switch(scope) { - case __memory_scope_work_item: break; - case __memory_scope_sub_group: __llvm_fence_rel_sg(); break; - case __memory_scope_work_group: __llvm_fence_rel_wg(); break; - case __memory_scope_device: __llvm_fence_rel_dev(); break; - case __memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; - } - //atomic_work_item_fence(flags, memory_order_release, scope); + __atomic_work_item_fence(flags, __memory_order_release, scope); __builtin_amdgcn_s_barrier(); - //atomic_work_item_fence(flags, memory_order_acquire, scope); - switch(scope) { - case __memory_scope_work_item: break; - case __memory_scope_sub_group: __llvm_fence_acq_sg(); break; - case __memory_scope_work_group: __llvm_fence_acq_wg(); break; - case __memory_scope_device: __llvm_fence_acq_dev(); break; - case __memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; - } + __atomic_work_item_fence(flags, __memory_order_acquire, scope); } else { __builtin_amdgcn_s_barrier(); } @@ -918,4 +977,5 @@ unsigned __smid(void) #endif //defined(__clang__) && defined(__HIP__) + #endif diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index a636c2c950..82c39b24f0 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -63,4 +63,15 @@ extern "C" __device__ void __llvm_fence_rel_wg(void); extern "C" __device__ void __llvm_fence_rel_dev(void); extern "C" __device__ void __llvm_fence_rel_sys(void); +extern "C" __device__ void __llvm_fence_ar_sg(void); +extern "C" __device__ void __llvm_fence_ar_wg(void); +extern "C" __device__ void __llvm_fence_ar_dev(void); +extern "C" __device__ void __llvm_fence_ar_sys(void); + + +extern "C" __device__ void __llvm_fence_sc_sg(void); +extern "C" __device__ void __llvm_fence_sc_wg(void); +extern "C" __device__ void __llvm_fence_sc_dev(void); +extern "C" __device__ void __llvm_fence_sc_sys(void); + #endif diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 68f2244014..3e0f2e27c5 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -204,81 +204,6 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); - -/** - * CUDA 8 device function features - - */ - - -/** - * Kernel launching - */ - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Fence Fence Functions - * @{ - * - * - * @warning The HIP memory fence functions are currently not supported yet. - * If any of those threadfence stubs are reached by the application, you should set "export - *HSA_DISABLE_CACHE=1" to disable L1 and L2 caches. - * - * - * On AMD platforms, the threadfence* routines are currently empty stubs. - */ - -extern __attribute__((const)) __device__ void __hip_hc_threadfence() __asm("__llvm_fence_sc_dev"); -extern __attribute__((const)) __device__ void __hip_hc_threadfence_block() __asm( - "__llvm_fence_sc_wg"); - - -/** - * @brief threadfence_block makes writes visible to threads running in same block. - * - * @Returns void - * - * @param void - * - * @warning __threadfence_block is a stub and map to no-op. - */ -// __device__ void __threadfence_block(void); -__device__ static inline void __threadfence_block(void) { return __hip_hc_threadfence_block(); } - -/** - * @brief threadfence makes wirtes visible to other threads running on same GPU. - * - * @Returns void - * - * @param void - * - * @warning __threadfence is a stub and map to no-op, application should set "export - * HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches. - */ -// __device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time -// compatibility, not yet functional"))); -__device__ static inline void __threadfence(void) { return __hip_hc_threadfence(); } - -/** - * @brief threadfence_system makes writes to pinned system memory visible on host CPU. - * - * @Returns void - * - * @param void - * - * @warning __threadfence_system is a stub and map to no-op. - */ -//__device__ void __threadfence_system(void) __attribute__((deprecated("Provided with workaround -//configuration, see hip_kernel_language.md for details"))); -__device__ void __threadfence_system(void); - -// doxygen end Fence Fence -/** - * @} - */ - #endif // __HCC_OR_HIP_CLANG__ #if defined __HCC__ diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index 65ee5f4368..5107acd8c6 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -155,6 +155,3 @@ __host__ __device__ int max(int arg1, int arg2) { } __host__ void* __get_dynamicgroupbaseptr() { return nullptr; } - - -__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); } diff --git a/hipamd/src/device_util.h b/hipamd/src/device_util.h index 6603689d82..8fa96da9d9 100644 --- a/hipamd/src/device_util.h +++ b/hipamd/src/device_util.h @@ -125,7 +125,6 @@ __device__ double __hip_fast_dsqrt_rd(double x); __device__ double __hip_fast_dsqrt_rn(double x); __device__ double __hip_fast_dsqrt_ru(double x); __device__ double __hip_fast_dsqrt_rz(double x); -__device__ void __threadfence_system(void); float __hip_host_j0f(float x); double __hip_host_j0(double x); From f080abe5a5b00f4f124ae10c1d13406d0eaeb72f Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 5 Jul 2018 23:00:41 +0530 Subject: [PATCH 23/40] Added tex2dlayered mapping for HIP/NVCC --- hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 1 + 1 file changed, 1 insertion(+) diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 3b8a3661f7..afe7f11f52 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -163,6 +163,7 @@ typedef cudaSurfaceObject_t hipSurfaceObject_t; #define hipTextureType1D cudaTextureType1D #define hipTextureType1DLayered cudaTextureType1DLayered #define hipTextureType2D cudaTextureType2D +#define hipTextureType2DLayered cudaTextureType2DLayered #define hipTextureType3D cudaTextureType3D #define hipDeviceMapHost cudaDeviceMapHost From b15b08d0dba9a28ae8aa755236703691cad028f2 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 5 Jul 2018 23:11:39 +0530 Subject: [PATCH 24/40] Corrected enum type --- hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 3b8a3661f7..557b15e9a2 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -1168,20 +1168,20 @@ inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int return hipCUDAErrorTohipError(cerror); } -template +template inline static hipError_t hipBindTexture(size_t* offset, const struct texture& tex, const void* devPtr, size_t size = UINT_MAX) { return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size)); } -template +template inline static hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); } -template +template inline static hipError_t hipUnbindTexture(struct texture* tex) { return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); } @@ -1198,7 +1198,7 @@ inline static hipError_t hipBindTextureToArray(struct texture& return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); } -template +template inline static hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array)); From 017f668a7235c973c5a63ec34827a3dd0e6ee7b1 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 5 Jul 2018 23:41:31 +0530 Subject: [PATCH 25/40] Added another variant of bindtextoarray for direct porting --- hipamd/include/hip/hcc_detail/hip_runtime_api.h | 7 +++++++ hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 0a80a583c7..573ae39af9 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -2621,6 +2621,13 @@ hipError_t hipBindTextureToArray(struct texture& tex, hipArray return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex); } +template +inline static hipError_t hipBindTextureToArray(struct texture *tex, + hipArray_const_t array, + const struct hipChannelFormatDesc* desc) { + return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex); +} + // C API hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, hipMipmappedArray_const_t mipmappedArray, diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 3b8a3661f7..64404574f0 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -1198,6 +1198,13 @@ inline static hipError_t hipBindTextureToArray(struct texture& return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); } +template +inline static hipError_t hipBindTextureToArray(struct texture *tex, + hipArray_const_t array, + const struct hipChannelFormatDesc* desc) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); +} + template inline static hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { From b362c5347594ea94550cbb30d3295d7e291d3479 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 5 Jul 2018 20:15:41 +0000 Subject: [PATCH 26/40] Implement min/max functions in HIP header Remove using hc::precise_math min and max. Instead we can use ocml directly for device and std:: for host. --- hipamd/include/hip/hcc_detail/device_library_decls.h | 3 +++ hipamd/include/hip/hcc_detail/hip_runtime.h | 11 +++++++++-- hipamd/src/device_util.cpp | 7 ------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index 82c39b24f0..a7e81a1968 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -43,6 +43,9 @@ extern "C" __device__ float __ocml_rint_f32(float); extern "C" __device__ float __ocml_ceil_f32(float); extern "C" __device__ float __ocml_trunc_f32(float); +extern "C" __device__ float __ocml_fmin_f32(float, float); +extern "C" __device__ float __ocml_fmax_f32(float, float); + // Introduce local address space #define __local __attribute__((address_space(3))) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 3e0f2e27c5..48818fb15d 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -201,8 +201,15 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask #endif //__HIP_ARCH_GFX803__ == 1 -__host__ __device__ int min(int arg1, int arg2); -__host__ __device__ int max(int arg1, int arg2); +__device__ inline static int min(int arg1, int arg2) { + return (int)(__ocml_fmin_f32((float) arg1, (float) arg2)); +} +__device__ inline static int max(int arg1, int arg2) { + return (int)(__ocml_fmax_f32((float) arg1, (float) arg2)); +} + +__host__ inline static int min(int arg1, int arg2) { return std::min(arg1, arg2); } +__host__ inline static int max(int arg1, int arg2) { return std::max(arg1, arg2); } #endif // __HCC_OR_HIP_CLANG__ diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index 5107acd8c6..34e198c61d 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -147,11 +147,4 @@ __device__ void* __hip_hc_memset(void* dst, uint8_t val, size_t size) { // abort __device__ void abort() { return hc::abort(); } -__host__ __device__ int min(int arg1, int arg2) { - return (int)(hc::precise_math::fmin((float)arg1, (float)arg2)); -} -__host__ __device__ int max(int arg1, int arg2) { - return (int)(hc::precise_math::fmax((float)arg1, (float)arg2)); -} - __host__ void* __get_dynamicgroupbaseptr() { return nullptr; } From d39508b4fde5fb83b721c3c0a297ab67926d01b6 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 5 Jul 2018 20:38:46 +0000 Subject: [PATCH 27/40] Implement hip_ldg Functions into HIP header Move all the function definitions for hip_ldg.cpp into hip_ldg.h header and enable for HIP clang path. --- hipamd/CMakeLists.txt | 3 +- hipamd/include/hip/hcc_detail/hip_ldg.h | 93 +++++++++++++-------- hipamd/src/hip_ldg.cpp | 83 ------------------ hipamd/tests/src/deviceLib/hip_test_ldg.cpp | 2 +- 4 files changed, 62 insertions(+), 119 deletions(-) delete mode 100644 hipamd/src/hip_ldg.cpp diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 9095ff6531..a4da3b1920 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -193,8 +193,7 @@ if(HIP_PLATFORM STREQUAL "hcc") src/program_state.cpp) set(SOURCE_FILES_DEVICE - src/device_util.cpp - src/hip_ldg.cpp) + src/device_util.cpp) execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") diff --git a/hipamd/include/hip/hcc_detail/hip_ldg.h b/hipamd/include/hip/hcc_detail/hip_ldg.h index 281550cd4c..a5b80b0a1b 100644 --- a/hipamd/include/hip/hcc_detail/hip_ldg.h +++ b/hipamd/include/hip/hcc_detail/hip_ldg.h @@ -23,54 +23,81 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H -#if defined __HCC__ -#if __hcc_workweek__ >= 16164 +#if defined(__HCC_OR_HIP_CLANG__) +#if __hcc_workweek__ >= 16164 || defined(__HIP_CLANG_ONLY__) #include "hip_vector_types.h" #include "host_defines.h" -__device__ char __ldg(const char*); -__device__ char2 __ldg(const char2*); -__device__ char4 __ldg(const char4*); -__device__ signed char __ldg(const signed char*); -__device__ unsigned char __ldg(const unsigned char*); +__device__ inline static char __ldg(const char* ptr) { return *ptr; } -__device__ short __ldg(const short*); -__device__ short2 __ldg(const short2*); -__device__ short4 __ldg(const short4*); -__device__ unsigned short __ldg(const unsigned short*); +__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; } -__device__ int __ldg(const int*); -__device__ int2 __ldg(const int2*); -__device__ int4 __ldg(const int4*); -__device__ unsigned int __ldg(const unsigned int*); +__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; } + +__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; } + +__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; } -__device__ long __ldg(const long*); -__device__ unsigned long __ldg(const unsigned long*); +__device__ inline static short __ldg(const short* ptr) { return ptr[0]; } -__device__ long long __ldg(const long long*); -__device__ longlong2 __ldg(const longlong2*); -__device__ unsigned long long __ldg(const unsigned long long*); +__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; } -__device__ uchar2 __ldg(const uchar2*); -__device__ uchar4 __ldg(const uchar4*); +__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; } -__device__ ushort2 __ldg(const ushort2*); +__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; } -__device__ uint2 __ldg(const uint2*); -__device__ uint4 __ldg(const uint4*); -__device__ ulonglong2 __ldg(const ulonglong2*); +__device__ inline static int __ldg(const int* ptr) { return ptr[0]; } -__device__ float __ldg(const float*); -__device__ float2 __ldg(const float2*); -__device__ float4 __ldg(const float4*); +__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; } -__device__ double __ldg(const double*); -__device__ double2 __ldg(const double2*); +__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; } -#endif // __hcc_workweek__ +__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; } -#endif // __HCC__ + +__device__ inline static long __ldg(const long* ptr) { return ptr[0]; } + +__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; } + + +__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; } + +__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; } + +__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; } + + +__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; } + +__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; } + + +__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; } + + +__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; } + +__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; } + + +__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; } + + +__device__ inline static float __ldg(const float* ptr) { return ptr[0]; } + +__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; } + +__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; } + + +__device__ inline static double __ldg(const double* ptr) { return ptr[0]; } + +__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; } + +#endif // __hcc_workweek__ || defined(__HIP_CLANG_ONLY__) + +#endif // defined(__HCC_OR_HIP_CLANG__) #endif // HIP_LDG_H diff --git a/hipamd/src/hip_ldg.cpp b/hipamd/src/hip_ldg.cpp deleted file mode 100644 index bf94c05571..0000000000 --- a/hipamd/src/hip_ldg.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hcc_detail/hip_ldg.h" -#include "hip/hcc_detail/hip_vector_types.h" - -__device__ char __ldg(const char* ptr) { return *ptr; } - -__device__ char2 __ldg(const char2* ptr) { return *ptr; } - -__device__ char4 __ldg(const char4* ptr) { return *ptr; } - -__device__ signed char __ldg(const signed char* ptr) { return ptr[0]; } - -__device__ unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; } - -__device__ short __ldg(const short* ptr) { return ptr[0]; } - -__device__ short2 __ldg(const short2* ptr) { return ptr[0]; } - -__device__ short4 __ldg(const short4* ptr) { return ptr[0]; } - -__device__ unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; } - -__device__ int __ldg(const int* ptr) { return ptr[0]; } - -__device__ int2 __ldg(const int2* ptr) { return ptr[0]; } - -__device__ int4 __ldg(const int4* ptr) { return ptr[0]; } - -__device__ unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; } - - -__device__ long __ldg(const long* ptr) { return ptr[0]; } - -__device__ unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; } - -__device__ long long __ldg(const long long* ptr) { return ptr[0]; } - -__device__ longlong2 __ldg(const longlong2* ptr) { return ptr[0]; } - -__device__ unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; } - -__device__ uchar2 __ldg(const uchar2* ptr) { return ptr[0]; } - -__device__ uchar4 __ldg(const uchar4* ptr) { return ptr[0]; } - -__device__ ushort2 __ldg(const ushort2* ptr) { return ptr[0]; } - -__device__ uint2 __ldg(const uint2* ptr) { return ptr[0]; } - -__device__ uint4 __ldg(const uint4* ptr) { return ptr[0]; } - -__device__ ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; } - -__device__ float __ldg(const float* ptr) { return ptr[0]; } - -__device__ float2 __ldg(const float2* ptr) { return ptr[0]; } - -__device__ float4 __ldg(const float4* ptr) { return ptr[0]; } - -__device__ double __ldg(const double* ptr) { return ptr[0]; } - -__device__ double2 __ldg(const double2* ptr) { return ptr[0]; } diff --git a/hipamd/tests/src/deviceLib/hip_test_ldg.cpp b/hipamd/tests/src/deviceLib/hip_test_ldg.cpp index 4dea81d9b9..7274baa92c 100644 --- a/hipamd/tests/src/deviceLib/hip_test_ldg.cpp +++ b/hipamd/tests/src/deviceLib/hip_test_ldg.cpp @@ -35,7 +35,7 @@ THE SOFTWARE. #include "hip/hip_vector_types.h" #include "test_common.h" -#if (__hcc_workweek__ >= 16164) || defined(__HIP_PLATFORM_NVCC__) +#if (__hcc_workweek__ >= 16164) || defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_CLANG_ONLY__) #define HIP_ASSERT(x) (assert((x) == hipSuccess)) From 5862ae4b6a8623d6e3eed272238b96d6a848499e Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 5 Jul 2018 20:49:47 +0000 Subject: [PATCH 28/40] Enable surface functions on HIP clang path Fix surface test on HIP clang path. --- hipamd/include/hip/hcc_detail/hip_runtime.h | 2 +- hipamd/include/hip/hcc_detail/surface_functions.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 48818fb15d..267f970377 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -110,9 +110,9 @@ extern int HIP_TRACE_API; #include #include #include +#include #if __HCC__ #include -#include #endif // __HCC__ // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. diff --git a/hipamd/include/hip/hcc_detail/surface_functions.h b/hipamd/include/hip/hcc_detail/surface_functions.h index 562cc440ed..607f221901 100644 --- a/hipamd/include/hip/hcc_detail/surface_functions.h +++ b/hipamd/include/hip/hcc_detail/surface_functions.h @@ -23,8 +23,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H #define HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H -#include -#include #include #define __SURFACE_FUNCTIONS_DECL__ static __inline__ __device__ From 757eeccdc740cd1182a96c82711a356b131bfe84 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 6 Jul 2018 11:26:48 -0400 Subject: [PATCH 29/40] Fix hip_mbcnt test typo Ctest did not catch this test failure. When running this test manually before typo, I got __mbcnt_hi() FAILED! . This fix will fix this test for HCC and HIP clang path. --- hipamd/tests/src/deviceLib/hip_mbcnt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/src/deviceLib/hip_mbcnt.cpp b/hipamd/tests/src/deviceLib/hip_mbcnt.cpp index 0dd7169f51..9fdf36a1d3 100644 --- a/hipamd/tests/src/deviceLib/hip_mbcnt.cpp +++ b/hipamd/tests/src/deviceLib/hip_mbcnt.cpp @@ -88,7 +88,7 @@ int main() { for (unsigned int i = 0; i < num_threads; i++) { unsigned int this_lane_id = i % wave_size; unsigned int this_mbcnt_lo = this_lane_id >= 32 ? 32 : this_lane_id; - unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 22); + unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 32); if (host_mbcnt_lo[i] != this_mbcnt_lo) mbcnt_lo_errors++; From 1f34993ac1e112b82bc319cca3fde2dd21316809 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Sat, 7 Jul 2018 23:30:39 -0400 Subject: [PATCH 30/40] Add declare of __get_dynamicgroupbaseptr for host compilation --- hipamd/include/hip/hcc_detail/device_functions.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 716c51a887..0b7dd81d67 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -735,7 +735,6 @@ int64_t __lanemask_lt() } #ifdef __HIP_DEVICE_COMPILE__ - __device__ inline void* __get_dynamicgroupbaseptr() @@ -743,6 +742,10 @@ void* __get_dynamicgroupbaseptr() // Get group segment base pointer. return (char*)__local_to_generic(__to_local(__llvm_amdgcn_groupstaticsize())); } +#else +__device__ +void* __get_dynamicgroupbaseptr(); +#endif // __HIP_DEVICE_COMPILE__ __device__ inline @@ -750,7 +753,6 @@ void *__amdgcn_get_dynamicgroupbaseptr() { return __get_dynamicgroupbaseptr(); } -#endif // __HIP_DEVICE_COMPILE__ // hip.amdgcn.bc - sync threads From 9aaa7922868233fcbc87fa71d717b48af54a25dc Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Mon, 9 Jul 2018 00:18:39 -0400 Subject: [PATCH 31/40] Add workaround __local_to_generic --- hipamd/include/hip/hcc_detail/device_functions.h | 4 +++- hipamd/include/hip/hcc_detail/device_library_decls.h | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 0b7dd81d67..1938170ce4 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -734,13 +734,15 @@ int64_t __lanemask_lt() return ballot; } +__device__ inline void* __local_to_generic(void* p) { return p; } + #ifdef __HIP_DEVICE_COMPILE__ __device__ inline void* __get_dynamicgroupbaseptr() { // Get group segment base pointer. - return (char*)__local_to_generic(__to_local(__llvm_amdgcn_groupstaticsize())); + return (char*)__local_to_generic((void*)__to_local(__llvm_amdgcn_groupstaticsize())); } #else __device__ diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index a7e81a1968..53ad7595fe 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -53,8 +53,6 @@ extern "C" __device__ float __ocml_fmax_f32(float, float); __device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } #endif //__HIP_DEVICE_COMPILE__ -extern "C" __device__ void* __local_to_generic(__local void* p); - // __llvm_fence* functions from device-libs/irif/src/fence.ll extern "C" __device__ void __llvm_fence_acq_sg(void); extern "C" __device__ void __llvm_fence_acq_wg(void); From f8746ecc64afa1d94ecd68db5fe8b01367a74207 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 10 Jul 2018 17:56:57 +0000 Subject: [PATCH 32/40] Remove activelanemask asm using ockl and llvm instrinsics Replace implementation of __any and __all functions using OCKL functions and replaced __ballot implementation to use llvm intrinsic llvm.amdgcn.icmp.i32 instead of calls to __activelanemask_v4_b64_b1 which is not convergent. --- .../include/hip/hcc_detail/device_functions.h | 29 +++++-------------- .../hip/hcc_detail/device_library_decls.h | 2 ++ .../include/hip/hcc_detail/llvm_intrinsics.h | 2 ++ 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 1938170ce4..5466982878 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -662,51 +662,38 @@ void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); } #endif // __HIP_DEVICE_COMPILE__ // warp vote function __all __any __ballot -extern "C" __device__ inline uint64_t __activelanemask_v4_b64_b1(unsigned int input) { - uint64_t output; - // define i64 @__activelanemask_v4_b64_b1(i32 %input) #5 { - // %a = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %input) #9 - // ret i64 %a - // } - __asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(output) : "v"(input)); - return output; -} - -__device__ -inline -unsigned int __activelanecount_u32_b1(unsigned int input) { - return __popcll(__activelanemask_v4_b64_b1(input)); -} - __device__ inline int __all(int predicate) { - return __popcll(__activelanemask_v4_b64_b1(predicate)) == __activelanecount_u32_b1(1); + return __ockl_wfall_i32(predicate); } __device__ inline int __any(int predicate) { #ifdef NVCC_COMPAT - if (__popcll(__activelanemask_v4_b64_b1(predicate)) != 0) + if (__ockl_wfany_i32(predicate) != 0) return 1; else return 0; #else - return __popcll(__activelanemask_v4_b64_b1(predicate)); + return __ockl_wfany_i32(predicate); #endif } +// XXX from llvm/include/llvm/IR/InstrTypes.h +#define ICMP_NE 33 + __device__ inline unsigned long long int __ballot(int predicate) { - return __activelanemask_v4_b64_b1(predicate); + return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); } __device__ inline unsigned long long int __ballot64(int predicate) { - return __activelanemask_v4_b64_b1(predicate); + return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); } // hip.amdgcn.bc - lanemask diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index 53ad7595fe..64e4ff8898 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -30,6 +30,8 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" +extern "C" __device__ bool __ockl_wfany_i32(int); +extern "C" __device__ bool __ockl_wfall_i32(int); extern "C" __device__ int32_t __ockl_activelane_u32(void); extern "C" __device__ uint __ockl_mul24_u32(uint, uint); diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h index 02df3c2fbe..6f2fc45626 100644 --- a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -31,6 +31,8 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" +__device__ ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); + __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); From bc8bc8cac45b03c08bb03d18c6374bd1fb5480cc Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 10 Jul 2018 18:27:32 +0000 Subject: [PATCH 33/40] Add func attributes to match ocml and ockl --- .../hip/hcc_detail/device_library_decls.h | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_library_decls.h b/hipamd/include/hip/hcc_detail/device_library_decls.h index 64e4ff8898..2a14b0b814 100644 --- a/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -30,23 +30,23 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -extern "C" __device__ bool __ockl_wfany_i32(int); -extern "C" __device__ bool __ockl_wfall_i32(int); -extern "C" __device__ int32_t __ockl_activelane_u32(void); +extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int); +extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int); +extern "C" __device__ uint __ockl_activelane_u32(void); -extern "C" __device__ uint __ockl_mul24_u32(uint, uint); -extern "C" __device__ int __ockl_mul24_i32(int, int); -extern "C" __device__ uint __ockl_mul_hi_u32(uint, uint); -extern "C" __device__ int __ockl_mul_hi_i32(int, int); -extern "C" __device__ uint __ockl_sad_u32(uint, uint, uint); +extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint); -extern "C" __device__ float __ocml_floor_f32(float); -extern "C" __device__ float __ocml_rint_f32(float); -extern "C" __device__ float __ocml_ceil_f32(float); -extern "C" __device__ float __ocml_trunc_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float); -extern "C" __device__ float __ocml_fmin_f32(float, float); -extern "C" __device__ float __ocml_fmax_f32(float, float); +extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); +extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); // Introduce local address space #define __local __attribute__((address_space(3))) From 9f3404cfea6a5fe0f18aac0ae231c54e68b8d41c Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 10 Jul 2018 18:56:48 +0000 Subject: [PATCH 34/40] Fix min/max, icmp asm and add comment for conversion functions --- hipamd/include/hip/hcc_detail/device_functions.h | 3 ++- hipamd/include/hip/hcc_detail/hip_runtime.h | 4 ++-- hipamd/include/hip/hcc_detail/llvm_intrinsics.h | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index 5466982878..47c25c87d7 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -440,7 +440,8 @@ __device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { } /* -Rounding modes are not yet supported in HIP + * Rounding modes are not yet supported in HIP + * TODO: Conversion functions are not correct, need to fix when BE is ready */ __device__ static inline float __double2float_rd(double x) { return (double)x; } diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 267f970377..13eaf4fda4 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -202,10 +202,10 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask #endif //__HIP_ARCH_GFX803__ == 1 __device__ inline static int min(int arg1, int arg2) { - return (int)(__ocml_fmin_f32((float) arg1, (float) arg2)); + return (arg1 < arg2) ? arg1 : arg2; } __device__ inline static int max(int arg1, int arg2) { - return (int)(__ocml_fmax_f32((float) arg1, (float) arg2)); + return (arg1 > arg2) ? arg1 : arg2; } __host__ inline static int min(int arg1, int arg2) { return std::min(arg1, arg2); } diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h index 6f2fc45626..dc6fd05c52 100644 --- a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -31,7 +31,9 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -__device__ ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); +__device__ +__attribute__((convergent)) +ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); From e6d155bcd59bb70bccaaa9ec3ebb4baf6f98646b Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 10 Jul 2018 16:49:59 -0400 Subject: [PATCH 35/40] Fix build failure in code_object_bundle.cpp --- hipamd/src/code_object_bundle.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/src/code_object_bundle.cpp b/hipamd/src/code_object_bundle.cpp index ede7090a52..91258f0c75 100644 --- a/hipamd/src/code_object_bundle.cpp +++ b/hipamd/src/code_object_bundle.cpp @@ -38,7 +38,7 @@ std::string isa_name(std::string triple) hsa_isa_from_name(triple.c_str(), &tmp) != HSA_STATUS_SUCCESS}; if (is_old_rocr) { - auto tmp{triple.substr(triple.rfind('x') + 1)}; + std::string tmp{triple.substr(triple.rfind('x') + 1)}; triple.replace(0, std::string::npos, "AMD:AMDGPU"); for (auto&& x : tmp) { @@ -51,7 +51,7 @@ std::string isa_name(std::string triple) } hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple) { - const auto isa{isa_name(std::move(triple))}; + const std::string isa{isa_name(std::move(triple))}; if (isa.empty()) return hsa_isa_t({}); From 5b53e278a5c0e1312665bf25b843d9b1826d5377 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 11 Jul 2018 12:17:33 +0530 Subject: [PATCH 36/40] Add hipGetTextureAlignmentOffset on NVCC path --- hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 3b8a3661f7..efa0c3b7ea 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -1239,6 +1239,11 @@ inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDe hipTextureObject_t textureObject) { return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject)); } + +inline static hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref) +{ + return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); +} #endif //__CUDACC__ #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H From bab48b86a6d18c335d1812c4a58dc1bf7d467d4d Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 11 Jul 2018 12:37:07 +0530 Subject: [PATCH 37/40] Added hipGetChanDesc for NVCC path --- hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index efa0c3b7ea..5706e1d097 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -1244,6 +1244,11 @@ inline static hipError_t hipGetTextureAlignmentOffset(size_t* offset, const text { return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); } + +inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) +{ + return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); +} #endif //__CUDACC__ #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H From 22dffde22e783acc6b4db8a8343676d769d7ce69 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 11 Jul 2018 18:21:00 +0300 Subject: [PATCH 38/40] [HIPIFY] Add DEBUG(X) macro compatibility In LLVM 7.0 DEBUG(X) was deleted, LLVM_DEBUG(X) should be used instead. --- hipamd/hipify-clang/src/LLVMCompat.h | 4 ++++ hipamd/hipify-clang/src/main.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hipamd/hipify-clang/src/LLVMCompat.h b/hipamd/hipify-clang/src/LLVMCompat.h index 3e2fe1aebb..72b6832012 100644 --- a/hipamd/hipify-clang/src/LLVMCompat.h +++ b/hipamd/hipify-clang/src/LLVMCompat.h @@ -23,6 +23,10 @@ namespace llcompat { #define GET_NUM_ARGS() getNumArgs() #endif +#if LLVM_VERSION_MAJOR < 7 + #define LLVM_DEBUG(X) DEBUG(X) +#endif + void PrintStackTraceOnErrorSignal(); /** diff --git a/hipamd/hipify-clang/src/main.cpp b/hipamd/hipify-clang/src/main.cpp index ccf627b147..e420ab0681 100644 --- a/hipamd/hipify-clang/src/main.cpp +++ b/hipamd/hipify-clang/src/main.cpp @@ -132,7 +132,7 @@ int main(int argc, const char **argv) { // Hipify _all_ the things! if (Tool.runAndSave(&actionFactory)) { - DEBUG(llvm::dbgs() << "Skipped some replacements.\n"); + LLVM_DEBUG(llvm::dbgs() << "Skipped some replacements.\n"); } // Either move the tmpfile to the output, or remove it. From b9c15702d299a5f0687e963bd65146a2701f77c8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 11 Jul 2018 20:15:49 +0300 Subject: [PATCH 39/40] [HIPIFY] Current trunk LLVM 7.0 initial support Tested with CUDA 8.0, 9.0, 9.1 and 9.2. Only 8.0 works with LLVM 7.0, due to the changes in LLVM trunc since released 6.0, which works fine with CUDA 8.0 and 9.0. So, nothing to do in hipify-clang, hope that all the CUDA 9.x related issues will be fixed in 7.0 release. --- hipamd/hipify-clang/CMakeLists.txt | 6 +++++- hipamd/hipify-clang/src/HipifyAction.cpp | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hipamd/hipify-clang/CMakeLists.txt b/hipamd/hipify-clang/CMakeLists.txt index 8b3fa7e591..5d9070be28 100644 --- a/hipamd/hipify-clang/CMakeLists.txt +++ b/hipamd/hipify-clang/CMakeLists.txt @@ -51,6 +51,10 @@ if(WIN32) target_link_libraries(hipify-clang version) endif() +if ((LLVM_PACKAGE_VERSION VERSION_EQUAL "7") OR (LLVM_PACKAGE_VERSION VERSION_GREATER "7")) + target_link_libraries(hipify-clang clangToolingInclusions) +endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS}") if(MSVC) @@ -95,7 +99,7 @@ if (HIPIFY_CLANG_TESTS) message(STATUS "Please install clang 4.0 or higher.") elseif (CUDA_VERSION VERSION_EQUAL "9.0") message(STATUS "Please install clang 6.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "9.1") + elseif ((CUDA_VERSION VERSION_EQUAL "9.1") OR (CUDA_VERSION VERSION_EQUAL "9.2")) message(STATUS "Please install clang 7.0 or higher.") endif() endif() diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index 7e5ff4357d..21b16e0699 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -428,7 +428,11 @@ public: void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token& include_token, StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, const clang::FileEntry* file, StringRef search_path, StringRef relative_path, - const clang::Module* imported) override { + const clang::Module* imported +#if LLVM_VERSION_MAJOR > 6 + , clang::SrcMgr::CharacteristicKind FileType +#endif + ) override { hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported); } From 4e0d07f07aec7baf8ee15bd090c0975cf282445f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 14 Jul 2018 16:08:19 +0300 Subject: [PATCH 40/40] [HIPIFY] Support of cudaComputeMode / CUcomputemode + update docs + fix typo in hip_runtime_api.h --- ...A_Driver_API_functions_supported_by_HIP.md | 7 ++++++- ..._Runtime_API_functions_supported_by_HIP.md | 10 +++++----- hipamd/hipify-clang/src/CUDA2HipMap.cpp | 20 +++++++++---------- hipamd/include/hip/hip_runtime_api.h | 2 +- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index c1c30ae019..7e806886c6 100644 --- a/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -231,6 +231,11 @@ | 0x02 |*`CU_MEMORYTYPE_DEVICE`* | | | 0x03 |*`CU_MEMORYTYPE_ARRAY`* | | | 0x04 |*`CU_MEMORYTYPE_UNIFIED`* | | +| enum |***`CUcomputemode`*** |***`hipComputeMode`*** | +| 0 |*`CU_COMPUTEMODE_DEFAULT`* |*`hipComputeModeDefault`* | +| 1 |*`CU_COMPUTEMODE_EXCLUSIVE`* |*`hipComputeModeExclusive`* | +| 2 |*`CU_COMPUTEMODE_PROHIBITED`* |*`hipComputeModeProhibited`* | +| 3 |*`CU_COMPUTEMODE_EXCLUSIVE_PROCESS`* |*`hipComputeModeExclusiveProcess`* | | enum |***`CUoccupancy_flags`*** | | | 0x00 |*`CU_OCCUPANCY_DEFAULT`* | | | 0x01 |*`CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE`* | | @@ -243,7 +248,7 @@ | 6 |*`CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`* | | | 7 |*`CU_POINTER_ATTRIBUTE_BUFFER_ID`* | | | 8 |*`CU_POINTER_ATTRIBUTE_IS_MANAGED`* | | -| enum |***`CUmemorytype`*** | | +| enum |***`CUresourcetype`*** | | | 0x00 |*`CU_RESOURCE_TYPE_ARRAY`* | | | 0x01 |*`CU_RESOURCE_TYPE_MIPMAPPED_ARRAY`* | | | 0x02 |*`CU_RESOURCE_TYPE_LINEAR`* | | diff --git a/hipamd/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/hipamd/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 65528da7fb..dca2683b12 100644 --- a/hipamd/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -418,11 +418,11 @@ | 1 |*`cudaChannelFormatKindUnsigned`* |*`hipChannelFormatKindUnsigned`* | | 2 |*`cudaChannelFormatKindFloat`* |*`hipChannelFormatKindFloat`* | | 3 |*`cudaChannelFormatKindNone`* |*`hipChannelFormatKindNone`* | -| enum |***`cudaComputeMode`*** | | -| 0 |*`cudaComputeModeDefault`* | | -| 1 |*`cudaComputeModeExclusive`* | | -| 2 |*`cudaComputeModeProhibited`* | | -| 3 |*`cudaComputeModeExclusiveProcess`* | | +| enum |***`cudaComputeMode`*** |***`hipComputeMode`*** | +| 0 |*`cudaComputeModeDefault`* |*`hipComputeModeDefault`* | +| 1 |*`cudaComputeModeExclusive`* |*`hipComputeModeExclusive`* | +| 2 |*`cudaComputeModeProhibited`* |*`hipComputeModeProhibited`* | +| 3 |*`cudaComputeModeExclusiveProcess`* |*`hipComputeModeExclusiveProcess`* | | enum |***`cudaDeviceAttr`*** |***`hipDeviceAttribute_t`*** | | 1 |*`cudaDevAttrMaxThreadsPerBlock`* |*`hipDeviceAttributeMaxThreadsPerBlock`* | | 2 |*`cudaDevAttrMaxBlockDimX`* |*`hipDeviceAttributeMaxBlockDimX`* | diff --git a/hipamd/hipify-clang/src/CUDA2HipMap.cpp b/hipamd/hipify-clang/src/CUDA2HipMap.cpp index 7d10b35e48..588642ccb5 100644 --- a/hipamd/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipamd/hipify-clang/src/CUDA2HipMap.cpp @@ -24,7 +24,7 @@ const std::map CUDA_TYPE_NAME_MAP{ {"CUaddress_mode", {"hipAddress_mode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUarray_cubemap_face", {"hipArray_cubemap_face", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUarray_format", {"hipArray_format", CONV_TYPE, API_DRIVER}}, - {"CUcomputemode", {"hipComputemode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaComputeMode) + {"CUcomputemode", {"hipComputeMode", CONV_TYPE, API_DRIVER}}, // API_RUNTIME ANALOGUE (cudaComputeMode) {"CUmem_advise", {"hipMemAdvise", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaComputeMode) {"CUmem_range_attribute", {"hipMemRangeAttribute", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaMemRangeAttribute) {"CUctx_flags", {"hipCctx_flags", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, @@ -236,7 +236,7 @@ const std::map CUDA_TYPE_NAME_MAP{ {"cudaDeviceAttr", {"hipDeviceAttribute_t", CONV_TYPE, API_RUNTIME}}, // API_DRIVER ANALOGUE (CUdevice_attribute) {"cudaDeviceProp", {"hipDeviceProp_t", CONV_TYPE, API_RUNTIME}}, {"cudaDeviceP2PAttr", {"hipDeviceP2PAttribute", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_DRIVER ANALOGUE (CUdevice_P2PAttribute) - {"cudaComputeMode", {"hipComputeMode", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_DRIVER ANALOGUE (CUcomputemode) + {"cudaComputeMode", {"hipComputeMode", CONV_TYPE, API_RUNTIME}}, // API_DRIVER ANALOGUE (CUcomputemode) {"cudaFuncCache", {"hipFuncCache_t", CONV_CACHE, API_RUNTIME}}, // API_Driver ANALOGUE (CUfunc_cache) {"cudaFuncAttributes", {"hipFuncAttributes", CONV_EXEC, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaSharedMemConfig", {"hipSharedMemConfig", CONV_TYPE, API_RUNTIME}}, @@ -628,10 +628,10 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CU_AD_FORMAT_FLOAT", {"HIP_AD_FORMAT_FLOAT", CONV_TYPE, API_DRIVER}}, // 0x20 // CUcomputemode enum - {"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0) - {"CU_COMPUTEMODE_EXCLUSIVE", {"hipComputeModeExclusive", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 1 // API_RUNTIME ANALOGUE (cudaComputeModeExclusive = 1) - {"CU_COMPUTEMODE_PROHIBITED", {"hipComputeModeProhibited", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 2 // API_RUNTIME ANALOGUE (cudaComputeModeProhibited = 2) - {"CU_COMPUTEMODE_EXCLUSIVE_PROCESS", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 3 // API_RUNTIME ANALOGUE (cudaComputeModeExclusiveProcess = 3) + {"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", CONV_TYPE, API_DRIVER}}, // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0) + {"CU_COMPUTEMODE_EXCLUSIVE", {"hipComputeModeExclusive", CONV_TYPE, API_DRIVER}}, // 1 // API_RUNTIME ANALOGUE (cudaComputeModeExclusive = 1) + {"CU_COMPUTEMODE_PROHIBITED", {"hipComputeModeProhibited", CONV_TYPE, API_DRIVER}}, // 2 // API_RUNTIME ANALOGUE (cudaComputeModeProhibited = 2) + {"CU_COMPUTEMODE_EXCLUSIVE_PROCESS", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_DRIVER}}, // 3 // API_RUNTIME ANALOGUE (cudaComputeModeExclusiveProcess = 3) // Memory advise values // {"CUmem_advise_enum", {"hipMemAdvise", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, @@ -1698,10 +1698,10 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudaDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_DRIVER ANALOGUE (cuDeviceGetP2PAttribute) // enum cudaComputeMode - {"cudaComputeModeDefault", {"hipComputeModeDefault", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_DEFAULT = 0) - {"cudaComputeModeExclusive", {"hipComputeModeExclusive", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE = 1) - {"cudaComputeModeProhibited", {"hipComputeModeProhibited", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_PROHIBITED = 2) - {"cudaComputeModeExclusiveProcess", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3) + {"cudaComputeModeDefault", {"hipComputeModeDefault", CONV_TYPE, API_RUNTIME}}, // 0 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_DEFAULT = 0) + {"cudaComputeModeExclusive", {"hipComputeModeExclusive", CONV_TYPE, API_RUNTIME}}, // 1 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE = 1) + {"cudaComputeModeProhibited", {"hipComputeModeProhibited", CONV_TYPE, API_RUNTIME}}, // 2 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_PROHIBITED = 2) + {"cudaComputeModeExclusiveProcess", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_RUNTIME}}, // 3 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3) // Device Flags {"cudaGetDeviceFlags", {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, diff --git a/hipamd/include/hip/hip_runtime_api.h b/hipamd/include/hip/hip_runtime_api.h index 2ff562cc01..cd7af65265 100644 --- a/hipamd/include/hip/hip_runtime_api.h +++ b/hipamd/include/hip/hip_runtime_api.h @@ -297,7 +297,7 @@ enum hipComputeMode { hipComputeModeDefault = 0, hipComputeModeExclusive = 1, hipComputeModeProhibited = 2, - hipComputeModeExcusiveProcess = 3 + hipComputeModeExclusiveProcess = 3 }; /**