Let's try this again...

This commit is contained in:
Alex Voicu
2018-06-25 17:49:50 +01:00
vanhempi 859133a045
commit 9d91b802a5
5 muutettua tiedostoa jossa 646 lisäystä ja 208 poistoa
+523 -48
Näytä tiedosto
@@ -34,6 +34,8 @@ THE SOFTWARE.
#include "hip/hcc_detail/host_defines.h"
#include <type_traits>
#if defined(__clang__)
#define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n)))
#elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax.
@@ -43,65 +45,538 @@ THE SOFTWARE.
__attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T))))
#endif
typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char);
typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char);
typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char);
typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char);
#if defined(__cplusplus)
template<typename T, unsigned int n> struct HIP_vector_base;
typedef char char1 __NATIVE_VECTOR__(1, char);
typedef char char2 __NATIVE_VECTOR__(2, char);
typedef char char3 __NATIVE_VECTOR__(3, char);
typedef char char4 __NATIVE_VECTOR__(4, char);
template<typename T>
struct HIP_vector_base<T, 1> {
typedef T Native_vec_ __NATIVE_VECTOR__(1, T);
typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short);
typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short);
typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short);
typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short);
union {
Native_vec_ data;
struct {
typename std::decay<
decltype(std::declval<Native_vec_>().x)>::type x;
};
};
};
typedef short short1 __NATIVE_VECTOR__(1, short);
typedef short short2 __NATIVE_VECTOR__(2, short);
typedef short short3 __NATIVE_VECTOR__(3, short);
typedef short short4 __NATIVE_VECTOR__(4, short);
template<typename T>
struct HIP_vector_base<T, 2> {
typedef T Native_vec_ __NATIVE_VECTOR__(2, T);
typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int);
typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int);
typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int);
typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int);
union {
Native_vec_ data;
struct {
typename std::decay<
decltype(std::declval<Native_vec_>().x)>::type x;
typename std::decay<
decltype(std::declval<Native_vec_>().y)>::type y;
};
};
};
typedef int int1 __NATIVE_VECTOR__(1, int);
typedef int int2 __NATIVE_VECTOR__(2, int);
typedef int int3 __NATIVE_VECTOR__(3, int);
typedef int int4 __NATIVE_VECTOR__(4, int);
template<typename T>
struct HIP_vector_base<T, 3> {
typedef T Native_vec_ __NATIVE_VECTOR__(3, T);
typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long);
typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long);
typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long);
typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long);
union {
Native_vec_ data;
struct {
typename std::decay<
decltype(std::declval<Native_vec_>().x)>::type x;
typename std::decay<
decltype(std::declval<Native_vec_>().y)>::type y;
typename std::decay<
decltype(std::declval<Native_vec_>().z)>::type z;
};
};
};
typedef long long1 __NATIVE_VECTOR__(1, long);
typedef long long2 __NATIVE_VECTOR__(2, long);
typedef long long3 __NATIVE_VECTOR__(3, long);
typedef long long4 __NATIVE_VECTOR__(4, long);
template<typename T>
struct HIP_vector_base<T, 4> {
typedef T Native_vec_ __NATIVE_VECTOR__(4, T);
typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long);
typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long);
typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long);
typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long);
union {
Native_vec_ data;
struct {
typename std::decay<
decltype(std::declval<Native_vec_>().x)>::type x;
typename std::decay<
decltype(std::declval<Native_vec_>().y)>::type y;
typename std::decay<
decltype(std::declval<Native_vec_>().z)>::type z;
typename std::decay<
decltype(std::declval<Native_vec_>().w)>::type w;
};
};
};
typedef long long longlong1 __NATIVE_VECTOR__(1, long long);
typedef long long longlong2 __NATIVE_VECTOR__(2, long long);
typedef long long longlong3 __NATIVE_VECTOR__(3, long long);
typedef long long longlong4 __NATIVE_VECTOR__(4, long long);
template<typename T, unsigned int rank>
struct HIP_vector_type : public HIP_vector_base<T, rank> {
using HIP_vector_base<T, rank>::data;
using typename HIP_vector_base<T, rank>::Native_vec_;
typedef float float1 __NATIVE_VECTOR__(1, float);
typedef float float2 __NATIVE_VECTOR__(2, float);
typedef float float3 __NATIVE_VECTOR__(3, float);
typedef float float4 __NATIVE_VECTOR__(4, float);
__host__ __device__
HIP_vector_type() = default;
template<
typename U,
typename std::enable_if<
std::is_convertible<U, T>{}>::type* = nullptr>
__host__ __device__
explicit
HIP_vector_type(U x) noexcept { data = Native_vec_(x); }
template< // TODO: constrain based on type as well.
typename... Us,
typename std::enable_if<sizeof...(Us) == rank>::type* = nullptr>
__host__ __device__
HIP_vector_type(Us... xs) noexcept { data = Native_vec_{xs...}; }
__host__ __device__
HIP_vector_type(const HIP_vector_type&) = default;
__host__ __device__
HIP_vector_type(HIP_vector_type&&) = default;
__host__ __device__
~HIP_vector_type() = default;
typedef double double1 __NATIVE_VECTOR__(1, double);
typedef double double2 __NATIVE_VECTOR__(2, double);
typedef double double3 __NATIVE_VECTOR__(3, double);
typedef double double4 __NATIVE_VECTOR__(4, double);
__host__ __device__
HIP_vector_type& operator=(const HIP_vector_type&) = default;
__host__ __device__
HIP_vector_type& operator=(HIP_vector_type&&) = default;
// Operators
__host__ __device__
HIP_vector_type& operator++() noexcept
{
data += Native_vec_(1);
return *this;
}
__host__ __device__
HIP_vector_type operator++(int) noexcept
{
auto tmp(*this);
++*this;
return tmp;
}
__host__ __device__
HIP_vector_type& operator--() noexcept
{
data -= Native_vec_(1);
return *this;
}
__host__ __device__
HIP_vector_type operator--(int) noexcept
{
auto tmp(*this);
--*this;
return tmp;
}
__host__ __device__
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
{
data += x.data;
return *this;
}
__host__ __device__
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
{
data -= x.data;
return *this;
}
__host__ __device__
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
{
data *= x.data;
return *this;
}
__host__ __device__
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
{
data /= x.data;
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type operator-() noexcept
{
auto tmp(*this);
tmp.data = -tmp.data;
return tmp;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type operator~() noexcept
{
HIP_vector_type r{*this};
r.data = ~r.data;
return r;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
{
data %= x.data;
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
{
data ^= x.data;
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
{
data |= x.data;
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
{
data &= x.data;
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
{
data >>= x.data;
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__host__ __device__
HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
{
data <<= x.data;
return *this;
}
};
template<typename T, unsigned int n>
__host__ __device__
inline
HIP_vector_type<T, n> operator+(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} += y;
}
template<typename T, unsigned int n>
__host__ __device__
inline
HIP_vector_type<T, n> operator-(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} -= y;
}
template<typename T, unsigned int n>
__host__ __device__
inline
HIP_vector_type<T, n> operator*(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} *= y;
}
template<typename T, unsigned int n>
__host__ __device__
inline
HIP_vector_type<T, n> operator/(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} /= y;
}
template<typename T, unsigned int n>
__host__ __device__
inline
bool operator==(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
auto tmp = x.data == y.data;
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
return true;
}
template<typename T, unsigned int n>
__host__ __device__
inline
bool operator!=(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return !(x == y);
}
template<typename T, unsigned int n>
__host__ __device__
inline
bool operator<(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
auto tmp = x.data < y.data;
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
return true;
}
template<typename T, unsigned int n>
__host__ __device__
inline
bool operator>(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return y < x;
}
template<typename T, unsigned int n>
__host__ __device__
inline
bool operator<=(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return !(y < x);
}
template<typename T, unsigned int n>
__host__ __device__
inline
bool operator>=(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return !(x < y);
}
template<
typename T,
unsigned int n,
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
inline
HIP_vector_type<T, n> operator%(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} %= y;
}
template<
typename T,
unsigned int n,
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
inline
HIP_vector_type<T, n> operator^(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} ^= y;
}
template<
typename T,
unsigned int n,
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
inline
HIP_vector_type<T, n> operator|(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} |= y;
}
template<
typename T,
unsigned int n,
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
inline
HIP_vector_type<T, n> operator&(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} &= y;
}
template<
typename T,
unsigned int n,
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
inline
HIP_vector_type<T, n> operator>>(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} >>= y;
}
template<
typename T,
unsigned int n,
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
inline
HIP_vector_type<T, n> operator<<(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} <<= y;
}
// TODO: the following are rather dubious in terms of general utility.
template<typename T, unsigned int n>
inline
bool operator||(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
auto tmp = x.data || y.data;
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
return true;
}
template<typename T, unsigned int n>
inline
bool operator&&(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
auto tmp = x.data && y.data;
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
return true;
}
#define __MAKE_VECTOR_TYPE__(CUDA_name, T, n) \
using CUDA_name = HIP_vector_type<T, n>;
#else
typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char);
typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char);
typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char);
typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char);
typedef char char1 __NATIVE_VECTOR__(1, char);
typedef char char2 __NATIVE_VECTOR__(2, char);
typedef char char3 __NATIVE_VECTOR__(3, char);
typedef char char4 __NATIVE_VECTOR__(4, char);
typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short);
typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short);
typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short);
typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short);
typedef short short1 __NATIVE_VECTOR__(1, short);
typedef short short2 __NATIVE_VECTOR__(2, short);
typedef short short3 __NATIVE_VECTOR__(3, short);
typedef short short4 __NATIVE_VECTOR__(4, short);
typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int);
typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int);
typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int);
typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int);
typedef int int1 __NATIVE_VECTOR__(1, int);
typedef int int2 __NATIVE_VECTOR__(2, int);
typedef int int3 __NATIVE_VECTOR__(3, int);
typedef int int4 __NATIVE_VECTOR__(4, int);
typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long);
typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long);
typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long);
typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long);
typedef long long1 __NATIVE_VECTOR__(1, long);
typedef long long2 __NATIVE_VECTOR__(2, long);
typedef long long3 __NATIVE_VECTOR__(3, long);
typedef long long4 __NATIVE_VECTOR__(4, long);
typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long);
typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long);
typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long);
typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long);
typedef long long longlong1 __NATIVE_VECTOR__(1, long long);
typedef long long longlong2 __NATIVE_VECTOR__(2, long long);
typedef long long longlong3 __NATIVE_VECTOR__(3, long long);
typedef long long longlong4 __NATIVE_VECTOR__(4, long long);
typedef float float1 __NATIVE_VECTOR__(1, float);
typedef float float2 __NATIVE_VECTOR__(2, float);
typedef float float3 __NATIVE_VECTOR__(3, float);
typedef float float4 __NATIVE_VECTOR__(4, float);
typedef double double1 __NATIVE_VECTOR__(1, double);
typedef double double2 __NATIVE_VECTOR__(2, double);
typedef double double3 __NATIVE_VECTOR__(3, double);
typedef double double4 __NATIVE_VECTOR__(4, double);
#endif
__MAKE_VECTOR_TYPE__(uchar1, unsigned char, 1);
__MAKE_VECTOR_TYPE__(uchar2, unsigned char, 2);
__MAKE_VECTOR_TYPE__(uchar3, unsigned char, 3);
__MAKE_VECTOR_TYPE__(uchar4, unsigned char, 4);
__MAKE_VECTOR_TYPE__(char1, char, 1);
__MAKE_VECTOR_TYPE__(char2, char, 2);
__MAKE_VECTOR_TYPE__(char3, char, 3);
__MAKE_VECTOR_TYPE__(char4, char, 4);
__MAKE_VECTOR_TYPE__(ushort1, unsigned short, 1);
__MAKE_VECTOR_TYPE__(ushort2, unsigned short, 2);
__MAKE_VECTOR_TYPE__(ushort3, unsigned short, 3);
__MAKE_VECTOR_TYPE__(ushort4, unsigned short, 4);
__MAKE_VECTOR_TYPE__(short1, short, 1);
__MAKE_VECTOR_TYPE__(short2, short, 2);
__MAKE_VECTOR_TYPE__(short3, short, 3);
__MAKE_VECTOR_TYPE__(short4, short, 4);
__MAKE_VECTOR_TYPE__(uint1, unsigned int, 1);
__MAKE_VECTOR_TYPE__(uint2, unsigned int, 2);
__MAKE_VECTOR_TYPE__(uint3, unsigned int, 3);
__MAKE_VECTOR_TYPE__(uint4, unsigned int, 4);
__MAKE_VECTOR_TYPE__(int1, int, 1);
__MAKE_VECTOR_TYPE__(int2, int, 2);
__MAKE_VECTOR_TYPE__(int3, int, 3);
__MAKE_VECTOR_TYPE__(int4, int, 4);
__MAKE_VECTOR_TYPE__(ulong1, unsigned long, 1);
__MAKE_VECTOR_TYPE__(ulong2, unsigned long, 2);
__MAKE_VECTOR_TYPE__(ulong3, unsigned long, 3);
__MAKE_VECTOR_TYPE__(ulong4, unsigned long, 4);
__MAKE_VECTOR_TYPE__(long1, long, 1);
__MAKE_VECTOR_TYPE__(long2, long, 2);
__MAKE_VECTOR_TYPE__(long3, long, 3);
__MAKE_VECTOR_TYPE__(long4, long, 4);
__MAKE_VECTOR_TYPE__(ulonglong1, unsigned long long, 1);
__MAKE_VECTOR_TYPE__(ulonglong2, unsigned long long, 2);
__MAKE_VECTOR_TYPE__(ulonglong3, unsigned long long, 3);
__MAKE_VECTOR_TYPE__(ulonglong4, unsigned long long, 4);
__MAKE_VECTOR_TYPE__(longlong1, long long, 1);
__MAKE_VECTOR_TYPE__(longlong2, long long, 2);
__MAKE_VECTOR_TYPE__(longlong3, long long, 3);
__MAKE_VECTOR_TYPE__(longlong4, long long, 4);
__MAKE_VECTOR_TYPE__(float1, float, 1);
__MAKE_VECTOR_TYPE__(float2, float, 2);
__MAKE_VECTOR_TYPE__(float3, float, 3);
__MAKE_VECTOR_TYPE__(float4, float, 4);
__MAKE_VECTOR_TYPE__(double1, double, 1);
__MAKE_VECTOR_TYPE__(double2, double, 2);
__MAKE_VECTOR_TYPE__(double3, double, 3);
__MAKE_VECTOR_TYPE__(double4, double, 4);
#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
__device__ __host__ \
+18 -18
Näytä tiedosto
@@ -355,33 +355,33 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 + one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 + one2) & MASK2);
unsigned one1 = in1.w & MASK1;
unsigned one2 = in2.w & MASK1;
out.w = (one1 + one2) & MASK1;
one1 = in1.w & MASK2;
one2 = in2.w & MASK2;
out.w = out.w | ((one1 + one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 - one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 - one2) & MASK2);
unsigned one1 = in1.w & MASK1;
unsigned one2 = in2.w & MASK1;
out.w = (one1 - one2) & MASK1;
one1 = in1.w & MASK2;
one2 = in2.w & MASK2;
out.w = out.w | ((one1 - one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 * one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 * one2) & MASK2);
unsigned one1 = in1.w & MASK1;
unsigned one2 = in2.w & MASK1;
out.w = (one1 * one2) & MASK1;
one1 = in1.w & MASK2;
one2 = in2.w & MASK2;
out.w = out.w | ((one1 * one2) & MASK2);
return out;
}
+51 -53
Näytä tiedosto
@@ -52,20 +52,20 @@ template<
__device__
bool integer_unary_tests(V& f1, V& f2) {
f1 %= f2;
if (!cmp(f1, 0)) return false;
if (f1 != V{0}) return false;
f1 &= f2;
if (!cmp(f1, 0)) return false;
if (f1 != V{0}) return false;
f1 |= f2;
if (!cmp(f1, 1)) return false;
if (f1 != V{1}) return false;
f1 ^= f2;
if (!cmp(f1, 0)) return false;
f1.x = 1;
if (f1 != V{0}) return false;
f1 = V{1};
f1 <<= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f1 >>= f2;
if (!cmp(f1, 1)) return false;
if (f1 != V{1}) return false;
f2 = ~f1;
return cmp(f2, ~1);
return f2 == V{~1};
}
template<
@@ -74,17 +74,17 @@ template<
__device__
bool integer_binary_tests(V& f1, V& f2, V& f3) {
f3 = f1 % f2;
if (!cmp(f3, 0)) return false;
if (f3 != V{0}) return false;
f1 = f3 & f2;
if (!cmp(f1, 0)) return false;
if (f1 != V{0}) return false;
f2 = f1 ^ f3;
if (!cmp(f2, 0)) return false;
f1.x = 1;
f2.x = 2;
if (f2 != V{0}) return false;
f1 = V{1};
f2 = V{2};
f3 = f1 << f2;
if (!cmp(f3, 4)) return false;
if (f3 != V{4}) return false;
f2 = f3 >> f1;
if (!cmp(f2, 2)) return false;
return f2 == V{2};
}
template<typename V>
@@ -107,60 +107,58 @@ bool constructor_tests() {
template<typename V>
bool TestVectorType() {
V f1(1);
V f2(1);
V f1{1};
V f2{1};
V f3 = f1 + f2;
if (!cmp(f3, 2)) return false;
if (f3 != V{2}) return false;
f2 = f3 - f1;
if (!cmp(f2, 1)) return false;
if (f2 != V{1}) return false;
f1 = f2 * f3;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f2 = f1 / f3;
if (!cmp(f2, 2 / 2)) return false;
if (f2 != V{1}) return false;
if (!integer_binary_tests(f1, f2, f3)) return false;
f1 = V(2);
f2 = V(1);
f1 = V{2};
f2 = V{1};
f1 += f2;
if (!cmp(f1, 3)) return false;
if (f1 != V{3}) return false;
f1 -= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f1 *= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f1 /= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
if (!integer_unary_tests(f1, f2)) return false;
#if false // We do not enable nullary increment / decrement yet.
f1 = V(2);
f2 = f1++;
if (!cmp(f1, 3)) return false;
if (!cmp(f2, 2)) return false;
f2 = f1--;
if (!cmp(f2, 3)) return false;
if (!cmp(f1, 2)) return false;
f2 = ++f1;
if (!cmp(f1, 3)) return false;
if (!cmp(f2, 3)) return false;
f2 = --f1;
if (!cmp(f1, 2)) return false;
if (!cmp(f2, 2)) return false;
#endif
f1 = V{2};
f2 = f1++;
if (f1 != V{3}) return false;
if (f2 != V{2}) return false;
f2 = f1--;
if (f2 != V{3}) return false;
if (f1 != V{2}) return false;
f2 = ++f1;
if (f1 != V{3}) return false;
if (f2 != V{3}) return false;
f2 = --f1;
if (f1 != V{2}) return false;
if (f2 != V{2}) return false;
if (!constructor_tests<V>()) return false;
f1 = V(3);
f2 = V(4);
f3 = V(3);
if (cmp(f1 == f2, true)) return false;
if (cmp(f1 != f2, false)) return false;
if (cmp(f1 < f2, false)) return false;
if (cmp(f2 > f1, false)) return false;
if (cmp(f1 >= f3, false)) return false;
if (cmp(f1 <= f3, false)) return false;
f1 = V{3};
f2 = V{4};
f3 = V{3};
if (f1 == f2) return false;
if (!(f1 != f2)) return false;
if (!(f1 < f2)) return false;
if (!(f2 > f1)) return false;
if (!(f1 >= f3)) return false;
if (!(f1 <= f3)) return false;
if (cmp(f1 && f2, false)) return false;
if (cmp(f1 || f2, false)) return false;
if (!(f1 && f2)) return false;
if (!(f1 || f2)) return false;
return true;
}
@@ -51,20 +51,23 @@ template<
__device__
bool integer_unary_tests(V& f1, V& f2) {
f1 %= f2;
if (!cmp(f1, 0)) return false;
if (f1 != V{0}) return false;
f1 &= f2;
if (!cmp(f1, 0)) return false;
if (f1 != V{0}) return false;
f1 |= f2;
if (!cmp(f1, 1)) return false;
if (f1 != V{1}) return false;
f1 ^= f2;
if (!cmp(f1, 0)) return false;
f1.x = 1;
if (f1 != V{0}) return false;
f1 = V{1};
f1 <<= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f1 >>= f2;
if (!cmp(f1, 1)) return false;
if (f1 != V{1}) return false;
f2 = ~f1;
return cmp(f2, ~1);
return f2 == V{~1};
return true;
}
template<
@@ -81,74 +84,72 @@ template<
__device__
bool integer_binary_tests(V& f1, V& f2, V& f3) {
f3 = f1 % f2;
if (!cmp(f3, 0)) return false;
if (f3 != V{0}) return false;
f1 = f3 & f2;
if (!cmp(f1, 0)) return false;
if (f1 != V{0}) return false;
f2 = f1 ^ f3;
if (!cmp(f2, 0)) return false;
f1.x = 1;
f2.x = 2;
if (f2 != V{0}) return false;
f1 = V{1};
f2 = V{2};
f3 = f1 << f2;
if (!cmp(f3, 4)) return false;
if (f3 != V{4}) return false;
f2 = f3 >> f1;
if (!cmp(f2, 2)) return false;
return f2 == V{2};
}
template<typename V>
__device__
bool TestVectorType() {
V f1(1);
V f2(1);
V f1{1};
V f2{1};
V f3 = f1 + f2;
if (!cmp(f3, 2)) return false;
if (f3 != V{2}) return false;
f2 = f3 - f1;
if (!cmp(f2, 1)) return false;
if (f2 != V{1}) return false;
f1 = f2 * f3;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f2 = f1 / f3;
if (!cmp(f2, 2 / 2)) return false;
if (f2 != V{1}) return false;
if (!integer_binary_tests(f1, f2, f3)) return false;
f1 = V(2);
f2 = V(1);
f1 = V{2};
f2 = V{1};
f1 += f2;
if (!cmp(f1, 3)) return false;
if (f1 != V{3}) return false;
f1 -= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f1 *= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
f1 /= f2;
if (!cmp(f1, 2)) return false;
if (f1 != V{2}) return false;
if (!integer_unary_tests(f1, f2)) return false;
#if false // We do not enable nullary increment / decrement yet.
f1 = V(2);
f2 = f1++;
if (!cmp(f1, 3)) return false;
if (!cmp(f2, 2)) return false;
f2 = f1--;
if (!cmp(f2, 3)) return false;
if (!cmp(f1, 2)) return false;
f2 = ++f1;
if (!cmp(f1, 3)) return false;
if (!cmp(f2, 3)) return false;
f2 = --f1;
if (!cmp(f1, 2)) return false;
if (!cmp(f2, 2)) return false;
#endif
f1 = V{2};
f2 = f1++;
if (f1 != V{3}) return false;
if (f2 != V{2}) return false;
f2 = f1--;
if (f2 != V{3}) return false;
if (f1 != V{2}) return false;
f2 = ++f1;
if (f1 != V{3}) return false;
if (f2 != V{3}) return false;
f2 = --f1;
if (f1 != V{2}) return false;
if (f2 != V{2}) return false;
f1 = V(3);
f2 = V(4);
f3 = V(3);
if (cmp(f1 == f2, true)) return false;
if (cmp(f1 != f2, false)) return false;
if (cmp(f1 < f2, false)) return false;
if (cmp(f2 > f1, false)) return false;
if (cmp(f1 >= f3, false)) return false;
if (cmp(f1 <= f3, false)) return false;
f1 = V{3};
f2 = V{4};
f3 = V{3};
if (f1 == f2) return false;
if (!(f1 != f2)) return false;
if (!(f1 < f2)) return false;
if (!(f2 > f1)) return false;
if (!(f1 >= f3)) return false;
if (!(f1 <= f3)) return false;
if (cmp(f1 && f2, false)) return false;
if (cmp(f1 || f2, false)) return false;
if (!(f1 && f2)) return false;
if (!(f1 || f2)) return false;
return true;
}
@@ -66,40 +66,4 @@ bool is_vec() {
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
decltype(is_vec4(std::declval<T>())){}));
}
template<typename T, typename U, Enable_if_t<is_vec<T, 1>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0;
}
template<typename T, typename U, Enable_if_t<is_vec<T, 2>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0 && r.y != 0;
}
template<typename T, typename U, Enable_if_t<is_vec<T, 3>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0 && r.y != 0 && r.z != 0;
}
template<typename T, typename U, Enable_if_t<is_vec<T, 4>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0 && r.y != 0 && r.z != 0 && r.w != 0;
}