Let's try this again...
This commit is contained in:
@@ -34,6 +34,8 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hcc_detail/host_defines.h"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(__clang__)
|
||||
#define __NATIVE_VECTOR__(n, ...) __attribute__((ext_vector_type(n)))
|
||||
#elif defined(__GNUC__) // N.B.: GCC does not support .xyzw syntax.
|
||||
@@ -43,65 +45,538 @@ THE SOFTWARE.
|
||||
__attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T))))
|
||||
#endif
|
||||
|
||||
typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char);
|
||||
typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char);
|
||||
typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char);
|
||||
typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char);
|
||||
#if defined(__cplusplus)
|
||||
template<typename T, unsigned int n> struct HIP_vector_base;
|
||||
|
||||
typedef char char1 __NATIVE_VECTOR__(1, char);
|
||||
typedef char char2 __NATIVE_VECTOR__(2, char);
|
||||
typedef char char3 __NATIVE_VECTOR__(3, char);
|
||||
typedef char char4 __NATIVE_VECTOR__(4, char);
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 1> {
|
||||
typedef T Native_vec_ __NATIVE_VECTOR__(1, T);
|
||||
|
||||
typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short);
|
||||
typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short);
|
||||
typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short);
|
||||
typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short);
|
||||
union {
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().x)>::type x;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
typedef short short1 __NATIVE_VECTOR__(1, short);
|
||||
typedef short short2 __NATIVE_VECTOR__(2, short);
|
||||
typedef short short3 __NATIVE_VECTOR__(3, short);
|
||||
typedef short short4 __NATIVE_VECTOR__(4, short);
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 2> {
|
||||
typedef T Native_vec_ __NATIVE_VECTOR__(2, T);
|
||||
|
||||
typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int);
|
||||
typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int);
|
||||
typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int);
|
||||
typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int);
|
||||
union {
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().x)>::type x;
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().y)>::type y;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
typedef int int1 __NATIVE_VECTOR__(1, int);
|
||||
typedef int int2 __NATIVE_VECTOR__(2, int);
|
||||
typedef int int3 __NATIVE_VECTOR__(3, int);
|
||||
typedef int int4 __NATIVE_VECTOR__(4, int);
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 3> {
|
||||
typedef T Native_vec_ __NATIVE_VECTOR__(3, T);
|
||||
|
||||
typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long);
|
||||
typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long);
|
||||
typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long);
|
||||
typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long);
|
||||
union {
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().x)>::type x;
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().y)>::type y;
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().z)>::type z;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
typedef long long1 __NATIVE_VECTOR__(1, long);
|
||||
typedef long long2 __NATIVE_VECTOR__(2, long);
|
||||
typedef long long3 __NATIVE_VECTOR__(3, long);
|
||||
typedef long long4 __NATIVE_VECTOR__(4, long);
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 4> {
|
||||
typedef T Native_vec_ __NATIVE_VECTOR__(4, T);
|
||||
|
||||
typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long);
|
||||
typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long);
|
||||
typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long);
|
||||
typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long);
|
||||
union {
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().x)>::type x;
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().y)>::type y;
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().z)>::type z;
|
||||
typename std::decay<
|
||||
decltype(std::declval<Native_vec_>().w)>::type w;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
typedef long long longlong1 __NATIVE_VECTOR__(1, long long);
|
||||
typedef long long longlong2 __NATIVE_VECTOR__(2, long long);
|
||||
typedef long long longlong3 __NATIVE_VECTOR__(3, long long);
|
||||
typedef long long longlong4 __NATIVE_VECTOR__(4, long long);
|
||||
template<typename T, unsigned int rank>
|
||||
struct HIP_vector_type : public HIP_vector_base<T, rank> {
|
||||
using HIP_vector_base<T, rank>::data;
|
||||
using typename HIP_vector_base<T, rank>::Native_vec_;
|
||||
|
||||
typedef float float1 __NATIVE_VECTOR__(1, float);
|
||||
typedef float float2 __NATIVE_VECTOR__(2, float);
|
||||
typedef float float3 __NATIVE_VECTOR__(3, float);
|
||||
typedef float float4 __NATIVE_VECTOR__(4, float);
|
||||
__host__ __device__
|
||||
HIP_vector_type() = default;
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
explicit
|
||||
HIP_vector_type(U x) noexcept { data = Native_vec_(x); }
|
||||
template< // TODO: constrain based on type as well.
|
||||
typename... Us,
|
||||
typename std::enable_if<sizeof...(Us) == rank>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type(Us... xs) noexcept { data = Native_vec_{xs...}; }
|
||||
__host__ __device__
|
||||
HIP_vector_type(const HIP_vector_type&) = default;
|
||||
__host__ __device__
|
||||
HIP_vector_type(HIP_vector_type&&) = default;
|
||||
__host__ __device__
|
||||
~HIP_vector_type() = default;
|
||||
|
||||
typedef double double1 __NATIVE_VECTOR__(1, double);
|
||||
typedef double double2 __NATIVE_VECTOR__(2, double);
|
||||
typedef double double3 __NATIVE_VECTOR__(3, double);
|
||||
typedef double double4 __NATIVE_VECTOR__(4, double);
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator=(const HIP_vector_type&) = default;
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator=(HIP_vector_type&&) = default;
|
||||
|
||||
// Operators
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator++() noexcept
|
||||
{
|
||||
data += Native_vec_(1);
|
||||
return *this;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type operator++(int) noexcept
|
||||
{
|
||||
auto tmp(*this);
|
||||
++*this;
|
||||
return tmp;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator--() noexcept
|
||||
{
|
||||
data -= Native_vec_(1);
|
||||
return *this;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type operator--(int) noexcept
|
||||
{
|
||||
auto tmp(*this);
|
||||
--*this;
|
||||
return tmp;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data += x.data;
|
||||
return *this;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data -= x.data;
|
||||
return *this;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data *= x.data;
|
||||
return *this;
|
||||
}
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data /= x.data;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type operator-() noexcept
|
||||
{
|
||||
auto tmp(*this);
|
||||
tmp.data = -tmp.data;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type operator~() noexcept
|
||||
{
|
||||
HIP_vector_type r{*this};
|
||||
r.data = ~r.data;
|
||||
return r;
|
||||
}
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data %= x.data;
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data ^= x.data;
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data |= x.data;
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data &= x.data;
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data >>= x.data;
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__host__ __device__
|
||||
HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
data <<= x.data;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
HIP_vector_type<T, n> operator+(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} += y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
HIP_vector_type<T, n> operator-(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} -= y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
HIP_vector_type<T, n> operator*(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} *= y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
HIP_vector_type<T, n> operator/(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} /= y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool operator==(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
auto tmp = x.data == y.data;
|
||||
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool operator!=(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool operator<(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
auto tmp = x.data < y.data;
|
||||
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool operator>(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return y < x;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool operator<=(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return !(y < x);
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool operator>=(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return !(x < y);
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int n,
|
||||
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
||||
inline
|
||||
HIP_vector_type<T, n> operator%(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} %= y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int n,
|
||||
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
||||
inline
|
||||
HIP_vector_type<T, n> operator^(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} ^= y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int n,
|
||||
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
||||
inline
|
||||
HIP_vector_type<T, n> operator|(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} |= y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int n,
|
||||
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
||||
inline
|
||||
HIP_vector_type<T, n> operator&(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} &= y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int n,
|
||||
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
||||
inline
|
||||
HIP_vector_type<T, n> operator>>(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} >>= y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
unsigned int n,
|
||||
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
||||
inline
|
||||
HIP_vector_type<T, n> operator<<(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} <<= y;
|
||||
}
|
||||
|
||||
// TODO: the following are rather dubious in terms of general utility.
|
||||
template<typename T, unsigned int n>
|
||||
inline
|
||||
bool operator||(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
auto tmp = x.data || y.data;
|
||||
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
inline
|
||||
bool operator&&(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
auto tmp = x.data && y.data;
|
||||
for (auto i = 0u; i != n; ++i) if (tmp[i] == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
#define __MAKE_VECTOR_TYPE__(CUDA_name, T, n) \
|
||||
using CUDA_name = HIP_vector_type<T, n>;
|
||||
#else
|
||||
typedef unsigned char uchar1 __NATIVE_VECTOR__(1, unsigned char);
|
||||
typedef unsigned char uchar2 __NATIVE_VECTOR__(2, unsigned char);
|
||||
typedef unsigned char uchar3 __NATIVE_VECTOR__(3, unsigned char);
|
||||
typedef unsigned char uchar4 __NATIVE_VECTOR__(4, unsigned char);
|
||||
|
||||
typedef char char1 __NATIVE_VECTOR__(1, char);
|
||||
typedef char char2 __NATIVE_VECTOR__(2, char);
|
||||
typedef char char3 __NATIVE_VECTOR__(3, char);
|
||||
typedef char char4 __NATIVE_VECTOR__(4, char);
|
||||
|
||||
typedef unsigned short ushort1 __NATIVE_VECTOR__(1, unsigned short);
|
||||
typedef unsigned short ushort2 __NATIVE_VECTOR__(2, unsigned short);
|
||||
typedef unsigned short ushort3 __NATIVE_VECTOR__(3, unsigned short);
|
||||
typedef unsigned short ushort4 __NATIVE_VECTOR__(4, unsigned short);
|
||||
|
||||
typedef short short1 __NATIVE_VECTOR__(1, short);
|
||||
typedef short short2 __NATIVE_VECTOR__(2, short);
|
||||
typedef short short3 __NATIVE_VECTOR__(3, short);
|
||||
typedef short short4 __NATIVE_VECTOR__(4, short);
|
||||
|
||||
typedef unsigned int uint1 __NATIVE_VECTOR__(1, unsigned int);
|
||||
typedef unsigned int uint2 __NATIVE_VECTOR__(2, unsigned int);
|
||||
typedef unsigned int uint3 __NATIVE_VECTOR__(3, unsigned int);
|
||||
typedef unsigned int uint4 __NATIVE_VECTOR__(4, unsigned int);
|
||||
|
||||
typedef int int1 __NATIVE_VECTOR__(1, int);
|
||||
typedef int int2 __NATIVE_VECTOR__(2, int);
|
||||
typedef int int3 __NATIVE_VECTOR__(3, int);
|
||||
typedef int int4 __NATIVE_VECTOR__(4, int);
|
||||
|
||||
typedef unsigned long ulong1 __NATIVE_VECTOR__(1, unsigned long);
|
||||
typedef unsigned long ulong2 __NATIVE_VECTOR__(2, unsigned long);
|
||||
typedef unsigned long ulong3 __NATIVE_VECTOR__(3, unsigned long);
|
||||
typedef unsigned long ulong4 __NATIVE_VECTOR__(4, unsigned long);
|
||||
|
||||
typedef long long1 __NATIVE_VECTOR__(1, long);
|
||||
typedef long long2 __NATIVE_VECTOR__(2, long);
|
||||
typedef long long3 __NATIVE_VECTOR__(3, long);
|
||||
typedef long long4 __NATIVE_VECTOR__(4, long);
|
||||
|
||||
typedef unsigned long long ulonglong1 __NATIVE_VECTOR__(1, unsigned long long);
|
||||
typedef unsigned long long ulonglong2 __NATIVE_VECTOR__(2, unsigned long long);
|
||||
typedef unsigned long long ulonglong3 __NATIVE_VECTOR__(3, unsigned long long);
|
||||
typedef unsigned long long ulonglong4 __NATIVE_VECTOR__(4, unsigned long long);
|
||||
|
||||
typedef long long longlong1 __NATIVE_VECTOR__(1, long long);
|
||||
typedef long long longlong2 __NATIVE_VECTOR__(2, long long);
|
||||
typedef long long longlong3 __NATIVE_VECTOR__(3, long long);
|
||||
typedef long long longlong4 __NATIVE_VECTOR__(4, long long);
|
||||
|
||||
typedef float float1 __NATIVE_VECTOR__(1, float);
|
||||
typedef float float2 __NATIVE_VECTOR__(2, float);
|
||||
typedef float float3 __NATIVE_VECTOR__(3, float);
|
||||
typedef float float4 __NATIVE_VECTOR__(4, float);
|
||||
|
||||
typedef double double1 __NATIVE_VECTOR__(1, double);
|
||||
typedef double double2 __NATIVE_VECTOR__(2, double);
|
||||
typedef double double3 __NATIVE_VECTOR__(3, double);
|
||||
typedef double double4 __NATIVE_VECTOR__(4, double);
|
||||
#endif
|
||||
|
||||
__MAKE_VECTOR_TYPE__(uchar1, unsigned char, 1);
|
||||
__MAKE_VECTOR_TYPE__(uchar2, unsigned char, 2);
|
||||
__MAKE_VECTOR_TYPE__(uchar3, unsigned char, 3);
|
||||
__MAKE_VECTOR_TYPE__(uchar4, unsigned char, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(char1, char, 1);
|
||||
__MAKE_VECTOR_TYPE__(char2, char, 2);
|
||||
__MAKE_VECTOR_TYPE__(char3, char, 3);
|
||||
__MAKE_VECTOR_TYPE__(char4, char, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(ushort1, unsigned short, 1);
|
||||
__MAKE_VECTOR_TYPE__(ushort2, unsigned short, 2);
|
||||
__MAKE_VECTOR_TYPE__(ushort3, unsigned short, 3);
|
||||
__MAKE_VECTOR_TYPE__(ushort4, unsigned short, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(short1, short, 1);
|
||||
__MAKE_VECTOR_TYPE__(short2, short, 2);
|
||||
__MAKE_VECTOR_TYPE__(short3, short, 3);
|
||||
__MAKE_VECTOR_TYPE__(short4, short, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(uint1, unsigned int, 1);
|
||||
__MAKE_VECTOR_TYPE__(uint2, unsigned int, 2);
|
||||
__MAKE_VECTOR_TYPE__(uint3, unsigned int, 3);
|
||||
__MAKE_VECTOR_TYPE__(uint4, unsigned int, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(int1, int, 1);
|
||||
__MAKE_VECTOR_TYPE__(int2, int, 2);
|
||||
__MAKE_VECTOR_TYPE__(int3, int, 3);
|
||||
__MAKE_VECTOR_TYPE__(int4, int, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(ulong1, unsigned long, 1);
|
||||
__MAKE_VECTOR_TYPE__(ulong2, unsigned long, 2);
|
||||
__MAKE_VECTOR_TYPE__(ulong3, unsigned long, 3);
|
||||
__MAKE_VECTOR_TYPE__(ulong4, unsigned long, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(long1, long, 1);
|
||||
__MAKE_VECTOR_TYPE__(long2, long, 2);
|
||||
__MAKE_VECTOR_TYPE__(long3, long, 3);
|
||||
__MAKE_VECTOR_TYPE__(long4, long, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(ulonglong1, unsigned long long, 1);
|
||||
__MAKE_VECTOR_TYPE__(ulonglong2, unsigned long long, 2);
|
||||
__MAKE_VECTOR_TYPE__(ulonglong3, unsigned long long, 3);
|
||||
__MAKE_VECTOR_TYPE__(ulonglong4, unsigned long long, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(longlong1, long long, 1);
|
||||
__MAKE_VECTOR_TYPE__(longlong2, long long, 2);
|
||||
__MAKE_VECTOR_TYPE__(longlong3, long long, 3);
|
||||
__MAKE_VECTOR_TYPE__(longlong4, long long, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(float1, float, 1);
|
||||
__MAKE_VECTOR_TYPE__(float2, float, 2);
|
||||
__MAKE_VECTOR_TYPE__(float3, float, 3);
|
||||
__MAKE_VECTOR_TYPE__(float4, float, 4);
|
||||
|
||||
__MAKE_VECTOR_TYPE__(double1, double, 1);
|
||||
__MAKE_VECTOR_TYPE__(double2, double, 2);
|
||||
__MAKE_VECTOR_TYPE__(double3, double, 3);
|
||||
__MAKE_VECTOR_TYPE__(double4, double, 4);
|
||||
|
||||
#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
|
||||
__device__ __host__ \
|
||||
|
||||
@@ -355,33 +355,33 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask
|
||||
|
||||
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 + one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 + one2) & MASK2);
|
||||
unsigned one1 = in1.w & MASK1;
|
||||
unsigned one2 = in2.w & MASK1;
|
||||
out.w = (one1 + one2) & MASK1;
|
||||
one1 = in1.w & MASK2;
|
||||
one2 = in2.w & MASK2;
|
||||
out.w = out.w | ((one1 + one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 - one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 - one2) & MASK2);
|
||||
unsigned one1 = in1.w & MASK1;
|
||||
unsigned one2 = in2.w & MASK1;
|
||||
out.w = (one1 - one2) & MASK1;
|
||||
one1 = in1.w & MASK2;
|
||||
one2 = in2.w & MASK2;
|
||||
out.w = out.w | ((one1 - one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 * one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 * one2) & MASK2);
|
||||
unsigned one1 = in1.w & MASK1;
|
||||
unsigned one2 = in2.w & MASK1;
|
||||
out.w = (one1 * one2) & MASK1;
|
||||
one1 = in1.w & MASK2;
|
||||
one2 = in2.w & MASK2;
|
||||
out.w = out.w | ((one1 * one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -52,20 +52,20 @@ template<
|
||||
__device__
|
||||
bool integer_unary_tests(V& f1, V& f2) {
|
||||
f1 %= f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
if (f1 != V{0}) return false;
|
||||
f1 &= f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
if (f1 != V{0}) return false;
|
||||
f1 |= f2;
|
||||
if (!cmp(f1, 1)) return false;
|
||||
if (f1 != V{1}) return false;
|
||||
f1 ^= f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
f1.x = 1;
|
||||
if (f1 != V{0}) return false;
|
||||
f1 = V{1};
|
||||
f1 <<= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f1 >>= f2;
|
||||
if (!cmp(f1, 1)) return false;
|
||||
if (f1 != V{1}) return false;
|
||||
f2 = ~f1;
|
||||
return cmp(f2, ~1);
|
||||
return f2 == V{~1};
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -74,17 +74,17 @@ template<
|
||||
__device__
|
||||
bool integer_binary_tests(V& f1, V& f2, V& f3) {
|
||||
f3 = f1 % f2;
|
||||
if (!cmp(f3, 0)) return false;
|
||||
if (f3 != V{0}) return false;
|
||||
f1 = f3 & f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
if (f1 != V{0}) return false;
|
||||
f2 = f1 ^ f3;
|
||||
if (!cmp(f2, 0)) return false;
|
||||
f1.x = 1;
|
||||
f2.x = 2;
|
||||
if (f2 != V{0}) return false;
|
||||
f1 = V{1};
|
||||
f2 = V{2};
|
||||
f3 = f1 << f2;
|
||||
if (!cmp(f3, 4)) return false;
|
||||
if (f3 != V{4}) return false;
|
||||
f2 = f3 >> f1;
|
||||
if (!cmp(f2, 2)) return false;
|
||||
return f2 == V{2};
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
@@ -107,60 +107,58 @@ bool constructor_tests() {
|
||||
|
||||
template<typename V>
|
||||
bool TestVectorType() {
|
||||
V f1(1);
|
||||
V f2(1);
|
||||
V f1{1};
|
||||
V f2{1};
|
||||
V f3 = f1 + f2;
|
||||
if (!cmp(f3, 2)) return false;
|
||||
if (f3 != V{2}) return false;
|
||||
f2 = f3 - f1;
|
||||
if (!cmp(f2, 1)) return false;
|
||||
if (f2 != V{1}) return false;
|
||||
f1 = f2 * f3;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f2 = f1 / f3;
|
||||
if (!cmp(f2, 2 / 2)) return false;
|
||||
if (f2 != V{1}) return false;
|
||||
if (!integer_binary_tests(f1, f2, f3)) return false;
|
||||
|
||||
f1 = V(2);
|
||||
f2 = V(1);
|
||||
f1 = V{2};
|
||||
f2 = V{1};
|
||||
f1 += f2;
|
||||
if (!cmp(f1, 3)) return false;
|
||||
if (f1 != V{3}) return false;
|
||||
f1 -= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f1 *= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f1 /= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
if (!integer_unary_tests(f1, f2)) return false;
|
||||
|
||||
#if false // We do not enable nullary increment / decrement yet.
|
||||
f1 = V(2);
|
||||
f2 = f1++;
|
||||
if (!cmp(f1, 3)) return false;
|
||||
if (!cmp(f2, 2)) return false;
|
||||
f2 = f1--;
|
||||
if (!cmp(f2, 3)) return false;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
f2 = ++f1;
|
||||
if (!cmp(f1, 3)) return false;
|
||||
if (!cmp(f2, 3)) return false;
|
||||
f2 = --f1;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (!cmp(f2, 2)) return false;
|
||||
#endif
|
||||
f1 = V{2};
|
||||
f2 = f1++;
|
||||
if (f1 != V{3}) return false;
|
||||
if (f2 != V{2}) return false;
|
||||
f2 = f1--;
|
||||
if (f2 != V{3}) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f2 = ++f1;
|
||||
if (f1 != V{3}) return false;
|
||||
if (f2 != V{3}) return false;
|
||||
f2 = --f1;
|
||||
if (f1 != V{2}) return false;
|
||||
if (f2 != V{2}) return false;
|
||||
|
||||
if (!constructor_tests<V>()) return false;
|
||||
|
||||
f1 = V(3);
|
||||
f2 = V(4);
|
||||
f3 = V(3);
|
||||
if (cmp(f1 == f2, true)) return false;
|
||||
if (cmp(f1 != f2, false)) return false;
|
||||
if (cmp(f1 < f2, false)) return false;
|
||||
if (cmp(f2 > f1, false)) return false;
|
||||
if (cmp(f1 >= f3, false)) return false;
|
||||
if (cmp(f1 <= f3, false)) return false;
|
||||
f1 = V{3};
|
||||
f2 = V{4};
|
||||
f3 = V{3};
|
||||
if (f1 == f2) return false;
|
||||
if (!(f1 != f2)) return false;
|
||||
if (!(f1 < f2)) return false;
|
||||
if (!(f2 > f1)) return false;
|
||||
if (!(f1 >= f3)) return false;
|
||||
if (!(f1 <= f3)) return false;
|
||||
|
||||
if (cmp(f1 && f2, false)) return false;
|
||||
if (cmp(f1 || f2, false)) return false;
|
||||
if (!(f1 && f2)) return false;
|
||||
if (!(f1 || f2)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -51,20 +51,23 @@ template<
|
||||
__device__
|
||||
bool integer_unary_tests(V& f1, V& f2) {
|
||||
f1 %= f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
if (f1 != V{0}) return false;
|
||||
|
||||
f1 &= f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
if (f1 != V{0}) return false;
|
||||
f1 |= f2;
|
||||
if (!cmp(f1, 1)) return false;
|
||||
if (f1 != V{1}) return false;
|
||||
f1 ^= f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
f1.x = 1;
|
||||
if (f1 != V{0}) return false;
|
||||
f1 = V{1};
|
||||
f1 <<= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f1 >>= f2;
|
||||
if (!cmp(f1, 1)) return false;
|
||||
if (f1 != V{1}) return false;
|
||||
f2 = ~f1;
|
||||
return cmp(f2, ~1);
|
||||
return f2 == V{~1};
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -81,74 +84,72 @@ template<
|
||||
__device__
|
||||
bool integer_binary_tests(V& f1, V& f2, V& f3) {
|
||||
f3 = f1 % f2;
|
||||
if (!cmp(f3, 0)) return false;
|
||||
if (f3 != V{0}) return false;
|
||||
f1 = f3 & f2;
|
||||
if (!cmp(f1, 0)) return false;
|
||||
if (f1 != V{0}) return false;
|
||||
f2 = f1 ^ f3;
|
||||
if (!cmp(f2, 0)) return false;
|
||||
f1.x = 1;
|
||||
f2.x = 2;
|
||||
if (f2 != V{0}) return false;
|
||||
f1 = V{1};
|
||||
f2 = V{2};
|
||||
f3 = f1 << f2;
|
||||
if (!cmp(f3, 4)) return false;
|
||||
if (f3 != V{4}) return false;
|
||||
f2 = f3 >> f1;
|
||||
if (!cmp(f2, 2)) return false;
|
||||
return f2 == V{2};
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
__device__
|
||||
bool TestVectorType() {
|
||||
V f1(1);
|
||||
V f2(1);
|
||||
V f1{1};
|
||||
V f2{1};
|
||||
V f3 = f1 + f2;
|
||||
if (!cmp(f3, 2)) return false;
|
||||
if (f3 != V{2}) return false;
|
||||
f2 = f3 - f1;
|
||||
if (!cmp(f2, 1)) return false;
|
||||
if (f2 != V{1}) return false;
|
||||
f1 = f2 * f3;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f2 = f1 / f3;
|
||||
if (!cmp(f2, 2 / 2)) return false;
|
||||
if (f2 != V{1}) return false;
|
||||
if (!integer_binary_tests(f1, f2, f3)) return false;
|
||||
|
||||
f1 = V(2);
|
||||
f2 = V(1);
|
||||
f1 = V{2};
|
||||
f2 = V{1};
|
||||
f1 += f2;
|
||||
if (!cmp(f1, 3)) return false;
|
||||
if (f1 != V{3}) return false;
|
||||
f1 -= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f1 *= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f1 /= f2;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
if (!integer_unary_tests(f1, f2)) return false;
|
||||
|
||||
#if false // We do not enable nullary increment / decrement yet.
|
||||
f1 = V(2);
|
||||
f2 = f1++;
|
||||
if (!cmp(f1, 3)) return false;
|
||||
if (!cmp(f2, 2)) return false;
|
||||
f2 = f1--;
|
||||
if (!cmp(f2, 3)) return false;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
f2 = ++f1;
|
||||
if (!cmp(f1, 3)) return false;
|
||||
if (!cmp(f2, 3)) return false;
|
||||
f2 = --f1;
|
||||
if (!cmp(f1, 2)) return false;
|
||||
if (!cmp(f2, 2)) return false;
|
||||
#endif
|
||||
f1 = V{2};
|
||||
f2 = f1++;
|
||||
if (f1 != V{3}) return false;
|
||||
if (f2 != V{2}) return false;
|
||||
f2 = f1--;
|
||||
if (f2 != V{3}) return false;
|
||||
if (f1 != V{2}) return false;
|
||||
f2 = ++f1;
|
||||
if (f1 != V{3}) return false;
|
||||
if (f2 != V{3}) return false;
|
||||
f2 = --f1;
|
||||
if (f1 != V{2}) return false;
|
||||
if (f2 != V{2}) return false;
|
||||
|
||||
f1 = V(3);
|
||||
f2 = V(4);
|
||||
f3 = V(3);
|
||||
if (cmp(f1 == f2, true)) return false;
|
||||
if (cmp(f1 != f2, false)) return false;
|
||||
if (cmp(f1 < f2, false)) return false;
|
||||
if (cmp(f2 > f1, false)) return false;
|
||||
if (cmp(f1 >= f3, false)) return false;
|
||||
if (cmp(f1 <= f3, false)) return false;
|
||||
f1 = V{3};
|
||||
f2 = V{4};
|
||||
f3 = V{3};
|
||||
if (f1 == f2) return false;
|
||||
if (!(f1 != f2)) return false;
|
||||
if (!(f1 < f2)) return false;
|
||||
if (!(f2 > f1)) return false;
|
||||
if (!(f1 >= f3)) return false;
|
||||
if (!(f1 <= f3)) return false;
|
||||
|
||||
if (cmp(f1 && f2, false)) return false;
|
||||
if (cmp(f1 || f2, false)) return false;
|
||||
if (!(f1 && f2)) return false;
|
||||
if (!(f1 || f2)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -66,40 +66,4 @@ bool is_vec() {
|
||||
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
|
||||
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
|
||||
decltype(is_vec4(std::declval<T>())){}));
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 1>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0;
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 2>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0 && r.y != 0;
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 3>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0 && r.y != 0 && r.z != 0;
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 4>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0 && r.y != 0 && r.z != 0 && r.w != 0;
|
||||
}
|
||||
Viittaa uudesa ongelmassa
Block a user