SWDEV-527851 SWDEV-527890 SWDEV-529456 - Make HIP vector simple struct (#356)

* Make HIP vector simple struct

Change-Id: I8442c2cc9af26b2a3c7d6719e3348df1593e83b3

* Update make_vector_type

Change-Id: Ic5060994a08baa4262c2a4b09fcbe6bc74276720
Этот коммит содержится в:
Belton-Schure, Aidan
2025-05-21 17:00:55 +01:00
коммит произвёл GitHub
родитель e9dbd7c99d
Коммит 3cd3b3ffc5
5 изменённых файлов: 364 добавлений и 419 удалений
+232 -345
Просмотреть файл
@@ -143,27 +143,34 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
} // Namespace hip_impl.
template<typename T, unsigned int n> struct HIP_vector_base;
template <typename T, unsigned int rank> struct HIP_vector_type;
template <typename T, unsigned int n>
__attribute__((always_inline)) __HOST_DEVICE__ typename HIP_vector_base<T, n>::Native_vec_&
get_native_vector(HIP_vector_base<T, n>& base_vec) {
static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base<T, n>::Native_vec_));
return *reinterpret_cast<typename HIP_vector_base<T, n>::Native_vec_*>(&base_vec.x);
};
template <typename T, unsigned int n>
__attribute__((
always_inline)) __HOST_DEVICE__ const typename HIP_vector_base<T, n>::Native_vec_&
get_native_vector(const HIP_vector_base<T, n>& base_vec) {
static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base<T, n>::Native_vec_));
return *reinterpret_cast<const typename HIP_vector_base<T, n>::Native_vec_*>(&base_vec.x);
};
template<typename T>
struct HIP_vector_base<T, 1> {
using Native_vec_ = __NATIVE_VECTOR__(1, T);
union {
Native_vec_ data;
struct {
T x;
};
};
T x;
using value_type = T;
__HOST_DEVICE__
HIP_vector_base() = default;
__HOST_DEVICE__
explicit
constexpr
HIP_vector_base(T x_) noexcept : data{x_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(const HIP_vector_base&) = default;
__HOST_DEVICE__
@@ -175,43 +182,23 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_base& operator=(const HIP_vector_base&) = default;
};
template<typename T>
struct HIP_vector_base<T, 2> {
using Native_vec_ = __NATIVE_VECTOR__(2, T);
template <typename T> struct alignas(2 * sizeof(T)) HIP_vector_base<T, 2> {
using Native_vec_ = __NATIVE_VECTOR__(2, T);
union
#if !__has_attribute(ext_vector_type)
alignas(hip_impl::next_pot(2 * sizeof(T)))
#endif
{
Native_vec_ data;
struct {
T x;
T y;
};
};
T x, y;
using value_type = T;
using value_type = T;
__HOST_DEVICE__
HIP_vector_base() = default;
__HOST_DEVICE__
explicit
constexpr
HIP_vector_base(T x_) noexcept : data{x_, x_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(const HIP_vector_base&) = default;
__HOST_DEVICE__
constexpr
HIP_vector_base(HIP_vector_base&&) = default;
__HOST_DEVICE__
~HIP_vector_base() = default;
__HOST_DEVICE__
HIP_vector_base& operator=(const HIP_vector_base&) = default;
__HOST_DEVICE__
HIP_vector_base() = default;
__HOST_DEVICE__
constexpr HIP_vector_base(const HIP_vector_base&) = default;
__HOST_DEVICE__
constexpr HIP_vector_base(HIP_vector_base&&) = default;
__HOST_DEVICE__
~HIP_vector_base() = default;
__HOST_DEVICE__
HIP_vector_base& operator=(const HIP_vector_base&) = default;
};
template<typename T>
@@ -364,27 +351,13 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
}
};
union {
Native_vec_ data;
struct {
T x;
T y;
T z;
};
};
T x, y, z;
using value_type = T;
__HOST_DEVICE__
HIP_vector_base() = default;
__HOST_DEVICE__
explicit
constexpr
HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(const HIP_vector_base&) = default;
__HOST_DEVICE__
@@ -399,320 +372,237 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_base& operator=(HIP_vector_base&&) = default;
};
template<typename T>
struct HIP_vector_base<T, 4> {
using Native_vec_ = __NATIVE_VECTOR__(4, T);
template <typename T> struct alignas(4 * sizeof(T)) HIP_vector_base<T, 4> {
using Native_vec_ = __NATIVE_VECTOR__(4, T);
union
#if !__has_attribute(ext_vector_type)
alignas(hip_impl::next_pot(4 * sizeof(T)))
#endif
{
Native_vec_ data;
struct {
T x;
T y;
T z;
T w;
};
};
T x, y, z, w;
using value_type = T;
using value_type = T;
__HOST_DEVICE__
HIP_vector_base() = default;
__HOST_DEVICE__
explicit
constexpr
HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
__HOST_DEVICE__
constexpr
HIP_vector_base(const HIP_vector_base&) = default;
__HOST_DEVICE__
constexpr
HIP_vector_base(HIP_vector_base&&) = default;
__HOST_DEVICE__
~HIP_vector_base() = default;
__HOST_DEVICE__
HIP_vector_base& operator=(const HIP_vector_base&) = default;
__HOST_DEVICE__
HIP_vector_base() = default;
__HOST_DEVICE__
constexpr HIP_vector_base(const HIP_vector_base&) = default;
__HOST_DEVICE__
constexpr HIP_vector_base(HIP_vector_base&&) = default;
__HOST_DEVICE__
~HIP_vector_base() = default;
__HOST_DEVICE__
HIP_vector_base& operator=(const HIP_vector_base&) = default;
};
template<typename T, unsigned int rank>
template <typename T, size_t rank, size_t... indices>
constexpr inline __HOST_DEVICE__ HIP_vector_type<T, rank> make_vector_type_impl(
T val, std::index_sequence<indices...>) noexcept {
// Fills vec with vals, and ignores the indices
return HIP_vector_type<T, rank>{((void)indices, val)...};
}
template <typename T, unsigned int rank>
constexpr inline __HOST_DEVICE__ HIP_vector_type<T, rank> make_vector_type(T val) {
return make_vector_type_impl<T, rank>(val, std::make_index_sequence<rank>{});
}
template <typename T, unsigned int rank>
struct HIP_vector_type : public HIP_vector_base<T, rank> {
using HIP_vector_base<T, rank>::data;
using typename HIP_vector_base<T, rank>::Native_vec_;
using typename HIP_vector_base<T, rank>::Native_vec_;
__HOST_DEVICE__
HIP_vector_type() = default;
template<
typename U,
typename std::enable_if<
std::is_convertible<U, T>::value>::type* = nullptr>
__HOST_DEVICE__
explicit
constexpr
HIP_vector_type(U x_) noexcept
: HIP_vector_base<T, rank>{static_cast<T>(x_)}
{}
template< // TODO: constrain based on type as well.
typename... Us,
typename std::enable_if<
(rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
__HOST_DEVICE__
constexpr
HIP_vector_type(Us... xs) noexcept
: HIP_vector_base<T, rank>{static_cast<T>(xs)...}
{}
__HOST_DEVICE__
constexpr
HIP_vector_type(const HIP_vector_type&) = default;
__HOST_DEVICE__
constexpr
HIP_vector_type(HIP_vector_type&&) = default;
__HOST_DEVICE__
~HIP_vector_type() = default;
__HOST_DEVICE__
HIP_vector_type() = default;
template <typename U,
typename std::enable_if<std::is_convertible<U, T>::value>::type* = nullptr>
__HOST_DEVICE__ explicit constexpr HIP_vector_type(U x_) noexcept
: HIP_vector_base<T, rank>{static_cast<T>(x_)} {}
template < // TODO: constrain based on type as well.
typename... Us,
typename std::enable_if<(rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
__HOST_DEVICE__ constexpr HIP_vector_type(Us... xs) noexcept
: HIP_vector_base<T, rank>{static_cast<T>(xs)...} {}
__HOST_DEVICE__
constexpr HIP_vector_type(const HIP_vector_type&) = default;
__HOST_DEVICE__
constexpr HIP_vector_type(HIP_vector_type&&) = default;
__HOST_DEVICE__
~HIP_vector_type() = default;
__HOST_DEVICE__
HIP_vector_type& operator=(const HIP_vector_type&) = default;
__HOST_DEVICE__
HIP_vector_type& operator=(HIP_vector_type&&) = default;
__HOST_DEVICE__
HIP_vector_type& operator=(const HIP_vector_type&) = default;
__HOST_DEVICE__
HIP_vector_type& operator=(HIP_vector_type&&) = default;
// Operators
__HOST_DEVICE__
HIP_vector_type& operator++() noexcept
{
return *this += HIP_vector_type{1};
}
__HOST_DEVICE__
HIP_vector_type operator++(int) noexcept
{
auto tmp(*this);
++*this;
return tmp;
}
// Operators
__HOST_DEVICE__
HIP_vector_type& operator++() noexcept {
HIP_vector_type unity = make_vector_type<T, rank>(1);
return *this += unity;
}
__HOST_DEVICE__
HIP_vector_type operator++(int) noexcept {
auto tmp(*this);
++*this;
return tmp;
}
__HOST_DEVICE__
HIP_vector_type& operator--() noexcept
{
return *this -= HIP_vector_type{1};
}
__HOST_DEVICE__
HIP_vector_type operator--(int) noexcept
{
auto tmp(*this);
--*this;
return tmp;
}
__HOST_DEVICE__
HIP_vector_type& operator--() noexcept {
HIP_vector_type unity = make_vector_type<T, rank>(1);
return *this -= unity;
}
__HOST_DEVICE__
HIP_vector_type operator--(int) noexcept {
auto tmp(*this);
--*this;
return tmp;
}
__HOST_DEVICE__
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
{
__HOST_DEVICE__
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data += x.data;
get_native_vector(*this) += get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] += get_native_vector(x)[i];
#endif
return *this;
}
template<
typename U,
typename std::enable_if<
std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator+=(U x) noexcept
{
return *this += HIP_vector_type{x};
}
return *this;
}
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator+=(U x) noexcept {
return *this += make_vector_type<T, rank>(x);
}
__HOST_DEVICE__
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
{
__HOST_DEVICE__
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data -= x.data;
get_native_vector(*this) -= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] -= get_native_vector(x)[i];
#endif
return *this;
}
template<
typename U,
typename std::enable_if<
std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator-=(U x) noexcept
{
return *this -= HIP_vector_type{x};
}
return *this;
}
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator-=(U x) noexcept {
return *this -= make_vector_type<T, rank>(x);
}
__HOST_DEVICE__
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
{
__HOST_DEVICE__
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data *= x.data;
get_native_vector(*this) *= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] *= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
HIP_vector_type x, const HIP_vector_type& y) noexcept
{
return HIP_vector_type{ x } *= y;
}
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
HIP_vector_type x, const HIP_vector_type& y) noexcept {
return HIP_vector_type{x} *= y;
}
template<
typename U,
typename std::enable_if<
std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator*=(U x) noexcept
{
return *this *= HIP_vector_type{x};
}
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator*=(U x) noexcept {
return *this *= make_vector_type<T, rank>(x);
}
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
HIP_vector_type x, const HIP_vector_type& y) noexcept
{
return HIP_vector_type{ x } /= y;
}
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
HIP_vector_type x, const HIP_vector_type& y) noexcept {
return HIP_vector_type{x} /= y;
}
__HOST_DEVICE__
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
{
__HOST_DEVICE__
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data /= x.data;
get_native_vector(*this) /= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] /= get_native_vector(x)[i];
#endif
return *this;
}
template<
typename U,
typename std::enable_if<
std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator/=(U x) noexcept
{
return *this /= HIP_vector_type{x};
}
return *this;
}
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator/=(U x) noexcept {
return *this /= make_vector_type<T, rank>(x);
}
template<
typename U = T,
typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type operator-() const noexcept
{
auto tmp(*this);
template <typename U = T, typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type operator-() const noexcept {
auto tmp(*this);
#if __HIP_USE_NATIVE_VECTOR__
tmp.data = -tmp.data;
get_native_vector(tmp) = -get_native_vector(tmp);
#else
for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(tmp)[i] = -get_native_vector(tmp)[i];
#endif
return tmp;
}
return tmp;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type operator~() const noexcept
{
HIP_vector_type r{*this};
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type operator~() const noexcept {
HIP_vector_type r{*this};
#if __HIP_USE_NATIVE_VECTOR__
r.data = ~r.data;
get_native_vector(r) = ~get_native_vector(r);
#else
for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(r)[i] = ~get_native_vector(r)[i];
#endif
return r;
}
return r;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
{
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data %= x.data;
get_native_vector(*this) %= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] %= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
{
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data ^= x.data;
get_native_vector(*this) ^= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] ^= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
{
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data |= x.data;
get_native_vector(*this) |= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] |= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
{
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data &= x.data;
get_native_vector(*this) &= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] &= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
{
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data >>= x.data;
get_native_vector(*this) >>= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] >>= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
template<
typename U = T,
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__
HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
{
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
__HOST_DEVICE__ HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept {
#if __HIP_USE_NATIVE_VECTOR__
data <<= x.data;
get_native_vector(*this) <<= get_native_vector(x);
#else
for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] <<= get_native_vector(x)[i];
#endif
return *this;
}
return *this;
}
};
template<typename T, unsigned int n>
@@ -731,7 +621,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator+(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} += make_vector_type<T, n>(y);
}
template<typename T, unsigned int n, typename U>
__HOST_DEVICE__
@@ -740,7 +630,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator+(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} += y;
return make_vector_type<T, n>(x) += y;
}
template<typename T, unsigned int n>
@@ -759,7 +649,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator-(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} -= make_vector_type<T, n>(y);
}
template<typename T, unsigned int n, typename U>
__HOST_DEVICE__
@@ -768,7 +658,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator-(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} -= y;
return make_vector_type<T, n>(x) -= y;
}
template<typename T, unsigned int n, typename U>
@@ -778,7 +668,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator*(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} *= make_vector_type<T, n>(y);
}
template<typename T, unsigned int n, typename U>
__HOST_DEVICE__
@@ -787,7 +677,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator*(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} *= y;
return make_vector_type<T, n>(x) *= y;
}
template<typename T, unsigned int n, typename U>
@@ -797,7 +687,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator/(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} /= make_vector_type<T, n>(y);
}
template<typename T, unsigned int n, typename U>
__HOST_DEVICE__
@@ -806,17 +696,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator/(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} /= y;
}
template<typename V>
__HOST_DEVICE__
inline
constexpr
bool _hip_compare(const V& x, const V& y, int n) noexcept
{
return
(n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
return make_vector_type<T, n>(x) /= y;
}
template<typename T, unsigned int n>
@@ -826,15 +706,22 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
bool operator==(
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
{
return _hip_compare(x.data, y.data, n - 1);
bool isTrue = true;
const auto& native_x = get_native_vector(x);
const auto& native_y = get_native_vector(y);
for (unsigned int i = 0; i < n; ++i) {
isTrue = (isTrue && (native_x[i] == native_y[i]));
}
return isTrue;
}
template<typename T, unsigned int n, typename U>
__HOST_DEVICE__
inline
constexpr
bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
{
return x == HIP_vector_type<T, n>{y};
return x == make_vector_type<T, n>(y);
}
template<typename T, unsigned int n, typename U>
__HOST_DEVICE__
@@ -842,7 +729,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
constexpr
bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} == y;
return make_vector_type<T, n>(x) == y;
}
template<typename T, unsigned int n>
@@ -894,7 +781,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator%(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} %= make_vector_type<T, n>(y);
}
template<
typename T,
@@ -907,7 +794,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator%(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} %= y;
return make_vector_type<T, n>(x) %= y;
}
template<
@@ -933,7 +820,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator^(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} ^= make_vector_type<T, n>(y);
}
template<
typename T,
@@ -946,7 +833,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator^(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} ^= y;
return make_vector_type<T, n>(x) ^= y;
}
template<
@@ -972,7 +859,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator|(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} |= make_vector_type<T, n>(y);
}
template<
typename T,
@@ -985,7 +872,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator|(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} |= y;
return make_vector_type<T, n>(x) |= y;
}
template<
@@ -1011,7 +898,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator&(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} &= make_vector_type<T, n>(y);
}
template<
typename T,
@@ -1024,7 +911,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator&(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} &= y;
return make_vector_type<T, n>(x) &= y;
}
template<
@@ -1050,7 +937,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator>>(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} >>= make_vector_type<T, n>(y);
}
template<
typename T,
@@ -1063,7 +950,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator>>(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} >>= y;
return make_vector_type<T, n>(x) >>= y;
}
template<
@@ -1089,7 +976,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator<<(
const HIP_vector_type<T, n>& x, U y) noexcept
{
return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
return HIP_vector_type<T, n>{x} <<= make_vector_type<T, n>(y);
}
template<
typename T,
@@ -1103,7 +990,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
HIP_vector_type<T, n> operator<<(
U x, const HIP_vector_type<T, n>& y) noexcept
{
return HIP_vector_type<T, n>{x} <<= y;
return make_vector_type<T, n>(x) <<= y;
}
/*
+4 -4
Просмотреть файл
@@ -52,22 +52,22 @@ THE SOFTWARE.
__DEVICE__
inline
int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
return __ockl_sdot2(a.data, b.data, c, saturate);
return __ockl_sdot2(get_native_vector(a), get_native_vector(b), c, saturate);
}
__DEVICE__
inline
uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
return __ockl_udot2(a.data, b.data, c, saturate);
return __ockl_udot2(get_native_vector(a), get_native_vector(b), c, saturate);
}
__DEVICE__
inline
int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
return __ockl_sdot4(a.data, b.data, c, saturate);
return __ockl_sdot4(get_native_vector(a), get_native_vector(b), c, saturate);
}
__DEVICE__
inline
uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
return __ockl_udot4(a.data, b.data, c, saturate);
return __ockl_udot4(get_native_vector(a), get_native_vector(b), c, saturate);
}
__DEVICE__
inline
+20 -10
Просмотреть файл
@@ -151,8 +151,9 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __ockl_image_load_2D(i, int2(x, y).data);
auto tmp = __ockl_image_load_2D(i, get_native_vector(coords));
*data = __hipMapFrom<T>(tmp);
}
@@ -170,9 +171,10 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __hipMapTo<float4::Native_vec_>(data);
__ockl_image_store_2D(i, int2(x, y).data, tmp);
__ockl_image_store_2D(i, get_native_vector(coords), tmp);
}
/** \brief Reads the value from the three-dimensional surface at coordinate
@@ -190,8 +192,9 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, int z) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int4 coords{x, y, z, 0};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data);
auto tmp = __ockl_image_load_3D(i, get_native_vector(coords));
*data = __hipMapFrom<T>(tmp);
}
@@ -210,9 +213,10 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int z) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int4 coords{x, y, z, 0};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
auto tmp = __hipMapTo<float4::Native_vec_>(data);
__ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp);
__ockl_image_store_3D(i, get_native_vector(coords), tmp);
}
/** \brief Reads the value from the one-dimensional layered surface at
@@ -268,8 +272,9 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int layer) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __ockl_image_load_lod_2D(i, int2(x, y).data, layer);
auto tmp = __ockl_image_load_lod_2D(i, get_native_vector(coords), layer);
*data = __hipMapFrom<T>(tmp);
}
@@ -288,9 +293,10 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int layer) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __hipMapTo<float4::Native_vec_>(data);
__ockl_image_store_lod_2D(i, int2(x, y).data, layer, tmp);
__ockl_image_store_lod_2D(i, get_native_vector(coords), layer, tmp);
}
/** \brief Reads the value from the cubemap surface at coordinate x, y and
@@ -308,8 +314,9 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __ockl_image_load_CM(i, int2(x, y).data, face);
auto tmp = __ockl_image_load_CM(i, get_native_vector(coords), face);
*data = __hipMapFrom<T>(tmp);
}
@@ -328,9 +335,10 @@ template <
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int face) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __hipMapTo<float4::Native_vec_>(data);
__ockl_image_store_CM(i, int2(x, y).data, face, tmp);
__ockl_image_store_CM(i, get_native_vector(coords), face, tmp);
}
/** \brief Reads the value from the layered cubemap surface at coordinate x, y
@@ -350,8 +358,9 @@ template <
static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face,
int layer) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __ockl_image_load_lod_CM(i, int2(x, y).data, face, layer);
auto tmp = __ockl_image_load_lod_CM(i, get_native_vector(coords), face, layer);
*data = __hipMapFrom<T>(tmp);
}
@@ -372,9 +381,10 @@ template <
static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, int x, int y, int face,
int layer) {
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
int2 coords{x, y};
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
auto tmp = __hipMapTo<float4::Native_vec_>(data);
__ockl_image_store_lod_CM(i, int2(x, y).data, face, layer, tmp);
__ockl_image_store_lod_CM(i, get_native_vector(coords), face, layer, tmp);
}
// Doxygen end group SurfaceAPI
+52 -28
Просмотреть файл
@@ -260,7 +260,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2D(texture<T, hipTextureType2D, readMode> t, float x, float y)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
float2 coords{x, y};
auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -268,7 +269,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayered(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
float2 coords{x, layer};
auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -276,7 +278,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayered(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
float4 coords{x, y, layer, 0.0f};
auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -284,7 +287,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3D(texture<T, hipTextureType3D, readMode> t, float x, float y, float z)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -292,7 +296,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemap(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -308,7 +313,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLod(texture<T, hipTextureType2D, readMode> t, float x, float y, float level)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
float2 coords{x, y};
auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -316,7 +322,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayeredLod(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float level)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
float2 coords{x, layer};
auto tmp = __ockl_image_sample_lod_1Da(i, s, get_native_vector(coords), level);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -324,7 +331,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayeredLod(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float level)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level);
float4 coords{x, y, layer, 0.0f};
auto tmp = __ockl_image_sample_lod_2Da(i, s, get_native_vector(coords), level);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -332,7 +340,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3DLod(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float level)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -340,7 +349,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLod(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float level)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -348,7 +358,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayered(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
float4 coords{x, y, z, layer};
auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -356,7 +367,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayeredLod(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float level)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
float4 coords{x, y, z, layer};
auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -365,8 +377,9 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> t
{
TEXTURE_PARAMETERS_INIT;
// TODO missing in device libs.
// auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
// return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
// auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)),
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
return {};
}
@@ -375,8 +388,9 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> t
{
TEXTURE_PARAMETERS_INIT;
// TODO missing in device libs.
// auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
// return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
// auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)),
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
return {};
}
@@ -392,7 +406,9 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DGrad(texture<T, hipTextureType2D, readMode> t, float x, float y, float2 dPdx, float2 dPdy)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
float2 coords{x, y};
auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx),
get_native_vector(dPdy));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -400,7 +416,8 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayeredGrad(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float dPdx, float dPdy)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
float2 coords{x, layer};
auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy);
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -408,7 +425,9 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayeredGrad(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
float4 coords{x, y, layer, 0.0f};
auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords),
get_native_vector(dPdx), get_native_vector(dPdy));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -416,7 +435,11 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3DGrad(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
{
TEXTURE_PARAMETERS_INIT;
auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
float4 coords{x, y, z, 0.0f};
float4 gradx{dPdx.x, dPdx.y, dPdx.z, 0.0f};
float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f};
auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords),
get_native_vector(gradx), get_native_vector(grady));
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
}
@@ -467,22 +490,23 @@ template <typename T, hipTextureReadMode readMode>
static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t<T, readMode> tex2Dgather(texture<T, hipTextureType2D, readMode> t, float x, float y, int comp=0)
{
TEXTURE_PARAMETERS_INIT;
float2 coords{x, y};
switch (comp) {
case 1: {
auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
}
case 2: {
auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
}
case 3: {
auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
}
default: {
auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords));
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
}
}
return {};
+56 -32
Просмотреть файл
@@ -78,7 +78,8 @@ template <
static __device__ __hip_img_chk__ T tex2D(hipTextureObject_t textureObject, float x, float y)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
float2 coords{x, y};
auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -96,7 +97,8 @@ template <
static __device__ __hip_img_chk__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -114,7 +116,8 @@ template <
static __device__ __hip_img_chk__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
float2 coords{x, layer};
auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -132,7 +135,8 @@ template <
static __device__ __hip_img_chk__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
float4 coords{x, y, layer, 0.0f};
auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -150,7 +154,8 @@ template <
static __device__ __hip_img_chk__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -168,7 +173,8 @@ template <
static __device__ __hip_img_chk__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
float4 coords{x, y, z, layer};
auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -186,26 +192,27 @@ template <
static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0)
{
TEXTURE_OBJECT_PARAMETERS_INIT
float2 coords{x, y};
switch (comp) {
case 1: {
auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
return __hipMapFrom<T>(tmp);
break;
auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
break;
}
case 2: {
auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
return __hipMapFrom<T>(tmp);
break;
auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
break;
}
case 3: {
auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
return __hipMapFrom<T>(tmp);
break;
auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
break;
}
default: {
auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
return __hipMapFrom<T>(tmp);
break;
auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
break;
}
}
return {};
@@ -243,7 +250,8 @@ template <
static __device__ __hip_img_chk__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
float2 coords{x, y};
auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level);
return __hipMapFrom<T>(tmp);
}
@@ -261,7 +269,8 @@ template <
static __device__ __hip_img_chk__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level);
return __hipMapFrom<T>(tmp);
}
@@ -279,7 +288,8 @@ template <
static __device__ __hip_img_chk__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
float2 coords{x, layer};
auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -297,7 +307,8 @@ template <
static __device__ __hip_img_chk__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
float4 coords{x, y, layer, 0.0f};
auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords));
return __hipMapFrom<T>(tmp);
}
@@ -315,7 +326,8 @@ template <
static __device__ __hip_img_chk__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
float4 coords{x, y, z, 0.0f};
auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level);
return __hipMapFrom<T>(tmp);
}
@@ -334,8 +346,9 @@ static __device__ __hip_img_chk__ T texCubemapGrad(hipTextureObject_t textureObj
{
TEXTURE_OBJECT_PARAMETERS_INIT
// TODO missing in device libs.
// auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
// return __hipMapFrom<T>(tmp);
// auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)),
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<T>(tmp);
return {};
}
@@ -353,7 +366,8 @@ template <
static __device__ __hip_img_chk__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
float4 coords{x, y, z, layer};
auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level);
return __hipMapFrom<T>(tmp);
}
@@ -389,7 +403,9 @@ template <
static __device__ __hip_img_chk__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
float2 coords{x, y};
auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx),
get_native_vector(dPdy));
return __hipMapFrom<T>(tmp);
}
@@ -407,7 +423,11 @@ template <
static __device__ __hip_img_chk__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
float4 coords{x, y, z, 0.0f};
float4 gradx{dPdy.x, dPdy.y, dPdy.z, 0.0f};
float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f};
auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords),
get_native_vector(gradx), get_native_vector(grady));
return __hipMapFrom<T>(tmp);
}
@@ -425,7 +445,8 @@ template <
static __device__ __hip_img_chk__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
float2 coords{x, layer};
auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy);
return __hipMapFrom<T>(tmp);
}
@@ -443,7 +464,9 @@ template <
static __device__ __hip_img_chk__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
{
TEXTURE_OBJECT_PARAMETERS_INIT
auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
float4 coords{x, y, layer, 0.0f};
auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords),
get_native_vector(dPdx), get_native_vector(dPdy));
return __hipMapFrom<T>(tmp);
}
@@ -462,8 +485,9 @@ static __device__ __hip_img_chk__ T texCubemapLayeredGrad(hipTextureObject_t te
{
TEXTURE_OBJECT_PARAMETERS_INIT
// TODO missing in device libs.
// auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
// return __hipMapFrom<T>(tmp);
// auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)),
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<T>(tmp);
return {};
}