SWDEV-527851 SWDEV-527890 SWDEV-529456 - Make HIP vector simple struct (#356)
* Make HIP vector simple struct Change-Id: I8442c2cc9af26b2a3c7d6719e3348df1593e83b3 * Update make_vector_type Change-Id: Ic5060994a08baa4262c2a4b09fcbe6bc74276720
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
e9dbd7c99d
Коммит
3cd3b3ffc5
@@ -143,27 +143,34 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
} // Namespace hip_impl.
|
||||
|
||||
template<typename T, unsigned int n> struct HIP_vector_base;
|
||||
template <typename T, unsigned int rank> struct HIP_vector_type;
|
||||
|
||||
template <typename T, unsigned int n>
|
||||
__attribute__((always_inline)) __HOST_DEVICE__ typename HIP_vector_base<T, n>::Native_vec_&
|
||||
get_native_vector(HIP_vector_base<T, n>& base_vec) {
|
||||
static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base<T, n>::Native_vec_));
|
||||
return *reinterpret_cast<typename HIP_vector_base<T, n>::Native_vec_*>(&base_vec.x);
|
||||
};
|
||||
|
||||
template <typename T, unsigned int n>
|
||||
__attribute__((
|
||||
always_inline)) __HOST_DEVICE__ const typename HIP_vector_base<T, n>::Native_vec_&
|
||||
get_native_vector(const HIP_vector_base<T, n>& base_vec) {
|
||||
static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base<T, n>::Native_vec_));
|
||||
return *reinterpret_cast<const typename HIP_vector_base<T, n>::Native_vec_*>(&base_vec.x);
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 1> {
|
||||
using Native_vec_ = __NATIVE_VECTOR__(1, T);
|
||||
|
||||
union {
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
T x;
|
||||
};
|
||||
};
|
||||
T x;
|
||||
|
||||
using value_type = T;
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
explicit
|
||||
constexpr
|
||||
HIP_vector_base(T x_) noexcept : data{x_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
@@ -175,43 +182,23 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 2> {
|
||||
using Native_vec_ = __NATIVE_VECTOR__(2, T);
|
||||
template <typename T> struct alignas(2 * sizeof(T)) HIP_vector_base<T, 2> {
|
||||
using Native_vec_ = __NATIVE_VECTOR__(2, T);
|
||||
|
||||
union
|
||||
#if !__has_attribute(ext_vector_type)
|
||||
alignas(hip_impl::next_pot(2 * sizeof(T)))
|
||||
#endif
|
||||
{
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
T x;
|
||||
T y;
|
||||
};
|
||||
};
|
||||
T x, y;
|
||||
|
||||
using value_type = T;
|
||||
using value_type = T;
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
explicit
|
||||
constexpr
|
||||
HIP_vector_base(T x_) noexcept : data{x_, x_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(HIP_vector_base&&) = default;
|
||||
__HOST_DEVICE__
|
||||
~HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr HIP_vector_base(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr HIP_vector_base(HIP_vector_base&&) = default;
|
||||
__HOST_DEVICE__
|
||||
~HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
@@ -364,27 +351,13 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
}
|
||||
};
|
||||
|
||||
union {
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
T x;
|
||||
T y;
|
||||
T z;
|
||||
};
|
||||
};
|
||||
T x, y, z;
|
||||
|
||||
using value_type = T;
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
explicit
|
||||
constexpr
|
||||
HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
@@ -399,320 +372,237 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_base& operator=(HIP_vector_base&&) = default;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct HIP_vector_base<T, 4> {
|
||||
using Native_vec_ = __NATIVE_VECTOR__(4, T);
|
||||
template <typename T> struct alignas(4 * sizeof(T)) HIP_vector_base<T, 4> {
|
||||
using Native_vec_ = __NATIVE_VECTOR__(4, T);
|
||||
|
||||
union
|
||||
#if !__has_attribute(ext_vector_type)
|
||||
alignas(hip_impl::next_pot(4 * sizeof(T)))
|
||||
#endif
|
||||
{
|
||||
Native_vec_ data;
|
||||
struct {
|
||||
T x;
|
||||
T y;
|
||||
T z;
|
||||
T w;
|
||||
};
|
||||
};
|
||||
T x, y, z, w;
|
||||
|
||||
using value_type = T;
|
||||
using value_type = T;
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
explicit
|
||||
constexpr
|
||||
HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_base(HIP_vector_base&&) = default;
|
||||
__HOST_DEVICE__
|
||||
~HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr HIP_vector_base(const HIP_vector_base&) = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr HIP_vector_base(HIP_vector_base&&) = default;
|
||||
__HOST_DEVICE__
|
||||
~HIP_vector_base() = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
||||
};
|
||||
|
||||
template<typename T, unsigned int rank>
|
||||
template <typename T, size_t rank, size_t... indices>
|
||||
constexpr inline __HOST_DEVICE__ HIP_vector_type<T, rank> make_vector_type_impl(
|
||||
T val, std::index_sequence<indices...>) noexcept {
|
||||
// Fills vec with vals, and ignores the indices
|
||||
return HIP_vector_type<T, rank>{((void)indices, val)...};
|
||||
}
|
||||
|
||||
template <typename T, unsigned int rank>
|
||||
constexpr inline __HOST_DEVICE__ HIP_vector_type<T, rank> make_vector_type(T val) {
|
||||
return make_vector_type_impl<T, rank>(val, std::make_index_sequence<rank>{});
|
||||
}
|
||||
|
||||
template <typename T, unsigned int rank>
|
||||
struct HIP_vector_type : public HIP_vector_base<T, rank> {
|
||||
using HIP_vector_base<T, rank>::data;
|
||||
using typename HIP_vector_base<T, rank>::Native_vec_;
|
||||
using typename HIP_vector_base<T, rank>::Native_vec_;
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type() = default;
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<U, T>::value>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
explicit
|
||||
constexpr
|
||||
HIP_vector_type(U x_) noexcept
|
||||
: HIP_vector_base<T, rank>{static_cast<T>(x_)}
|
||||
{}
|
||||
template< // TODO: constrain based on type as well.
|
||||
typename... Us,
|
||||
typename std::enable_if<
|
||||
(rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_type(Us... xs) noexcept
|
||||
: HIP_vector_base<T, rank>{static_cast<T>(xs)...}
|
||||
{}
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_type(const HIP_vector_type&) = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr
|
||||
HIP_vector_type(HIP_vector_type&&) = default;
|
||||
__HOST_DEVICE__
|
||||
~HIP_vector_type() = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type() = default;
|
||||
template <typename U,
|
||||
typename std::enable_if<std::is_convertible<U, T>::value>::type* = nullptr>
|
||||
__HOST_DEVICE__ explicit constexpr HIP_vector_type(U x_) noexcept
|
||||
: HIP_vector_base<T, rank>{static_cast<T>(x_)} {}
|
||||
template < // TODO: constrain based on type as well.
|
||||
typename... Us,
|
||||
typename std::enable_if<(rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
|
||||
__HOST_DEVICE__ constexpr HIP_vector_type(Us... xs) noexcept
|
||||
: HIP_vector_base<T, rank>{static_cast<T>(xs)...} {}
|
||||
__HOST_DEVICE__
|
||||
constexpr HIP_vector_type(const HIP_vector_type&) = default;
|
||||
__HOST_DEVICE__
|
||||
constexpr HIP_vector_type(HIP_vector_type&&) = default;
|
||||
__HOST_DEVICE__
|
||||
~HIP_vector_type() = default;
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator=(const HIP_vector_type&) = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator=(HIP_vector_type&&) = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator=(const HIP_vector_type&) = default;
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator=(HIP_vector_type&&) = default;
|
||||
|
||||
// Operators
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator++() noexcept
|
||||
{
|
||||
return *this += HIP_vector_type{1};
|
||||
}
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type operator++(int) noexcept
|
||||
{
|
||||
auto tmp(*this);
|
||||
++*this;
|
||||
return tmp;
|
||||
}
|
||||
// Operators
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator++() noexcept {
|
||||
HIP_vector_type unity = make_vector_type<T, rank>(1);
|
||||
return *this += unity;
|
||||
}
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type operator++(int) noexcept {
|
||||
auto tmp(*this);
|
||||
++*this;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator--() noexcept
|
||||
{
|
||||
return *this -= HIP_vector_type{1};
|
||||
}
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type operator--(int) noexcept
|
||||
{
|
||||
auto tmp(*this);
|
||||
--*this;
|
||||
return tmp;
|
||||
}
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator--() noexcept {
|
||||
HIP_vector_type unity = make_vector_type<T, rank>(1);
|
||||
return *this -= unity;
|
||||
}
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type operator--(int) noexcept {
|
||||
auto tmp(*this);
|
||||
--*this;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data += x.data;
|
||||
get_native_vector(*this) += get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] += get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator+=(U x) noexcept
|
||||
{
|
||||
return *this += HIP_vector_type{x};
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator+=(U x) noexcept {
|
||||
return *this += make_vector_type<T, rank>(x);
|
||||
}
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data -= x.data;
|
||||
get_native_vector(*this) -= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] -= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator-=(U x) noexcept
|
||||
{
|
||||
return *this -= HIP_vector_type{x};
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator-=(U x) noexcept {
|
||||
return *this -= make_vector_type<T, rank>(x);
|
||||
}
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data *= x.data;
|
||||
get_native_vector(*this) *= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] *= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
|
||||
HIP_vector_type x, const HIP_vector_type& y) noexcept
|
||||
{
|
||||
return HIP_vector_type{ x } *= y;
|
||||
}
|
||||
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
|
||||
HIP_vector_type x, const HIP_vector_type& y) noexcept {
|
||||
return HIP_vector_type{x} *= y;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator*=(U x) noexcept
|
||||
{
|
||||
return *this *= HIP_vector_type{x};
|
||||
}
|
||||
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator*=(U x) noexcept {
|
||||
return *this *= make_vector_type<T, rank>(x);
|
||||
}
|
||||
|
||||
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
|
||||
HIP_vector_type x, const HIP_vector_type& y) noexcept
|
||||
{
|
||||
return HIP_vector_type{ x } /= y;
|
||||
}
|
||||
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
|
||||
HIP_vector_type x, const HIP_vector_type& y) noexcept {
|
||||
return HIP_vector_type{x} /= y;
|
||||
}
|
||||
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data /= x.data;
|
||||
get_native_vector(*this) /= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] /= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator/=(U x) noexcept
|
||||
{
|
||||
return *this /= HIP_vector_type{x};
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename U, typename std::enable_if<std::is_convertible<U, T>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator/=(U x) noexcept {
|
||||
return *this /= make_vector_type<T, rank>(x);
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type operator-() const noexcept
|
||||
{
|
||||
auto tmp(*this);
|
||||
template <typename U = T, typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type operator-() const noexcept {
|
||||
auto tmp(*this);
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
tmp.data = -tmp.data;
|
||||
get_native_vector(tmp) = -get_native_vector(tmp);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(tmp)[i] = -get_native_vector(tmp)[i];
|
||||
#endif
|
||||
return tmp;
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type operator~() const noexcept
|
||||
{
|
||||
HIP_vector_type r{*this};
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type operator~() const noexcept {
|
||||
HIP_vector_type r{*this};
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
r.data = ~r.data;
|
||||
get_native_vector(r) = ~get_native_vector(r);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(r)[i] = ~get_native_vector(r)[i];
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data %= x.data;
|
||||
get_native_vector(*this) %= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] %= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data ^= x.data;
|
||||
get_native_vector(*this) ^= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] ^= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data |= x.data;
|
||||
get_native_vector(*this) |= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] |= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data &= x.data;
|
||||
get_native_vector(*this) &= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] &= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data >>= x.data;
|
||||
get_native_vector(*this) >>= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] >>= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<
|
||||
typename U = T,
|
||||
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__
|
||||
HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
|
||||
{
|
||||
template <typename U = T, typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
||||
__HOST_DEVICE__ HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept {
|
||||
#if __HIP_USE_NATIVE_VECTOR__
|
||||
data <<= x.data;
|
||||
get_native_vector(*this) <<= get_native_vector(x);
|
||||
#else
|
||||
for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
|
||||
for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] <<= get_native_vector(x)[i];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
@@ -731,7 +621,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator+(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} += make_vector_type<T, n>(y);
|
||||
}
|
||||
template<typename T, unsigned int n, typename U>
|
||||
__HOST_DEVICE__
|
||||
@@ -740,7 +630,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator+(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} += y;
|
||||
return make_vector_type<T, n>(x) += y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
@@ -759,7 +649,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator-(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} -= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<typename T, unsigned int n, typename U>
|
||||
__HOST_DEVICE__
|
||||
@@ -768,7 +658,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator-(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} -= y;
|
||||
return make_vector_type<T, n>(x) -= y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n, typename U>
|
||||
@@ -778,7 +668,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator*(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} *= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<typename T, unsigned int n, typename U>
|
||||
__HOST_DEVICE__
|
||||
@@ -787,7 +677,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator*(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} *= y;
|
||||
return make_vector_type<T, n>(x) *= y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n, typename U>
|
||||
@@ -797,7 +687,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator/(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} /= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<typename T, unsigned int n, typename U>
|
||||
__HOST_DEVICE__
|
||||
@@ -806,17 +696,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator/(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} /= y;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
__HOST_DEVICE__
|
||||
inline
|
||||
constexpr
|
||||
bool _hip_compare(const V& x, const V& y, int n) noexcept
|
||||
{
|
||||
return
|
||||
(n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
|
||||
return make_vector_type<T, n>(x) /= y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
@@ -826,15 +706,22 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
bool operator==(
|
||||
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return _hip_compare(x.data, y.data, n - 1);
|
||||
bool isTrue = true;
|
||||
const auto& native_x = get_native_vector(x);
|
||||
const auto& native_y = get_native_vector(y);
|
||||
for (unsigned int i = 0; i < n; ++i) {
|
||||
isTrue = (isTrue && (native_x[i] == native_y[i]));
|
||||
}
|
||||
return isTrue;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n, typename U>
|
||||
__HOST_DEVICE__
|
||||
inline
|
||||
constexpr
|
||||
bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return x == HIP_vector_type<T, n>{y};
|
||||
return x == make_vector_type<T, n>(y);
|
||||
}
|
||||
template<typename T, unsigned int n, typename U>
|
||||
__HOST_DEVICE__
|
||||
@@ -842,7 +729,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
constexpr
|
||||
bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} == y;
|
||||
return make_vector_type<T, n>(x) == y;
|
||||
}
|
||||
|
||||
template<typename T, unsigned int n>
|
||||
@@ -894,7 +781,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator%(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} %= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<
|
||||
typename T,
|
||||
@@ -907,7 +794,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator%(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} %= y;
|
||||
return make_vector_type<T, n>(x) %= y;
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -933,7 +820,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator^(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} ^= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<
|
||||
typename T,
|
||||
@@ -946,7 +833,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator^(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} ^= y;
|
||||
return make_vector_type<T, n>(x) ^= y;
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -972,7 +859,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator|(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} |= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<
|
||||
typename T,
|
||||
@@ -985,7 +872,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator|(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} |= y;
|
||||
return make_vector_type<T, n>(x) |= y;
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -1011,7 +898,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator&(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} &= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<
|
||||
typename T,
|
||||
@@ -1024,7 +911,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator&(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} &= y;
|
||||
return make_vector_type<T, n>(x) &= y;
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -1050,7 +937,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator>>(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} >>= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<
|
||||
typename T,
|
||||
@@ -1063,7 +950,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator>>(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} >>= y;
|
||||
return make_vector_type<T, n>(x) >>= y;
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -1089,7 +976,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator<<(
|
||||
const HIP_vector_type<T, n>& x, U y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
|
||||
return HIP_vector_type<T, n>{x} <<= make_vector_type<T, n>(y);
|
||||
}
|
||||
template<
|
||||
typename T,
|
||||
@@ -1103,7 +990,7 @@ template <typename __T> struct is_scalar : public integral_constant<bool, __is_s
|
||||
HIP_vector_type<T, n> operator<<(
|
||||
U x, const HIP_vector_type<T, n>& y) noexcept
|
||||
{
|
||||
return HIP_vector_type<T, n>{x} <<= y;
|
||||
return make_vector_type<T, n>(x) <<= y;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -52,22 +52,22 @@ THE SOFTWARE.
|
||||
__DEVICE__
|
||||
inline
|
||||
int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
|
||||
return __ockl_sdot2(a.data, b.data, c, saturate);
|
||||
return __ockl_sdot2(get_native_vector(a), get_native_vector(b), c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
|
||||
return __ockl_udot2(a.data, b.data, c, saturate);
|
||||
return __ockl_udot2(get_native_vector(a), get_native_vector(b), c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
|
||||
return __ockl_sdot4(a.data, b.data, c, saturate);
|
||||
return __ockl_sdot4(get_native_vector(a), get_native_vector(b), c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
|
||||
return __ockl_udot4(a.data, b.data, c, saturate);
|
||||
return __ockl_udot4(get_native_vector(a), get_native_vector(b), c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
|
||||
@@ -151,8 +151,9 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __ockl_image_load_2D(i, int2(x, y).data);
|
||||
auto tmp = __ockl_image_load_2D(i, get_native_vector(coords));
|
||||
*data = __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -170,9 +171,10 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __hipMapTo<float4::Native_vec_>(data);
|
||||
__ockl_image_store_2D(i, int2(x, y).data, tmp);
|
||||
__ockl_image_store_2D(i, get_native_vector(coords), tmp);
|
||||
}
|
||||
|
||||
/** \brief Reads the value from the three-dimensional surface at coordinate
|
||||
@@ -190,8 +192,9 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, int z) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int4 coords{x, y, z, 0};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
|
||||
auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data);
|
||||
auto tmp = __ockl_image_load_3D(i, get_native_vector(coords));
|
||||
*data = __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -210,9 +213,10 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int z) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int4 coords{x, y, z, 0};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
|
||||
auto tmp = __hipMapTo<float4::Native_vec_>(data);
|
||||
__ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp);
|
||||
__ockl_image_store_3D(i, get_native_vector(coords), tmp);
|
||||
}
|
||||
|
||||
/** \brief Reads the value from the one-dimensional layered surface at
|
||||
@@ -268,8 +272,9 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int layer) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __ockl_image_load_lod_2D(i, int2(x, y).data, layer);
|
||||
auto tmp = __ockl_image_load_lod_2D(i, get_native_vector(coords), layer);
|
||||
*data = __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -288,9 +293,10 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int layer) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __hipMapTo<float4::Native_vec_>(data);
|
||||
__ockl_image_store_lod_2D(i, int2(x, y).data, layer, tmp);
|
||||
__ockl_image_store_lod_2D(i, get_native_vector(coords), layer, tmp);
|
||||
}
|
||||
|
||||
/** \brief Reads the value from the cubemap surface at coordinate x, y and
|
||||
@@ -308,8 +314,9 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __ockl_image_load_CM(i, int2(x, y).data, face);
|
||||
auto tmp = __ockl_image_load_CM(i, get_native_vector(coords), face);
|
||||
*data = __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -328,9 +335,10 @@ template <
|
||||
typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
|
||||
static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int face) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __hipMapTo<float4::Native_vec_>(data);
|
||||
__ockl_image_store_CM(i, int2(x, y).data, face, tmp);
|
||||
__ockl_image_store_CM(i, get_native_vector(coords), face, tmp);
|
||||
}
|
||||
|
||||
/** \brief Reads the value from the layered cubemap surface at coordinate x, y
|
||||
@@ -350,8 +358,9 @@ template <
|
||||
static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face,
|
||||
int layer) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __ockl_image_load_lod_CM(i, int2(x, y).data, face, layer);
|
||||
auto tmp = __ockl_image_load_lod_CM(i, get_native_vector(coords), face, layer);
|
||||
*data = __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -372,9 +381,10 @@ template <
|
||||
static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, int x, int y, int face,
|
||||
int layer) {
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT
|
||||
int2 coords{x, y};
|
||||
x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
|
||||
auto tmp = __hipMapTo<float4::Native_vec_>(data);
|
||||
__ockl_image_store_lod_CM(i, int2(x, y).data, face, layer, tmp);
|
||||
__ockl_image_store_lod_CM(i, get_native_vector(coords), face, layer, tmp);
|
||||
}
|
||||
|
||||
// Doxygen end group SurfaceAPI
|
||||
|
||||
@@ -260,7 +260,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2D(texture<T, hipTextureType2D, readMode> t, float x, float y)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
|
||||
float2 coords{x, y};
|
||||
auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -268,7 +269,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayered(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
|
||||
float2 coords{x, layer};
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -276,7 +278,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayered(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
|
||||
float4 coords{x, y, layer, 0.0f};
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -284,7 +287,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3D(texture<T, hipTextureType3D, readMode> t, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -292,7 +296,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemap(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -308,7 +313,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLod(texture<T, hipTextureType2D, readMode> t, float x, float y, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
|
||||
float2 coords{x, y};
|
||||
auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -316,7 +322,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayeredLod(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
|
||||
float2 coords{x, layer};
|
||||
auto tmp = __ockl_image_sample_lod_1Da(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -324,7 +331,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayeredLod(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level);
|
||||
float4 coords{x, y, layer, 0.0f};
|
||||
auto tmp = __ockl_image_sample_lod_2Da(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -332,7 +340,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3DLod(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -340,7 +349,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLod(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -348,7 +358,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayered(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
|
||||
float4 coords{x, y, z, layer};
|
||||
auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -356,7 +367,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayeredLod(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float level)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
|
||||
float4 coords{x, y, z, layer};
|
||||
auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -365,8 +377,9 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> t
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
// auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)),
|
||||
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
|
||||
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -375,8 +388,9 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> t
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
// auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)),
|
||||
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
|
||||
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -392,7 +406,9 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DGrad(texture<T, hipTextureType2D, readMode> t, float x, float y, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
float2 coords{x, y};
|
||||
auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx),
|
||||
get_native_vector(dPdy));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -400,7 +416,8 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayeredGrad(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float dPdx, float dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
|
||||
float2 coords{x, layer};
|
||||
auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy);
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -408,7 +425,9 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayeredGrad(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
float4 coords{x, y, layer, 0.0f};
|
||||
auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords),
|
||||
get_native_vector(dPdx), get_native_vector(dPdy));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -416,7 +435,11 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3DGrad(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
float4 gradx{dPdx.x, dPdx.y, dPdx.z, 0.0f};
|
||||
float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords),
|
||||
get_native_vector(gradx), get_native_vector(grady));
|
||||
return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
|
||||
@@ -467,22 +490,23 @@ template <typename T, hipTextureReadMode readMode>
|
||||
static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t<T, readMode> tex2Dgather(texture<T, hipTextureType2D, readMode> t, float x, float y, int comp=0)
|
||||
{
|
||||
TEXTURE_PARAMETERS_INIT;
|
||||
float2 coords{x, y};
|
||||
switch (comp) {
|
||||
case 1: {
|
||||
auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
case 2: {
|
||||
auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
case 3: {
|
||||
auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
default: {
|
||||
auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
|
||||
}
|
||||
}
|
||||
return {};
|
||||
|
||||
@@ -78,7 +78,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2D(hipTextureObject_t textureObject, float x, float y)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
|
||||
float2 coords{x, y};
|
||||
auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -96,7 +97,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -114,7 +116,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
|
||||
float2 coords{x, layer};
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -132,7 +135,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
|
||||
float4 coords{x, y, layer, 0.0f};
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -150,7 +154,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -168,7 +173,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
|
||||
float4 coords{x, y, z, layer};
|
||||
auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -186,26 +192,27 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
float2 coords{x, y};
|
||||
switch (comp) {
|
||||
case 1: {
|
||||
auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
@@ -243,7 +250,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
|
||||
float2 coords{x, y};
|
||||
auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -261,7 +269,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -279,7 +288,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
|
||||
float2 coords{x, layer};
|
||||
auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -297,7 +307,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
|
||||
float4 coords{x, y, layer, 0.0f};
|
||||
auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -315,7 +326,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -334,8 +346,9 @@ static __device__ __hip_img_chk__ T texCubemapGrad(hipTextureObject_t textureObj
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return __hipMapFrom<T>(tmp);
|
||||
// auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)),
|
||||
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
|
||||
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<T>(tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -353,7 +366,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
|
||||
float4 coords{x, y, z, layer};
|
||||
auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -389,7 +403,9 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
float2 coords{x, y};
|
||||
auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx),
|
||||
get_native_vector(dPdy));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -407,7 +423,11 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
float4 coords{x, y, z, 0.0f};
|
||||
float4 gradx{dPdy.x, dPdy.y, dPdy.z, 0.0f};
|
||||
float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f};
|
||||
auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords),
|
||||
get_native_vector(gradx), get_native_vector(grady));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -425,7 +445,8 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
|
||||
float2 coords{x, layer};
|
||||
auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy);
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -443,7 +464,9 @@ template <
|
||||
static __device__ __hip_img_chk__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
|
||||
float4 coords{x, y, layer, 0.0f};
|
||||
auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords),
|
||||
get_native_vector(dPdx), get_native_vector(dPdy));
|
||||
return __hipMapFrom<T>(tmp);
|
||||
}
|
||||
|
||||
@@ -462,8 +485,9 @@ static __device__ __hip_img_chk__ T texCubemapLayeredGrad(hipTextureObject_t te
|
||||
{
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT
|
||||
// TODO missing in device libs.
|
||||
// auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
|
||||
// return __hipMapFrom<T>(tmp);
|
||||
// auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)),
|
||||
// get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x,
|
||||
// dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<T>(tmp);
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user