diff --git a/hipamd/include/hip/amd_detail/amd_hip_vector_types.h b/hipamd/include/hip/amd_detail/amd_hip_vector_types.h index 220f2189fc..9b472c534b 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_vector_types.h +++ b/hipamd/include/hip/amd_detail/amd_hip_vector_types.h @@ -143,27 +143,34 @@ template struct is_scalar : public integral_constant struct HIP_vector_base; + template struct HIP_vector_type; + + template + __attribute__((always_inline)) __HOST_DEVICE__ typename HIP_vector_base::Native_vec_& + get_native_vector(HIP_vector_base& base_vec) { + static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base::Native_vec_)); + return *reinterpret_cast::Native_vec_*>(&base_vec.x); + }; + + template + __attribute__(( + always_inline)) __HOST_DEVICE__ const typename HIP_vector_base::Native_vec_& + get_native_vector(const HIP_vector_base& base_vec) { + static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base::Native_vec_)); + return *reinterpret_cast::Native_vec_*>(&base_vec.x); + }; template struct HIP_vector_base { using Native_vec_ = __NATIVE_VECTOR__(1, T); - union { - Native_vec_ data; - struct { - T x; - }; - }; + T x; using value_type = T; __HOST_DEVICE__ HIP_vector_base() = default; __HOST_DEVICE__ - explicit - constexpr - HIP_vector_base(T x_) noexcept : data{x_} {} - __HOST_DEVICE__ constexpr HIP_vector_base(const HIP_vector_base&) = default; __HOST_DEVICE__ @@ -175,43 +182,23 @@ template struct is_scalar : public integral_constant - struct HIP_vector_base { - using Native_vec_ = __NATIVE_VECTOR__(2, T); + template struct alignas(2 * sizeof(T)) HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(2, T); - union - #if !__has_attribute(ext_vector_type) - alignas(hip_impl::next_pot(2 * sizeof(T))) - #endif - { - Native_vec_ data; - struct { - T x; - T y; - }; - }; + T x, y; - using value_type = T; + using value_type = T; - __HOST_DEVICE__ - HIP_vector_base() = default; - __HOST_DEVICE__ - explicit - constexpr - HIP_vector_base(T x_) noexcept : data{x_, x_} {} - __HOST_DEVICE__ - constexpr - HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {} - __HOST_DEVICE__ - constexpr - HIP_vector_base(const HIP_vector_base&) = default; - __HOST_DEVICE__ - constexpr - HIP_vector_base(HIP_vector_base&&) = default; - __HOST_DEVICE__ - ~HIP_vector_base() = default; - __HOST_DEVICE__ - HIP_vector_base& operator=(const HIP_vector_base&) = default; + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; }; template @@ -364,27 +351,13 @@ template struct is_scalar : public integral_constant struct is_scalar : public integral_constant - struct HIP_vector_base { - using Native_vec_ = __NATIVE_VECTOR__(4, T); + template struct alignas(4 * sizeof(T)) HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(4, T); - union - #if !__has_attribute(ext_vector_type) - alignas(hip_impl::next_pot(4 * sizeof(T))) - #endif - { - Native_vec_ data; - struct { - T x; - T y; - T z; - T w; - }; - }; + T x, y, z, w; - using value_type = T; + using value_type = T; - __HOST_DEVICE__ - HIP_vector_base() = default; - __HOST_DEVICE__ - explicit - constexpr - HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {} - __HOST_DEVICE__ - constexpr - HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {} - __HOST_DEVICE__ - constexpr - HIP_vector_base(const HIP_vector_base&) = default; - __HOST_DEVICE__ - constexpr - HIP_vector_base(HIP_vector_base&&) = default; - __HOST_DEVICE__ - ~HIP_vector_base() = default; - __HOST_DEVICE__ - HIP_vector_base& operator=(const HIP_vector_base&) = default; + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; }; - template + template + constexpr inline __HOST_DEVICE__ HIP_vector_type make_vector_type_impl( + T val, std::index_sequence) noexcept { + // Fills vec with vals, and ignores the indices + return HIP_vector_type{((void)indices, val)...}; + } + + template + constexpr inline __HOST_DEVICE__ HIP_vector_type make_vector_type(T val) { + return make_vector_type_impl(val, std::make_index_sequence{}); + } + + template struct HIP_vector_type : public HIP_vector_base { - using HIP_vector_base::data; - using typename HIP_vector_base::Native_vec_; + using typename HIP_vector_base::Native_vec_; - __HOST_DEVICE__ - HIP_vector_type() = default; - template< - typename U, - typename std::enable_if< - std::is_convertible::value>::type* = nullptr> - __HOST_DEVICE__ - explicit - constexpr - HIP_vector_type(U x_) noexcept - : HIP_vector_base{static_cast(x_)} - {} - template< // TODO: constrain based on type as well. - typename... Us, - typename std::enable_if< - (rank > 1) && sizeof...(Us) == rank>::type* = nullptr> - __HOST_DEVICE__ - constexpr - HIP_vector_type(Us... xs) noexcept - : HIP_vector_base{static_cast(xs)...} - {} - __HOST_DEVICE__ - constexpr - HIP_vector_type(const HIP_vector_type&) = default; - __HOST_DEVICE__ - constexpr - HIP_vector_type(HIP_vector_type&&) = default; - __HOST_DEVICE__ - ~HIP_vector_type() = default; + __HOST_DEVICE__ + HIP_vector_type() = default; + template ::value>::type* = nullptr> + __HOST_DEVICE__ explicit constexpr HIP_vector_type(U x_) noexcept + : HIP_vector_base{static_cast(x_)} {} + template < // TODO: constrain based on type as well. + typename... Us, + typename std::enable_if<(rank > 1) && sizeof...(Us) == rank>::type* = nullptr> + __HOST_DEVICE__ constexpr HIP_vector_type(Us... xs) noexcept + : HIP_vector_base{static_cast(xs)...} {} + __HOST_DEVICE__ + constexpr HIP_vector_type(const HIP_vector_type&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_type(HIP_vector_type&&) = default; + __HOST_DEVICE__ + ~HIP_vector_type() = default; - __HOST_DEVICE__ - HIP_vector_type& operator=(const HIP_vector_type&) = default; - __HOST_DEVICE__ - HIP_vector_type& operator=(HIP_vector_type&&) = default; + __HOST_DEVICE__ + HIP_vector_type& operator=(const HIP_vector_type&) = default; + __HOST_DEVICE__ + HIP_vector_type& operator=(HIP_vector_type&&) = default; - // Operators - __HOST_DEVICE__ - HIP_vector_type& operator++() noexcept - { - return *this += HIP_vector_type{1}; - } - __HOST_DEVICE__ - HIP_vector_type operator++(int) noexcept - { - auto tmp(*this); - ++*this; - return tmp; - } + // Operators + __HOST_DEVICE__ + HIP_vector_type& operator++() noexcept { + HIP_vector_type unity = make_vector_type(1); + return *this += unity; + } + __HOST_DEVICE__ + HIP_vector_type operator++(int) noexcept { + auto tmp(*this); + ++*this; + return tmp; + } - __HOST_DEVICE__ - HIP_vector_type& operator--() noexcept - { - return *this -= HIP_vector_type{1}; - } - __HOST_DEVICE__ - HIP_vector_type operator--(int) noexcept - { - auto tmp(*this); - --*this; - return tmp; - } + __HOST_DEVICE__ + HIP_vector_type& operator--() noexcept { + HIP_vector_type unity = make_vector_type(1); + return *this -= unity; + } + __HOST_DEVICE__ + HIP_vector_type operator--(int) noexcept { + auto tmp(*this); + --*this; + return tmp; + } - __HOST_DEVICE__ - HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept - { + __HOST_DEVICE__ + HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data += x.data; + get_native_vector(*this) += get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] += x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] += get_native_vector(x)[i]; #endif - return *this; - } - template< - typename U, - typename std::enable_if< - std::is_convertible{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator+=(U x) noexcept - { - return *this += HIP_vector_type{x}; - } + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator+=(U x) noexcept { + return *this += make_vector_type(x); + } - __HOST_DEVICE__ - HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept - { + __HOST_DEVICE__ + HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data -= x.data; + get_native_vector(*this) -= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] -= get_native_vector(x)[i]; #endif - return *this; - } - template< - typename U, - typename std::enable_if< - std::is_convertible{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator-=(U x) noexcept - { - return *this -= HIP_vector_type{x}; - } + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator-=(U x) noexcept { + return *this -= make_vector_type(x); + } - __HOST_DEVICE__ - HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept - { + __HOST_DEVICE__ + HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data *= x.data; + get_native_vector(*this) *= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] *= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } - friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( - HIP_vector_type x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{ x } *= y; - } + friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( + HIP_vector_type x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} *= y; + } - template< - typename U, - typename std::enable_if< - std::is_convertible{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator*=(U x) noexcept - { - return *this *= HIP_vector_type{x}; - } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator*=(U x) noexcept { + return *this *= make_vector_type(x); + } - friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( - HIP_vector_type x, const HIP_vector_type& y) noexcept - { - return HIP_vector_type{ x } /= y; - } + friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( + HIP_vector_type x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} /= y; + } - __HOST_DEVICE__ - HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept - { + __HOST_DEVICE__ + HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data /= x.data; + get_native_vector(*this) /= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] /= get_native_vector(x)[i]; #endif - return *this; - } - template< - typename U, - typename std::enable_if< - std::is_convertible{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator/=(U x) noexcept - { - return *this /= HIP_vector_type{x}; - } + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator/=(U x) noexcept { + return *this /= make_vector_type(x); + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type operator-() const noexcept - { - auto tmp(*this); + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type operator-() const noexcept { + auto tmp(*this); #if __HIP_USE_NATIVE_VECTOR__ - tmp.data = -tmp.data; + get_native_vector(tmp) = -get_native_vector(tmp); #else - for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(tmp)[i] = -get_native_vector(tmp)[i]; #endif - return tmp; - } + return tmp; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type operator~() const noexcept - { - HIP_vector_type r{*this}; + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type operator~() const noexcept { + HIP_vector_type r{*this}; #if __HIP_USE_NATIVE_VECTOR__ - r.data = ~r.data; + get_native_vector(r) = ~get_native_vector(r); #else - for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(r)[i] = ~get_native_vector(r)[i]; #endif - return r; - } + return r; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept - { + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data %= x.data; + get_native_vector(*this) %= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] %= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept - { + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data ^= x.data; + get_native_vector(*this) ^= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] ^= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept - { + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data |= x.data; + get_native_vector(*this) |= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] |= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept - { + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data &= x.data; + get_native_vector(*this) &= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] &= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept - { + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data >>= x.data; + get_native_vector(*this) >>= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] >>= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } - template< - typename U = T, - typename std::enable_if{}>::type* = nullptr> - __HOST_DEVICE__ - HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept - { + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept { #if __HIP_USE_NATIVE_VECTOR__ - data <<= x.data; + get_native_vector(*this) <<= get_native_vector(x); #else - for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i]; + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] <<= get_native_vector(x)[i]; #endif - return *this; - } + return *this; + } }; template @@ -731,7 +621,7 @@ template struct is_scalar : public integral_constant operator+( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} += HIP_vector_type{y}; + return HIP_vector_type{x} += make_vector_type(y); } template __HOST_DEVICE__ @@ -740,7 +630,7 @@ template struct is_scalar : public integral_constant operator+( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} += y; + return make_vector_type(x) += y; } template @@ -759,7 +649,7 @@ template struct is_scalar : public integral_constant operator-( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} -= HIP_vector_type{y}; + return HIP_vector_type{x} -= make_vector_type(y); } template __HOST_DEVICE__ @@ -768,7 +658,7 @@ template struct is_scalar : public integral_constant operator-( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} -= y; + return make_vector_type(x) -= y; } template @@ -778,7 +668,7 @@ template struct is_scalar : public integral_constant operator*( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} *= HIP_vector_type{y}; + return HIP_vector_type{x} *= make_vector_type(y); } template __HOST_DEVICE__ @@ -787,7 +677,7 @@ template struct is_scalar : public integral_constant operator*( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} *= y; + return make_vector_type(x) *= y; } template @@ -797,7 +687,7 @@ template struct is_scalar : public integral_constant operator/( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} /= HIP_vector_type{y}; + return HIP_vector_type{x} /= make_vector_type(y); } template __HOST_DEVICE__ @@ -806,17 +696,7 @@ template struct is_scalar : public integral_constant operator/( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} /= y; - } - - template - __HOST_DEVICE__ - inline - constexpr - bool _hip_compare(const V& x, const V& y, int n) noexcept - { - return - (n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1)); + return make_vector_type(x) /= y; } template @@ -826,15 +706,22 @@ template struct is_scalar : public integral_constant& x, const HIP_vector_type& y) noexcept { - return _hip_compare(x.data, y.data, n - 1); + bool isTrue = true; + const auto& native_x = get_native_vector(x); + const auto& native_y = get_native_vector(y); + for (unsigned int i = 0; i < n; ++i) { + isTrue = (isTrue && (native_x[i] == native_y[i])); + } + return isTrue; } + template __HOST_DEVICE__ inline constexpr bool operator==(const HIP_vector_type& x, U y) noexcept { - return x == HIP_vector_type{y}; + return x == make_vector_type(y); } template __HOST_DEVICE__ @@ -842,7 +729,7 @@ template struct is_scalar : public integral_constant& y) noexcept { - return HIP_vector_type{x} == y; + return make_vector_type(x) == y; } template @@ -894,7 +781,7 @@ template struct is_scalar : public integral_constant operator%( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} %= HIP_vector_type{y}; + return HIP_vector_type{x} %= make_vector_type(y); } template< typename T, @@ -907,7 +794,7 @@ template struct is_scalar : public integral_constant operator%( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} %= y; + return make_vector_type(x) %= y; } template< @@ -933,7 +820,7 @@ template struct is_scalar : public integral_constant operator^( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} ^= HIP_vector_type{y}; + return HIP_vector_type{x} ^= make_vector_type(y); } template< typename T, @@ -946,7 +833,7 @@ template struct is_scalar : public integral_constant operator^( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} ^= y; + return make_vector_type(x) ^= y; } template< @@ -972,7 +859,7 @@ template struct is_scalar : public integral_constant operator|( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} |= HIP_vector_type{y}; + return HIP_vector_type{x} |= make_vector_type(y); } template< typename T, @@ -985,7 +872,7 @@ template struct is_scalar : public integral_constant operator|( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} |= y; + return make_vector_type(x) |= y; } template< @@ -1011,7 +898,7 @@ template struct is_scalar : public integral_constant operator&( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} &= HIP_vector_type{y}; + return HIP_vector_type{x} &= make_vector_type(y); } template< typename T, @@ -1024,7 +911,7 @@ template struct is_scalar : public integral_constant operator&( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} &= y; + return make_vector_type(x) &= y; } template< @@ -1050,7 +937,7 @@ template struct is_scalar : public integral_constant operator>>( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} >>= HIP_vector_type{y}; + return HIP_vector_type{x} >>= make_vector_type(y); } template< typename T, @@ -1063,7 +950,7 @@ template struct is_scalar : public integral_constant operator>>( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} >>= y; + return make_vector_type(x) >>= y; } template< @@ -1089,7 +976,7 @@ template struct is_scalar : public integral_constant operator<<( const HIP_vector_type& x, U y) noexcept { - return HIP_vector_type{x} <<= HIP_vector_type{y}; + return HIP_vector_type{x} <<= make_vector_type(y); } template< typename T, @@ -1103,7 +990,7 @@ template struct is_scalar : public integral_constant operator<<( U x, const HIP_vector_type& y) noexcept { - return HIP_vector_type{x} <<= y; + return make_vector_type(x) <<= y; } /* diff --git a/hipamd/include/hip/amd_detail/amd_math_functions.h b/hipamd/include/hip/amd_detail/amd_math_functions.h index ee84ef9e8a..663c71fa43 100644 --- a/hipamd/include/hip/amd_detail/amd_math_functions.h +++ b/hipamd/include/hip/amd_detail/amd_math_functions.h @@ -52,22 +52,22 @@ THE SOFTWARE. __DEVICE__ inline int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) { - return __ockl_sdot2(a.data, b.data, c, saturate); + return __ockl_sdot2(get_native_vector(a), get_native_vector(b), c, saturate); } __DEVICE__ inline uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) { - return __ockl_udot2(a.data, b.data, c, saturate); + return __ockl_udot2(get_native_vector(a), get_native_vector(b), c, saturate); } __DEVICE__ inline int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) { - return __ockl_sdot4(a.data, b.data, c, saturate); + return __ockl_sdot4(get_native_vector(a), get_native_vector(b), c, saturate); } __DEVICE__ inline uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) { - return __ockl_udot4(a.data, b.data, c, saturate); + return __ockl_udot4(get_native_vector(a), get_native_vector(b), c, saturate); } __DEVICE__ inline diff --git a/hipamd/include/hip/amd_detail/amd_surface_functions.h b/hipamd/include/hip/amd_detail/amd_surface_functions.h index a4919f0533..c931cf62d1 100644 --- a/hipamd/include/hip/amd_detail/amd_surface_functions.h +++ b/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -151,8 +151,9 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); - auto tmp = __ockl_image_load_2D(i, int2(x, y).data); + auto tmp = __ockl_image_load_2D(i, get_native_vector(coords)); *data = __hipMapFrom(tmp); } @@ -170,9 +171,10 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); auto tmp = __hipMapTo(data); - __ockl_image_store_2D(i, int2(x, y).data, tmp); + __ockl_image_store_2D(i, get_native_vector(coords), tmp); } /** \brief Reads the value from the three-dimensional surface at coordinate @@ -190,8 +192,9 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, int z) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int4 coords{x, y, z, 0}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); - auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data); + auto tmp = __ockl_image_load_3D(i, get_native_vector(coords)); *data = __hipMapFrom(tmp); } @@ -210,9 +213,10 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int z) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int4 coords{x, y, z, 0}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); auto tmp = __hipMapTo(data); - __ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp); + __ockl_image_store_3D(i, get_native_vector(coords), tmp); } /** \brief Reads the value from the one-dimensional layered surface at @@ -268,8 +272,9 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int layer) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); - auto tmp = __ockl_image_load_lod_2D(i, int2(x, y).data, layer); + auto tmp = __ockl_image_load_lod_2D(i, get_native_vector(coords), layer); *data = __hipMapFrom(tmp); } @@ -288,9 +293,10 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int layer) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); auto tmp = __hipMapTo(data); - __ockl_image_store_lod_2D(i, int2(x, y).data, layer, tmp); + __ockl_image_store_lod_2D(i, get_native_vector(coords), layer, tmp); } /** \brief Reads the value from the cubemap surface at coordinate x, y and @@ -308,8 +314,9 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); - auto tmp = __ockl_image_load_CM(i, int2(x, y).data, face); + auto tmp = __ockl_image_load_CM(i, get_native_vector(coords), face); *data = __hipMapFrom(tmp); } @@ -328,9 +335,10 @@ template < typename std::enable_if<__hip_is_tex_surf_channel_type::value>::type* = nullptr> static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int face) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); auto tmp = __hipMapTo(data); - __ockl_image_store_CM(i, int2(x, y).data, face, tmp); + __ockl_image_store_CM(i, get_native_vector(coords), face, tmp); } /** \brief Reads the value from the layered cubemap surface at coordinate x, y @@ -350,8 +358,9 @@ template < static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, int layer) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); - auto tmp = __ockl_image_load_lod_CM(i, int2(x, y).data, face, layer); + auto tmp = __ockl_image_load_lod_CM(i, get_native_vector(coords), face, layer); *data = __hipMapFrom(tmp); } @@ -372,9 +381,10 @@ template < static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, int layer) { __HIP_SURFACE_OBJECT_PARAMETERS_INIT + int2 coords{x, y}; x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); auto tmp = __hipMapTo(data); - __ockl_image_store_lod_CM(i, int2(x, y).data, face, layer, tmp); + __ockl_image_store_lod_CM(i, get_native_vector(coords), face, layer, tmp); } // Doxygen end group SurfaceAPI diff --git a/hipamd/include/hip/amd_detail/texture_fetch_functions.h b/hipamd/include/hip/amd_detail/texture_fetch_functions.h index c4dcbe78d8..4de7f5ebd9 100644 --- a/hipamd/include/hip/amd_detail/texture_fetch_functions.h +++ b/hipamd/include/hip/amd_detail/texture_fetch_functions.h @@ -260,7 +260,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2D(texture t, float x, float y) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + float2 coords{x, y}; + auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -268,7 +269,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayered(texture t, float x, int layer) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -276,7 +278,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayered(texture t, float x, float y, int layer) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -284,7 +287,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3D(texture t, float x, float y, float z) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -292,7 +296,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemap(texture t, float x, float y, float z) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -308,7 +313,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLod(texture t, float x, float y, float level) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + float2 coords{x, y}; + auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -316,7 +322,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayeredLod(texture t, float x, int layer, float level) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_lod_1Da(i, s, get_native_vector(coords), level); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -324,7 +331,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayeredLod(texture t, float x, float y, int layer, float level) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level); + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_lod_2Da(i, s, get_native_vector(coords), level); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -332,7 +340,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3DLod(texture t, float x, float y, float z, float level) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -340,7 +349,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLod(texture t, float x, float y, float z, float level) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -348,7 +358,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayered(texture t, float x, float y, float z, int layer) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -356,7 +367,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayeredLod(texture t, float x, float y, float z, int layer, float level) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -365,8 +377,9 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t t { TEXTURE_PARAMETERS_INIT; // TODO missing in device libs. - // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); - // return __hipMapFrom<__hip_tex_ret_t>(tmp); + // auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t>(tmp); return {}; } @@ -375,8 +388,9 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t t { TEXTURE_PARAMETERS_INIT; // TODO missing in device libs. - // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); - // return __hipMapFrom<__hip_tex_ret_t>(tmp); + // auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t>(tmp); return {}; } @@ -392,7 +406,9 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DGrad(texture t, float x, float y, float2 dPdx, float2 dPdy) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + float2 coords{x, y}; + auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx), + get_native_vector(dPdy)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -400,7 +416,8 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayeredGrad(texture t, float x, int layer, float dPdx, float dPdy) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -408,7 +425,9 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayeredGrad(texture t, float x, float y, int layer, float2 dPdx, float2 dPdy) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords), + get_native_vector(dPdx), get_native_vector(dPdy)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -416,7 +435,11 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3DGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) { TEXTURE_PARAMETERS_INIT; - auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + float4 coords{x, y, z, 0.0f}; + float4 gradx{dPdx.x, dPdx.y, dPdx.z, 0.0f}; + float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f}; + auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords), + get_native_vector(gradx), get_native_vector(grady)); return __hipMapFrom<__hip_tex_ret_t>(tmp); } @@ -467,22 +490,23 @@ template static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t tex2Dgather(texture t, float x, float y, int comp=0) { TEXTURE_PARAMETERS_INIT; + float2 coords{x, y}; switch (comp) { case 1: { - auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); - return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); } case 2: { - auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); - return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); } case 3: { - auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); - return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); } default: { - auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); - return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); } } return {}; diff --git a/hipamd/include/hip/amd_detail/texture_indirect_functions.h b/hipamd/include/hip/amd_detail/texture_indirect_functions.h index 1e435018df..bbc46ccdff 100644 --- a/hipamd/include/hip/amd_detail/texture_indirect_functions.h +++ b/hipamd/include/hip/amd_detail/texture_indirect_functions.h @@ -78,7 +78,8 @@ template < static __device__ __hip_img_chk__ T tex2D(hipTextureObject_t textureObject, float x, float y) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + float2 coords{x, y}; + auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -96,7 +97,8 @@ template < static __device__ __hip_img_chk__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -114,7 +116,8 @@ template < static __device__ __hip_img_chk__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -132,7 +135,8 @@ template < static __device__ __hip_img_chk__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -150,7 +154,8 @@ template < static __device__ __hip_img_chk__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -168,7 +173,8 @@ template < static __device__ __hip_img_chk__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -186,26 +192,27 @@ template < static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0) { TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, y}; switch (comp) { case 1: { - auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); - return __hipMapFrom(tmp); - break; + auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; } case 2: { - auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); - return __hipMapFrom(tmp); - break; + auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; } case 3: { - auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); - return __hipMapFrom(tmp); - break; + auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; } default: { - auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); - return __hipMapFrom(tmp); - break; + auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; } } return {}; @@ -243,7 +250,8 @@ template < static __device__ __hip_img_chk__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + float2 coords{x, y}; + auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level); return __hipMapFrom(tmp); } @@ -261,7 +269,8 @@ template < static __device__ __hip_img_chk__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level); return __hipMapFrom(tmp); } @@ -279,7 +288,8 @@ template < static __device__ __hip_img_chk__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -297,7 +307,8 @@ template < static __device__ __hip_img_chk__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords)); return __hipMapFrom(tmp); } @@ -315,7 +326,8 @@ template < static __device__ __hip_img_chk__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level); return __hipMapFrom(tmp); } @@ -334,8 +346,9 @@ static __device__ __hip_img_chk__ T texCubemapGrad(hipTextureObject_t textureObj { TEXTURE_OBJECT_PARAMETERS_INIT // TODO missing in device libs. - // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); - // return __hipMapFrom(tmp); + // auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom(tmp); return {}; } @@ -353,7 +366,8 @@ template < static __device__ __hip_img_chk__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level); return __hipMapFrom(tmp); } @@ -389,7 +403,9 @@ template < static __device__ __hip_img_chk__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + float2 coords{x, y}; + auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx), + get_native_vector(dPdy)); return __hipMapFrom(tmp); } @@ -407,7 +423,11 @@ template < static __device__ __hip_img_chk__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + float4 coords{x, y, z, 0.0f}; + float4 gradx{dPdy.x, dPdy.y, dPdy.z, 0.0f}; + float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f}; + auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords), + get_native_vector(gradx), get_native_vector(grady)); return __hipMapFrom(tmp); } @@ -425,7 +445,8 @@ template < static __device__ __hip_img_chk__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy); return __hipMapFrom(tmp); } @@ -443,7 +464,9 @@ template < static __device__ __hip_img_chk__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) { TEXTURE_OBJECT_PARAMETERS_INIT - auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords), + get_native_vector(dPdx), get_native_vector(dPdy)); return __hipMapFrom(tmp); } @@ -462,8 +485,9 @@ static __device__ __hip_img_chk__ T texCubemapLayeredGrad(hipTextureObject_t te { TEXTURE_OBJECT_PARAMETERS_INIT // TODO missing in device libs. - // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); - // return __hipMapFrom(tmp); + // auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom(tmp); return {}; }