Merge pull request #535 from ROCm-Developer-Tools/feature_native_vector_types
Feature native vector types
This commit is contained in:
@@ -408,34 +408,34 @@ double __shfl_xor(double var, int lane_mask, int width = warpSize) {
|
||||
|
||||
__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 + one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 + one2) & MASK2);
|
||||
unsigned one1 = in1.w & MASK1;
|
||||
unsigned one2 = in2.w & MASK1;
|
||||
out.w = (one1 + one2) & MASK1;
|
||||
one1 = in1.w & MASK2;
|
||||
one2 = in2.w & MASK2;
|
||||
out.w = out.w | ((one1 + one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 - one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 - one2) & MASK2);
|
||||
unsigned one1 = in1.w & MASK1;
|
||||
unsigned one2 = in2.w & MASK1;
|
||||
out.w = (one1 - one2) & MASK1;
|
||||
one1 = in1.w & MASK2;
|
||||
one2 = in2.w & MASK2;
|
||||
out.w = out.w | ((one1 - one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
|
||||
char4 out;
|
||||
unsigned one1 = in1.a & MASK1;
|
||||
unsigned one2 = in2.a & MASK1;
|
||||
out.a = (one1 * one2) & MASK1;
|
||||
one1 = in1.a & MASK2;
|
||||
one2 = in2.a & MASK2;
|
||||
out.a = out.a | ((one1 * one2) & MASK2);
|
||||
unsigned one1 = in1.w & MASK1;
|
||||
unsigned one2 = in2.w & MASK1;
|
||||
out.w = (one1 * one2) & MASK1;
|
||||
one1 = in1.w & MASK2;
|
||||
one2 = in2.w & MASK2;
|
||||
out.w = out.w | ((one1 * one2) & MASK2);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
File diff soppresso perché troppo grande
Carica Diff
@@ -110,47 +110,47 @@ union TData {
|
||||
|
||||
#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
|
||||
|
||||
#define TEXTURE_RETURN_CHAR_X return char1(texel.i.x);
|
||||
#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
|
||||
|
||||
#define TEXTURE_RETURN_UCHAR_X return uchar1(texel.u.x);
|
||||
#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
|
||||
|
||||
#define TEXTURE_RETURN_SHORT_X return short1(texel.i.x);
|
||||
#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
|
||||
|
||||
#define TEXTURE_RETURN_USHORT_X return ushort1(texel.u.x);
|
||||
#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
|
||||
|
||||
#define TEXTURE_RETURN_INT_X return int1(texel.i.x);
|
||||
#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
|
||||
|
||||
#define TEXTURE_RETURN_UINT_X return uint1(texel.u.x);
|
||||
#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
|
||||
|
||||
#define TEXTURE_RETURN_FLOAT_X return float1(texel.f.x);
|
||||
#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
|
||||
|
||||
#define TEXTURE_RETURN_CHAR_XY return char2(texel.i.x, texel.i.y);
|
||||
#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
|
||||
|
||||
#define TEXTURE_RETURN_UCHAR_XY return uchar2(texel.u.x, texel.u.y);
|
||||
#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
|
||||
|
||||
#define TEXTURE_RETURN_SHORT_XY return short2(texel.i.x, texel.i.y);
|
||||
#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
|
||||
|
||||
#define TEXTURE_RETURN_USHORT_XY return ushort2(texel.u.x, texel.u.y);
|
||||
#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
|
||||
|
||||
#define TEXTURE_RETURN_INT_XY return int2(texel.i.x, texel.i.y);
|
||||
#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
|
||||
|
||||
#define TEXTURE_RETURN_UINT_XY return uint2(texel.u.x, texel.u.y);
|
||||
#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
|
||||
|
||||
#define TEXTURE_RETURN_FLOAT_XY return float2(texel.f.x, texel.f.y);
|
||||
#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
|
||||
|
||||
#define TEXTURE_RETURN_CHAR_XYZW return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
|
||||
#define TEXTURE_RETURN_UCHAR_XYZW return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
|
||||
#define TEXTURE_RETURN_SHORT_XYZW return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
|
||||
#define TEXTURE_RETURN_USHORT_XYZW return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
|
||||
#define TEXTURE_RETURN_INT_XYZW return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
|
||||
#define TEXTURE_RETURN_UINT_XYZW return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
|
||||
#define TEXTURE_RETURN_FLOAT_XYZW return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
|
||||
#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
|
||||
|
||||
extern "C" {
|
||||
hc::short_vector::float4::vector_value_type __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i,
|
||||
|
||||
File diff soppresso perché troppo grande
Carica Diff
File diff soppresso perché troppo grande
Carica Diff
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
template<bool b, typename T = void>
|
||||
using Enable_if_t = typename std::enable_if<b, T>::type;
|
||||
|
||||
__host__ __device__
|
||||
std::false_type is_vec4(...);
|
||||
__host__ __device__
|
||||
std::false_type is_vec3(...);
|
||||
__host__ __device__
|
||||
std::false_type is_vec2(...);
|
||||
__host__ __device__
|
||||
std::false_type is_vec1(...);
|
||||
|
||||
template<typename T>
|
||||
__host__ __device__
|
||||
auto is_vec4(const T&) -> decltype(std::declval<T>().xyzw, std::true_type{});
|
||||
template<
|
||||
typename T, Enable_if_t<decltype(!is_vec4(std::declval<T>())){}>* = nullptr>
|
||||
__host__ __device__
|
||||
auto is_vec3(const T&) -> decltype(std::declval<T>().xyz, std::true_type{});
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<
|
||||
!decltype(is_vec4(std::declval<T>())){} &&
|
||||
!decltype(is_vec3(std::declval<T>())){}>* = nullptr>
|
||||
__host__ __device__
|
||||
auto is_vec2(const T&) -> decltype(std::declval<T>().xy, std::true_type{});
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<
|
||||
!decltype(is_vec4(std::declval<T>())){} &&
|
||||
!decltype(is_vec3(std::declval<T>())){} &&
|
||||
!decltype(is_vec2(std::declval<T>())){}>* = nullptr>
|
||||
__host__ __device__
|
||||
auto is_vec1(const T&) -> decltype(std::declval<T>().x, std::true_type{});
|
||||
|
||||
template<typename T, int dimension>
|
||||
__host__ __device__
|
||||
constexpr
|
||||
bool is_vec() {
|
||||
return (dimension == 1) ? decltype(is_vec1(std::declval<T>())){} :
|
||||
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
|
||||
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
|
||||
decltype(is_vec4(std::declval<T>())){}));
|
||||
}
|
||||
Fai riferimento in un nuovo problema
Block a user