Merge pull request #535 from ROCm-Developer-Tools/feature_native_vector_types

Feature native vector types

[ROCm/hip commit: 1918ffdda5]
Este commit está contenido en:
Maneesh Gupta
2018-07-17 10:12:08 +05:30
cometido por GitHub
Se han modificado 6 ficheros con 1000 adiciones y 15967 borrados
@@ -408,34 +408,34 @@ double __shfl_xor(double var, int lane_mask, int width = warpSize) {
__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 + one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 + one2) & MASK2);
unsigned one1 = in1.w & MASK1;
unsigned one2 = in2.w & MASK1;
out.w = (one1 + one2) & MASK1;
one1 = in1.w & MASK2;
one2 = in2.w & MASK2;
out.w = out.w | ((one1 + one2) & MASK2);
return out;
}
__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 - one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 - one2) & MASK2);
unsigned one1 = in1.w & MASK1;
unsigned one2 = in2.w & MASK1;
out.w = (one1 - one2) & MASK1;
one1 = in1.w & MASK2;
one2 = in2.w & MASK2;
out.w = out.w | ((one1 - one2) & MASK2);
return out;
}
__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 * one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 * one2) & MASK2);
unsigned one1 = in1.w & MASK1;
unsigned one2 = in2.w & MASK1;
out.w = (one1 * one2) & MASK1;
one1 = in1.w & MASK2;
one2 = in2.w & MASK2;
out.w = out.w | ((one1 * one2) & MASK2);
return out;
}
La diferencia del archivo ha sido suprimido porque es demasiado grande Cargar Diff
@@ -110,47 +110,47 @@ union TData {
#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
#define TEXTURE_RETURN_CHAR_X return char1(texel.i.x);
#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
#define TEXTURE_RETURN_UCHAR_X return uchar1(texel.u.x);
#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
#define TEXTURE_RETURN_SHORT_X return short1(texel.i.x);
#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
#define TEXTURE_RETURN_USHORT_X return ushort1(texel.u.x);
#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
#define TEXTURE_RETURN_INT_X return int1(texel.i.x);
#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
#define TEXTURE_RETURN_UINT_X return uint1(texel.u.x);
#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
#define TEXTURE_RETURN_FLOAT_X return float1(texel.f.x);
#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
#define TEXTURE_RETURN_CHAR_XY return char2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_UCHAR_XY return uchar2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_SHORT_XY return short2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_USHORT_XY return ushort2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_INT_XY return int2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_UINT_XY return uint2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_FLOAT_XY return float2(texel.f.x, texel.f.y);
#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
#define TEXTURE_RETURN_CHAR_XYZW return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_UCHAR_XYZW return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_SHORT_XYZW return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_USHORT_XYZW return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_INT_XYZW return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_UINT_XYZW return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_FLOAT_XYZW return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
extern "C" {
hc::short_vector::float4::vector_value_type __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i,
La diferencia del archivo ha sido suprimido porque es demasiado grande Cargar Diff
La diferencia del archivo ha sido suprimido porque es demasiado grande Cargar Diff
@@ -0,0 +1,69 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <type_traits>
template<bool b, typename T = void>
using Enable_if_t = typename std::enable_if<b, T>::type;
__host__ __device__
std::false_type is_vec4(...);
__host__ __device__
std::false_type is_vec3(...);
__host__ __device__
std::false_type is_vec2(...);
__host__ __device__
std::false_type is_vec1(...);
template<typename T>
__host__ __device__
auto is_vec4(const T&) -> decltype(std::declval<T>().xyzw, std::true_type{});
template<
typename T, Enable_if_t<decltype(!is_vec4(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec3(const T&) -> decltype(std::declval<T>().xyz, std::true_type{});
template<
typename T,
Enable_if_t<
!decltype(is_vec4(std::declval<T>())){} &&
!decltype(is_vec3(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec2(const T&) -> decltype(std::declval<T>().xy, std::true_type{});
template<
typename T,
Enable_if_t<
!decltype(is_vec4(std::declval<T>())){} &&
!decltype(is_vec3(std::declval<T>())){} &&
!decltype(is_vec2(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec1(const T&) -> decltype(std::declval<T>().x, std::true_type{});
template<typename T, int dimension>
__host__ __device__
constexpr
bool is_vec() {
return (dimension == 1) ? decltype(is_vec1(std::declval<T>())){} :
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
decltype(is_vec4(std::declval<T>())){}));
}