Re-sync with upstream.

[ROCm/hip commit: 417869821d]
This commit is contained in:
Alex Voicu
2018-06-01 15:49:05 +01:00
16 ha cambiato i file con 2753 aggiunte e 17020 eliminazioni
+1 -2
Vedi File
@@ -195,7 +195,6 @@ if(HIP_PLATFORM STREQUAL "hcc")
set(SOURCE_FILES_DEVICE
src/device_util.cpp
src/hip_ldg.cpp
src/hip_fp16.cpp
src/device_functions.cpp)
execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
@@ -239,7 +238,7 @@ endif()
# Install hip_hcc if platform is hcc
if(HIP_PLATFORM STREQUAL "hcc")
install(TARGETS hip_hcc_static hip_hcc hip_device DESTINATION lib)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc_gfx803.ll DESTINATION lib)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll DESTINATION lib)
# Install .hipInfo
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
-3
Vedi File
@@ -492,7 +492,6 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
$HIPCXXFLAGS .= $GPU_ARCH_ARG;;
}
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX803__=1 ";
$ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
}
if ($target_gfx900 eq 1) {
$GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx900";
@@ -501,12 +500,10 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
$HIPCXXFLAGS .= $GPU_ARCH_ARG;;
}
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 ";
$ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
}
if ($target_gfx906 eq 1) {
$HIPLDFLAGS .= " --amdgpu-target=gfx906";
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 ";
$ENV{HCC_EXTRA_LIBRARIES_GFX906}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
}
}
File diff soppresso perché troppo grande Carica Diff
@@ -0,0 +1,257 @@
#pragma once
#if defined(__cplusplus)
#include <cstring>
#endif
struct __half_raw {
unsigned short x;
};
struct __half2_raw {
unsigned short x;
unsigned short y;
};
#if defined(__cplusplus)
struct __half;
__half __float2half(float);
float __half2float(__half);
// BEGIN STRUCT __HALF
struct __half {
protected:
unsigned short __x;
public:
// CREATORS
__half() = default;
__half(const __half_raw& x) : __x{x.x} {}
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
__half(float x) : __x{__float2half(x).__x} {}
__half(double x) : __x{__float2half(x).__x} {}
#endif
__half(const __half&) = default;
__half(__half&&) = default;
~__half() = default;
// MANIPULATORS
__half& operator=(const __half&) = default;
__half& operator=(__half&&) = default;
__half& operator=(const __half_raw& x) { __x = x.x; return *this; }
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
__half& operator=(float x)
{
__x = __float2half(x).__x;
return *this;
}
__half& operator=(double x)
{
return *this = static_cast<float>(x);
}
#endif
// ACCESSORS
operator float() const { return __half2float(*this); }
operator __half_raw() const { return __half_raw{__x}; }
};
// END STRUCT __HALF
// BEGIN STRUCT __HALF2
struct __half2 {
protected:
__half x;
__half y;
public:
// CREATORS
__half2() = default;
__half2(const __half2_raw& ix)
:
x{reinterpret_cast<const __half&>(ix.x)},
y{reinterpret_cast<const __half&>(ix.y)}
{}
__half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {}
__half2(const __half2&) = default;
__half2(__half2&&) = default;
~__half2() = default;
// MANIPULATORS
__half2& operator=(const __half2&) = default;
__half2& operator=(__half2&&) = default;
__half2& operator=(const __half2_raw& ix)
{
x = reinterpret_cast<const __half_raw&>(ix.x);
y = reinterpret_cast<const __half_raw&>(ix.y);
return *this;
}
// ACCESSORS
operator __half2_raw() const
{
return __half2_raw{
reinterpret_cast<const unsigned short&>(x),
reinterpret_cast<const unsigned short&>(y)};
}
};
// END STRUCT __HALF2
namespace
{
inline
unsigned short __internal_float2half(
float flt, unsigned int& sgn, unsigned int& rem)
{
unsigned int x{};
std::memcpy(&x, &flt, sizeof(flt));
unsigned int u = (x & 0x7fffffffU);
sgn = ((x >> 16) & 0x8000U);
// NaN/+Inf/-Inf
if (u >= 0x7f800000U) {
rem = 0;
return static_cast<unsigned short>(
(u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU);
}
// Overflows
if (u > 0x477fefffU) {
rem = 0x80000000U;
return static_cast<unsigned short>(sgn | 0x7bffU);
}
// Normal numbers
if (u >= 0x38800000U) {
rem = u << 19;
u -= 0x38000000U;
return static_cast<unsigned short>(sgn | (u >> 13));
}
// +0/-0
if (u < 0x33000001U) {
rem = u;
return static_cast<unsigned short>(sgn);
}
// Denormal numbers
unsigned int exponent = u >> 23;
unsigned int mantissa = (u & 0x7fffffU);
unsigned int shift = 0x7eU - exponent;
mantissa |= 0x800000U;
rem = mantissa << (32 - shift);
return static_cast<unsigned short>(sgn | (mantissa >> shift));
}
inline
__half __float2half(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x;
return r;
}
inline
__half __float2half_rn(float x) { return __float2half(x); }
inline
__half __float2half_rz(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
return r;
}
inline
__half __float2half_rd(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
if (rem && sgn) ++r.x;
return r;
}
inline
__half __float2half_ru(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
if (rem && !sgn) ++r.x;
return r;
}
inline
__half2 __float2half2_rn(float x)
{
return __half2{__float2half_rn(x), __float2half_rn(x)};
}
inline
__half2 __floats2half2_rn(float x, float y)
{
return __half2{__float2half_rn(x), __float2half_rn(y)};
}
inline
float __internal_half2float(unsigned short x)
{
unsigned int sign = ((x >> 15) & 1);
unsigned int exponent = ((x >> 10) & 0x1f);
unsigned int mantissa = ((x & 0x3ff) << 13);
if (exponent == 0x1fU) { /* NaN or Inf */
mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0);
exponent = 0xffU;
} else if (!exponent) { /* Denorm or Zero */
if (mantissa) {
unsigned int msb;
exponent = 0x71U;
do {
msb = (mantissa & 0x400000U);
mantissa <<= 1; /* normalize */
--exponent;
} while (!msb);
mantissa &= 0x7fffffU; /* 1.mantissa is implicit */
}
} else {
exponent += 0x70U;
}
unsigned int u = ((sign << 31) | (exponent << 23) | mantissa);
float f;
memcpy(&f, &u, sizeof(u));
return f;
}
inline
float __half2float(__half x)
{
return __internal_half2float(static_cast<__half_raw>(x).x);
}
inline
float __low2float(__half2 x)
{
return __internal_half2float(static_cast<__half2_raw>(x).x);
}
inline
float __high2float(__half2 x)
{
return __internal_half2float(static_cast<__half2_raw>(x).y);
}
} // Anonymous namespace.
#if !defined(HIP_NO_HALF)
using half = __half;
using half2 = __half2;
#endif
#endif // defined(__cplusplus)
@@ -0,0 +1,76 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
// /*
// Half Math Functions
// */
extern "C"
{
__attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
_Float16 __ocml_cos_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
__attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
__attribute__((const))
_Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
__attribute__((const)) int __ocml_isinf_f16(_Float16);
__attribute__((const)) int __ocml_isnan_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
__attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
__attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
__attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
_Float16 __ocml_sin_f16(_Float16);
__attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
__attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
typedef short __2i16 __attribute__((ext_vector_type(2)));
__attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
__2f16 __ocml_cos_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
__attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
__attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
inline
__2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL.
{
return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)};
}
__attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
__2f16 __ocml_sin_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
}
File diff soppresso perché troppo grande Carica Diff
@@ -110,47 +110,47 @@ union TData {
#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
#define TEXTURE_RETURN_CHAR_X return char1(texel.i.x);
#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
#define TEXTURE_RETURN_UCHAR_X return uchar1(texel.u.x);
#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
#define TEXTURE_RETURN_SHORT_X return short1(texel.i.x);
#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
#define TEXTURE_RETURN_USHORT_X return ushort1(texel.u.x);
#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
#define TEXTURE_RETURN_INT_X return int1(texel.i.x);
#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
#define TEXTURE_RETURN_UINT_X return uint1(texel.u.x);
#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
#define TEXTURE_RETURN_FLOAT_X return float1(texel.f.x);
#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
#define TEXTURE_RETURN_CHAR_XY return char2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_UCHAR_XY return uchar2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_SHORT_XY return short2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_USHORT_XY return ushort2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_INT_XY return int2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_UINT_XY return uint2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_FLOAT_XY return float2(texel.f.x, texel.f.y);
#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
#define TEXTURE_RETURN_CHAR_XYZW return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_UCHAR_XYZW return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_SHORT_XYZW return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_USHORT_XYZW return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_INT_XYZW return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_UINT_XYZW return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_FLOAT_XYZW return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
extern "C" {
hc::short_vector::float4::vector_value_type __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i,
+1 -1
Vedi File
@@ -5,7 +5,7 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll @hip_SOURCE_DIR@/src/hip_hc_gfx803.ll DESTINATION lib)
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip)
install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip)
-399
Vedi File
@@ -1,399 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hcc_detail/hip_fp16.h"
struct hipHalfHolder {
union {
__half h;
unsigned short s;
};
};
__device__ __half __hadd(__half a, __half b) { return a + b; }
__device__ __half __hadd_sat(__half a, __half b) { return a + b; }
__device__ __half __hfma(__half a, __half b, __half c) { return a * b + c; }
__device__ __half __hfma_sat(__half a, __half b, __half c) { return a * b + c; }
__device__ __half __hmul(__half a, __half b) { return a * b; }
__device__ __half __hmul_sat(__half a, __half b) { return a * b; }
__device__ __half __hneg(__half a) { return -a; }
__device__ __half __hsub(__half a, __half b) { return a - b; }
__device__ __half __hsub_sat(__half a, __half b) { return a - b; }
__device__ __half hdiv(__half a, __half b) { return a / b; }
/*
Half comparision Functions
*/
__device__ bool __heq(__half a, __half b) { return a == b ? true : false; }
__device__ bool __hge(__half a, __half b) { return a >= b ? true : false; }
__device__ bool __hgt(__half a, __half b) { return a > b ? true : false; }
__device__ bool __hisinf(__half a) {
hipHalfHolder hH;
hH.h = a;
// mask with 0x7fff to drop the sign bit
// 0x7c00 is bit pattern for inf (exp = 11111, significand = 0)
return ((hH.s & 0x7fff) == 0x7c00) ? true : false;
}
__device__ bool __hisnan(__half a) {
hipHalfHolder hH;
hH.h = a;
// mask with 0x7fff to drop the sign bit
// 0x7cXX is bit pattern for inf (exp = 11111, significand = 0)
return ((hH.s & 0x7fff) > 0x7c00) ? true : false;
}
__device__ bool __hle(__half a, __half b) { return a <= b ? true : false; }
__device__ bool __hlt(__half a, __half b) { return a < b ? true : false; }
__device__ bool __hne(__half a, __half b) { return a != b ? true : false; }
/*
Half2 Comparision Functions
*/
__device__ bool __hbeq2(__half2 a, __half2 b) {
return (a.x == b.x ? true : false) && (a.y == b.y ? true : false);
}
__device__ bool __hbge2(__half2 a, __half2 b) {
return (a.x >= b.x ? true : false) && (a.y >= b.y ? true : false);
}
__device__ bool __hbgt2(__half2 a, __half2 b) {
return (a.x > b.x ? true : false) && (a.y > b.y ? true : false);
}
__device__ bool __hble2(__half2 a, __half2 b) {
return (a.x <= b.x ? true : false) && (a.y <= b.y ? true : false);
}
__device__ bool __hblt2(__half2 a, __half2 b) {
return (a.x < b.x ? true : false) && (a.y < b.y ? true : false);
}
__device__ bool __hbne2(__half2 a, __half2 b) {
return (a.x != b.x ? true : false) && (a.y != b.y ? true : false);
}
__device__ __half2 __heq2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x == b.x) ? (__half)1 : (__half)0;
c.y = (a.y == b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hge2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x >= b.x) ? (__half)1 : (__half)0;
c.y = (a.y >= b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hgt2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x > b.x) ? (__half)1 : (__half)0;
c.y = (a.y > b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hisnan2(__half2 a) {
__half2 c;
c.x = (__hisnan(a.x)) ? (__half)1 : (__half)0;
c.y = (__hisnan(a.y)) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hle2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x <= b.x) ? (__half)1 : (__half)0;
c.y = (a.y <= b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hlt2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x < b.x) ? (__half)1 : (__half)0;
c.y = (a.y < b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hne2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x != b.x) ? (__half)1 : (__half)0;
c.y = (a.y != b.y) ? (__half)1 : (__half)0;
return c;
}
/*
Conversion instructions
*/
__device__ __half2 __float22half2_rn(const float2 a) {
__half2 b;
b.x = (__half)a.x;
b.y = (__half)a.y;
return b;
}
__device__ __half __float2half(const float a) { return (__half)a; }
__device__ __half2 __float2half2_rn(const float a) {
__half2 b;
b.x = (__half)a;
b.y = (__half)a;
return b;
}
__device__ __half __float2half_rd(const float a) { return (__half)a; }
__device__ __half __float2half_rn(const float a) { return (__half)a; }
__device__ __half __float2half_ru(const float a) { return (__half)a; }
__device__ __half __float2half_rz(const float a) { return (__half)a; }
__device__ __half2 __floats2half2_rn(const float a, const float b) {
__half2 c;
c.x = (__half)a;
c.y = (__half)b;
return c;
}
__device__ float2 __half22float2(const __half2 a) {
float2 b;
b.x = (float)a.x;
b.y = (float)a.y;
return b;
}
__device__ float __half2float(const __half a) { return (float)a; }
__device__ __half2 half2half2(const __half a) {
__half2 b;
b.x = a;
b.y = a;
return b;
}
__device__ int __half2int_rd(__half h) { return (int)h; }
__device__ int __half2int_rn(__half h) { return (int)h; }
__device__ int __half2int_ru(__half h) { return (int)h; }
__device__ int __half2int_rz(__half h) { return (int)h; }
__device__ long long int __half2ll_rd(__half h) { return (long long int)h; }
__device__ long long int __half2ll_rn(__half h) { return (long long int)h; }
__device__ long long int __half2ll_ru(__half h) { return (long long int)h; }
__device__ long long int __half2ll_rz(__half h) { return (long long int)h; }
__device__ short __half2short_rd(__half h) { return (short)h; }
__device__ short __half2short_rn(__half h) { return (short)h; }
__device__ short __half2short_ru(__half h) { return (short)h; }
__device__ short __half2short_rz(__half h) { return (short)h; }
__device__ unsigned int __half2uint_rd(__half h) { return (unsigned int)h; }
__device__ unsigned int __half2uint_rn(__half h) { return (unsigned int)h; }
__device__ unsigned int __half2uint_ru(__half h) { return (unsigned int)h; }
__device__ unsigned int __half2uint_rz(__half h) { return (unsigned int)h; }
__device__ unsigned long long int __half2ull_rd(__half h) { return (unsigned long long)h; }
__device__ unsigned long long int __half2ull_rn(__half h) { return (unsigned long long)h; }
__device__ unsigned long long int __half2ull_ru(__half h) { return (unsigned long long)h; }
__device__ unsigned long long int __half2ull_rz(__half h) { return (unsigned long long)h; }
__device__ unsigned short int __half2ushort_rd(__half h) { return (unsigned short int)h; }
__device__ unsigned short int __half2ushort_rn(__half h) { return (unsigned short int)h; }
__device__ unsigned short int __half2ushort_ru(__half h) { return (unsigned short int)h; }
__device__ unsigned short int __half2ushort_rz(__half h) { return (unsigned short int)h; }
__device__ short int __half_as_short(const __half h) {
hipHalfHolder hH;
hH.h = h;
return (short)hH.s;
}
__device__ unsigned short int __half_as_ushort(const __half h) {
hipHalfHolder hH;
hH.h = h;
return hH.s;
}
__device__ __half2 __halves2half2(const __half a, const __half b) {
__half2 c;
c.x = a;
c.y = b;
return c;
}
__device__ float __high2float(const __half2 a) { return (float)a.y; }
__device__ __half __high2half(const __half2 a) { return a.y; }
__device__ __half2 __high2half2(const __half2 a) {
__half2 b;
b.x = a.y;
b.y = a.y;
return b;
}
__device__ __half2 __highs2half2(const __half2 a, const __half2 b) {
__half2 c;
c.x = a.y;
c.y = b.y;
return c;
}
__device__ __half __int2half_rd(int i) { return (__half)i; }
__device__ __half __int2half_rn(int i) { return (__half)i; }
__device__ __half __int2half_ru(int i) { return (__half)i; }
__device__ __half __int2half_rz(int i) { return (__half)i; }
__device__ __half __ll2half_rd(long long int i) { return (__half)i; }
__device__ __half __ll2half_rn(long long int i) { return (__half)i; }
__device__ __half __ll2half_ru(long long int i) { return (__half)i; }
__device__ __half __ll2half_rz(long long int i) { return (__half)i; }
__device__ float __low2float(const __half2 a) { return (float)a.x; }
__device__ __half __low2half(const __half2 a) { return a.x; }
__device__ __half2 __low2half2(const __half2 a, const __half2 b) {
__half2 c;
c.x = a.x;
c.y = b.x;
return c;
}
__device__ __half2 __low2half2(const __half2 a) {
__half2 b;
b.x = a.x;
b.y = a.x;
return b;
}
__device__ __half2 __lowhigh2highlow(const __half2 a) {
__half2 b;
b.x = a.y;
b.y = a.x;
return b;
}
__device__ __half2 __lows2half2(const __half2 a, const __half2 b) {
__half2 c;
c.x = a.x;
c.y = b.x;
return c;
}
__device__ __half __short2half_rd(short int i) { return (__half)i; }
__device__ __half __short2half_rn(short int i) { return (__half)i; }
__device__ __half __short2half_ru(short int i) { return (__half)i; }
__device__ __half __short2half_rz(short int i) { return (__half)i; }
__device__ __half __uint2half_rd(unsigned int i) { return (__half)i; }
__device__ __half __uint2half_rn(unsigned int i) { return (__half)i; }
__device__ __half __uint2half_ru(unsigned int i) { return (__half)i; }
__device__ __half __uint2half_rz(unsigned int i) { return (__half)i; }
__device__ __half __ull2half_rd(unsigned long long int i) { return (__half)i; }
__device__ __half __ull2half_rn(unsigned long long int i) { return (__half)i; }
__device__ __half __ull2half_ru(unsigned long long int i) { return (__half)i; }
__device__ __half __ull2half_rz(unsigned long long int i) { return (__half)i; }
__device__ __half __ushort2half_rd(unsigned short int i) { return (__half)i; }
__device__ __half __ushort2half_rn(unsigned short int i) { return (__half)i; }
__device__ __half __ushort2half_ru(unsigned short int i) { return (__half)i; }
__device__ __half __ushort2half_rz(unsigned short int i) { return (__half)i; }
__device__ __half __ushort_as_half(const unsigned short int i) {
hipHalfHolder hH;
hH.s = i;
return hH.h;
}
/*
Soft Implementation. Use it for backup.
*/
static const unsigned sign_val = 0x8000;
static const __half __half_value_one_float = {0x3C00};
static const __half __half_value_zero_float = {0x0};
static const unsigned __half_pos_inf = 0x7C00;
static const unsigned __half_neg_inf = 0xFC00;
typedef struct {
union {
float f;
unsigned u;
};
} struct_float;
-123
Vedi File
@@ -1,123 +0,0 @@
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--amdhsa"
define <2 x half> @__hip_hc_ir_hadd2_int(<2 x half> %a, <2 x half> %b) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = bitcast <2 x half> %b to i32
%3 = tail call i32 asm sideeffect "v_add_f16 $0, $1, $2","=v,v,v"(i32 %1, i32 %2)
tail call void asm sideeffect "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %3, i32 %1, i32 %2)
%4 = bitcast i32 %3 to <2 x half>
ret <2 x half> %4
}
define <2 x half> @__hip_hc_ir_hfma2_int(<2 x half> %a, <2 x half> %b, <2 x half> %c) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = bitcast <2 x half> %b to i32
%3 = bitcast <2 x half> %c to i32
%4 = tail call i32 asm sideeffect "v_mad_f16 $0, $1, $2, $3","=v,v,v,v"(i32 %1, i32 %2, i32 %3)
tail call void asm sideeffect "v_mul_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %4, i32 %1, i32 %2)
tail call void asm sideeffect "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %4, i32 %4, i32 %3)
%5 = bitcast i32 %4 to <2 x half>
ret <2 x half> %5
}
define <2 x half> @__hip_hc_ir_hmul2_int(<2 x half> %a, <2 x half> %b) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = bitcast <2 x half> %b to i32
%3 = tail call i32 asm sideeffect "v_mul_f16 $0, $1, $2","=v,v,v"(i32 %1, i32 %2)
tail call void asm sideeffect "v_mul_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %3, i32 %1, i32 %2)
%4 = bitcast i32 %3 to <2 x half>
ret <2 x half> %4
}
define <2 x half> @__hip_hc_ir_hsub2_int(<2 x half> %a, <2 x half> %b) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = bitcast <2 x half> %b to i32
%3 = tail call i32 asm sideeffect "v_sub_f16 $0, $1, $2","=v,v,v"(i32 %1, i32 %2)
tail call void asm sideeffect "v_sub_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %3, i32 %1, i32 %2)
%4 = bitcast i32 %3 to <2 x half>
ret <2 x half> %4
}
define <2 x half> @__hip_hc_ir_h2ceil_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_ceil_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_ceil_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2cos_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_cos_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_cos_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2exp2_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_exp_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_exp_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2floor_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_floor_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_floor_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2log2_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_log_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_log_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2rcp_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_rcp_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_rcp_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2rsqrt_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_rsq_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_rsq_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2sin_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_sin_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_sin_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2sqrt_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_sqrt_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_sqrt_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
define <2 x half> @__hip_hc_ir_h2trunc_int(<2 x half> %a) #1 {
%1 = bitcast <2 x half> %a to i32
%2 = tail call i32 asm sideeffect "v_trunc_f16 $0, $1","=v,v"(i32 %1)
tail call void asm sideeffect "v_trunc_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
%3 = bitcast i32 %2 to <2 x half>
ret <2 x half> %3
}
attributes #1 = { alwaysinline nounwind }
+41 -25
Vedi File
@@ -1586,23 +1586,22 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea
return e;
};
int isLockedPointer(const void *ptr)
hipError_t getLockedPointer(const void *hostPtr, size_t dataLen, void **devicePtrPtr)
{
hsa_amd_pointer_info_t info;
int isLocked = 0;
hc::accelerator acc;
info.size = sizeof(info);
hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast<void*>(ptr), &info, nullptr, nullptr, nullptr);
if(hsa_status != HSA_STATUS_SUCCESS) {
return -1;
}
if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) {
isLocked = 1;
}
return isLocked;
}
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
if (status == AM_SUCCESS) {
*devicePtrPtr = (char*)amPointerInfo._devicePointer;
return(hipSuccess);
};
return(hipErrorHostMemoryNotRegistered);
};
// TODO - review and optimize
hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
@@ -1611,12 +1610,20 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
int isLocked = 0;
if(kind == hipMemcpyHostToDevice) {
isLocked = isLockedPointer(src);
void *pinnedPtr=NULL;
void *actualSrc = (void*)src;
void *actualDest = dst;
if(kind == hipMemcpyHostToDevice ) {
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
isLocked = 1;
actualSrc = pinnedPtr;
}
} else if(kind == hipMemcpyDeviceToHost) {
isLocked = isLockedPointer(dst);
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
isLocked = 1;
actualDest = pinnedPtr;
}
}
hc::completion_future marker;
hipError_t e = hipSuccess;
@@ -1624,12 +1631,12 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false);
} else {
try {
if(isLocked) {
if(!isLocked) {
for (int i = 0; i < height; ++i)
stream->locked_copySync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind);
} else {
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), width, height, dpitch, spitch);
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
stream->locked_wait();
}
} catch (ihipException& ex) {
@@ -1654,10 +1661,19 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
hipError_t e = hipSuccess;
int isLocked = 0;
if(kind == hipMemcpyHostToDevice) {
isLocked = isLockedPointer(src);
void *pinnedPtr=NULL;
void *actualSrc = (void*)src;
void *actualDest = dst;
if(kind == hipMemcpyHostToDevice ) {
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
isLocked = 1;
actualSrc = pinnedPtr;
}
} else if(kind == hipMemcpyDeviceToHost) {
isLocked = isLockedPointer(dst);
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
isLocked = 1;
actualDest = pinnedPtr;
}
}
if((width == dpitch) && (width == spitch)) {
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
@@ -1668,7 +1684,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
} else{
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), width, height, dpitch, spitch);
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
}
} catch (ihipException& ex) {
e = ex._code;
@@ -23,56 +23,67 @@ THE SOFTWARE.
* HIT_END
*/
#include <iostream>
#include <hip/hip_fp16.h>
#include "hip/hip_runtime.h"
#include "test_common.h"
#define LEN 64
#define HALF_SIZE 64 * sizeof(__half)
#define HALF2_SIZE 64 * sizeof(__half2)
#include "test_common.h"
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
__global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
int tx = threadIdx.x;
__half a = A[tx];
__half b = B[tx];
__half c = C[tx];
c = __hadd(a, c);
c = __hadd_sat(b, c);
c = __hfma(a, c, b);
c = __hfma_sat(b, c, a);
c = __hsub(a, c);
c = __hsub_sat(b, c);
c = __hmul(a, c);
c = __hmul_sat(b, c);
c = hdiv(a, c);
__global__
void __halfMath(bool* result, __half a) {
result[0] = __heq(__hadd(a, __half{1}), __half{2});
result[0] = __heq(__hadd_sat(a, __half{1}), __half{1}) && result[0];
result[0] = __heq(__hfma(a, __half{2}, __half{3}), __half{5}) && result[0];
result[0] =
__heq(__hfma_sat(a, __half{2}, __half{3}), __half{1}) && result[0];
result[0] = __heq(__hsub(a, __half{1}), __half{0}) && result[0];
result[0] = __heq(__hsub_sat(a, __half{2}), __half{0}) && result[0];
result[0] = __heq(__hmul(a, __half{2}), __half{2}) && result[0];
result[0] = __heq(__hmul_sat(a, __half{2}), __half{1}) && result[0];
result[0] = __heq(__hdiv(a, __half{2}), __half{0.5}) && result[0];
}
__global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C) {
int tx = threadIdx.x;
__half2 a = A[tx];
__half2 b = B[tx];
__half2 c = C[tx];
c = __hadd2(a, c);
c = __hadd2_sat(b, c);
c = __hfma2(a, c, b);
c = __hfma2_sat(b, c, a);
c = __hsub2(a, c);
c = __hsub2_sat(b, c);
c = __hmul2(a, c);
c = __hmul2_sat(b, c);
__device__
bool to_bool(const __half2& x)
{
auto r = static_cast<const __half2_raw&>(x);
return r.data.x != 0 && r.data.y != 0;
}
__global__ void kernel_hisnan(hipLaunchParm lp, __half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisnan(input[tx]);
__global__
void __half2Math(bool* result, __half2 a) {
result[0] =
to_bool(__heq2(__hadd2(a, __half2{1, 1}), __half2{2, 2}));
result[0] = to_bool(__heq2(__hadd2_sat(a, __half2{1, 1}), __half2{1, 1})) &&
result[0];
result[0] = to_bool(__heq2(
__hfma2(a, __half2{2, 2}, __half2{3, 3}), __half2{5, 5})) && result[0];
result[0] = to_bool(__heq2(
__hfma2_sat(a, __half2{2, 2}, __half2{3, 3}), __half2{1, 1})) && result[0];
result[0] = to_bool(__heq2(__hsub2(a, __half2{1, 1}), __half2{0, 0})) &&
result[0];
result[0] = to_bool(__heq2(__hsub2_sat(a, __half2{2, 2}), __half2{0, 0})) &&
result[0];
result[0] = to_bool(__heq2(__hmul2(a, __half2{2, 2}), __half2{2, 2})) &&
result[0];
result[0] = to_bool(__heq2(__hmul2_sat(a, __half2{2, 2}), __half2{1, 1})) &&
result[0];
result[0] = to_bool(__heq2(__h2div(a, __half2{2, 2}), __half2{0.5, 0.5})) &&
result[0];
}
__global__ void kernel_hisinf(hipLaunchParm lp, __half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisinf(input[tx]);
__global__
void kernel_hisnan(__half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisnan(input[tx]);
}
__global__
void kernel_hisinf(__half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisinf(input[tx]);
}
#endif
@@ -93,7 +104,8 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
hipMalloc((void**)&outputGPU, memsize);
// launch the kernel
hipLaunchKernel(kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
hipLaunchKernelGGL(
kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
// copy output from device
int* outputCPU = (int*) malloc(memsize);
@@ -103,12 +115,18 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
for (int i=0; i<NUM_INPUTS; i++) {
if ((2 <= i) && (i <= 5)) { // inputs are nan, output should be true
if (outputCPU[i] == 0) {
failed("__hisnan() returned false for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisnan() returned false for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
else { // inputs are NOT nan, output should be false
if (outputCPU[i] != 0) {
failed("__hisnan() returned true for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisnan() returned true for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
}
@@ -129,7 +147,8 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
hipMalloc((void**)&outputGPU, memsize);
// launch the kernel
hipLaunchKernel(kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
hipLaunchKernelGGL(
kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
// copy output from device
int* outputCPU = (int*) malloc(memsize);
@@ -139,12 +158,18 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
for (int i=0; i<NUM_INPUTS; i++) {
if ((0 <= i) && (i <= 1)) { // inputs are inf, output should be true
if (outputCPU[i] == 0) {
failed("__hisinf() returned false for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisinf() returned false for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
else { // inputs are NOT inf, output should be false
if (outputCPU[i] != 0) {
failed("__hisinf() returned true for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisinf() returned true for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
}
@@ -160,11 +185,11 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
void checkFunctional() {
// allocate memory
// allocate memory
const int NUM_INPUTS = 16;
auto memsize = NUM_INPUTS * sizeof(__half);
__half* inputCPU = (__half*) malloc(memsize);
// populate inputs
inputCPU[0] = host_ushort_as_half(0x7c00); // inf
inputCPU[1] = host_ushort_as_half(0xfc00); // -inf
@@ -203,25 +228,27 @@ void checkFunctional() {
}
int main() {
__half *A, *B, *C;
hipMalloc(&A, HALF_SIZE);
hipMalloc(&B, HALF_SIZE);
hipMalloc(&C, HALF_SIZE);
hipLaunchKernel(__halfMath, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A, B, C);
hipFree(A);
hipFree(B);
hipFree(C);
__half2 *A2, *B2, *C2;
hipMalloc(&A2, HALF2_SIZE);
hipMalloc(&B2, HALF2_SIZE);
hipMalloc(&C2, HALF2_SIZE);
hipLaunchKernel(__half2Math, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A2, B2, C2);
hipFree(A2);
hipFree(B2);
hipFree(C2);
bool* result{nullptr};
hipHostMalloc(&result, sizeof(result));
// run some functional checks
checkFunctional();
passed();
result[0] = false;
hipLaunchKernelGGL(
__halfMath, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half{1});
hipDeviceSynchronize();
if (!result[0]) { failed("Failed __half tests."); }
result[0] = false;
hipLaunchKernelGGL(
__half2Math, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half2{1, 1});
hipDeviceSynchronize();
if (!result[0]) { failed("Failed __half2 tests."); }
hipHostFree(result);
// run some functional checks
checkFunctional();
passed();
}
@@ -0,0 +1,184 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
* RUN: %t
* HIT_END
*/
#include <hip/hip_fp16.h>
#include "hip/hip_runtime.h"
#include "test_common.h"
#include <type_traits>
using namespace std;
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
__global__
void __halfTest(bool* result, __half a) {
// Construction
static_assert(is_default_constructible<__half>{}, "");
static_assert(is_copy_constructible<__half>{}, "");
static_assert(is_move_constructible<__half>{}, "");
static_assert(is_constructible<__half, float>{}, "");
static_assert(is_constructible<__half, double>{}, "");
static_assert(is_constructible<__half, unsigned short>{}, "");
static_assert(is_constructible<__half, short>{}, "");
static_assert(is_constructible<__half, unsigned int>{}, "");
static_assert(is_constructible<__half, int>{}, "");
static_assert(is_constructible<__half, unsigned long>{}, "");
static_assert(is_constructible<__half, long>{}, "");
static_assert(is_constructible<__half, long long>{}, "");
static_assert(is_constructible<__half, unsigned long long>{}, "");
static_assert(is_constructible<__half, __half_raw>{}, "");
// Assignment
static_assert(is_copy_assignable<__half>{}, "");
static_assert(is_move_assignable<__half>{}, "");
static_assert(is_assignable<__half, float>{}, "");
static_assert(is_assignable<__half, double>{}, "");
static_assert(is_assignable<__half, unsigned short>{}, "");
static_assert(is_assignable<__half, short>{}, "");
static_assert(is_assignable<__half, unsigned int>{}, "");
static_assert(is_assignable<__half, int>{}, "");
static_assert(is_assignable<__half, unsigned long>{}, "");
static_assert(is_assignable<__half, long>{}, "");
static_assert(is_assignable<__half, long long>{}, "");
static_assert(is_assignable<__half, unsigned long long>{}, "");
static_assert(is_assignable<__half, __half_raw>{}, "");
static_assert(is_assignable<__half, volatile __half_raw&>{}, "");
static_assert(is_assignable<__half, volatile __half_raw&&>{}, "");
// Conversion
static_assert(is_convertible<__half, float>{}, "");
static_assert(is_convertible<__half, unsigned short>{}, "");
static_assert(is_convertible<__half, short>{}, "");
static_assert(is_convertible<__half, unsigned int>{}, "");
static_assert(is_convertible<__half, int>{}, "");
static_assert(is_convertible<__half, unsigned long>{}, "");
static_assert(is_convertible<__half, long>{}, "");
static_assert(is_convertible<__half, long long>{}, "");
static_assert(is_convertible<__half, bool>{}, "");
static_assert(is_convertible<__half, unsigned long long>{}, "");
static_assert(is_convertible<__half, __half_raw>{}, "");
static_assert(is_convertible<__half, volatile __half_raw>{}, "");
// Nullary
result[0] = __heq(a, +a) && result[0];
result[0] = __heq(__hneg(a), -a) && result[0];
// Unary arithmetic
result[0] = __heq(a += 0, a) && result[0];
result[0] = __heq(a -= 0, a) && result[0];
result[0] = __heq(a *= 1, a) && result[0];
result[0] = __heq(a /= 1, a) && result[0];
// Binary arithmetic
result[0] = __heq((a + a), __hadd(a, a)) && result[0];
result[0] = __heq((a - a), __hsub(a, a)) && result[0];
result[0] = __heq((a * a), __hmul(a, a)) && result[0];
result[0] = __heq((a / a), __hdiv(a, a)) && result[0];
// Relations
result[0] = (a == a) && result[0];
result[0] = !(a != a) && result[0];
result[0] = (a <= a) && result[0];
result[0] = (a >= a) && result[0];
result[0] = !(a < a) && result[0];
result[0] = !(a > a) && result[0];
}
__device__
bool to_bool(const __half2& x)
{
auto r = static_cast<const __half2_raw&>(x);
return r.data.x != 0 && r.data.y != 0;
}
__global__
void __half2Test(bool* result, __half2 a) {
// Construction
static_assert(is_default_constructible<__half2>{}, "");
static_assert(is_copy_constructible<__half2>{}, "");
static_assert(is_move_constructible<__half2>{}, "");
static_assert(is_constructible<__half2, __half, __half>{}, "");
static_assert(is_constructible<__half2, __half2_raw>{}, "");
// Assignment
static_assert(is_copy_assignable<__half2>{}, "");
static_assert(is_move_assignable<__half2>{}, "");
static_assert(is_assignable<__half2, __half2_raw>{}, "");
// Conversion
static_assert(is_convertible<__half2, __half2_raw>{}, "");
// Nullary
result[0] = to_bool(__heq2(a, +a)) && result[0];
result[0] = to_bool(__heq2(__hneg2(a), -a)) && result[0];
// Unary arithmetic
result[0] = to_bool(__heq2(a += 0, a)) && result[0];
result[0] = to_bool(__heq2(a -= 0, a)) && result[0];
result[0] = to_bool(__heq2(a *= 1, a)) && result[0];
result[0] = to_bool(__heq2(a /= 1, a)) && result[0];
// Binary arithmetic
result[0] = to_bool(__heq2((a + a), __hadd2(a, a))) && result[0];
result[0] = to_bool(__heq2((a - a), __hsub2(a, a))) && result[0];
result[0] = to_bool(__heq2((a * a), __hmul2(a, a))) && result[0];
result[0] = to_bool(__heq2((a / a), __h2div(a, a))) && result[0];
// Relations
result[0] = (a == a) && result[0];
result[0] = !(a != a) && result[0];
result[0] = (a <= a) && result[0];
result[0] = (a >= a) && result[0];
result[0] = !(a < a) && result[0];
result[0] = !(a > a) && result[0];
}
#endif
int main() {
bool* result{nullptr};
hipHostMalloc(&result, 1);
result[0] = true;
hipLaunchKernelGGL(
__halfTest, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half{1});
hipDeviceSynchronize();
if (!result[0]) { failed("Failed __half tests."); }
result[0] = true;
hipLaunchKernelGGL(
__half2Test, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half2{1, 1});
hipDeviceSynchronize();
if (!result[0]) { failed("Failed __half2 tests."); }
hipHostFree(result);
passed();
}
File diff soppresso perché troppo grande Carica Diff
File diff soppresso perché troppo grande Carica Diff
@@ -0,0 +1,105 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <type_traits>
template<bool b, typename T = void>
using Enable_if_t = typename std::enable_if<b, T>::type;
__host__ __device__
std::false_type is_vec4(...);
__host__ __device__
std::false_type is_vec3(...);
__host__ __device__
std::false_type is_vec2(...);
__host__ __device__
std::false_type is_vec1(...);
template<typename T>
__host__ __device__
auto is_vec4(const T&) -> decltype(std::declval<T>().xyzw, std::true_type{});
template<
typename T, Enable_if_t<decltype(!is_vec4(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec3(const T&) -> decltype(std::declval<T>().xyz, std::true_type{});
template<
typename T,
Enable_if_t<
!decltype(is_vec4(std::declval<T>())){} &&
!decltype(is_vec3(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec2(const T&) -> decltype(std::declval<T>().xy, std::true_type{});
template<
typename T,
Enable_if_t<
!decltype(is_vec4(std::declval<T>())){} &&
!decltype(is_vec3(std::declval<T>())){} &&
!decltype(is_vec2(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec1(const T&) -> decltype(std::declval<T>().x, std::true_type{});
template<typename T, int dimension>
__host__ __device__
constexpr
bool is_vec() {
return (dimension == 1) ? decltype(is_vec1(std::declval<T>())){} :
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
decltype(is_vec4(std::declval<T>())){}));
}
template<typename T, typename U, Enable_if_t<is_vec<T, 1>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0;
}
template<typename T, typename U, Enable_if_t<is_vec<T, 2>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0 && r.y != 0;
}
template<typename T, typename U, Enable_if_t<is_vec<T, 3>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0 && r.y != 0 && r.z != 0;
}
template<typename T, typename U, Enable_if_t<is_vec<T, 4>()>* = nullptr>
__host__ __device__
inline
bool cmp(const T& x, U expected) {
const auto r = x == T(expected);
return r.x != 0 && r.y != 0 && r.z != 0 && r.w != 0;
}